Index: projects/clang391-import/bin/kenv/kenv.c =================================================================== --- projects/clang391-import/bin/kenv/kenv.c (revision 309262) +++ projects/clang391-import/bin/kenv/kenv.c (revision 309263) @@ -1,206 +1,206 @@ /*- * Copyright (c) 2000 Peter Wemm * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include static void usage(void); static int kdumpenv(void); static int kgetenv(const char *); static int ksetenv(const char *, char *); static int kunsetenv(const char *); static int hflag = 0; static int Nflag = 0; static int qflag = 0; static int uflag = 0; static int vflag = 0; static void usage(void) { (void)fprintf(stderr, "%s\n%s\n%s\n", "usage: kenv [-hNq]", " kenv [-qv] variable[=value]", " kenv [-q] -u variable"); exit(1); } int main(int argc, char **argv) { char *env, *eq, *val; int ch, error; - error = 0; val = NULL; env = NULL; while ((ch = getopt(argc, argv, "hNquv")) != -1) { switch (ch) { case 'h': hflag++; break; case 'N': Nflag++; break; case 'q': qflag++; break; case 'u': uflag++; break; case 'v': vflag++; break; default: usage(); } } argc -= optind; argv += optind; if (argc > 0) { env = argv[0]; eq = strchr(env, '='); if (eq != NULL) { *eq++ = '\0'; val = eq; } argv++; argc--; } if ((hflag || Nflag) && env != NULL) usage(); if (argc > 0 || ((uflag || vflag) && env == NULL)) usage(); if (env == NULL) { error = kdumpenv(); if (error && !qflag) warn("kdumpenv"); } else if (val == NULL) { if (uflag) { error = kunsetenv(env); if (error && !qflag) warnx("unable to unset %s", env); } else { error = kgetenv(env); if (error && !qflag) warnx("unable to get %s", env); } } else { error = ksetenv(env, val); if (error && !qflag) warnx("unable to set %s to %s", env, val); } return (error); } static int kdumpenv(void) { - char *buf, *cp; + char *buf, *bp, *cp; int buflen, envlen; envlen = kenv(KENV_DUMP, NULL, NULL, 0); if (envlen < 0) return (-1); for (;;) { buflen = envlen * 120 / 100; - buf = malloc(buflen + 1); + buf = calloc(1, buflen + 1); if (buf == NULL) return (-1); - memset(buf, 0, buflen + 1); /* Be defensive */ envlen = kenv(KENV_DUMP, NULL, buf, buflen); if (envlen < 0) { free(buf); return (-1); } if (envlen > buflen) free(buf); else break; } - for (; *buf != '\0'; buf += strlen(buf) + 1) { + for (bp = buf; *bp != '\0'; bp += strlen(bp) + 1) { if (hflag) { - if (strncmp(buf, "hint.", 5) != 0) + if (strncmp(bp, "hint.", 5) != 0) continue; } - cp = strchr(buf, '='); + cp = strchr(bp, '='); if (cp == NULL) continue; *cp++ = '\0'; if (Nflag) - printf("%s\n", buf); + printf("%s\n", bp); else - printf("%s=\"%s\"\n", buf, cp); - buf = cp; + printf("%s=\"%s\"\n", bp, cp); + bp = cp; } + + free(buf); return (0); } static int kgetenv(const char *env) { char buf[1024]; int ret; ret = kenv(KENV_GET, env, buf, sizeof(buf)); if (ret == -1) return (ret); if (vflag) printf("%s=\"%s\"\n", env, buf); else printf("%s\n", buf); return (0); } static int ksetenv(const char *env, char *val) { int ret; - ret = kenv(KENV_SET, env, val, strlen(val)+1); + ret = kenv(KENV_SET, env, val, strlen(val) + 1); if (ret == 0) printf("%s=\"%s\"\n", env, val); return (ret); } static int kunsetenv(const char *env) { int ret; - + ret = kenv(KENV_UNSET, env, NULL, 0); return (ret); } Index: projects/clang391-import/release/packages/clang.ucl =================================================================== --- projects/clang391-import/release/packages/clang.ucl (revision 309262) +++ projects/clang391-import/release/packages/clang.ucl (revision 309263) @@ -1,24 +1,24 @@ # # $FreeBSD$ # name = "FreeBSD-%PKGNAME%" origin = "base" version = "%VERSION%" comment = "%COMMENT%" categories = [ base ] maintainer = "re@FreeBSD.org" www = "https://www.FreeBSD.org" prefix = "/" licenselogic = "single" -licenses = [ BSD2CLAUSE ] +licenses = [ NCSA ] desc = <> CTR_DLINE_SHIFT) #define CTR_ILINE_SHIFT 0 #define CTR_ILINE_MASK (0xf << CTR_ILINE_SHIFT) #define CTR_ILINE_SIZE(reg) (((reg) & CTR_ILINE_MASK) >> CTR_ILINE_SHIFT) /* DCZID_EL0 - Data Cache Zero ID register */ #define DCZID_DZP (1 << 4) /* DC ZVA prohibited if non-0 */ #define DCZID_BS_SHIFT 0 #define DCZID_BS_MASK (0xf << DCZID_BS_SHIFT) #define DCZID_BS_SIZE(reg) (((reg) & DCZID_BS_MASK) >> DCZID_BS_SHIFT) /* ESR_ELx */ #define ESR_ELx_ISS_MASK 0x00ffffff #define ISS_INSN_FnV (0x01 << 10) #define ISS_INSN_EA (0x01 << 9) #define ISS_INSN_S1PTW (0x01 << 7) #define ISS_INSN_IFSC_MASK (0x1f << 0) #define ISS_DATA_ISV (0x01 << 24) #define ISS_DATA_SAS_MASK (0x03 << 22) #define ISS_DATA_SSE (0x01 << 21) #define ISS_DATA_SRT_MASK (0x1f << 16) #define ISS_DATA_SF (0x01 << 15) #define ISS_DATA_AR (0x01 << 14) #define ISS_DATA_FnV (0x01 << 10) #define ISS_DATa_EA (0x01 << 9) #define ISS_DATa_CM (0x01 << 8) #define ISS_INSN_S1PTW (0x01 << 7) #define ISS_DATa_WnR (0x01 << 6) -#define ISS_DATA_DFSC_MASK (0x1f << 0) +#define ISS_DATA_DFSC_MASK (0x3f << 0) #define ISS_DATA_DFSC_ASF_L0 (0x00 << 0) #define ISS_DATA_DFSC_ASF_L1 (0x01 << 0) #define ISS_DATA_DFSC_ASF_L2 (0x02 << 0) #define ISS_DATA_DFSC_ASF_L3 (0x03 << 0) #define ISS_DATA_DFSC_TF_L0 (0x04 << 0) #define ISS_DATA_DFSC_TF_L1 (0x05 << 0) #define ISS_DATA_DFSC_TF_L2 (0x06 << 0) #define ISS_DATA_DFSC_TF_L3 (0x07 << 0) #define ISS_DATA_DFSC_AFF_L1 (0x09 << 0) #define ISS_DATA_DFSC_AFF_L2 (0x0a << 0) #define ISS_DATA_DFSC_AFF_L3 (0x0b << 0) #define ISS_DATA_DFSC_PF_L1 (0x0d << 0) #define ISS_DATA_DFSC_PF_L2 (0x0e << 0) #define ISS_DATA_DFSC_PF_L3 (0x0f << 0) #define ISS_DATA_DFSC_EXT (0x10 << 0) #define ISS_DATA_DFSC_EXT_L0 (0x14 << 0) #define ISS_DATA_DFSC_EXT_L1 (0x15 << 0) #define ISS_DATA_DFSC_EXT_L2 (0x16 << 0) #define ISS_DATA_DFSC_EXT_L3 (0x17 << 0) #define ISS_DATA_DFSC_ECC (0x18 << 0) #define ISS_DATA_DFSC_ECC_L0 (0x1c << 0) #define ISS_DATA_DFSC_ECC_L1 (0x1d << 0) #define ISS_DATA_DFSC_ECC_L2 (0x1e << 0) #define ISS_DATA_DFSC_ECC_L3 (0x1f << 0) #define ISS_DATA_DFSC_ALIGN (0x21 << 0) #define ISS_DATA_DFSC_TLB_CONFLICT (0x30 << 0) #define ESR_ELx_IL (0x01 << 25) #define ESR_ELx_EC_SHIFT 26 #define ESR_ELx_EC_MASK (0x3f << 26) #define ESR_ELx_EXCEPTION(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define EXCP_UNKNOWN 0x00 /* Unkwn exception */ #define EXCP_FP_SIMD 0x07 /* VFP/SIMD trap */ #define EXCP_ILL_STATE 0x0e /* Illegal execution state */ #define EXCP_SVC 0x15 /* SVC trap */ #define EXCP_MSR 0x18 /* MSR/MRS trap */ #define EXCP_INSN_ABORT_L 0x20 /* Instruction abort, from lower EL */ #define EXCP_INSN_ABORT 0x21 /* Instruction abort, from same EL */ #define EXCP_PC_ALIGN 0x22 /* PC alignment fault */ #define EXCP_DATA_ABORT_L 0x24 /* Data abort, from lower EL */ #define EXCP_DATA_ABORT 0x25 /* Data abort, from same EL */ #define EXCP_SP_ALIGN 0x26 /* SP slignment fault */ #define EXCP_TRAP_FP 0x2c /* Trapped FP exception */ #define EXCP_SERROR 0x2f /* SError interrupt */ #define EXCP_SOFTSTP_EL0 0x32 /* Software Step, from lower EL */ #define EXCP_SOFTSTP_EL1 0x33 /* Software Step, from same EL */ #define EXCP_WATCHPT_EL1 0x35 /* Watchpoint, from same EL */ #define EXCP_BRK 0x3c /* Breakpoint */ /* ICC_CTLR_EL1 */ #define ICC_CTLR_EL1_EOIMODE (1U << 1) /* ICC_IAR1_EL1 */ #define ICC_IAR1_EL1_SPUR (0x03ff) /* ICC_IGRPEN0_EL1 */ #define ICC_IGRPEN0_EL1_EN (1U << 0) /* ICC_PMR_EL1 */ #define ICC_PMR_EL1_PRIO_MASK (0xFFUL) /* ICC_SGI1R_EL1 */ #define ICC_SGI1R_EL1_TL_MASK 0xffffUL #define ICC_SGI1R_EL1_AFF1_SHIFT 16 #define ICC_SGI1R_EL1_SGIID_SHIFT 24 #define ICC_SGI1R_EL1_AFF2_SHIFT 32 #define ICC_SGI1R_EL1_AFF3_SHIFT 48 #define ICC_SGI1R_EL1_SGIID_MASK 0xfUL #define ICC_SGI1R_EL1_IRM (0x1UL << 40) /* ICC_SRE_EL1 */ #define ICC_SRE_EL1_SRE (1U << 0) /* ICC_SRE_EL2 */ #define ICC_SRE_EL2_SRE (1U << 0) #define ICC_SRE_EL2_EN (1U << 3) /* ID_AA64DFR0_EL1 */ #define ID_AA64DFR0_MASK 0xf0f0ffff #define ID_AA64DFR0_DEBUG_VER_SHIFT 0 #define ID_AA64DFR0_DEBUG_VER_MASK (0xf << ID_AA64DFR0_DEBUG_VER_SHIFT) #define ID_AA64DFR0_DEBUG_VER(x) ((x) & ID_AA64DFR0_DEBUG_VER_MASK) #define ID_AA64DFR0_DEBUG_VER_8 (0x6 << ID_AA64DFR0_DEBUG_VER_SHIFT) #define ID_AA64DFR0_DEBUG_VER_8_VHE (0x7 << ID_AA64DFR0_DEBUG_VER_SHIFT) #define ID_AA64DFR0_TRACE_VER_SHIFT 4 #define ID_AA64DFR0_TRACE_VER_MASK (0xf << ID_AA64DFR0_TRACE_VER_SHIFT) #define ID_AA64DFR0_TRACE_VER(x) ((x) & ID_AA64DFR0_TRACE_VER_MASK) #define ID_AA64DFR0_TRACE_VER_NONE (0x0 << ID_AA64DFR0_TRACE_VER_SHIFT) #define ID_AA64DFR0_TRACE_VER_IMPL (0x1 << ID_AA64DFR0_TRACE_VER_SHIFT) #define ID_AA64DFR0_PMU_VER_SHIFT 8 #define ID_AA64DFR0_PMU_VER_MASK (0xf << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_PMU_VER(x) ((x) & ID_AA64DFR0_PMU_VER_MASK) #define ID_AA64DFR0_PMU_VER_NONE (0x0 << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_PMU_VER_3 (0x1 << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_PMU_VER_3_1 (0x4 << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_PMU_VER_IMPL (0xf << ID_AA64DFR0_PMU_VER_SHIFT) #define ID_AA64DFR0_BRPS_SHIFT 12 #define ID_AA64DFR0_BRPS_MASK (0xf << ID_AA64DFR0_BRPS_SHIFT) #define ID_AA64DFR0_BRPS(x) \ ((((x) >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) + 1) #define ID_AA64DFR0_WRPS_SHIFT 20 #define ID_AA64DFR0_WRPS_MASK (0xf << ID_AA64DFR0_WRPS_SHIFT) #define ID_AA64DFR0_WRPS(x) \ ((((x) >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) + 1) #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_CTX_CMPS_MASK (0xf << ID_AA64DFR0_CTX_CMPS_SHIFT) #define ID_AA64DFR0_CTX_CMPS(x) \ ((((x) >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) + 1) /* ID_AA64ISAR0_EL1 */ #define ID_AA64ISAR0_MASK 0xf0fffff0 #define ID_AA64ISAR0_AES_SHIFT 4 #define ID_AA64ISAR0_AES_MASK (0xf << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES(x) ((x) & ID_AA64ISAR0_AES_MASK) #define ID_AA64ISAR0_AES_NONE (0x0 << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES_BASE (0x1 << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_AES_PMULL (0x2 << ID_AA64ISAR0_AES_SHIFT) #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_SHA1_MASK (0xf << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA1(x) ((x) & ID_AA64ISAR0_SHA1_MASK) #define ID_AA64ISAR0_SHA1_NONE (0x0 << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA1_BASE (0x1 << ID_AA64ISAR0_SHA1_SHIFT) #define ID_AA64ISAR0_SHA2_SHIFT 12 #define ID_AA64ISAR0_SHA2_MASK (0xf << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_SHA2(x) ((x) & ID_AA64ISAR0_SHA2_MASK) #define ID_AA64ISAR0_SHA2_NONE (0x0 << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_SHA2_BASE (0x1 << ID_AA64ISAR0_SHA2_SHIFT) #define ID_AA64ISAR0_CRC32_SHIFT 16 #define ID_AA64ISAR0_CRC32_MASK (0xf << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_CRC32(x) ((x) & ID_AA64ISAR0_CRC32_MASK) #define ID_AA64ISAR0_CRC32_NONE (0x0 << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_CRC32_BASE (0x1 << ID_AA64ISAR0_CRC32_SHIFT) #define ID_AA64ISAR0_ATOMIC_SHIFT 20 #define ID_AA64ISAR0_ATOMIC_MASK (0xf << ID_AA64ISAR0_ATOMIC_SHIFT) #define ID_AA64ISAR0_ATOMIC(x) ((x) & ID_AA64ISAR0_ATOMIC_MASK) #define ID_AA64ISAR0_ATOMIC_NONE (0x0 << ID_AA64ISAR0_ATOMIC_SHIFT) #define ID_AA64ISAR0_ATOMIC_IMPL (0x2 << ID_AA64ISAR0_ATOMIC_SHIFT) #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_RDM_MASK (0xf << ID_AA64ISAR0_RDM_SHIFT) #define ID_AA64ISAR0_RDM(x) ((x) & ID_AA64ISAR0_RDM_MASK) #define ID_AA64ISAR0_RDM_NONE (0x0 << ID_AA64ISAR0_RDM_SHIFT) #define ID_AA64ISAR0_RDM_IMPL (0x1 << ID_AA64ISAR0_RDM_SHIFT) /* ID_AA64MMFR0_EL1 */ #define ID_AA64MMFR0_MASK 0xffffffff #define ID_AA64MMFR0_PA_RANGE_SHIFT 0 #define ID_AA64MMFR0_PA_RANGE_MASK (0xf << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE(x) ((x) & ID_AA64MMFR0_PA_RANGE_MASK) #define ID_AA64MMFR0_PA_RANGE_4G (0x0 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_64G (0x1 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_1T (0x2 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_4T (0x3 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_16T (0x4 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_PA_RANGE_256T (0x5 << ID_AA64MMFR0_PA_RANGE_SHIFT) #define ID_AA64MMFR0_ASID_BITS_SHIFT 4 #define ID_AA64MMFR0_ASID_BITS_MASK (0xf << ID_AA64MMFR0_ASID_BITS_SHIFT) #define ID_AA64MMFR0_ASID_BITS(x) ((x) & ID_AA64MMFR0_ASID_BITS_MASK) #define ID_AA64MMFR0_ASID_BITS_8 (0x0 << ID_AA64MMFR0_ASID_BITS_SHIFT) #define ID_AA64MMFR0_ASID_BITS_16 (0x2 << ID_AA64MMFR0_ASID_BITS_SHIFT) #define ID_AA64MMFR0_BIGEND_SHIFT 8 #define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) #define ID_AA64MMFR0_BIGEND(x) ((x) & ID_AA64MMFR0_BIGEND_MASK) #define ID_AA64MMFR0_BIGEND_FIXED (0x0 << ID_AA64MMFR0_BIGEND_SHIFT) #define ID_AA64MMFR0_BIGEND_MIXED (0x1 << ID_AA64MMFR0_BIGEND_SHIFT) #define ID_AA64MMFR0_S_NS_MEM_SHIFT 12 #define ID_AA64MMFR0_S_NS_MEM_MASK (0xf << ID_AA64MMFR0_S_NS_MEM_SHIFT) #define ID_AA64MMFR0_S_NS_MEM(x) ((x) & ID_AA64MMFR0_S_NS_MEM_MASK) #define ID_AA64MMFR0_S_NS_MEM_NONE (0x0 << ID_AA64MMFR0_S_NS_MEM_SHIFT) #define ID_AA64MMFR0_S_NS_MEM_DISTINCT (0x1 << ID_AA64MMFR0_S_NS_MEM_SHIFT) #define ID_AA64MMFR0_BIGEND_EL0_SHIFT 16 #define ID_AA64MMFR0_BIGEND_EL0_MASK (0xf << ID_AA64MMFR0_BIGEND_EL0_SHIFT) #define ID_AA64MMFR0_BIGEND_EL0(x) ((x) & ID_AA64MMFR0_BIGEND_EL0_MASK) #define ID_AA64MMFR0_BIGEND_EL0_FIXED (0x0 << ID_AA64MMFR0_BIGEND_EL0_SHIFT) #define ID_AA64MMFR0_BIGEND_EL0_MIXED (0x1 << ID_AA64MMFR0_BIGEND_EL0_SHIFT) #define ID_AA64MMFR0_TGRAN16_SHIFT 20 #define ID_AA64MMFR0_TGRAN16_MASK (0xf << ID_AA64MMFR0_TGRAN16_SHIFT) #define ID_AA64MMFR0_TGRAN16(x) ((x) & ID_AA64MMFR0_TGRAN16_MASK) #define ID_AA64MMFR0_TGRAN16_NONE (0x0 << ID_AA64MMFR0_TGRAN16_SHIFT) #define ID_AA64MMFR0_TGRAN16_IMPL (0x1 << ID_AA64MMFR0_TGRAN16_SHIFT) #define ID_AA64MMFR0_TGRAN64_SHIFT 24 #define ID_AA64MMFR0_TGRAN64_MASK (0xf << ID_AA64MMFR0_TGRAN64_SHIFT) #define ID_AA64MMFR0_TGRAN64(x) ((x) & ID_AA64MMFR0_TGRAN64_MASK) #define ID_AA64MMFR0_TGRAN64_IMPL (0x0 << ID_AA64MMFR0_TGRAN64_SHIFT) #define ID_AA64MMFR0_TGRAN64_NONE (0xf << ID_AA64MMFR0_TGRAN64_SHIFT) #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN4_MASK (0xf << ID_AA64MMFR0_TGRAN4_SHIFT) #define ID_AA64MMFR0_TGRAN4(x) ((x) & ID_AA64MMFR0_TGRAN4_MASK) #define ID_AA64MMFR0_TGRAN4_IMPL (0x0 << ID_AA64MMFR0_TGRAN4_SHIFT) #define ID_AA64MMFR0_TGRAN4_NONE (0xf << ID_AA64MMFR0_TGRAN4_SHIFT) /* ID_AA64MMFR1_EL1 */ #define ID_AA64MMFR1_MASK 0x00ffffff #define ID_AA64MMFR1_HAFDBS_SHIFT 0 #define ID_AA64MMFR1_HAFDBS_MASK (0xf << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_HAFDBS(x) ((x) & ID_AA64MMFR1_HAFDBS_MASK) #define ID_AA64MMFR1_HAFDBS_NONE (0x0 << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_HAFDBS_AF (0x1 << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_HAFDBS_AF_DBS (0x2 << ID_AA64MMFR1_HAFDBS_SHIFT) #define ID_AA64MMFR1_VMIDBITS_SHIFT 4 #define ID_AA64MMFR1_VMIDBITS_MASK (0xf << ID_AA64MMFR1_VMIDBITS_SHIFT) #define ID_AA64MMFR1_VMIDBITS(x) ((x) & ID_AA64MMFR1_VMIDBITS_MASK) #define ID_AA64MMFR1_VMIDBITS_8 (0x0 << ID_AA64MMFR1_VMIDBITS_SHIFT) #define ID_AA64MMFR1_VMIDBITS_16 (0x2 << ID_AA64MMFR1_VMIDBITS_SHIFT) #define ID_AA64MMFR1_VH_SHIFT 8 #define ID_AA64MMFR1_VH_MASK (0xf << ID_AA64MMFR1_VH_SHIFT) #define ID_AA64MMFR1_VH(x) ((x) & ID_AA64MMFR1_VH_MASK) #define ID_AA64MMFR1_VH_NONE (0x0 << ID_AA64MMFR1_VH_SHIFT) #define ID_AA64MMFR1_VH_IMPL (0x1 << ID_AA64MMFR1_VH_SHIFT) #define ID_AA64MMFR1_HPDS_SHIFT 12 #define ID_AA64MMFR1_HPDS_MASK (0xf << ID_AA64MMFR1_HPDS_SHIFT) #define ID_AA64MMFR1_HPDS(x) ((x) & ID_AA64MMFR1_HPDS_MASK) #define ID_AA64MMFR1_HPDS_NONE (0x0 << ID_AA64MMFR1_HPDS_SHIFT) #define ID_AA64MMFR1_HPDS_IMPL (0x1 << ID_AA64MMFR1_HPDS_SHIFT) #define ID_AA64MMFR1_LO_SHIFT 16 #define ID_AA64MMFR1_LO_MASK (0xf << ID_AA64MMFR1_LO_SHIFT) #define ID_AA64MMFR1_LO(x) ((x) & ID_AA64MMFR1_LO_MASK) #define ID_AA64MMFR1_LO_NONE (0x0 << ID_AA64MMFR1_LO_SHIFT) #define ID_AA64MMFR1_LO_IMPL (0x1 << ID_AA64MMFR1_LO_SHIFT) #define ID_AA64MMFR1_PAN_SHIFT 20 #define ID_AA64MMFR1_PAN_MASK (0xf << ID_AA64MMFR1_PAN_SHIFT) #define ID_AA64MMFR1_PAN(x) ((x) & ID_AA64MMFR1_PAN_MASK) #define ID_AA64MMFR1_PAN_NONE (0x0 << ID_AA64MMFR1_PAN_SHIFT) #define ID_AA64MMFR1_PAN_IMPL (0x1 << ID_AA64MMFR1_PAN_SHIFT) /* ID_AA64PFR0_EL1 */ #define ID_AA64PFR0_MASK 0x0fffffff #define ID_AA64PFR0_EL0_SHIFT 0 #define ID_AA64PFR0_EL0_MASK (0xf << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL0(x) ((x) & ID_AA64PFR0_EL0_MASK) #define ID_AA64PFR0_EL0_64 (1 << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL0_64_32 (2 << ID_AA64PFR0_EL0_SHIFT) #define ID_AA64PFR0_EL1_SHIFT 4 #define ID_AA64PFR0_EL1_MASK (0xf << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL1(x) ((x) & ID_AA64PFR0_EL1_MASK) #define ID_AA64PFR0_EL1_64 (1 << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL1_64_32 (2 << ID_AA64PFR0_EL1_SHIFT) #define ID_AA64PFR0_EL2_SHIFT 8 #define ID_AA64PFR0_EL2_MASK (0xf << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2(x) ((x) & ID_AA64PFR0_EL2_MASK) #define ID_AA64PFR0_EL2_NONE (0 << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2_64 (1 << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL2_64_32 (2 << ID_AA64PFR0_EL2_SHIFT) #define ID_AA64PFR0_EL3_SHIFT 12 #define ID_AA64PFR0_EL3_MASK (0xf << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3(x) ((x) & ID_AA64PFR0_EL3_MASK) #define ID_AA64PFR0_EL3_NONE (0 << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3_64 (1 << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_EL3_64_32 (2 << ID_AA64PFR0_EL3_SHIFT) #define ID_AA64PFR0_FP_SHIFT 16 #define ID_AA64PFR0_FP_MASK (0xf << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_FP(x) ((x) & ID_AA64PFR0_FP_MASK) #define ID_AA64PFR0_FP_IMPL (0x0 << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_FP_NONE (0xf << ID_AA64PFR0_FP_SHIFT) #define ID_AA64PFR0_ADV_SIMD_SHIFT 20 #define ID_AA64PFR0_ADV_SIMD_MASK (0xf << ID_AA64PFR0_ADV_SIMD_SHIFT) #define ID_AA64PFR0_ADV_SIMD(x) ((x) & ID_AA64PFR0_ADV_SIMD_MASK) #define ID_AA64PFR0_ADV_SIMD_IMPL (0x0 << ID_AA64PFR0_ADV_SIMD_SHIFT) #define ID_AA64PFR0_ADV_SIMD_NONE (0xf << ID_AA64PFR0_ADV_SIMD_SHIFT) #define ID_AA64PFR0_GIC_BITS 0x4 /* Number of bits in GIC field */ #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_GIC_MASK (0xf << ID_AA64PFR0_GIC_SHIFT) #define ID_AA64PFR0_GIC(x) ((x) & ID_AA64PFR0_GIC_MASK) #define ID_AA64PFR0_GIC_CPUIF_NONE (0x0 << ID_AA64PFR0_GIC_SHIFT) #define ID_AA64PFR0_GIC_CPUIF_EN (0x1 << ID_AA64PFR0_GIC_SHIFT) /* MAIR_EL1 - Memory Attribute Indirection Register */ #define MAIR_ATTR_MASK(idx) (0xff << ((n)* 8)) #define MAIR_ATTR(attr, idx) ((attr) << ((idx) * 8)) #define MAIR_DEVICE_nGnRnE 0x00 #define MAIR_NORMAL_NC 0x44 #define MAIR_NORMAL_WT 0x88 #define MAIR_NORMAL_WB 0xff /* PAR_EL1 - Physical Address Register */ #define PAR_F_SHIFT 0 #define PAR_F (0x1 << PAR_F_SHIFT) #define PAR_SUCCESS(x) (((x) & PAR_F) == 0) /* When PAR_F == 0 (success) */ #define PAR_SH_SHIFT 7 #define PAR_SH_MASK (0x3 << PAR_SH_SHIFT) #define PAR_NS_SHIFT 9 #define PAR_NS_MASK (0x3 << PAR_NS_SHIFT) #define PAR_PA_SHIFT 12 #define PAR_PA_MASK 0x0000fffffffff000 #define PAR_ATTR_SHIFT 56 #define PAR_ATTR_MASK (0xff << PAR_ATTR_SHIFT) /* When PAR_F == 1 (aborted) */ #define PAR_FST_SHIFT 1 #define PAR_FST_MASK (0x3f << PAR_FST_SHIFT) #define PAR_PTW_SHIFT 8 #define PAR_PTW_MASK (0x1 << PAR_PTW_SHIFT) #define PAR_S_SHIFT 9 #define PAR_S_MASK (0x1 << PAR_S_SHIFT) /* SCTLR_EL1 - System Control Register */ #define SCTLR_RES0 0xc8222400 /* Reserved, write 0 */ #define SCTLR_RES1 0x30d00800 /* Reserved, write 1 */ #define SCTLR_M 0x00000001 #define SCTLR_A 0x00000002 #define SCTLR_C 0x00000004 #define SCTLR_SA 0x00000008 #define SCTLR_SA0 0x00000010 #define SCTLR_CP15BEN 0x00000020 #define SCTLR_THEE 0x00000040 #define SCTLR_ITD 0x00000080 #define SCTLR_SED 0x00000100 #define SCTLR_UMA 0x00000200 #define SCTLR_I 0x00001000 #define SCTLR_DZE 0x00004000 #define SCTLR_UCT 0x00008000 #define SCTLR_nTWI 0x00010000 #define SCTLR_nTWE 0x00040000 #define SCTLR_WXN 0x00080000 #define SCTLR_EOE 0x01000000 #define SCTLR_EE 0x02000000 #define SCTLR_UCI 0x04000000 /* SPSR_EL1 */ /* * When the exception is taken in AArch64: * M[4] is 0 for AArch64 mode * M[3:2] is the exception level * M[1] is unused * M[0] is the SP select: * 0: always SP0 * 1: current ELs SP */ #define PSR_M_EL0t 0x00000000 #define PSR_M_EL1t 0x00000004 #define PSR_M_EL1h 0x00000005 #define PSR_M_EL2t 0x00000008 #define PSR_M_EL2h 0x00000009 #define PSR_M_MASK 0x0000001f #define PSR_F 0x00000040 #define PSR_I 0x00000080 #define PSR_A 0x00000100 #define PSR_D 0x00000200 #define PSR_IL 0x00100000 #define PSR_SS 0x00200000 #define PSR_V 0x10000000 #define PSR_C 0x20000000 #define PSR_Z 0x40000000 #define PSR_N 0x80000000 /* TCR_EL1 - Translation Control Register */ #define TCR_ASID_16 (1 << 36) #define TCR_IPS_SHIFT 32 #define TCR_IPS_32BIT (0 << TCR_IPS_SHIFT) #define TCR_IPS_36BIT (1 << TCR_IPS_SHIFT) #define TCR_IPS_40BIT (2 << TCR_IPS_SHIFT) #define TCR_IPS_42BIT (3 << TCR_IPS_SHIFT) #define TCR_IPS_44BIT (4 << TCR_IPS_SHIFT) #define TCR_IPS_48BIT (5 << TCR_IPS_SHIFT) #define TCR_TG1_SHIFT 30 #define TCR_TG1_16K (1 << TCR_TG1_SHIFT) #define TCR_TG1_4K (2 << TCR_TG1_SHIFT) #define TCR_TG1_64K (3 << TCR_TG1_SHIFT) #define TCR_SH1_SHIFT 28 #define TCR_SH1_IS (0x3UL << TCR_SH1_SHIFT) #define TCR_ORGN1_SHIFT 26 #define TCR_ORGN1_WBWA (0x1UL << TCR_ORGN1_SHIFT) #define TCR_IRGN1_SHIFT 24 #define TCR_IRGN1_WBWA (0x1UL << TCR_IRGN1_SHIFT) #define TCR_SH0_SHIFT 12 #define TCR_SH0_IS (0x3UL << TCR_SH0_SHIFT) #define TCR_ORGN0_SHIFT 10 #define TCR_ORGN0_WBWA (0x1UL << TCR_ORGN0_SHIFT) #define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_WBWA (0x1UL << TCR_IRGN0_SHIFT) #define TCR_CACHE_ATTRS ((TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) |\ (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)) #ifdef SMP #define TCR_SMP_ATTRS (TCR_SH0_IS | TCR_SH1_IS) #else #define TCR_SMP_ATTRS 0 #endif #define TCR_T1SZ_SHIFT 16 #define TCR_T0SZ_SHIFT 0 #define TCR_T1SZ(x) ((x) << TCR_T1SZ_SHIFT) #define TCR_T0SZ(x) ((x) << TCR_T0SZ_SHIFT) #define TCR_TxSZ(x) (TCR_T1SZ(x) | TCR_T0SZ(x)) /* Saved Program Status Register */ #define DBG_SPSR_SS (0x1 << 21) /* Monitor Debug System Control Register */ #define DBG_MDSCR_SS (0x1 << 0) #define DBG_MDSCR_KDE (0x1 << 13) #define DBG_MDSCR_MDE (0x1 << 15) /* Perfomance Monitoring Counters */ #define PMCR_E (1 << 0) /* Enable all counters */ #define PMCR_P (1 << 1) /* Reset all counters */ #define PMCR_C (1 << 2) /* Clock counter reset */ #define PMCR_D (1 << 3) /* CNTR counts every 64 clk cycles */ #define PMCR_X (1 << 4) /* Export to ext. monitoring (ETM) */ #define PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define PMCR_LC (1 << 6) /* Long cycle count enable */ #define PMCR_IMP_SHIFT 24 /* Implementer code */ #define PMCR_IMP_MASK (0xff << PMCR_IMP_SHIFT) #define PMCR_IDCODE_SHIFT 16 /* Identification code */ #define PMCR_IDCODE_MASK (0xff << PMCR_IDCODE_SHIFT) #define PMCR_IDCODE_CORTEX_A57 0x01 #define PMCR_IDCODE_CORTEX_A72 0x02 #define PMCR_IDCODE_CORTEX_A53 0x03 #define PMCR_N_SHIFT 11 /* Number of counters implemented */ #define PMCR_N_MASK (0x1f << PMCR_N_SHIFT) #endif /* !_MACHINE_ARMREG_H_ */ Index: projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c =================================================================== --- projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (revision 309262) +++ projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (revision 309263) @@ -1,1164 +1,1171 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__FreeBSD__) && defined(_KERNEL) #include #include #endif /* * ZFS Write Throttle * ------------------ * * ZFS must limit the rate of incoming writes to the rate at which it is able * to sync data modifications to the backend storage. Throttling by too much * creates an artificial limit; throttling by too little can only be sustained * for short periods and would lead to highly lumpy performance. On a per-pool * basis, ZFS tracks the amount of modified (dirty) data. As operations change * data, the amount of dirty data increases; as ZFS syncs out data, the amount * of dirty data decreases. When the amount of dirty data exceeds a * predetermined threshold further modifications are blocked until the amount * of dirty data decreases (as data is synced out). * * The limit on dirty data is tunable, and should be adjusted according to * both the IO capacity and available memory of the system. The larger the * window, the more ZFS is able to aggregate and amortize metadata (and data) * changes. However, memory is a limited resource, and allowing for more dirty * data comes at the cost of keeping other useful data in memory (for example * ZFS data cached by the ARC). * * Implementation * * As buffers are modified dsl_pool_willuse_space() increments both the per- * txg (dp_dirty_pertxg[]) and poolwide (dp_dirty_total) accounting of * dirty space used; dsl_pool_dirty_space() decrements those values as data * is synced out from dsl_pool_sync(). While only the poolwide value is * relevant, the per-txg value is useful for debugging. The tunable * zfs_dirty_data_max determines the dirty space limit. Once that value is * exceeded, new writes are halted until space frees up. * * The zfs_dirty_data_sync tunable dictates the threshold at which we * ensure that there is a txg syncing (see the comment in txg.c for a full * description of transaction group stages). * * The IO scheduler uses both the dirty space limit and current amount of * dirty data as inputs. Those values affect the number of concurrent IOs ZFS * issues. See the comment in vdev_queue.c for details of the IO scheduler. * * The delay is also calculated based on the amount of dirty data. See the * comment above dmu_tx_delay() for details. */ /* * zfs_dirty_data_max will be set to zfs_dirty_data_max_percent% of all memory, * capped at zfs_dirty_data_max_max. It can also be overridden in /etc/system. */ uint64_t zfs_dirty_data_max; uint64_t zfs_dirty_data_max_max = 4ULL * 1024 * 1024 * 1024; int zfs_dirty_data_max_percent = 10; /* * If there is at least this much dirty data, push out a txg. */ uint64_t zfs_dirty_data_sync = 64 * 1024 * 1024; /* * Once there is this amount of dirty data, the dmu_tx_delay() will kick in * and delay each transaction. * This value should be >= zfs_vdev_async_write_active_max_dirty_percent. */ int zfs_delay_min_dirty_percent = 60; /* * This controls how quickly the delay approaches infinity. * Larger values cause it to delay more for a given amount of dirty data. * Therefore larger values will cause there to be less dirty data for a * given throughput. * * For the smoothest delay, this value should be about 1 billion divided * by the maximum number of operations per second. This will smoothly * handle between 10x and 1/10th this number. * * Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the * multiply in dmu_tx_delay(). */ uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000; #if defined(__FreeBSD__) && defined(_KERNEL) extern int zfs_vdev_async_write_active_max_dirty_percent; SYSCTL_DECL(_vfs_zfs); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max, CTLFLAG_RWTUN, &zfs_dirty_data_max, 0, "The maximum amount of dirty data in bytes after which new writes are " "halted until space becomes available"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max_max, CTLFLAG_RDTUN, &zfs_dirty_data_max_max, 0, "The absolute cap on dirty_data_max when auto calculating"); static int sysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vfs_zfs, OID_AUTO, dirty_data_max_percent, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), sysctl_zfs_dirty_data_max_percent, "I", "The percent of physical memory used to auto calculate dirty_data_max"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_sync, CTLFLAG_RWTUN, &zfs_dirty_data_sync, 0, "Force a txg if the number of dirty buffer bytes exceed this value"); static int sysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS); /* No zfs_delay_min_dirty_percent tunable due to limit requirements */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_min_dirty_percent, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(int), sysctl_zfs_delay_min_dirty_percent, "I", "The limit of outstanding dirty data before transations are delayed"); static int sysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS); /* No zfs_delay_scale tunable due to limit requirements */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_scale, CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t), sysctl_zfs_delay_scale, "QU", "Controls how quickly the delay approaches infinity"); static int sysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_dirty_data_max_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < 0 || val > 100) return (EINVAL); zfs_dirty_data_max_percent = val; return (0); } static int sysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_delay_min_dirty_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < zfs_vdev_async_write_active_max_dirty_percent) return (EINVAL); zfs_delay_min_dirty_percent = val; return (0); } static int sysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS) { uint64_t val; int err; val = zfs_delay_scale; err = sysctl_handle_64(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val > UINT64_MAX / zfs_dirty_data_max) return (EINVAL); zfs_delay_scale = val; return (0); } #endif hrtime_t zfs_throttle_delay = MSEC2NSEC(10); hrtime_t zfs_throttle_resolution = MSEC2NSEC(10); int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) { uint64_t obj; int err; err = zap_lookup(dp->dp_meta_objset, dsl_dir_phys(dp->dp_root_dir)->dd_child_dir_zapobj, name, sizeof (obj), 1, &obj); if (err) return (err); return (dsl_dir_hold_obj(dp, obj, name, dp, ddp)); } static dsl_pool_t * dsl_pool_open_impl(spa_t *spa, uint64_t txg) { dsl_pool_t *dp; blkptr_t *bp = spa_get_rootblkptr(spa); dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); dp->dp_spa = spa; dp->dp_meta_rootbp = *bp; rrw_init(&dp->dp_config_rwlock, B_TRUE); txg_init(dp, txg); txg_list_create(&dp->dp_dirty_datasets, offsetof(dsl_dataset_t, ds_dirty_link)); txg_list_create(&dp->dp_dirty_zilogs, offsetof(zilog_t, zl_dirty_link)); txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, offsetof(dsl_sync_task_t, dst_node)); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL); dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri, 1, 4, 0); return (dp); } int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &dp->dp_meta_objset); if (err != 0) dsl_pool_close(dp); else *dpp = dp; return (err); } int dsl_pool_open(dsl_pool_t *dp) { int err; dsl_dir_t *dd; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); if (err) goto out; if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) { err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; err = dsl_dataset_hold_obj(dp, dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds); if (err == 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } dsl_dir_rele(dd, dp); if (err) goto out; } if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir); if (err) goto out; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } /* * Note: errors ignored, because the leak dir will not exist if we * have not encountered a leak yet. */ (void) dsl_pool_open_special_dir(dp, LEAK_DIR_NAME, &dp->dp_leak_dir); if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj); if (err != 0) goto out; } if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMPTY_BPOBJ)) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, &dp->dp_empty_bpobj); if (err != 0) goto out; } err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj); if (err == ENOENT) err = 0; if (err) goto out; err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: rrw_exit(&dp->dp_config_rwlock, FTAG); return (err); } void dsl_pool_close(dsl_pool_t *dp) { /* * Drop our references from dsl_pool_open(). * * Since we held the origin_snap from "syncing" context (which * includes pool-opening context), it actually only got a "ref" * and not a hold, so just drop that here. */ if (dp->dp_origin_snap) dsl_dataset_rele(dp->dp_origin_snap, dp); if (dp->dp_mos_dir) dsl_dir_rele(dp->dp_mos_dir, dp); if (dp->dp_free_dir) dsl_dir_rele(dp->dp_free_dir, dp); if (dp->dp_leak_dir) dsl_dir_rele(dp->dp_leak_dir, dp); if (dp->dp_root_dir) dsl_dir_rele(dp->dp_root_dir, dp); bpobj_close(&dp->dp_free_bpobj); /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ if (dp->dp_meta_objset) dmu_objset_evict(dp->dp_meta_objset); txg_list_destroy(&dp->dp_dirty_datasets); txg_list_destroy(&dp->dp_dirty_zilogs); txg_list_destroy(&dp->dp_sync_tasks); txg_list_destroy(&dp->dp_dirty_dirs); /* * We can't set retry to TRUE since we're explicitly specifying * a spa to flush. This is good enough; any missed buffers for * this spa won't cause trouble, and they'll eventually fall * out of the ARC just like any other unused buffer. */ arc_flush(dp->dp_spa, FALSE); txg_fini(dp); dsl_scan_fini(dp); dmu_buf_user_evict_wait(); rrw_destroy(&dp->dp_config_rwlock); mutex_destroy(&dp->dp_lock); taskq_destroy(dp->dp_vnrele_taskq); if (dp->dp_blkstats) kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); kmem_free(dp, sizeof (dsl_pool_t)); } dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); objset_t *os; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT0(err); /* Initialize scan structures */ VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); rrw_exit(&ds->ds_bp_rwlock, FTAG); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif dsl_dataset_rele(ds, FTAG); dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); return (dp); } /* * Account for the meta-objset space in its placeholder dsl_dir. */ void dsl_pool_mos_diduse_space(dsl_pool_t *dp, int64_t used, int64_t comp, int64_t uncomp) { ASSERT3U(comp, ==, uncomp); /* it's all metadata */ mutex_enter(&dp->dp_lock); dp->dp_mos_used_delta += used; dp->dp_mos_compressed_delta += comp; dp->dp_mos_uncompressed_delta += uncomp; mutex_exit(&dp->dp_lock); } static void dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx) { zio_t *zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dmu_objset_sync(dp->dp_meta_objset, zio, tx); VERIFY0(zio_wait(zio)); dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); } static void dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta) { ASSERT(MUTEX_HELD(&dp->dp_lock)); if (delta < 0) ASSERT3U(-delta, <=, dp->dp_dirty_total); dp->dp_dirty_total += delta; /* * Note: we signal even when increasing dp_dirty_total. * This ensures forward progress -- each thread wakes the next waiter. */ if (dp->dp_dirty_total <= zfs_dirty_data_max) cv_signal(&dp->dp_spaceavail_cv); } void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) { zio_t *zio; dmu_tx_t *tx; dsl_dir_t *dd; dsl_dataset_t *ds; objset_t *mos = dp->dp_meta_objset; list_t synced_datasets; list_create(&synced_datasets, sizeof (dsl_dataset_t), offsetof(dsl_dataset_t, ds_synced_link)); tx = dmu_tx_create_assigned(dp, txg); /* * Write out all dirty blocks of dirty datasets. */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { /* * We must not sync any non-MOS datasets twice, because * we may have taken a snapshot of them. However, we * may sync newly-created datasets on pass 2. */ ASSERT(!list_link_active(&ds->ds_synced_link)); list_insert_tail(&synced_datasets, ds); dsl_dataset_sync(ds, zio, tx); } VERIFY0(zio_wait(zio)); /* * We have written all of the accounted dirty data, so our * dp_space_towrite should now be zero. However, some seldom-used * code paths do not adhere to this (e.g. dbuf_undirty(), also * rounding error in dbuf_write_physdone). * Shore up the accounting of any dirtied space now. */ dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); /* * After the data blocks have been written (ensured by the zio_wait() * above), update the user/group space accounting. */ for (ds = list_head(&synced_datasets); ds != NULL; ds = list_next(&synced_datasets, ds)) { dmu_objset_do_userquota_updates(ds->ds_objset, tx); } /* * Sync the datasets again to push out the changes due to * userspace updates. This must be done before we process the * sync tasks, so that any snapshots will have the correct * user accounting information (and we won't get confused * about which blocks are part of the snapshot). */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { ASSERT(list_link_active(&ds->ds_synced_link)); dmu_buf_rele(ds->ds_dbuf, ds); dsl_dataset_sync(ds, zio, tx); } VERIFY0(zio_wait(zio)); /* * Now that the datasets have been completely synced, we can * clean up our in-memory structures accumulated while syncing: * * - move dead blocks from the pending deadlist to the on-disk deadlist * - release hold from dsl_dataset_dirty() */ while ((ds = list_remove_head(&synced_datasets)) != NULL) { dsl_dataset_sync_done(ds, tx); } while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) { dsl_dir_sync(dd, tx); } /* * The MOS's space is accounted for in the pool/$MOS * (dp_mos_dir). We can't modify the mos while we're syncing * it, so we remember the deltas and apply them here. */ if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 || dp->dp_mos_uncompressed_delta != 0) { dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD, dp->dp_mos_used_delta, dp->dp_mos_compressed_delta, dp->dp_mos_uncompressed_delta, tx); dp->dp_mos_used_delta = 0; dp->dp_mos_compressed_delta = 0; dp->dp_mos_uncompressed_delta = 0; } if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { dsl_pool_sync_mos(dp, tx); } /* * If we modify a dataset in the same txg that we want to destroy it, * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it. * dsl_dir_destroy_check() will fail if there are unexpected holds. * Therefore, we want to sync the MOS (thus syncing the dd_dbuf * and clearing the hold on it) before we process the sync_tasks. * The MOS data dirtied by the sync_tasks will be synced on the next * pass. */ if (!txg_list_empty(&dp->dp_sync_tasks, txg)) { dsl_sync_task_t *dst; /* * No more sync tasks should have been added while we * were syncing. */ ASSERT3U(spa_sync_pass(dp->dp_spa), ==, 1); while ((dst = txg_list_remove(&dp->dp_sync_tasks, txg)) != NULL) dsl_sync_task_sync(dst, tx); } dmu_tx_commit(tx); DTRACE_PROBE2(dsl_pool_sync__done, dsl_pool_t *dp, dp, uint64_t, txg); } void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) { zilog_t *zilog; - while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) { + while (zilog = txg_list_head(&dp->dp_dirty_zilogs, txg)) { dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); + /* + * We don't remove the zilog from the dp_dirty_zilogs + * list until after we've cleaned it. This ensures that + * callers of zilog_is_dirty() receive an accurate + * answer when they are racing with the spa sync thread. + */ zil_clean(zilog, txg); + (void) txg_list_remove_this(&dp->dp_dirty_zilogs, zilog, txg); ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); dmu_buf_rele(ds->ds_dbuf, zilog); } ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); } /* * TRUE if the current thread is the tx_sync_thread or if we * are being called from SPA context during pool initialization. */ int dsl_pool_sync_context(dsl_pool_t *dp) { return (curthread == dp->dp_tx.tx_sync_thread || spa_is_initializing(dp->dp_spa)); } uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) { uint64_t space, resv; /* * If we're trying to assess whether it's OK to do a free, * cut the reservation in half to allow forward progress * (e.g. make it possible to rm(1) files from a full pool). */ space = spa_get_dspace(dp->dp_spa); resv = spa_get_slop_space(dp->dp_spa); if (netfree) resv >>= 1; return (space - resv); } boolean_t dsl_pool_need_dirty_delay(dsl_pool_t *dp) { uint64_t delay_min_bytes = zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; boolean_t rv; mutex_enter(&dp->dp_lock); if (dp->dp_dirty_total > zfs_dirty_data_sync) txg_kick(dp); rv = (dp->dp_dirty_total > delay_min_bytes); mutex_exit(&dp->dp_lock); return (rv); } void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) { if (space > 0) { mutex_enter(&dp->dp_lock); dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space; dsl_pool_dirty_delta(dp, space); mutex_exit(&dp->dp_lock); } } void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg) { ASSERT3S(space, >=, 0); if (space == 0) return; mutex_enter(&dp->dp_lock); if (dp->dp_dirty_pertxg[txg & TXG_MASK] < space) { /* XXX writing something we didn't dirty? */ space = dp->dp_dirty_pertxg[txg & TXG_MASK]; } ASSERT3U(dp->dp_dirty_pertxg[txg & TXG_MASK], >=, space); dp->dp_dirty_pertxg[txg & TXG_MASK] -= space; ASSERT3U(dp->dp_dirty_total, >=, space); dsl_pool_dirty_delta(dp, -space); mutex_exit(&dp->dp_lock); } /* ARGSUSED */ static int upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds, *prev = NULL; int err; err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err) { dsl_dataset_rele(ds, FTAG); return (err); } if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object) break; dsl_dataset_rele(ds, FTAG); ds = prev; prev = NULL; } if (prev == NULL) { prev = dp->dp_origin_snap; /* * The $ORIGIN can't have any data, or the accounting * will be wrong. */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth); rrw_exit(&ds->ds_bp_rwlock, FTAG); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { dsl_dataset_rele(ds, FTAG); return (0); } dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_prev_snap_obj = prev->ds_object; dsl_dataset_phys(ds)->ds_prev_snap_txg = dsl_dataset_phys(prev)->ds_creation_txg; dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); dsl_dir_phys(ds->ds_dir)->dd_origin_obj = prev->ds_object; dmu_buf_will_dirty(prev->ds_dbuf, tx); dsl_dataset_phys(prev)->ds_num_children++; if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) { ASSERT(ds->ds_prev == NULL); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); } } ASSERT3U(dsl_dir_phys(ds->ds_dir)->dd_origin_obj, ==, prev->ds_object); ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_obj, ==, prev->ds_object); if (dsl_dataset_phys(prev)->ds_next_clones_obj == 0) { dmu_buf_will_dirty(prev->ds_dbuf, tx); dsl_dataset_phys(prev)->ds_next_clones_obj = zap_create(dp->dp_meta_objset, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dataset_phys(prev)->ds_next_clones_obj, ds->ds_object, tx)); dsl_dataset_rele(ds, FTAG); if (prev != dp->dp_origin_snap) dsl_dataset_rele(prev, FTAG); return (0); } void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap != NULL); VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE)); } /* ARGSUSED */ static int upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { dmu_tx_t *tx = arg; objset_t *mos = dp->dp_meta_objset; if (dsl_dir_phys(ds->ds_dir)->dd_origin_obj != 0) { dsl_dataset_t *origin; VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &origin)); if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); dsl_dir_phys(origin->ds_dir)->dd_clones = zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dir_phys(origin->ds_dir)->dd_clones, ds->ds_object, tx)); dsl_dataset_rele(origin, FTAG); } return (0); } void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); uint64_t obj; (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* * We can't use bpobj_alloc(), because spa_version() still * returns the old version, and we need a new-version bpobj with * subobj support. So call dmu_object_alloc() directly. */ obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, SPA_OLD_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE)); } void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) { uint64_t dsobj; dsl_dataset_t *ds; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap == NULL); ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER)); /* create the origin dir, ds, & snap-ds */ dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, NULL, 0, kcred, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, &dp->dp_origin_snap)); dsl_dataset_rele(ds, FTAG); } taskq_t * dsl_pool_vnrele_taskq(dsl_pool_t *dp) { return (dp->dp_vnrele_taskq); } /* * Walk through the pool-wide zap object of temporary snapshot user holds * and release them. */ void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) { zap_attribute_t za; zap_cursor_t zc; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; nvlist_t *holds; if (zapobj == 0) return; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); holds = fnvlist_alloc(); for (zap_cursor_init(&zc, mos, zapobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { char *htag; nvlist_t *tags; htag = strchr(za.za_name, '-'); *htag = '\0'; ++htag; if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) { tags = fnvlist_alloc(); fnvlist_add_boolean(tags, htag); fnvlist_add_nvlist(holds, za.za_name, tags); fnvlist_free(tags); } else { fnvlist_add_boolean(tags, htag); } } dsl_dataset_user_release_tmp(dp, holds); fnvlist_free(holds); zap_cursor_fini(&zc); } /* * Create the pool-wide zap object for storing temporary snapshot holds. */ void dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) { objset_t *mos = dp->dp_meta_objset; ASSERT(dp->dp_tmp_userrefs_obj == 0); ASSERT(dmu_tx_is_syncing(tx)); dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx); } static int dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding) { objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; char *name; int error; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); ASSERT(dmu_tx_is_syncing(tx)); /* * If the pool was created prior to SPA_VERSION_USERREFS, the * zap object for temporary holds might not exist yet. */ if (zapobj == 0) { if (holding) { dsl_pool_user_hold_create_obj(dp, tx); zapobj = dp->dp_tmp_userrefs_obj; } else { return (SET_ERROR(ENOENT)); } } name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); if (holding) error = zap_add(mos, zapobj, name, 8, 1, &now, tx); else error = zap_remove(mos, zapobj, name, tx); strfree(name); return (error); } /* * Add a temporary hold for the given dataset object and tag. */ int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, uint64_t now, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); } /* * Release a temporary hold for the given dataset object and tag. */ int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0, tx, B_FALSE)); } /* * DSL Pool Configuration Lock * * The dp_config_rwlock protects against changes to DSL state (e.g. dataset * creation / destruction / rename / property setting). It must be held for * read to hold a dataset or dsl_dir. I.e. you must call * dsl_pool_config_enter() or dsl_pool_hold() before calling * dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock * must be held continuously until all datasets and dsl_dirs are released. * * The only exception to this rule is that if a "long hold" is placed on * a dataset, then the dp_config_rwlock may be dropped while the dataset * is still held. The long hold will prevent the dataset from being * destroyed -- the destroy will fail with EBUSY. A long hold can be * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset * (by calling dsl_{dataset,objset}_{try}own{_obj}). * * Legitimate long-holders (including owners) should be long-running, cancelable * tasks that should cause "zfs destroy" to fail. This includes DMU * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open), * "zfs send", and "zfs diff". There are several other long-holders whose * uses are suboptimal (e.g. "zfs promote", and zil_suspend()). * * The usual formula for long-holding would be: * dsl_pool_hold() * dsl_dataset_hold() * ... perform checks ... * dsl_dataset_long_hold() * dsl_pool_rele() * ... perform long-running task ... * dsl_dataset_long_rele() * dsl_dataset_rele() * * Note that when the long hold is released, the dataset is still held but * the pool is not held. The dataset may change arbitrarily during this time * (e.g. it could be destroyed). Therefore you shouldn't do anything to the * dataset except release it. * * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only * or modifying operations. * * Modifying operations should generally use dsl_sync_task(). The synctask * infrastructure enforces proper locking strategy with respect to the * dp_config_rwlock. See the comment above dsl_sync_task() for details. * * Read-only operations will manually hold the pool, then the dataset, obtain * information from the dataset, then release the pool and dataset. * dmu_objset_{hold,rele}() are convenience routines that also do the pool * hold/rele. */ int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp) { spa_t *spa; int error; error = spa_open(name, &spa, tag); if (error == 0) { *dp = spa_get_dsl(spa); dsl_pool_config_enter(*dp, tag); } return (error); } void dsl_pool_rele(dsl_pool_t *dp, void *tag) { dsl_pool_config_exit(dp, tag); spa_close(dp->dp_spa, tag); } void dsl_pool_config_enter(dsl_pool_t *dp, void *tag) { /* * We use a "reentrant" reader-writer lock, but not reentrantly. * * The rrwlock can (with the track_all flag) track all reading threads, * which is very useful for debugging which code path failed to release * the lock, and for verifying that the *current* thread does hold * the lock. * * (Unlike a rwlock, which knows that N threads hold it for * read, but not *which* threads, so rw_held(RW_READER) returns TRUE * if any thread holds it for read, even if this thread doesn't). */ ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); } void dsl_pool_config_enter_prio(dsl_pool_t *dp, void *tag) { ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); rrw_enter_read_prio(&dp->dp_config_rwlock, tag); } void dsl_pool_config_exit(dsl_pool_t *dp, void *tag) { rrw_exit(&dp->dp_config_rwlock, tag); } boolean_t dsl_pool_config_held(dsl_pool_t *dp) { return (RRW_LOCK_HELD(&dp->dp_config_rwlock)); } boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp) { return (RRW_WRITE_HELD(&dp->dp_config_rwlock)); } Index: projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c =================================================================== --- projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (revision 309262) +++ projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (revision 309263) @@ -1,7351 +1,7356 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Martin Matuska . All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome */ /* * SPA: Storage Pool Allocator * * This file contains all the routines used when modifying on-disk SPA state. * This includes opening, importing, destroying, exporting a pool, and syncing a * pool. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #include #include #endif /* _KERNEL */ #include "zfs_prop.h" #include "zfs_comutil.h" /* Check hostid on import? */ static int check_hostid = 1; /* * The interval, in seconds, at which failed configuration cache file writes * should be retried. */ static int zfs_ccw_retry_interval = 300; SYSCTL_DECL(_vfs_zfs); SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RWTUN, &check_hostid, 0, "Check hostid on import?"); TUNABLE_INT("vfs.zfs.ccw_retry_interval", &zfs_ccw_retry_interval); SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RW, &zfs_ccw_retry_interval, 0, "Configuration cache file write, retry after failure, interval (seconds)"); typedef enum zti_modes { ZTI_MODE_FIXED, /* value is # of threads (min 1) */ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ ZTI_MODE_NULL, /* don't create a taskq */ ZTI_NMODES } zti_modes_t; #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } #define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 } #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 } #define ZTI_N(n) ZTI_P(n, 1) #define ZTI_ONE ZTI_N(1) typedef struct zio_taskq_info { zti_modes_t zti_mode; uint_t zti_value; uint_t zti_count; } zio_taskq_info_t; static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { "issue", "issue_high", "intr", "intr_high" }; /* * This table defines the taskq settings for each ZFS I/O type. When * initializing a pool, we use this table to create an appropriately sized * taskq. Some operations are low volume and therefore have a small, static * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE * macros. Other operations process a large amount of data; the ZTI_BATCH * macro causes us to create a taskq oriented for throughput. Some operations * are so high frequency and short-lived that the taskq itself can become a a * point of lock contention. The ZTI_P(#, #) macro indicates that we need an * additional degree of parallelism specified by the number of threads per- * taskq and the number of taskqs; when dispatching an event in this case, the * particular taskq is chosen at random. * * The different taskq priorities are to handle the different contexts (issue * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that * need to be handled with minimum delay. */ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ { ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ { ZTI_BATCH, ZTI_N(5), ZTI_N(8), ZTI_N(5) }, /* WRITE */ { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ }; static sysevent_t *spa_event_create(spa_t *spa, vdev_t *vd, const char *name); static void spa_event_post(sysevent_t *ev); static void spa_sync_version(void *arg, dmu_tx_t *tx); static void spa_sync_props(void *arg, dmu_tx_t *tx); static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, char **ereport); static void spa_vdev_resilver_done(spa_t *spa); uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */ #ifdef PSRSET_BIND id_t zio_taskq_psrset_bind = PS_NONE; #endif #ifdef SYSDC boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ #endif uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ extern int zfs_sync_pass_deferred_free; #ifndef illumos extern void spa_deadman(void *arg); #endif /* * This (illegal) pool name is used when temporarily importing a spa_t in order * to get the vdev stats associated with the imported devices. */ #define TRYIMPORT_NAME "$import" /* * ========================================================================== * SPA properties routines * ========================================================================== */ /* * Add a (source=src, propname=propval) list to an nvlist. */ static void spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, uint64_t intval, zprop_source_t src) { const char *propname = zpool_prop_to_name(prop); nvlist_t *propval; VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); if (strval != NULL) VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); else VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); nvlist_free(propval); } /* * Get property values from the spa configuration. */ static void spa_prop_get_config(spa_t *spa, nvlist_t **nvp) { vdev_t *rvd = spa->spa_root_vdev; dsl_pool_t *pool = spa->spa_dsl_pool; uint64_t size, alloc, cap, version; zprop_source_t src = ZPROP_SRC_NONE; spa_config_dirent_t *dp; metaslab_class_t *mc = spa_normal_class(spa); ASSERT(MUTEX_HELD(&spa->spa_props_lock)); if (rvd != NULL) { alloc = metaslab_class_get_alloc(spa_normal_class(spa)); size = metaslab_class_get_space(spa_normal_class(spa)); spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, size - alloc, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, metaslab_class_fragmentation(mc), src); spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, metaslab_class_expandable_space(mc), src); spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, (spa_mode(spa) == FREAD), src); cap = (size == 0) ? 0 : (alloc * 100 / size); spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, ddt_get_pool_dedup_ratio(spa), src); spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, rvd->vdev_state, src); version = spa_version(spa); if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) src = ZPROP_SRC_DEFAULT; else src = ZPROP_SRC_LOCAL; spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); } if (pool != NULL) { /* * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, * when opening pools before this version freedir will be NULL. */ if (pool->dp_free_dir != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, src); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, 0, src); } if (pool->dp_leak_dir != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, src); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, 0, src); } } spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); if (spa->spa_comment != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, 0, ZPROP_SRC_LOCAL); } if (spa->spa_root != NULL) spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 0, ZPROP_SRC_LOCAL); if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) { spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE); } if ((dp = list_head(&spa->spa_config_list)) != NULL) { if (dp->scd_path == NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, "none", 0, ZPROP_SRC_LOCAL); } else if (strcmp(dp->scd_path, spa_config_path) != 0) { spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, dp->scd_path, 0, ZPROP_SRC_LOCAL); } } } /* * Get zpool property values. */ int spa_prop_get(spa_t *spa, nvlist_t **nvp) { objset_t *mos = spa->spa_meta_objset; zap_cursor_t zc; zap_attribute_t za; int err; VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); mutex_enter(&spa->spa_props_lock); /* * Get properties from the spa config. */ spa_prop_get_config(spa, nvp); /* If no pool property object, no more prop to get. */ if (mos == NULL || spa->spa_pool_props_object == 0) { mutex_exit(&spa->spa_props_lock); return (0); } /* * Get properties from the MOS pool property object. */ for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); (err = zap_cursor_retrieve(&zc, &za)) == 0; zap_cursor_advance(&zc)) { uint64_t intval = 0; char *strval = NULL; zprop_source_t src = ZPROP_SRC_DEFAULT; zpool_prop_t prop; if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) continue; switch (za.za_integer_length) { case 8: /* integer property */ if (za.za_first_integer != zpool_prop_default_numeric(prop)) src = ZPROP_SRC_LOCAL; if (prop == ZPOOL_PROP_BOOTFS) { dsl_pool_t *dp; dsl_dataset_t *ds = NULL; dp = spa_get_dsl(spa); dsl_pool_config_enter(dp, FTAG); if (err = dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &ds)) { dsl_pool_config_exit(dp, FTAG); break; } strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, strval); dsl_dataset_rele(ds, FTAG); dsl_pool_config_exit(dp, FTAG); } else { strval = NULL; intval = za.za_first_integer; } spa_prop_add_list(*nvp, prop, strval, intval, src); if (strval != NULL) kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN); break; case 1: /* string property */ strval = kmem_alloc(za.za_num_integers, KM_SLEEP); err = zap_lookup(mos, spa->spa_pool_props_object, za.za_name, 1, za.za_num_integers, strval); if (err) { kmem_free(strval, za.za_num_integers); break; } spa_prop_add_list(*nvp, prop, strval, 0, src); kmem_free(strval, za.za_num_integers); break; default: break; } } zap_cursor_fini(&zc); mutex_exit(&spa->spa_props_lock); out: if (err && err != ENOENT) { nvlist_free(*nvp); *nvp = NULL; return (err); } return (0); } /* * Validate the given pool properties nvlist and modify the list * for the property values to be set. */ static int spa_prop_validate(spa_t *spa, nvlist_t *props) { nvpair_t *elem; int error = 0, reset_bootfs = 0; uint64_t objnum = 0; boolean_t has_feature = B_FALSE; elem = NULL; while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { uint64_t intval; char *strval, *slash, *check, *fname; const char *propname = nvpair_name(elem); zpool_prop_t prop = zpool_name_to_prop(propname); switch (prop) { case ZPROP_INVAL: if (!zpool_prop_feature(propname)) { error = SET_ERROR(EINVAL); break; } /* * Sanitize the input. */ if (nvpair_type(elem) != DATA_TYPE_UINT64) { error = SET_ERROR(EINVAL); break; } if (nvpair_value_uint64(elem, &intval) != 0) { error = SET_ERROR(EINVAL); break; } if (intval != 0) { error = SET_ERROR(EINVAL); break; } fname = strchr(propname, '@') + 1; if (zfeature_lookup_name(fname, NULL) != 0) { error = SET_ERROR(EINVAL); break; } has_feature = B_TRUE; break; case ZPOOL_PROP_VERSION: error = nvpair_value_uint64(elem, &intval); if (!error && (intval < spa_version(spa) || intval > SPA_VERSION_BEFORE_FEATURES || has_feature)) error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_DELEGATION: case ZPOOL_PROP_AUTOREPLACE: case ZPOOL_PROP_LISTSNAPS: case ZPOOL_PROP_AUTOEXPAND: error = nvpair_value_uint64(elem, &intval); if (!error && intval > 1) error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_BOOTFS: /* * If the pool version is less than SPA_VERSION_BOOTFS, * or the pool is still being created (version == 0), * the bootfs property cannot be set. */ if (spa_version(spa) < SPA_VERSION_BOOTFS) { error = SET_ERROR(ENOTSUP); break; } /* * Make sure the vdev config is bootable */ if (!vdev_is_bootable(spa->spa_root_vdev)) { error = SET_ERROR(ENOTSUP); break; } reset_bootfs = 1; error = nvpair_value_string(elem, &strval); if (!error) { objset_t *os; uint64_t propval; if (strval == NULL || strval[0] == '\0') { objnum = zpool_prop_default_numeric( ZPOOL_PROP_BOOTFS); break; } if (error = dmu_objset_hold(strval, FTAG, &os)) break; /* * Must be ZPL, and its property settings * must be supported by GRUB (compression * is not gzip, and large blocks are not used). */ if (dmu_objset_type(os) != DMU_OST_ZFS) { error = SET_ERROR(ENOTSUP); } else if ((error = dsl_prop_get_int_ds(dmu_objset_ds(os), zfs_prop_to_name(ZFS_PROP_COMPRESSION), &propval)) == 0 && !BOOTFS_COMPRESS_VALID(propval)) { error = SET_ERROR(ENOTSUP); } else { objnum = dmu_objset_id(os); } dmu_objset_rele(os, FTAG); } break; case ZPOOL_PROP_FAILUREMODE: error = nvpair_value_uint64(elem, &intval); if (!error && (intval < ZIO_FAILURE_MODE_WAIT || intval > ZIO_FAILURE_MODE_PANIC)) error = SET_ERROR(EINVAL); /* * This is a special case which only occurs when * the pool has completely failed. This allows * the user to change the in-core failmode property * without syncing it out to disk (I/Os might * currently be blocked). We do this by returning * EIO to the caller (spa_prop_set) to trick it * into thinking we encountered a property validation * error. */ if (!error && spa_suspended(spa)) { spa->spa_failmode = intval; error = SET_ERROR(EIO); } break; case ZPOOL_PROP_CACHEFILE: if ((error = nvpair_value_string(elem, &strval)) != 0) break; if (strval[0] == '\0') break; if (strcmp(strval, "none") == 0) break; if (strval[0] != '/') { error = SET_ERROR(EINVAL); break; } slash = strrchr(strval, '/'); ASSERT(slash != NULL); if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || strcmp(slash, "/..") == 0) error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_COMMENT: if ((error = nvpair_value_string(elem, &strval)) != 0) break; for (check = strval; *check != '\0'; check++) { /* * The kernel doesn't have an easy isprint() * check. For this kernel check, we merely * check ASCII apart from DEL. Fix this if * there is an easy-to-use kernel isprint(). */ if (*check >= 0x7f) { error = SET_ERROR(EINVAL); break; } } if (strlen(strval) > ZPROP_MAX_COMMENT) error = E2BIG; break; case ZPOOL_PROP_DEDUPDITTO: if (spa_version(spa) < SPA_VERSION_DEDUP) error = SET_ERROR(ENOTSUP); else error = nvpair_value_uint64(elem, &intval); if (error == 0 && intval != 0 && intval < ZIO_DEDUPDITTO_MIN) error = SET_ERROR(EINVAL); break; } if (error) break; } if (!error && reset_bootfs) { error = nvlist_remove(props, zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); if (!error) { error = nvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); } } return (error); } void spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) { char *cachefile; spa_config_dirent_t *dp; if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), &cachefile) != 0) return; dp = kmem_alloc(sizeof (spa_config_dirent_t), KM_SLEEP); if (cachefile[0] == '\0') dp->scd_path = spa_strdup(spa_config_path); else if (strcmp(cachefile, "none") == 0) dp->scd_path = NULL; else dp->scd_path = spa_strdup(cachefile); list_insert_head(&spa->spa_config_list, dp); if (need_sync) spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } int spa_prop_set(spa_t *spa, nvlist_t *nvp) { int error; nvpair_t *elem = NULL; boolean_t need_sync = B_FALSE; if ((error = spa_prop_validate(spa, nvp)) != 0) return (error); while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT || prop == ZPOOL_PROP_READONLY) continue; if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) { uint64_t ver; if (prop == ZPOOL_PROP_VERSION) { VERIFY(nvpair_value_uint64(elem, &ver) == 0); } else { ASSERT(zpool_prop_feature(nvpair_name(elem))); ver = SPA_VERSION_FEATURES; need_sync = B_TRUE; } /* Save time if the version is already set. */ if (ver == spa_version(spa)) continue; /* * In addition to the pool directory object, we might * create the pool properties object, the features for * read object, the features for write object, or the * feature descriptions object. */ error = dsl_sync_task(spa->spa_name, NULL, spa_sync_version, &ver, 6, ZFS_SPACE_CHECK_RESERVED); if (error) return (error); continue; } need_sync = B_TRUE; break; } if (need_sync) { return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, nvp, 6, ZFS_SPACE_CHECK_RESERVED)); } return (0); } /* * If the bootfs property value is dsobj, clear it. */ void spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) { if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { VERIFY(zap_remove(spa->spa_meta_objset, spa->spa_pool_props_object, zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); spa->spa_bootfs = 0; } } /*ARGSUSED*/ static int spa_change_guid_check(void *arg, dmu_tx_t *tx) { uint64_t *newguid = arg; spa_t *spa = dmu_tx_pool(tx)->dp_spa; vdev_t *rvd = spa->spa_root_vdev; uint64_t vdev_state; spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); vdev_state = rvd->vdev_state; spa_config_exit(spa, SCL_STATE, FTAG); if (vdev_state != VDEV_STATE_HEALTHY) return (SET_ERROR(ENXIO)); ASSERT3U(spa_guid(spa), !=, *newguid); return (0); } static void spa_change_guid_sync(void *arg, dmu_tx_t *tx) { uint64_t *newguid = arg; spa_t *spa = dmu_tx_pool(tx)->dp_spa; uint64_t oldguid; vdev_t *rvd = spa->spa_root_vdev; oldguid = spa_guid(spa); spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); rvd->vdev_guid = *newguid; rvd->vdev_guid_sum += (*newguid - oldguid); vdev_config_dirty(rvd); spa_config_exit(spa, SCL_STATE, FTAG); spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", oldguid, *newguid); } /* * Change the GUID for the pool. This is done so that we can later * re-import a pool built from a clone of our own vdevs. We will modify * the root vdev's guid, our own pool guid, and then mark all of our * vdevs dirty. Note that we must make sure that all our vdevs are * online when we do this, or else any vdevs that weren't present * would be orphaned from our pool. We are also going to issue a * sysevent to update any watchers. */ int spa_change_guid(spa_t *spa) { int error; uint64_t guid; mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); guid = spa_generate_guid(NULL); error = dsl_sync_task(spa->spa_name, spa_change_guid_check, spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); if (error == 0) { spa_config_sync(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID); } mutex_exit(&spa_namespace_lock); mutex_exit(&spa->spa_vdev_top_lock); return (error); } /* * ========================================================================== * SPA state manipulation (open/create/destroy/import/export) * ========================================================================== */ static int spa_error_entry_compare(const void *a, const void *b) { spa_error_entry_t *sa = (spa_error_entry_t *)a; spa_error_entry_t *sb = (spa_error_entry_t *)b; int ret; ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, sizeof (zbookmark_phys_t)); if (ret < 0) return (-1); else if (ret > 0) return (1); else return (0); } /* * Utility function which retrieves copies of the current logs and * re-initializes them in the process. */ void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) { ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); avl_create(&spa->spa_errlist_scrub, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); } static void spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; enum zti_modes mode = ztip->zti_mode; uint_t value = ztip->zti_value; uint_t count = ztip->zti_count; spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; char name[32]; uint_t flags = 0; boolean_t batch = B_FALSE; if (mode == ZTI_MODE_NULL) { tqs->stqs_count = 0; tqs->stqs_taskq = NULL; return; } ASSERT3U(count, >, 0); tqs->stqs_count = count; tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); switch (mode) { case ZTI_MODE_FIXED: ASSERT3U(value, >=, 1); value = MAX(value, 1); break; case ZTI_MODE_BATCH: batch = B_TRUE; flags |= TASKQ_THREADS_CPU_PCT; value = zio_taskq_batch_pct; break; default: panic("unrecognized mode for %s_%s taskq (%u:%u) in " "spa_activate()", zio_type_name[t], zio_taskq_types[q], mode, value); break; } for (uint_t i = 0; i < count; i++) { taskq_t *tq; if (count > 1) { (void) snprintf(name, sizeof (name), "%s_%s_%u", zio_type_name[t], zio_taskq_types[q], i); } else { (void) snprintf(name, sizeof (name), "%s_%s", zio_type_name[t], zio_taskq_types[q]); } #ifdef SYSDC if (zio_taskq_sysdc && spa->spa_proc != &p0) { if (batch) flags |= TASKQ_DC_BATCH; tq = taskq_create_sysdc(name, value, 50, INT_MAX, spa->spa_proc, zio_taskq_basedc, flags); } else { #endif pri_t pri = maxclsyspri; /* * The write issue taskq can be extremely CPU * intensive. Run it at slightly lower priority * than the other taskqs. */ if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) pri++; tq = taskq_create_proc(name, value, pri, 50, INT_MAX, spa->spa_proc, flags); #ifdef SYSDC } #endif tqs->stqs_taskq[i] = tq; } } static void spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; if (tqs->stqs_taskq == NULL) { ASSERT0(tqs->stqs_count); return; } for (uint_t i = 0; i < tqs->stqs_count; i++) { ASSERT3P(tqs->stqs_taskq[i], !=, NULL); taskq_destroy(tqs->stqs_taskq[i]); } kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); tqs->stqs_taskq = NULL; } /* * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. * Note that a type may have multiple discrete taskqs to avoid lock contention * on the taskq itself. In that case we choose which taskq at random by using * the low bits of gethrtime(). */ void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) { spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; taskq_t *tq; ASSERT3P(tqs->stqs_taskq, !=, NULL); ASSERT3U(tqs->stqs_count, !=, 0); if (tqs->stqs_count == 1) { tq = tqs->stqs_taskq[0]; } else { #ifdef _KERNEL tq = tqs->stqs_taskq[cpu_ticks() % tqs->stqs_count]; #else tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count]; #endif } taskq_dispatch_ent(tq, func, arg, flags, ent); } static void spa_create_zio_taskqs(spa_t *spa) { for (int t = 0; t < ZIO_TYPES; t++) { for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { spa_taskqs_init(spa, t, q); } } } #ifdef _KERNEL #ifdef SPA_PROCESS static void spa_thread(void *arg) { callb_cpr_t cprinfo; spa_t *spa = arg; user_t *pu = PTOU(curproc); CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, spa->spa_name); ASSERT(curproc != &p0); (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), "zpool-%s", spa->spa_name); (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); #ifdef PSRSET_BIND /* bind this thread to the requested psrset */ if (zio_taskq_psrset_bind != PS_NONE) { pool_lock(); mutex_enter(&cpu_lock); mutex_enter(&pidlock); mutex_enter(&curproc->p_lock); if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, 0, NULL, NULL) == 0) { curthread->t_bind_pset = zio_taskq_psrset_bind; } else { cmn_err(CE_WARN, "Couldn't bind process for zfs pool \"%s\" to " "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); } mutex_exit(&curproc->p_lock); mutex_exit(&pidlock); mutex_exit(&cpu_lock); pool_unlock(); } #endif #ifdef SYSDC if (zio_taskq_sysdc) { sysdc_thread_enter(curthread, 100, 0); } #endif spa->spa_proc = curproc; spa->spa_did = curthread->t_did; spa_create_zio_taskqs(spa); mutex_enter(&spa->spa_proc_lock); ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); spa->spa_proc_state = SPA_PROC_ACTIVE; cv_broadcast(&spa->spa_proc_cv); CALLB_CPR_SAFE_BEGIN(&cprinfo); while (spa->spa_proc_state == SPA_PROC_ACTIVE) cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); spa->spa_proc_state = SPA_PROC_GONE; spa->spa_proc = &p0; cv_broadcast(&spa->spa_proc_cv); CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ mutex_enter(&curproc->p_lock); lwp_exit(); } #endif /* SPA_PROCESS */ #endif /* * Activate an uninitialized pool. */ static void spa_activate(spa_t *spa, int mode) { ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); /* Try to create a covering process */ mutex_enter(&spa->spa_proc_lock); ASSERT(spa->spa_proc_state == SPA_PROC_NONE); ASSERT(spa->spa_proc == &p0); spa->spa_did = 0; #ifdef SPA_PROCESS /* Only create a process if we're going to be around a while. */ if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, NULL, 0) == 0) { spa->spa_proc_state = SPA_PROC_CREATED; while (spa->spa_proc_state == SPA_PROC_CREATED) { cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); } ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); ASSERT(spa->spa_proc != &p0); ASSERT(spa->spa_did != 0); } else { #ifdef _KERNEL cmn_err(CE_WARN, "Couldn't create process for zfs pool \"%s\"\n", spa->spa_name); #endif } } #endif /* SPA_PROCESS */ mutex_exit(&spa->spa_proc_lock); /* If we didn't create a process, we need to create our taskqs. */ ASSERT(spa->spa_proc == &p0); if (spa->spa_proc == &p0) { spa_create_zio_taskqs(spa); } /* * Start TRIM thread. */ trim_thread_create(spa); list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), offsetof(vdev_t, vdev_config_dirty_node)); list_create(&spa->spa_evicting_os_list, sizeof (objset_t), offsetof(objset_t, os_evicting_node)); list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), offsetof(vdev_t, vdev_state_dirty_node)); txg_list_create(&spa->spa_vdev_txg_list, offsetof(struct vdev, vdev_txg_node)); avl_create(&spa->spa_errlist_scrub, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); } /* * Opposite of spa_activate(). */ static void spa_deactivate(spa_t *spa) { ASSERT(spa->spa_sync_on == B_FALSE); ASSERT(spa->spa_dsl_pool == NULL); ASSERT(spa->spa_root_vdev == NULL); ASSERT(spa->spa_async_zio_root == NULL); ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); /* * Stop TRIM thread in case spa_unload() wasn't called directly * before spa_deactivate(). */ trim_thread_destroy(spa); spa_evicting_os_wait(spa); txg_list_destroy(&spa->spa_vdev_txg_list); list_destroy(&spa->spa_config_dirty_list); list_destroy(&spa->spa_evicting_os_list); list_destroy(&spa->spa_state_dirty_list); for (int t = 0; t < ZIO_TYPES; t++) { for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { spa_taskqs_fini(spa, t, q); } } metaslab_class_destroy(spa->spa_normal_class); spa->spa_normal_class = NULL; metaslab_class_destroy(spa->spa_log_class); spa->spa_log_class = NULL; /* * If this was part of an import or the open otherwise failed, we may * still have errors left in the queues. Empty them just in case. */ spa_errlog_drain(spa); avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_last); spa->spa_state = POOL_STATE_UNINITIALIZED; mutex_enter(&spa->spa_proc_lock); if (spa->spa_proc_state != SPA_PROC_NONE) { ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); spa->spa_proc_state = SPA_PROC_DEACTIVATE; cv_broadcast(&spa->spa_proc_cv); while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { ASSERT(spa->spa_proc != &p0); cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); } ASSERT(spa->spa_proc_state == SPA_PROC_GONE); spa->spa_proc_state = SPA_PROC_NONE; } ASSERT(spa->spa_proc == &p0); mutex_exit(&spa->spa_proc_lock); #ifdef SPA_PROCESS /* * We want to make sure spa_thread() has actually exited the ZFS * module, so that the module can't be unloaded out from underneath * it. */ if (spa->spa_did != 0) { thread_join(spa->spa_did); spa->spa_did = 0; } #endif /* SPA_PROCESS */ } /* * Verify a pool configuration, and construct the vdev tree appropriately. This * will create all the necessary vdevs in the appropriate layout, with each vdev * in the CLOSED state. This will prep the pool before open/creation/import. * All vdev validation is done by the vdev_alloc() routine. */ static int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, int atype) { nvlist_t **child; uint_t children; int error; if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) return (error); if ((*vdp)->vdev_ops->vdev_op_leaf) return (0); error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children); if (error == ENOENT) return (0); if (error) { vdev_free(*vdp); *vdp = NULL; return (SET_ERROR(EINVAL)); } for (int c = 0; c < children; c++) { vdev_t *vd; if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, atype)) != 0) { vdev_free(*vdp); *vdp = NULL; return (error); } } ASSERT(*vdp != NULL); return (0); } /* * Opposite of spa_load(). */ static void spa_unload(spa_t *spa) { int i; ASSERT(MUTEX_HELD(&spa_namespace_lock)); /* * Stop TRIM thread. */ trim_thread_destroy(spa); /* * Stop async tasks. */ spa_async_suspend(spa); /* * Stop syncing. */ if (spa->spa_sync_on) { txg_sync_stop(spa->spa_dsl_pool); spa->spa_sync_on = B_FALSE; } /* * Wait for any outstanding async I/O to complete. */ if (spa->spa_async_zio_root != NULL) { for (int i = 0; i < max_ncpus; i++) (void) zio_wait(spa->spa_async_zio_root[i]); kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); spa->spa_async_zio_root = NULL; } bpobj_close(&spa->spa_deferred_bpobj); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); /* * Close all vdevs. */ if (spa->spa_root_vdev) vdev_free(spa->spa_root_vdev); ASSERT(spa->spa_root_vdev == NULL); /* * Close the dsl pool. */ if (spa->spa_dsl_pool) { dsl_pool_close(spa->spa_dsl_pool); spa->spa_dsl_pool = NULL; spa->spa_meta_objset = NULL; } ddt_unload(spa); /* * Drop and purge level 2 cache */ spa_l2cache_drop(spa); for (i = 0; i < spa->spa_spares.sav_count; i++) vdev_free(spa->spa_spares.sav_vdevs[i]); if (spa->spa_spares.sav_vdevs) { kmem_free(spa->spa_spares.sav_vdevs, spa->spa_spares.sav_count * sizeof (void *)); spa->spa_spares.sav_vdevs = NULL; } if (spa->spa_spares.sav_config) { nvlist_free(spa->spa_spares.sav_config); spa->spa_spares.sav_config = NULL; } spa->spa_spares.sav_count = 0; for (i = 0; i < spa->spa_l2cache.sav_count; i++) { vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); vdev_free(spa->spa_l2cache.sav_vdevs[i]); } if (spa->spa_l2cache.sav_vdevs) { kmem_free(spa->spa_l2cache.sav_vdevs, spa->spa_l2cache.sav_count * sizeof (void *)); spa->spa_l2cache.sav_vdevs = NULL; } if (spa->spa_l2cache.sav_config) { nvlist_free(spa->spa_l2cache.sav_config); spa->spa_l2cache.sav_config = NULL; } spa->spa_l2cache.sav_count = 0; spa->spa_async_suspended = 0; if (spa->spa_comment != NULL) { spa_strfree(spa->spa_comment); spa->spa_comment = NULL; } spa_config_exit(spa, SCL_ALL, FTAG); } /* * Load (or re-load) the current list of vdevs describing the active spares for * this pool. When this is called, we have some form of basic information in * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and * then re-generate a more complete list including status information. */ static void spa_load_spares(spa_t *spa) { nvlist_t **spares; uint_t nspares; int i; vdev_t *vd, *tvd; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); /* * First, close and free any existing spare vdevs. */ for (i = 0; i < spa->spa_spares.sav_count; i++) { vd = spa->spa_spares.sav_vdevs[i]; /* Undo the call to spa_activate() below */ if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, B_FALSE)) != NULL && tvd->vdev_isspare) spa_spare_remove(tvd); vdev_close(vd); vdev_free(vd); } if (spa->spa_spares.sav_vdevs) kmem_free(spa->spa_spares.sav_vdevs, spa->spa_spares.sav_count * sizeof (void *)); if (spa->spa_spares.sav_config == NULL) nspares = 0; else VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); spa->spa_spares.sav_count = (int)nspares; spa->spa_spares.sav_vdevs = NULL; if (nspares == 0) return; /* * Construct the array of vdevs, opening them to get status in the * process. For each spare, there is potentially two different vdev_t * structures associated with it: one in the list of spares (used only * for basic validation purposes) and one in the active vdev * configuration (if it's spared in). During this phase we open and * validate each vdev on the spare list. If the vdev also exists in the * active configuration, then we also mark this vdev as an active spare. */ spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), KM_SLEEP); for (i = 0; i < spa->spa_spares.sav_count; i++) { VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, VDEV_ALLOC_SPARE) == 0); ASSERT(vd != NULL); spa->spa_spares.sav_vdevs[i] = vd; if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, B_FALSE)) != NULL) { if (!tvd->vdev_isspare) spa_spare_add(tvd); /* * We only mark the spare active if we were successfully * able to load the vdev. Otherwise, importing a pool * with a bad active spare would result in strange * behavior, because multiple pool would think the spare * is actively in use. * * There is a vulnerability here to an equally bizarre * circumstance, where a dead active spare is later * brought back to life (onlined or otherwise). Given * the rarity of this scenario, and the extra complexity * it adds, we ignore the possibility. */ if (!vdev_is_dead(tvd)) spa_spare_activate(tvd); } vd->vdev_top = vd; vd->vdev_aux = &spa->spa_spares; if (vdev_open(vd) != 0) continue; if (vdev_validate_aux(vd) == 0) spa_spare_add(vd); } /* * Recompute the stashed list of spares, with status information * this time. */ VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), KM_SLEEP); for (i = 0; i < spa->spa_spares.sav_count; i++) spares[i] = vdev_config_generate(spa, spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); for (i = 0; i < spa->spa_spares.sav_count; i++) nvlist_free(spares[i]); kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); } /* * Load (or re-load) the current list of vdevs describing the active l2cache for * this pool. When this is called, we have some form of basic information in * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and * then re-generate a more complete list including status information. * Devices which are already active have their details maintained, and are * not re-opened. */ static void spa_load_l2cache(spa_t *spa) { nvlist_t **l2cache; uint_t nl2cache; int i, j, oldnvdevs; uint64_t guid; vdev_t *vd, **oldvdevs, **newvdevs; spa_aux_vdev_t *sav = &spa->spa_l2cache; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); if (sav->sav_config != NULL) { VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); } else { nl2cache = 0; newvdevs = NULL; } oldvdevs = sav->sav_vdevs; oldnvdevs = sav->sav_count; sav->sav_vdevs = NULL; sav->sav_count = 0; /* * Process new nvlist of vdevs. */ for (i = 0; i < nl2cache; i++) { VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, &guid) == 0); newvdevs[i] = NULL; for (j = 0; j < oldnvdevs; j++) { vd = oldvdevs[j]; if (vd != NULL && guid == vd->vdev_guid) { /* * Retain previous vdev for add/remove ops. */ newvdevs[i] = vd; oldvdevs[j] = NULL; break; } } if (newvdevs[i] == NULL) { /* * Create new vdev */ VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, VDEV_ALLOC_L2CACHE) == 0); ASSERT(vd != NULL); newvdevs[i] = vd; /* * Commit this vdev as an l2cache device, * even if it fails to open. */ spa_l2cache_add(vd); vd->vdev_top = vd; vd->vdev_aux = sav; spa_l2cache_activate(vd); if (vdev_open(vd) != 0) continue; (void) vdev_validate_aux(vd); if (!vdev_is_dead(vd)) l2arc_add_vdev(spa, vd); } } /* * Purge vdevs that were dropped */ for (i = 0; i < oldnvdevs; i++) { uint64_t pool; vd = oldvdevs[i]; if (vd != NULL) { ASSERT(vd->vdev_isl2cache); if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); vdev_clear_stats(vd); vdev_free(vd); } } if (oldvdevs) kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); if (sav->sav_config == NULL) goto out; sav->sav_vdevs = newvdevs; sav->sav_count = (int)nl2cache; /* * Recompute the stashed list of l2cache devices, with status * information this time. */ VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); for (i = 0; i < sav->sav_count; i++) l2cache[i] = vdev_config_generate(spa, sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); VERIFY(nvlist_add_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); out: for (i = 0; i < sav->sav_count; i++) nvlist_free(l2cache[i]); if (sav->sav_count) kmem_free(l2cache, sav->sav_count * sizeof (void *)); } static int load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) { dmu_buf_t *db; char *packed = NULL; size_t nvsize = 0; int error; *value = NULL; error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db); if (error != 0) return (error); nvsize = *(uint64_t *)db->db_data; dmu_buf_rele(db, FTAG); packed = kmem_alloc(nvsize, KM_SLEEP); error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, DMU_READ_PREFETCH); if (error == 0) error = nvlist_unpack(packed, nvsize, value, 0); kmem_free(packed, nvsize); return (error); } /* * Checks to see if the given vdev could not be opened, in which case we post a * sysevent to notify the autoreplace code that the device has been removed. */ static void spa_check_removed(vdev_t *vd) { for (int c = 0; c < vd->vdev_children; c++) spa_check_removed(vd->vdev_child[c]); if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && !vd->vdev_ishole) { zfs_post_autoreplace(vd->vdev_spa, vd); spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); } } static void spa_config_valid_zaps(vdev_t *vd, vdev_t *mvd) { ASSERT3U(vd->vdev_children, ==, mvd->vdev_children); vd->vdev_top_zap = mvd->vdev_top_zap; vd->vdev_leaf_zap = mvd->vdev_leaf_zap; for (uint64_t i = 0; i < vd->vdev_children; i++) { spa_config_valid_zaps(vd->vdev_child[i], mvd->vdev_child[i]); } } /* * Validate the current config against the MOS config */ static boolean_t spa_config_valid(spa_t *spa, nvlist_t *config) { vdev_t *mrvd, *rvd = spa->spa_root_vdev; nvlist_t *nv; VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children); /* * If we're doing a normal import, then build up any additional * diagnostic information about missing devices in this config. * We'll pass this up to the user for further processing. */ if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { nvlist_t **child, *nv; uint64_t idx = 0; child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **), KM_SLEEP); VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; vdev_t *mtvd = mrvd->vdev_child[c]; if (tvd->vdev_ops == &vdev_missing_ops && mtvd->vdev_ops != &vdev_missing_ops && mtvd->vdev_islog) child[idx++] = vdev_config_generate(spa, mtvd, B_FALSE, 0); } if (idx) { VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, child, idx) == 0); VERIFY(nvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0); for (int i = 0; i < idx; i++) nvlist_free(child[i]); } nvlist_free(nv); kmem_free(child, rvd->vdev_children * sizeof (char **)); } /* * Compare the root vdev tree with the information we have * from the MOS config (mrvd). Check each top-level vdev * with the corresponding MOS config top-level (mtvd). */ for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; vdev_t *mtvd = mrvd->vdev_child[c]; /* * Resolve any "missing" vdevs in the current configuration. * If we find that the MOS config has more accurate information * about the top-level vdev then use that vdev instead. */ if (tvd->vdev_ops == &vdev_missing_ops && mtvd->vdev_ops != &vdev_missing_ops) { if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) continue; /* * Device specific actions. */ if (mtvd->vdev_islog) { spa_set_log_state(spa, SPA_LOG_CLEAR); } else { /* * XXX - once we have 'readonly' pool * support we should be able to handle * missing data devices by transitioning * the pool to readonly. */ continue; } /* * Swap the missing vdev with the data we were * able to obtain from the MOS config. */ vdev_remove_child(rvd, tvd); vdev_remove_child(mrvd, mtvd); vdev_add_child(rvd, mtvd); vdev_add_child(mrvd, tvd); spa_config_exit(spa, SCL_ALL, FTAG); vdev_load(mtvd); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_reopen(rvd); } else { if (mtvd->vdev_islog) { /* * Load the slog device's state from the MOS * config since it's possible that the label * does not contain the most up-to-date * information. */ vdev_load_log_state(tvd, mtvd); vdev_reopen(tvd); } /* * Per-vdev ZAP info is stored exclusively in the MOS. */ spa_config_valid_zaps(tvd, mtvd); } } vdev_free(mrvd); spa_config_exit(spa, SCL_ALL, FTAG); /* * Ensure we were able to validate the config. */ return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum); } /* * Check for missing log devices */ static boolean_t spa_check_logs(spa_t *spa) { boolean_t rv = B_FALSE; dsl_pool_t *dp = spa_get_dsl(spa); switch (spa->spa_log_state) { case SPA_LOG_MISSING: /* need to recheck in case slog has been restored */ case SPA_LOG_UNKNOWN: rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj, zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0); if (rv) spa_set_log_state(spa, SPA_LOG_MISSING); break; } return (rv); } static boolean_t spa_passivate_log(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; boolean_t slog_found = B_FALSE; ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); if (!spa_has_slogs(spa)) return (B_FALSE); for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; if (tvd->vdev_islog) { metaslab_group_passivate(mg); slog_found = B_TRUE; } } return (slog_found); } static void spa_activate_log(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; if (tvd->vdev_islog) metaslab_group_activate(mg); } } int spa_offline_log(spa_t *spa) { int error; error = dmu_objset_find(spa_name(spa), zil_vdev_offline, NULL, DS_FIND_CHILDREN); if (error == 0) { /* * We successfully offlined the log device, sync out the * current txg so that the "stubby" block can be removed * by zil_sync(). */ txg_wait_synced(spa->spa_dsl_pool, 0); } return (error); } static void spa_aux_check_removed(spa_aux_vdev_t *sav) { int i; for (i = 0; i < sav->sav_count; i++) spa_check_removed(sav->sav_vdevs[i]); } void spa_claim_notify(zio_t *zio) { spa_t *spa = zio->io_spa; if (zio->io_error) return; mutex_enter(&spa->spa_props_lock); /* any mutex will do */ if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) spa->spa_claim_max_txg = zio->io_bp->blk_birth; mutex_exit(&spa->spa_props_lock); } typedef struct spa_load_error { uint64_t sle_meta_count; uint64_t sle_data_count; } spa_load_error_t; static void spa_load_verify_done(zio_t *zio) { blkptr_t *bp = zio->io_bp; spa_load_error_t *sle = zio->io_private; dmu_object_type_t type = BP_GET_TYPE(bp); int error = zio->io_error; spa_t *spa = zio->io_spa; if (error) { if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && type != DMU_OT_INTENT_LOG) atomic_inc_64(&sle->sle_meta_count); else atomic_inc_64(&sle->sle_data_count); } zio_data_buf_free(zio->io_data, zio->io_size); mutex_enter(&spa->spa_scrub_lock); spa->spa_scrub_inflight--; cv_broadcast(&spa->spa_scrub_io_cv); mutex_exit(&spa->spa_scrub_lock); } /* * Maximum number of concurrent scrub i/os to create while verifying * a pool while importing it. */ int spa_load_verify_maxinflight = 10000; boolean_t spa_load_verify_metadata = B_TRUE; boolean_t spa_load_verify_data = B_TRUE; SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_maxinflight, CTLFLAG_RWTUN, &spa_load_verify_maxinflight, 0, "Maximum number of concurrent scrub I/Os to create while verifying a " "pool while importing it"); SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_metadata, CTLFLAG_RWTUN, &spa_load_verify_metadata, 0, "Check metadata on import?"); SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_data, CTLFLAG_RWTUN, &spa_load_verify_data, 0, "Check user data on import?"); /*ARGSUSED*/ static int spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); /* * Note: normally this routine will not be called if * spa_load_verify_metadata is not set. However, it may be useful * to manually set the flag after the traversal has begun. */ if (!spa_load_verify_metadata) return (0); if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data) return (0); zio_t *rio = arg; size_t size = BP_GET_PSIZE(bp); void *data = zio_data_buf_alloc(size); mutex_enter(&spa->spa_scrub_lock); while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight) cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); spa->spa_scrub_inflight++; mutex_exit(&spa->spa_scrub_lock); zio_nowait(zio_read(rio, spa, bp, data, size, spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); return (0); } /* ARGSUSED */ int verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); return (0); } static int spa_load_verify(spa_t *spa) { zio_t *rio; spa_load_error_t sle = { 0 }; zpool_rewind_policy_t policy; boolean_t verify_ok = B_FALSE; int error = 0; zpool_get_rewind_policy(spa->spa_config, &policy); if (policy.zrp_request & ZPOOL_NEVER_REWIND) return (0); dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); error = dmu_objset_find_dp(spa->spa_dsl_pool, spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, DS_FIND_CHILDREN); dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); if (error != 0) return (error); rio = zio_root(spa, NULL, &sle, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); if (spa_load_verify_metadata) { error = traverse_pool(spa, spa->spa_verify_min_txg, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, spa_load_verify_cb, rio); } (void) zio_wait(rio); spa->spa_load_meta_errors = sle.sle_meta_count; spa->spa_load_data_errors = sle.sle_data_count; if (!error && sle.sle_meta_count <= policy.zrp_maxmeta && sle.sle_data_count <= policy.zrp_maxdata) { int64_t loss = 0; verify_ok = B_TRUE; spa->spa_load_txg = spa->spa_uberblock.ub_txg; spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; VERIFY(nvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); VERIFY(nvlist_add_int64(spa->spa_load_info, ZPOOL_CONFIG_REWIND_TIME, loss) == 0); VERIFY(nvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); } else { spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; } if (error) { if (error != ENXIO && error != EIO) error = SET_ERROR(EIO); return (error); } return (verify_ok ? 0 : EIO); } /* * Find a value in the pool props object. */ static void spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) { (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); } /* * Find a value in the pool directory object. */ static int spa_dir_prop(spa_t *spa, const char *name, uint64_t *val) { return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, name, sizeof (uint64_t), 1, val)); } static int spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) { vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); return (err); } /* * Fix up config after a partly-completed split. This is done with the * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off * pool have that entry in their config, but only the splitting one contains * a list of all the guids of the vdevs that are being split off. * * This function determines what to do with that list: either rejoin * all the disks to the pool, or complete the splitting process. To attempt * the rejoin, each disk that is offlined is marked online again, and * we do a reopen() call. If the vdev label for every disk that was * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) * then we call vdev_split() on each disk, and complete the split. * * Otherwise we leave the config alone, with all the vdevs in place in * the original pool. */ static void spa_try_repair(spa_t *spa, nvlist_t *config) { uint_t extracted; uint64_t *glist; uint_t i, gcount; nvlist_t *nvl; vdev_t **vd; boolean_t attempt_reopen; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) return; /* check that the config is complete */ if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, &glist, &gcount) != 0) return; vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); /* attempt to online all the vdevs & validate */ attempt_reopen = B_TRUE; for (i = 0; i < gcount; i++) { if (glist[i] == 0) /* vdev is hole */ continue; vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); if (vd[i] == NULL) { /* * Don't bother attempting to reopen the disks; * just do the split. */ attempt_reopen = B_FALSE; } else { /* attempt to re-online it */ vd[i]->vdev_offline = B_FALSE; } } if (attempt_reopen) { vdev_reopen(spa->spa_root_vdev); /* check each device to see what state it's in */ for (extracted = 0, i = 0; i < gcount; i++) { if (vd[i] != NULL && vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) break; ++extracted; } } /* * If every disk has been moved to the new pool, or if we never * even attempted to look at them, then we split them off for * good. */ if (!attempt_reopen || gcount == extracted) { for (i = 0; i < gcount; i++) if (vd[i] != NULL) vdev_split(vd[i]); vdev_reopen(spa->spa_root_vdev); } kmem_free(vd, gcount * sizeof (vdev_t *)); } static int spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig) { nvlist_t *config = spa->spa_config; char *ereport = FM_EREPORT_ZFS_POOL; char *comment; int error; uint64_t pool_guid; nvlist_t *nvl; if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) return (SET_ERROR(EINVAL)); ASSERT(spa->spa_comment == NULL); if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) spa->spa_comment = spa_strdup(comment); /* * Versioning wasn't explicitly added to the label until later, so if * it's not present treat it as the initial version. */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &spa->spa_ubsync.ub_version) != 0) spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &spa->spa_config_txg); if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && spa_guid_exists(pool_guid, 0)) { error = SET_ERROR(EEXIST); } else { spa->spa_config_guid = pool_guid; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) { VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting, KM_SLEEP) == 0); } nvlist_free(spa->spa_load_info); spa->spa_load_info = fnvlist_alloc(); gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, pool_guid, config, state, type, mosconfig, &ereport); } /* * Don't count references from objsets that are already closed * and are making their way through the eviction process. */ spa_evicting_os_wait(spa); spa->spa_minref = refcount_count(&spa->spa_refcount); if (error) { if (error != EEXIST) { spa->spa_loaded_ts.tv_sec = 0; spa->spa_loaded_ts.tv_nsec = 0; } if (error != EBADF) { zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); } } spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; spa->spa_ena = 0; return (error); } /* * Count the number of per-vdev ZAPs associated with all of the vdevs in the * vdev tree rooted in the given vd, and ensure that each ZAP is present in the * spa's per-vdev ZAP list. */ static uint64_t vdev_count_verify_zaps(vdev_t *vd) { spa_t *spa = vd->vdev_spa; uint64_t total = 0; if (vd->vdev_top_zap != 0) { total++; ASSERT0(zap_lookup_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps, vd->vdev_top_zap)); } if (vd->vdev_leaf_zap != 0) { total++; ASSERT0(zap_lookup_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); } for (uint64_t i = 0; i < vd->vdev_children; i++) { total += vdev_count_verify_zaps(vd->vdev_child[i]); } return (total); } /* * Load an existing storage pool, using the pool's builtin spa_config as a * source of configuration information. */ static int spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, char **ereport) { int error = 0; nvlist_t *nvroot = NULL; nvlist_t *label; vdev_t *rvd; uberblock_t *ub = &spa->spa_uberblock; uint64_t children, config_cache_txg = spa->spa_config_txg; int orig_mode = spa->spa_mode; int parse; uint64_t obj; boolean_t missing_feat_write = B_FALSE; /* * If this is an untrusted config, access the pool in read-only mode. * This prevents things like resilvering recently removed devices. */ if (!mosconfig) spa->spa_mode = FREAD; ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa->spa_load_state = state; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot)) return (SET_ERROR(EINVAL)); parse = (type == SPA_IMPORT_EXISTING ? VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); /* * Create "The Godfather" zio to hold all async IOs */ spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); for (int i = 0; i < max_ncpus; i++) { spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); } /* * Parse the configuration into a vdev tree. We explicitly set the * value that will be returned by spa_version() since parsing the * configuration requires knowing the version number. */ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) return (error); ASSERT(spa->spa_root_vdev == rvd); ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT); ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT); if (type != SPA_IMPORT_ASSEMBLE) { ASSERT(spa_guid(spa) == pool_guid); } /* * Try to open all vdevs, loading each label in the process. */ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = vdev_open(rvd); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) return (error); /* * We need to validate the vdev labels against the configuration that * we have in hand, which is dependent on the setting of mosconfig. If * mosconfig is true then we're validating the vdev labels based on * that config. Otherwise, we're validating against the cached config * (zpool.cache) that was read when we loaded the zfs module, and then * later we will recursively call spa_load() and validate against * the vdev config. * * If we're assembling a new pool that's been split off from an * existing pool, the labels haven't yet been updated so we skip * validation for now. */ if (type != SPA_IMPORT_ASSEMBLE) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = vdev_validate(rvd, mosconfig); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) return (error); if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) return (SET_ERROR(ENXIO)); } /* * Find the best uberblock. */ vdev_uberblock_load(rvd, ub, &label); /* * If we weren't able to find a single valid uberblock, return failure. */ if (ub->ub_txg == 0) { nvlist_free(label); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); } /* * If the pool has an unsupported version we can't open it. */ if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { nvlist_free(label); return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); } if (ub->ub_version >= SPA_VERSION_FEATURES) { nvlist_t *features; /* * If we weren't able to find what's necessary for reading the * MOS in the label, return failure. */ if (label == NULL || nvlist_lookup_nvlist(label, ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) { nvlist_free(label); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); } /* * Update our in-core representation with the definitive values * from the label. */ nvlist_free(spa->spa_label_features); VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); } nvlist_free(label); /* * Look through entries in the label nvlist's features_for_read. If * there is a feature listed there which we don't understand then we * cannot open a pool. */ if (ub->ub_version >= SPA_VERSION_FEATURES) { nvlist_t *unsup_feat; VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == 0); for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, NULL); nvp != NULL; nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { if (!zfeature_is_supported(nvpair_name(nvp))) { VERIFY(nvlist_add_string(unsup_feat, nvpair_name(nvp), "") == 0); } } if (!nvlist_empty(unsup_feat)) { VERIFY(nvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); nvlist_free(unsup_feat); return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); } nvlist_free(unsup_feat); } /* * If the vdev guid sum doesn't match the uberblock, we have an * incomplete configuration. We first check to see if the pool * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN). * If it is, defer the vdev_guid_sum check till later so we * can handle missing vdevs. */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE && rvd->vdev_guid_sum != ub->ub_guid_sum) return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_try_repair(spa, config); spa_config_exit(spa, SCL_ALL, FTAG); nvlist_free(spa->spa_config_splitting); spa->spa_config_splitting = NULL; } /* * Initialize internal SPA structures. */ spa->spa_state = POOL_STATE_ACTIVE; spa->spa_ubsync = spa->spa_uberblock; spa->spa_verify_min_txg = spa->spa_extreme_rewind ? TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; spa->spa_first_txg = spa->spa_last_ubsync_txg ? spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; spa->spa_claim_max_txg = spa->spa_first_txg; spa->spa_prev_software_version = ub->ub_software_version; error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); if (error) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (spa_version(spa) >= SPA_VERSION_FEATURES) { boolean_t missing_feat_read = B_FALSE; nvlist_t *unsup_feat, *enabled_feat; if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, &spa->spa_feat_for_read_obj) != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, &spa->spa_feat_for_write_obj) != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, &spa->spa_feat_desc_obj) != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } enabled_feat = fnvlist_alloc(); unsup_feat = fnvlist_alloc(); if (!spa_features_check(spa, B_FALSE, unsup_feat, enabled_feat)) missing_feat_read = B_TRUE; if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) { if (!spa_features_check(spa, B_TRUE, unsup_feat, enabled_feat)) { missing_feat_write = B_TRUE; } } fnvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); if (!nvlist_empty(unsup_feat)) { fnvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); } fnvlist_free(enabled_feat); fnvlist_free(unsup_feat); if (!missing_feat_read) { fnvlist_add_boolean(spa->spa_load_info, ZPOOL_CONFIG_CAN_RDONLY); } /* * If the state is SPA_LOAD_TRYIMPORT, our objective is * twofold: to determine whether the pool is available for * import in read-write mode and (if it is not) whether the * pool is available for import in read-only mode. If the pool * is available for import in read-write mode, it is displayed * as available in userland; if it is not available for import * in read-only mode, it is displayed as unavailable in * userland. If the pool is available for import in read-only * mode but not read-write mode, it is displayed as unavailable * in userland with a special note that the pool is actually * available for open in read-only mode. * * As a result, if the state is SPA_LOAD_TRYIMPORT and we are * missing a feature for write, we must first determine whether * the pool can be opened read-only before returning to * userland in order to know whether to display the * abovementioned note. */ if (missing_feat_read || (missing_feat_write && spa_writeable(spa))) { return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); } /* * Load refcounts for ZFS features from disk into an in-memory * cache during SPA initialization. */ for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { uint64_t refcount; error = feature_get_refcount_from_disk(spa, &spa_feature_table[i], &refcount); if (error == 0) { spa->spa_feat_refcount_cache[i] = refcount; } else if (error == ENOTSUP) { spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED; } else { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } } } if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG, &spa->spa_feat_enabled_txg_obj) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } spa->spa_is_initializing = B_TRUE; error = dsl_pool_open(spa->spa_dsl_pool); spa->spa_is_initializing = B_FALSE; if (error != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (!mosconfig) { uint64_t hostid; nvlist_t *policy = NULL, *nvconfig; if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, ZPOOL_CONFIG_HOSTID, &hostid) == 0) { char *hostname; unsigned long myhostid = 0; VERIFY(nvlist_lookup_string(nvconfig, ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); #ifdef _KERNEL myhostid = zone_get_hostid(NULL); #else /* _KERNEL */ /* * We're emulating the system's hostid in userland, so * we can't use zone_get_hostid(). */ (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); #endif /* _KERNEL */ if (check_hostid && hostid != 0 && myhostid != 0 && hostid != myhostid) { nvlist_free(nvconfig); cmn_err(CE_WARN, "pool '%s' could not be " "loaded as it was last accessed by " "another system (host: %s hostid: 0x%lx). " "See: http://illumos.org/msg/ZFS-8000-EY", spa_name(spa), hostname, (unsigned long)hostid); return (SET_ERROR(EBADF)); } } if (nvlist_lookup_nvlist(spa->spa_config, ZPOOL_REWIND_POLICY, &policy) == 0) VERIFY(nvlist_add_nvlist(nvconfig, ZPOOL_REWIND_POLICY, policy) == 0); spa_config_set(spa, nvconfig); spa_unload(spa); spa_deactivate(spa); spa_activate(spa, orig_mode); return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE)); } /* Grab the secret checksum salt from the MOS. */ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1, sizeof (spa->spa_cksum_salt.zcs_bytes), spa->spa_cksum_salt.zcs_bytes); if (error == ENOENT) { /* Generate a new salt for subsequent use */ (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, sizeof (spa->spa_cksum_salt.zcs_bytes)); } else if (error != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); if (error != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * Load the bit that tells us to use the new accounting function * (raid-z deflation). If we have an older pool, this will not * be present. */ error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, &spa->spa_creation_version); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * Load the persistent error log. If we have an older pool, this will * not be present. */ error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, &spa->spa_errlog_scrub); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * Load the history object. If we have an older pool, this * will not be present. */ error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * Load the per-vdev ZAP map. If we have an older pool, this will not * be present; in this case, defer its creation to a later time to * avoid dirtying the MOS this early / out of sync context. See * spa_sync_config_object. */ /* The sentinel is only available in the MOS config. */ nvlist_t *mos_config; if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP, &spa->spa_all_vdev_zaps); if (error != ENOENT && error != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } else if (error == 0 && !nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) { /* * An older version of ZFS overwrote the sentinel value, so * we have orphaned per-vdev ZAPs in the MOS. Defer their * destruction to later; see spa_sync_config_object. */ spa->spa_avz_action = AVZ_ACTION_DESTROY; /* * We're assuming that no vdevs have had their ZAPs created * before this. Better be sure of it. */ ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); } nvlist_free(mos_config); /* * If we're assembling the pool from the split-off vdevs of * an existing pool, we don't want to attach the spares & cache * devices. */ /* * Load any hot spares for this pool. */ error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); if (load_nvlist(spa, spa->spa_spares.sav_object, &spa->spa_spares.sav_config) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); } else if (error == 0) { spa->spa_spares.sav_sync = B_TRUE; } /* * Load any level 2 ARC devices for this pool. */ error = spa_dir_prop(spa, DMU_POOL_L2CACHE, &spa->spa_l2cache.sav_object); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); if (load_nvlist(spa, spa->spa_l2cache.sav_object, &spa->spa_l2cache.sav_config) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); } else if (error == 0) { spa->spa_l2cache.sav_sync = B_TRUE; } spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object); if (error && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (error == 0) { uint64_t autoreplace; spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, &spa->spa_dedup_ditto); spa->spa_autoreplace = (autoreplace != 0); } /* * If the 'autoreplace' property is set, then post a resource notifying * the ZFS DE that it should not issue any faults for unopenable * devices. We also iterate over the vdevs, and post a sysevent for any * unopenable vdevs so that the normal autoreplace handler can take * over. */ if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { spa_check_removed(spa->spa_root_vdev); /* * For the import case, this is done in spa_import(), because * at this point we're using the spare definitions from * the MOS config, not necessarily from the userland config. */ if (state != SPA_LOAD_IMPORT) { spa_aux_check_removed(&spa->spa_spares); spa_aux_check_removed(&spa->spa_l2cache); } } /* * Load the vdev state for all toplevel vdevs. */ vdev_load(rvd); /* * Propagate the leaf DTLs we just loaded all the way up the tree. */ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_dtl_reassess(rvd, 0, 0, B_FALSE); spa_config_exit(spa, SCL_ALL, FTAG); /* * Load the DDTs (dedup tables). */ error = ddt_load(spa); if (error != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); spa_update_dspace(spa); /* * Validate the config, using the MOS config to fill in any * information which might be missing. If we fail to validate * the config then declare the pool unfit for use. If we're * assembling a pool from a split, the log is not transferred * over. */ if (type != SPA_IMPORT_ASSEMBLE) { nvlist_t *nvconfig; if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (!spa_config_valid(spa, nvconfig)) { nvlist_free(nvconfig); return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); } nvlist_free(nvconfig); /* * Now that we've validated the config, check the state of the * root vdev. If it can't be opened, it indicates one or * more toplevel vdevs are faulted. */ if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) return (SET_ERROR(ENXIO)); if (spa_writeable(spa) && spa_check_logs(spa)) { *ereport = FM_EREPORT_ZFS_LOG_REPLAY; return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO)); } } if (missing_feat_write) { ASSERT(state == SPA_LOAD_TRYIMPORT); /* * At this point, we know that we can open the pool in * read-only mode but not read-write mode. We now have enough * information and can return to userland. */ return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); } /* * We've successfully opened the pool, verify that we're ready * to start pushing transactions. */ if (state != SPA_LOAD_TRYIMPORT) { if (error = spa_load_verify(spa)) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || spa->spa_load_max_txg == UINT64_MAX)) { dmu_tx_t *tx; int need_update = B_FALSE; dsl_pool_t *dp = spa_get_dsl(spa); ASSERT(state != SPA_LOAD_TRYIMPORT); /* * Claim log blocks that haven't been committed yet. * This must all happen in a single txg. * Note: spa_claim_max_txg is updated by spa_claim_notify(), * invoked from zil_claim_log_block()'s i/o done callback. * Price of rollback is that we abandon the log. */ spa->spa_claiming = B_TRUE; tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, zil_claim, tx, DS_FIND_CHILDREN); dmu_tx_commit(tx); spa->spa_claiming = B_FALSE; spa_set_log_state(spa, SPA_LOG_GOOD); spa->spa_sync_on = B_TRUE; txg_sync_start(spa->spa_dsl_pool); /* * Wait for all claims to sync. We sync up to the highest * claimed log block birth time so that claimed log blocks * don't appear to be from the future. spa_claim_max_txg * will have been set for us by either zil_check_log_chain() * (invoked from spa_check_logs()) or zil_claim() above. */ txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* * If the config cache is stale, or we have uninitialized * metaslabs (see spa_vdev_add()), then update the config. * * If this is a verbatim import, trust the current * in-core spa_config and update the disk labels. */ if (config_cache_txg != spa->spa_config_txg || state == SPA_LOAD_IMPORT || state == SPA_LOAD_RECOVER || (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) need_update = B_TRUE; for (int c = 0; c < rvd->vdev_children; c++) if (rvd->vdev_child[c]->vdev_ms_array == 0) need_update = B_TRUE; /* * Update the config cache asychronously in case we're the * root pool, in which case the config cache isn't writable yet. */ if (need_update) spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); /* * Check all DTLs to see if anything needs resilvering. */ if (!dsl_scan_resilvering(spa->spa_dsl_pool) && vdev_resilver_needed(rvd, NULL, NULL)) spa_async_request(spa, SPA_ASYNC_RESILVER); /* * Log the fact that we booted up (so that we can detect if * we rebooted in the middle of an operation). */ spa_history_log_version(spa, "open"); /* * Delete any inconsistent datasets. */ (void) dmu_objset_find(spa_name(spa), dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); /* * Clean up any stale temporary dataset userrefs. */ dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); } return (0); } static int spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) { int mode = spa->spa_mode; spa_unload(spa); spa_deactivate(spa); spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; spa_activate(spa, mode); spa_async_suspend(spa); return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig)); } /* * If spa_load() fails this function will try loading prior txg's. If * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this * function will not rewind the pool and will return the same error as * spa_load(). */ static int spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, uint64_t max_request, int rewind_flags) { nvlist_t *loadinfo = NULL; nvlist_t *config = NULL; int load_error, rewind_error; uint64_t safe_rewind_txg; uint64_t min_txg; if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { spa->spa_load_max_txg = spa->spa_load_txg; spa_set_log_state(spa, SPA_LOG_CLEAR); } else { spa->spa_load_max_txg = max_request; if (max_request != UINT64_MAX) spa->spa_extreme_rewind = B_TRUE; } load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig); if (load_error == 0) return (0); if (spa->spa_root_vdev != NULL) config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; if (rewind_flags & ZPOOL_NEVER_REWIND) { nvlist_free(config); return (load_error); } if (state == SPA_LOAD_RECOVER) { /* Price of rolling back is discarding txgs, including log */ spa_set_log_state(spa, SPA_LOG_CLEAR); } else { /* * If we aren't rolling back save the load info from our first * import attempt so that we can restore it after attempting * to rewind. */ loadinfo = spa->spa_load_info; spa->spa_load_info = fnvlist_alloc(); } spa->spa_load_max_txg = spa->spa_last_ubsync_txg; safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? TXG_INITIAL : safe_rewind_txg; /* * Continue as long as we're finding errors, we're still within * the acceptable rewind range, and we're still finding uberblocks */ while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { if (spa->spa_load_max_txg < safe_rewind_txg) spa->spa_extreme_rewind = B_TRUE; rewind_error = spa_load_retry(spa, state, mosconfig); } spa->spa_extreme_rewind = B_FALSE; spa->spa_load_max_txg = UINT64_MAX; if (config && (rewind_error || state != SPA_LOAD_RECOVER)) spa_config_set(spa, config); if (state == SPA_LOAD_RECOVER) { ASSERT3P(loadinfo, ==, NULL); return (rewind_error); } else { /* Store the rewind info as part of the initial load info */ fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, spa->spa_load_info); /* Restore the initial load info */ fnvlist_free(spa->spa_load_info); spa->spa_load_info = loadinfo; return (load_error); } } /* * Pool Open/Import * * The import case is identical to an open except that the configuration is sent * down from userland, instead of grabbed from the configuration cache. For the * case of an open, the pool configuration will exist in the * POOL_STATE_UNINITIALIZED state. * * The stats information (gen/count/ustats) is used to gather vdev statistics at * the same time open the pool, without having to keep around the spa_t in some * ambiguous state. */ static int spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, nvlist_t **config) { spa_t *spa; spa_load_state_t state = SPA_LOAD_OPEN; int error; int locked = B_FALSE; int firstopen = B_FALSE; *spapp = NULL; /* * As disgusting as this is, we need to support recursive calls to this * function because dsl_dir_open() is called during spa_load(), and ends * up calling spa_open() again. The real fix is to figure out how to * avoid dsl_dir_open() calling this in the first place. */ if (mutex_owner(&spa_namespace_lock) != curthread) { mutex_enter(&spa_namespace_lock); locked = B_TRUE; } if ((spa = spa_lookup(pool)) == NULL) { if (locked) mutex_exit(&spa_namespace_lock); return (SET_ERROR(ENOENT)); } if (spa->spa_state == POOL_STATE_UNINITIALIZED) { zpool_rewind_policy_t policy; firstopen = B_TRUE; zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config, &policy); if (policy.zrp_request & ZPOOL_DO_REWIND) state = SPA_LOAD_RECOVER; spa_activate(spa, spa_mode_global); if (state != SPA_LOAD_RECOVER) spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, policy.zrp_request); if (error == EBADF) { /* * If vdev_validate() returns failure (indicated by * EBADF), it indicates that one of the vdevs indicates * that the pool has been exported or destroyed. If * this is the case, the config cache is out of sync and * we should remove the pool from the namespace. */ spa_unload(spa); spa_deactivate(spa); spa_config_sync(spa, B_TRUE, B_TRUE); spa_remove(spa); if (locked) mutex_exit(&spa_namespace_lock); return (SET_ERROR(ENOENT)); } if (error) { /* * We can't open the pool, but we still have useful * information: the state of each vdev after the * attempted vdev_open(). Return this to the user. */ if (config != NULL && spa->spa_config) { VERIFY(nvlist_dup(spa->spa_config, config, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); } spa_unload(spa); spa_deactivate(spa); spa->spa_last_open_failed = error; if (locked) mutex_exit(&spa_namespace_lock); *spapp = NULL; return (error); } } spa_open_ref(spa, tag); if (config != NULL) *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); /* * If we've recovered the pool, pass back any information we * gathered while doing the load. */ if (state == SPA_LOAD_RECOVER) { VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); } if (locked) { spa->spa_last_open_failed = 0; spa->spa_last_ubsync_txg = 0; spa->spa_load_txg = 0; mutex_exit(&spa_namespace_lock); #ifdef __FreeBSD__ #ifdef _KERNEL if (firstopen) zvol_create_minors(spa->spa_name); #endif #endif } *spapp = spa; return (0); } int spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, nvlist_t **config) { return (spa_open_common(name, spapp, tag, policy, config)); } int spa_open(const char *name, spa_t **spapp, void *tag) { return (spa_open_common(name, spapp, tag, NULL, NULL)); } /* * Lookup the given spa_t, incrementing the inject count in the process, * preventing it from being exported or destroyed. */ spa_t * spa_inject_addref(char *name) { spa_t *spa; mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); return (NULL); } spa->spa_inject_ref++; mutex_exit(&spa_namespace_lock); return (spa); } void spa_inject_delref(spa_t *spa) { mutex_enter(&spa_namespace_lock); spa->spa_inject_ref--; mutex_exit(&spa_namespace_lock); } /* * Add spares device information to the nvlist. */ static void spa_add_spares(spa_t *spa, nvlist_t *config) { nvlist_t **spares; uint_t i, nspares; nvlist_t *nvroot; uint64_t guid; vdev_stat_t *vs; uint_t vsc; uint64_t pool; ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); if (spa->spa_spares.sav_count == 0) return; VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); if (nspares != 0) { VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); /* * Go through and find any spares which have since been * repurposed as an active spare. If this is the case, update * their status appropriately. */ for (i = 0; i < nspares; i++) { VERIFY(nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID, &guid) == 0); if (spa_spare_exists(guid, &pool, NULL) && pool != 0ULL) { VERIFY(nvlist_lookup_uint64_array( spares[i], ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); vs->vs_state = VDEV_STATE_CANT_OPEN; vs->vs_aux = VDEV_AUX_SPARED; } } } } /* * Add l2cache device information to the nvlist, including vdev stats. */ static void spa_add_l2cache(spa_t *spa, nvlist_t *config) { nvlist_t **l2cache; uint_t i, j, nl2cache; nvlist_t *nvroot; uint64_t guid; vdev_t *vd; vdev_stat_t *vs; uint_t vsc; ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); if (spa->spa_l2cache.sav_count == 0) return; VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); if (nl2cache != 0) { VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); /* * Update level 2 cache device stats. */ for (i = 0; i < nl2cache; i++) { VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, &guid) == 0); vd = NULL; for (j = 0; j < spa->spa_l2cache.sav_count; j++) { if (guid == spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { vd = spa->spa_l2cache.sav_vdevs[j]; break; } } ASSERT(vd != NULL); VERIFY(nvlist_lookup_uint64_array(l2cache[i], ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); vdev_get_stats(vd, vs); } } } static void spa_add_feature_stats(spa_t *spa, nvlist_t *config) { nvlist_t *features; zap_cursor_t zc; zap_attribute_t za; ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0); /* We may be unable to read features if pool is suspended. */ if (spa_suspended(spa)) goto out; if (spa->spa_feat_for_read_obj != 0) { for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_feat_for_read_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { ASSERT(za.za_integer_length == sizeof (uint64_t) && za.za_num_integers == 1); VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, za.za_first_integer)); } zap_cursor_fini(&zc); } if (spa->spa_feat_for_write_obj != 0) { for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_feat_for_write_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { ASSERT(za.za_integer_length == sizeof (uint64_t) && za.za_num_integers == 1); VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, za.za_first_integer)); } zap_cursor_fini(&zc); } out: VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, features) == 0); nvlist_free(features); } int spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) { int error; spa_t *spa; *config = NULL; error = spa_open_common(name, &spa, FTAG, NULL, config); if (spa != NULL) { /* * This still leaves a window of inconsistency where the spares * or l2cache devices could change and the config would be * self-inconsistent. */ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); if (*config != NULL) { uint64_t loadtimes[2]; loadtimes[0] = spa->spa_loaded_ts.tv_sec; loadtimes[1] = spa->spa_loaded_ts.tv_nsec; VERIFY(nvlist_add_uint64_array(*config, ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, spa_get_errlog_size(spa)) == 0); if (spa_suspended(spa)) VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0); spa_add_spares(spa, *config); spa_add_l2cache(spa, *config); spa_add_feature_stats(spa, *config); } } /* * We want to get the alternate root even for faulted pools, so we cheat * and call spa_lookup() directly. */ if (altroot) { if (spa == NULL) { mutex_enter(&spa_namespace_lock); spa = spa_lookup(name); if (spa) spa_altroot(spa, altroot, buflen); else altroot[0] = '\0'; spa = NULL; mutex_exit(&spa_namespace_lock); } else { spa_altroot(spa, altroot, buflen); } } if (spa != NULL) { spa_config_exit(spa, SCL_CONFIG, FTAG); spa_close(spa, FTAG); } return (error); } /* * Validate that the auxiliary device array is well formed. We must have an * array of nvlists, each which describes a valid leaf vdev. If this is an * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be * specified, as long as they are well-formed. */ static int spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, spa_aux_vdev_t *sav, const char *config, uint64_t version, vdev_labeltype_t label) { nvlist_t **dev; uint_t i, ndev; vdev_t *vd; int error; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); /* * It's acceptable to have no devs specified. */ if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) return (0); if (ndev == 0) return (SET_ERROR(EINVAL)); /* * Make sure the pool is formatted with a version that supports this * device type. */ if (spa_version(spa) < version) return (SET_ERROR(ENOTSUP)); /* * Set the pending device list so we correctly handle device in-use * checking. */ sav->sav_pending = dev; sav->sav_npending = ndev; for (i = 0; i < ndev; i++) { if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, mode)) != 0) goto out; if (!vd->vdev_ops->vdev_op_leaf) { vdev_free(vd); error = SET_ERROR(EINVAL); goto out; } /* * The L2ARC currently only supports disk devices in * kernel context. For user-level testing, we allow it. */ #ifdef _KERNEL if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { error = SET_ERROR(ENOTBLK); vdev_free(vd); goto out; } #endif vd->vdev_top = vd; if ((error = vdev_open(vd)) == 0 && (error = vdev_label_init(vd, crtxg, label)) == 0) { VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); } vdev_free(vd); if (error && (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) goto out; else error = 0; } out: sav->sav_pending = NULL; sav->sav_npending = 0; return (error); } static int spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) { int error; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, VDEV_LABEL_SPARE)) != 0) { return (error); } return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, VDEV_LABEL_L2CACHE)); } static void spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, const char *config) { int i; if (sav->sav_config != NULL) { nvlist_t **olddevs; uint_t oldndevs; nvlist_t **newdevs; /* * Generate new dev list by concatentating with the * current dev list. */ VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, &olddevs, &oldndevs) == 0); newdevs = kmem_alloc(sizeof (void *) * (ndevs + oldndevs), KM_SLEEP); for (i = 0; i < oldndevs; i++) VERIFY(nvlist_dup(olddevs[i], &newdevs[i], KM_SLEEP) == 0); for (i = 0; i < ndevs; i++) VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], KM_SLEEP) == 0); VERIFY(nvlist_remove(sav->sav_config, config, DATA_TYPE_NVLIST_ARRAY) == 0); VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, newdevs, ndevs + oldndevs) == 0); for (i = 0; i < oldndevs + ndevs; i++) nvlist_free(newdevs[i]); kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); } else { /* * Generate a new dev list. */ VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, devs, ndevs) == 0); } } /* * Stop and drop level 2 ARC devices */ void spa_l2cache_drop(spa_t *spa) { vdev_t *vd; int i; spa_aux_vdev_t *sav = &spa->spa_l2cache; for (i = 0; i < sav->sav_count; i++) { uint64_t pool; vd = sav->sav_vdevs[i]; ASSERT(vd != NULL); if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); } } /* * Pool Creation */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, nvlist_t *zplprops) { spa_t *spa; char *altroot = NULL; vdev_t *rvd; dsl_pool_t *dp; dmu_tx_t *tx; int error = 0; uint64_t txg = TXG_INITIAL; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; uint64_t version, obj; boolean_t has_features; /* * If this pool already exists, return failure. */ mutex_enter(&spa_namespace_lock); if (spa_lookup(pool) != NULL) { mutex_exit(&spa_namespace_lock); return (SET_ERROR(EEXIST)); } /* * Allocate a new spa_t structure. */ (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); spa = spa_add(pool, NULL, altroot); spa_activate(spa, spa_mode_global); if (props && (error = spa_prop_validate(spa, props))) { spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (error); } has_features = B_FALSE; for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); elem != NULL; elem = nvlist_next_nvpair(props, elem)) { if (zpool_prop_feature(nvpair_name(elem))) has_features = B_TRUE; } if (has_features || nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { version = SPA_VERSION; } ASSERT(SPA_VERSION_IS_SUPPORTED(version)); spa->spa_first_txg = txg; spa->spa_uberblock.ub_txg = txg - 1; spa->spa_uberblock.ub_version = version; spa->spa_ubsync = spa->spa_uberblock; spa->spa_load_state = SPA_LOAD_CREATE; /* * Create "The Godfather" zio to hold all async IOs */ spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); for (int i = 0; i < max_ncpus; i++) { spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); } /* * Create the root vdev. */ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); ASSERT(error != 0 || rvd != NULL); ASSERT(error != 0 || spa->spa_root_vdev == rvd); if (error == 0 && !zfs_allocatable_devs(nvroot)) error = SET_ERROR(EINVAL); if (error == 0 && (error = vdev_create(rvd, txg, B_FALSE)) == 0 && (error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) == 0) { for (int c = 0; c < rvd->vdev_children; c++) { vdev_ashift_optimize(rvd->vdev_child[c]); vdev_metaslab_set_size(rvd->vdev_child[c]); vdev_expand(rvd->vdev_child[c], txg); } } spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { spa_unload(spa); spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (error); } /* * Get the list of spares, if specified. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_spares.sav_sync = B_TRUE; } /* * Get the list of level 2 cache devices, if specified. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_l2cache.sav_sync = B_TRUE; } spa->spa_is_initializing = B_TRUE; spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); spa->spa_meta_objset = dp->dp_meta_objset; spa->spa_is_initializing = B_FALSE; /* * Create DDTs (dedup tables). */ ddt_create(spa); spa_update_dspace(spa); tx = dmu_tx_create_assigned(dp, txg); /* * Create the pool config object. */ spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { cmn_err(CE_PANIC, "failed to add pool config"); } if (spa_version(spa) >= SPA_VERSION_FEATURES) spa_feature_create_zap_objects(spa, tx); if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, sizeof (uint64_t), 1, &version, tx) != 0) { cmn_err(CE_PANIC, "failed to add pool version"); } /* Newly created pools with the right version are always deflated. */ if (version >= SPA_VERSION_RAIDZ_DEFLATE) { spa->spa_deflate = TRUE; if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { cmn_err(CE_PANIC, "failed to add deflate"); } } /* * Create the deferred-free bpobj. Turn off compression * because sync-to-convergence takes longer if the blocksize * keeps changing. */ obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); dmu_object_set_compress(spa->spa_meta_objset, obj, ZIO_COMPRESS_OFF, tx); if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, sizeof (uint64_t), 1, &obj, tx) != 0) { cmn_err(CE_PANIC, "failed to add bpobj"); } VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj)); /* * Create the pool's history object. */ if (version >= SPA_VERSION_ZPOOL_HISTORY) spa_history_create_obj(spa, tx); /* * Generate some random noise for salted checksums to operate on. */ (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, sizeof (spa->spa_cksum_salt.zcs_bytes)); /* * Set pool properties. */ spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); if (props != NULL) { spa_configfile_set(spa, props, B_FALSE); spa_sync_props(props, tx); } dmu_tx_commit(tx); spa->spa_sync_on = B_TRUE; txg_sync_start(spa->spa_dsl_pool); /* * We explicitly wait for the first transaction to complete so that our * bean counters are appropriately updated. */ txg_wait_synced(spa->spa_dsl_pool, txg); spa_config_sync(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, ESC_ZFS_POOL_CREATE); spa_history_log_version(spa, "create"); /* * Don't count references from objsets that are already closed * and are making their way through the eviction process. */ spa_evicting_os_wait(spa); spa->spa_minref = refcount_count(&spa->spa_refcount); spa->spa_load_state = SPA_LOAD_NONE; mutex_exit(&spa_namespace_lock); return (0); } #ifdef _KERNEL #ifdef illumos /* * Get the root pool information from the root disk, then import the root pool * during the system boot up time. */ extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); static nvlist_t * spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) { nvlist_t *config; nvlist_t *nvtop, *nvroot; uint64_t pgid; if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) return (NULL); /* * Add this top-level vdev to the child array. */ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); /* * Put this pool's top-level vdevs into a root vdev. */ VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &nvtop, 1) == 0); /* * Replace the existing vdev_tree with the new root vdev in * this pool's configuration (remove the old, add the new). */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); return (config); } /* * Walk the vdev tree and see if we can find a device with "better" * configuration. A configuration is "better" if the label on that * device has a more recent txg. */ static void spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) { for (int c = 0; c < vd->vdev_children; c++) spa_alt_rootvdev(vd->vdev_child[c], avd, txg); if (vd->vdev_ops->vdev_op_leaf) { nvlist_t *label; uint64_t label_txg; if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, &label) != 0) return; VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, &label_txg) == 0); /* * Do we have a better boot device? */ if (label_txg > *txg) { *txg = label_txg; *avd = vd; } nvlist_free(label); } } /* * Import a root pool. * * For x86. devpath_list will consist of devid and/or physpath name of * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). * The GRUB "findroot" command will return the vdev we should boot. * * For Sparc, devpath_list consists the physpath name of the booting device * no matter the rootpool is a single device pool or a mirrored pool. * e.g. * "/pci@1f,0/ide@d/disk@0,0:a" */ int spa_import_rootpool(char *devpath, char *devid) { spa_t *spa; vdev_t *rvd, *bvd, *avd = NULL; nvlist_t *config, *nvtop; uint64_t guid, txg; char *pname; int error; /* * Read the label from the boot device and generate a configuration. */ config = spa_generate_rootconf(devpath, devid, &guid); #if defined(_OBP) && defined(_KERNEL) if (config == NULL) { if (strstr(devpath, "/iscsi/ssd") != NULL) { /* iscsi boot */ get_iscsi_bootpath_phy(devpath); config = spa_generate_rootconf(devpath, devid, &guid); } } #endif if (config == NULL) { cmn_err(CE_NOTE, "Cannot read the pool label from '%s'", devpath); return (SET_ERROR(EIO)); } VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(pname)) != NULL) { /* * Remove the existing root pool from the namespace so that we * can replace it with the correct config we just read in. */ spa_remove(spa); } spa = spa_add(pname, config, NULL); spa->spa_is_root = B_TRUE; spa->spa_import_flags = ZFS_IMPORT_VERBATIM; /* * Build up a vdev tree based on the boot device's label config. */ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, VDEV_ALLOC_ROOTPOOL); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { mutex_exit(&spa_namespace_lock); nvlist_free(config); cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", pname); return (error); } /* * Get the boot vdev. */ if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", (u_longlong_t)guid); error = SET_ERROR(ENOENT); goto out; } /* * Determine if there is a better boot device. */ avd = bvd; spa_alt_rootvdev(rvd, &avd, &txg); if (avd != bvd) { cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " "try booting from '%s'", avd->vdev_path); error = SET_ERROR(EINVAL); goto out; } /* * If the boot device is part of a spare vdev then ensure that * we're booting off the active spare. */ if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && !bvd->vdev_isspare) { cmn_err(CE_NOTE, "The boot device is currently spared. Please " "try booting from '%s'", bvd->vdev_parent-> vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); error = SET_ERROR(EINVAL); goto out; } error = 0; out: spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); spa_config_exit(spa, SCL_ALL, FTAG); mutex_exit(&spa_namespace_lock); nvlist_free(config); return (error); } #else /* !illumos */ extern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs, uint64_t *count); static nvlist_t * spa_generate_rootconf(const char *name) { nvlist_t **configs, **tops; nvlist_t *config; nvlist_t *best_cfg, *nvtop, *nvroot; uint64_t *holes; uint64_t best_txg; uint64_t nchildren; uint64_t pgid; uint64_t count; uint64_t i; uint_t nholes; if (vdev_geom_read_pool_label(name, &configs, &count) != 0) return (NULL); ASSERT3U(count, !=, 0); best_txg = 0; for (i = 0; i < count; i++) { uint64_t txg; VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG, &txg) == 0); if (txg > best_txg) { best_txg = txg; best_cfg = configs[i]; } } nchildren = 1; nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren); holes = NULL; nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY, &holes, &nholes); tops = kmem_zalloc(nchildren * sizeof(void *), KM_SLEEP); for (i = 0; i < nchildren; i++) { if (i >= count) break; if (configs[i] == NULL) continue; VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); nvlist_dup(nvtop, &tops[i], KM_SLEEP); } for (i = 0; holes != NULL && i < nholes; i++) { if (i >= nchildren) continue; if (tops[holes[i]] != NULL) continue; nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP); VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID, holes[i]) == 0); VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID, 0) == 0); } for (i = 0; i < nchildren; i++) { if (tops[i] != NULL) continue; nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP); VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) == 0); VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID, i) == 0); VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID, 0) == 0); } /* * Create pool config based on the best vdev config. */ nvlist_dup(best_cfg, &config, KM_SLEEP); /* * Put this pool's top-level vdevs into a root vdev. */ VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) == 0); VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, tops, nchildren) == 0); /* * Replace the existing vdev_tree with the new root vdev in * this pool's configuration (remove the old, add the new). */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); /* * Drop vdev config elements that should not be present at pool level. */ nvlist_remove(config, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64); nvlist_remove(config, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64); for (i = 0; i < count; i++) nvlist_free(configs[i]); kmem_free(configs, count * sizeof(void *)); for (i = 0; i < nchildren; i++) nvlist_free(tops[i]); kmem_free(tops, nchildren * sizeof(void *)); nvlist_free(nvroot); return (config); } int spa_import_rootpool(const char *name) { spa_t *spa; vdev_t *rvd, *bvd, *avd = NULL; nvlist_t *config, *nvtop; uint64_t txg; char *pname; int error; /* * Read the label from the boot device and generate a configuration. */ config = spa_generate_rootconf(name); mutex_enter(&spa_namespace_lock); if (config != NULL) { VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) == 0 && strcmp(name, pname) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); if ((spa = spa_lookup(pname)) != NULL) { /* * Remove the existing root pool from the namespace so * that we can replace it with the correct config * we just read in. */ spa_remove(spa); } spa = spa_add(pname, config, NULL); /* * Set spa_ubsync.ub_version as it can be used in vdev_alloc() * via spa_version(). */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &spa->spa_ubsync.ub_version) != 0) spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; } else if ((spa = spa_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); nvlist_free(config); cmn_err(CE_NOTE, "Cannot find the pool label for '%s'", name); return (EIO); } else { VERIFY(nvlist_dup(spa->spa_config, &config, KM_SLEEP) == 0); } spa->spa_is_root = B_TRUE; spa->spa_import_flags = ZFS_IMPORT_VERBATIM; /* * Build up a vdev tree based on the boot device's label config. */ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, VDEV_ALLOC_ROOTPOOL); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { mutex_exit(&spa_namespace_lock); nvlist_free(config); cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", pname); return (error); } spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); spa_config_exit(spa, SCL_ALL, FTAG); mutex_exit(&spa_namespace_lock); nvlist_free(config); return (0); } #endif /* illumos */ #endif /* _KERNEL */ /* * Import a non-root pool into the system. */ int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) { spa_t *spa; char *altroot = NULL; spa_load_state_t state = SPA_LOAD_IMPORT; zpool_rewind_policy_t policy; uint64_t mode = spa_mode_global; uint64_t readonly = B_FALSE; int error; nvlist_t *nvroot; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; /* * If a pool with this name exists, return failure. */ mutex_enter(&spa_namespace_lock); if (spa_lookup(pool) != NULL) { mutex_exit(&spa_namespace_lock); return (SET_ERROR(EEXIST)); } /* * Create and initialize the spa structure. */ (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); (void) nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); if (readonly) mode = FREAD; spa = spa_add(pool, config, altroot); spa->spa_import_flags = flags; /* * Verbatim import - Take a pool and insert it into the namespace * as if it had been loaded at boot. */ if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { if (props != NULL) spa_configfile_set(spa, props, B_FALSE); spa_config_sync(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, ESC_ZFS_POOL_IMPORT); mutex_exit(&spa_namespace_lock); return (0); } spa_activate(spa, mode); /* * Don't start async tasks until we know everything is healthy. */ spa_async_suspend(spa); zpool_get_rewind_policy(config, &policy); if (policy.zrp_request & ZPOOL_DO_REWIND) state = SPA_LOAD_RECOVER; /* * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig * because the user-supplied config is actually the one to trust when * doing an import. */ if (state != SPA_LOAD_RECOVER) spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, policy.zrp_request); /* * Propagate anything learned while loading the pool and pass it * back to caller (i.e. rewind info, missing devices, etc). */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); /* * Toss any existing sparelist, as it doesn't have any validity * anymore, and conflicts with spa_has_spare(). */ if (spa->spa_spares.sav_config) { nvlist_free(spa->spa_spares.sav_config); spa->spa_spares.sav_config = NULL; spa_load_spares(spa); } if (spa->spa_l2cache.sav_config) { nvlist_free(spa->spa_l2cache.sav_config); spa->spa_l2cache.sav_config = NULL; spa_load_l2cache(spa); } VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); if (error == 0) error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE); if (error == 0) error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_L2CACHE); spa_config_exit(spa, SCL_ALL, FTAG); if (props != NULL) spa_configfile_set(spa, props, B_FALSE); if (error != 0 || (props && spa_writeable(spa) && (error = spa_prop_set(spa, props)))) { spa_unload(spa); spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (error); } spa_async_resume(spa); /* * Override any spares and level 2 cache devices as specified by * the user, as these may have correct device names/devids, etc. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { if (spa->spa_spares.sav_config) VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); else VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_spares.sav_sync = B_TRUE; } if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { if (spa->spa_l2cache.sav_config) VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); else VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_l2cache.sav_sync = B_TRUE; } /* * Check for any removed devices. */ if (spa->spa_autoreplace) { spa_aux_check_removed(&spa->spa_spares); spa_aux_check_removed(&spa->spa_l2cache); } if (spa_writeable(spa)) { /* * Update the config cache to include the newly-imported pool. */ spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); } /* * It's possible that the pool was expanded while it was exported. * We kick off an async task to handle this for us. */ spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); spa_history_log_version(spa, "import"); spa_event_notify(spa, NULL, ESC_ZFS_POOL_IMPORT); mutex_exit(&spa_namespace_lock); #ifdef __FreeBSD__ #ifdef _KERNEL zvol_create_minors(pool); #endif #endif return (0); } nvlist_t * spa_tryimport(nvlist_t *tryconfig) { nvlist_t *config = NULL; char *poolname; spa_t *spa; uint64_t state; int error; if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) return (NULL); if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) return (NULL); /* * Create and initialize the spa structure. */ mutex_enter(&spa_namespace_lock); spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); spa_activate(spa, FREAD); /* * Pass off the heavy lifting to spa_load(). * Pass TRUE for mosconfig because the user-supplied config * is actually the one to trust when doing an import. */ error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE); /* * If 'tryconfig' was at least parsable, return the current config. */ if (spa->spa_root_vdev != NULL) { config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, poolname) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, spa->spa_uberblock.ub_timestamp) == 0); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); /* * If the bootfs property exists on this pool then we * copy it out so that external consumers can tell which * pools are bootable. */ if ((!error || error == EEXIST) && spa->spa_bootfs) { char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); /* * We have to play games with the name since the * pool was opened as TRYIMPORT_NAME. */ if (dsl_dsobj_to_dsname(spa_name(spa), spa->spa_bootfs, tmpname) == 0) { char *cp; char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); cp = strchr(tmpname, '/'); if (cp == NULL) { (void) strlcpy(dsname, tmpname, MAXPATHLEN); } else { (void) snprintf(dsname, MAXPATHLEN, "%s/%s", poolname, ++cp); } VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_BOOTFS, dsname) == 0); kmem_free(dsname, MAXPATHLEN); } kmem_free(tmpname, MAXPATHLEN); } /* * Add the list of hot spares and level 2 cache devices. */ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa_add_spares(spa, config); spa_add_l2cache(spa, config); spa_config_exit(spa, SCL_CONFIG, FTAG); } spa_unload(spa); spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (config); } /* * Pool export/destroy * * The act of destroying or exporting a pool is very simple. We make sure there * is no more pending I/O and any references to the pool are gone. Then, we * update the pool state and sync all the labels to disk, removing the * configuration from the cache afterwards. If the 'hardforce' flag is set, then * we don't sync the labels or remove the configuration cache. */ static int spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, boolean_t force, boolean_t hardforce) { spa_t *spa; if (oldconfig) *oldconfig = NULL; if (!(spa_mode_global & FWRITE)) return (SET_ERROR(EROFS)); mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(pool)) == NULL) { mutex_exit(&spa_namespace_lock); return (SET_ERROR(ENOENT)); } /* * Put a hold on the pool, drop the namespace lock, stop async tasks, * reacquire the namespace lock, and see if we can export. */ spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); spa_async_suspend(spa); mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); /* * The pool will be in core if it's openable, * in which case we can modify its state. */ if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { /* * Objsets may be open only because they're dirty, so we * have to force it to sync before checking spa_refcnt. */ txg_wait_synced(spa->spa_dsl_pool, 0); spa_evicting_os_wait(spa); /* * A pool cannot be exported or destroyed if there are active * references. If we are resetting a pool, allow references by * fault injection handlers. */ if (!spa_refcount_zero(spa) || (spa->spa_inject_ref != 0 && new_state != POOL_STATE_UNINITIALIZED)) { spa_async_resume(spa); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EBUSY)); } /* * A pool cannot be exported if it has an active shared spare. * This is to prevent other pools stealing the active spare * from an exported pool. At user's own will, such pool can * be forcedly exported. */ if (!force && new_state == POOL_STATE_EXPORTED && spa_has_active_shared_spare(spa)) { spa_async_resume(spa); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EXDEV)); } /* * We want this to be reflected on every label, * so mark them all dirty. spa_unload() will do the * final sync that pushes these changes out. */ if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa->spa_state = new_state; spa->spa_final_txg = spa_last_synced_txg(spa) + TXG_DEFER_SIZE + 1; vdev_config_dirty(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); } } spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); if (spa->spa_state != POOL_STATE_UNINITIALIZED) { spa_unload(spa); spa_deactivate(spa); } if (oldconfig && spa->spa_config) VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); if (new_state != POOL_STATE_UNINITIALIZED) { if (!hardforce) spa_config_sync(spa, B_TRUE, B_TRUE); spa_remove(spa); } mutex_exit(&spa_namespace_lock); return (0); } /* * Destroy a storage pool. */ int spa_destroy(char *pool) { return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, B_FALSE, B_FALSE)); } /* * Export a storage pool. */ int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, boolean_t hardforce) { return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, force, hardforce)); } /* * Similar to spa_export(), this unloads the spa_t without actually removing it * from the namespace in any way. */ int spa_reset(char *pool) { return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, B_FALSE, B_FALSE)); } /* * ========================================================================== * Device manipulation * ========================================================================== */ /* * Add a device to a storage pool. */ int spa_vdev_add(spa_t *spa, nvlist_t *nvroot) { uint64_t txg, id; int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *tvd; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, VDEV_ALLOC_ADD)) != 0) return (spa_vdev_exit(spa, NULL, txg, error)); spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0) nspares = 0; if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) != 0) nl2cache = 0; if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) return (spa_vdev_exit(spa, vd, txg, EINVAL)); if (vd->vdev_children != 0 && (error = vdev_create(vd, txg, B_FALSE)) != 0) return (spa_vdev_exit(spa, vd, txg, error)); /* * We must validate the spares and l2cache devices after checking the * children. Otherwise, vdev_inuse() will blindly overwrite the spare. */ if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) return (spa_vdev_exit(spa, vd, txg, error)); /* * Transfer each new top-level vdev from vd to rvd. */ for (int c = 0; c < vd->vdev_children; c++) { /* * Set the vdev id to the first hole, if one exists. */ for (id = 0; id < rvd->vdev_children; id++) { if (rvd->vdev_child[id]->vdev_ishole) { vdev_free(rvd->vdev_child[id]); break; } } tvd = vd->vdev_child[c]; vdev_remove_child(vd, tvd); tvd->vdev_id = id; vdev_add_child(rvd, tvd); vdev_config_dirty(tvd); } if (nspares != 0) { spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, ZPOOL_CONFIG_SPARES); spa_load_spares(spa); spa->spa_spares.sav_sync = B_TRUE; } if (nl2cache != 0) { spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, ZPOOL_CONFIG_L2CACHE); spa_load_l2cache(spa); spa->spa_l2cache.sav_sync = B_TRUE; } /* * We have to be careful when adding new vdevs to an existing pool. * If other threads start allocating from these vdevs before we * sync the config cache, and we lose power, then upon reboot we may * fail to open the pool because there are DVAs that the config cache * can't translate. Therefore, we first add the vdevs without * initializing metaslabs; sync the config cache (via spa_vdev_exit()); * and then let spa_config_update() initialize the new metaslabs. * * spa_load() checks for added-but-not-initialized vdevs, so that * if we lose power at any point in this sequence, the remaining * steps will be completed the next time we load the pool. */ (void) spa_vdev_exit(spa, vd, txg, 0); mutex_enter(&spa_namespace_lock); spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); spa_event_notify(spa, NULL, ESC_ZFS_VDEV_ADD); mutex_exit(&spa_namespace_lock); return (0); } /* * Attach a device to a mirror. The arguments are the path to any device * in the mirror, and the nvroot for the new device. If the path specifies * a device that is not mirrored, we automatically insert the mirror vdev. * * If 'replacing' is specified, the new device is intended to replace the * existing device; in this case the two devices are made into their own * mirror using the 'replacing' vdev, which is functionally identical to * the mirror vdev (it actually reuses all the same ops) but has a few * extra rules: you can't attach to it after it's been created, and upon * completion of resilvering, the first disk (the one being replaced) * is automatically detached. */ int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) { uint64_t txg, dtl_max_txg; vdev_t *rvd = spa->spa_root_vdev; vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; vdev_ops_t *pvops; char *oldvdpath, *newvdpath; int newvd_isspare; int error; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); if (oldvd == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); if (!oldvd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); pvd = oldvd->vdev_parent; if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, VDEV_ALLOC_ATTACH)) != 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); if (newrootvd->vdev_children != 1) return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); newvd = newrootvd->vdev_child[0]; if (!newvd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); if ((error = vdev_create(newrootvd, txg, replacing)) != 0) return (spa_vdev_exit(spa, newrootvd, txg, error)); /* * Spares can't replace logs */ if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); if (!replacing) { /* * For attach, the only allowable parent is a mirror or the root * vdev. */ if (pvd->vdev_ops != &vdev_mirror_ops && pvd->vdev_ops != &vdev_root_ops) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); pvops = &vdev_mirror_ops; } else { /* * Active hot spares can only be replaced by inactive hot * spares. */ if (pvd->vdev_ops == &vdev_spare_ops && oldvd->vdev_isspare && !spa_has_spare(spa, newvd->vdev_guid)) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); /* * If the source is a hot spare, and the parent isn't already a * spare, then we want to create a new hot spare. Otherwise, we * want to create a replacing vdev. The user is not allowed to * attach to a spared vdev child unless the 'isspare' state is * the same (spare replaces spare, non-spare replaces * non-spare). */ if (pvd->vdev_ops == &vdev_replacing_ops && spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } else if (pvd->vdev_ops == &vdev_spare_ops && newvd->vdev_isspare != oldvd->vdev_isspare) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } if (newvd->vdev_isspare) pvops = &vdev_spare_ops; else pvops = &vdev_replacing_ops; } /* * Make sure the new device is big enough. */ if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); /* * The new device cannot have a higher alignment requirement * than the top-level vdev. */ if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); /* * If this is an in-place replacement, update oldvd's path and devid * to make it distinguishable from newvd, and unopenable from now on. */ if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { spa_strfree(oldvd->vdev_path); oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, KM_SLEEP); (void) sprintf(oldvd->vdev_path, "%s/%s", newvd->vdev_path, "old"); if (oldvd->vdev_devid != NULL) { spa_strfree(oldvd->vdev_devid); oldvd->vdev_devid = NULL; } } /* mark the device being resilvered */ newvd->vdev_resilver_txg = txg; /* * If the parent is not a mirror, or if we're replacing, insert the new * mirror/replacing/spare vdev above oldvd. */ if (pvd->vdev_ops != pvops) pvd = vdev_add_parent(oldvd, pvops); ASSERT(pvd->vdev_top->vdev_parent == rvd); ASSERT(pvd->vdev_ops == pvops); ASSERT(oldvd->vdev_parent == pvd); /* * Extract the new device from its root and add it to pvd. */ vdev_remove_child(newrootvd, newvd); newvd->vdev_id = pvd->vdev_children; newvd->vdev_crtxg = oldvd->vdev_crtxg; vdev_add_child(pvd, newvd); tvd = newvd->vdev_top; ASSERT(pvd->vdev_top == tvd); ASSERT(tvd->vdev_parent == rvd); vdev_config_dirty(tvd); /* * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account * for any dmu_sync-ed blocks. It will propagate upward when * spa_vdev_exit() calls vdev_dtl_reassess(). */ dtl_max_txg = txg + TXG_CONCURRENT_STATES; vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, dtl_max_txg - TXG_INITIAL); if (newvd->vdev_isspare) { spa_spare_activate(newvd); spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); } oldvdpath = spa_strdup(oldvd->vdev_path); newvdpath = spa_strdup(newvd->vdev_path); newvd_isspare = newvd->vdev_isspare; /* * Mark newvd's DTL dirty in this txg. */ vdev_dirty(tvd, VDD_DTL, newvd, txg); /* * Schedule the resilver to restart in the future. We do this to * ensure that dmu_sync-ed blocks have been stitched into the * respective datasets. */ dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); if (spa->spa_bootfs) spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH); spa_event_notify(spa, newvd, ESC_ZFS_VDEV_ATTACH); /* * Commit the config */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); spa_history_log_internal(spa, "vdev attach", NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : replacing ? "replace" : "attach", newvdpath, replacing ? "for" : "to", oldvdpath); spa_strfree(oldvdpath); spa_strfree(newvdpath); return (0); } /* * Detach a device from a mirror or replacing vdev. * * If 'replace_done' is specified, only detach if the parent * is a replacing vdev. */ int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) { uint64_t txg; int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *pvd, *cvd, *tvd; boolean_t unspare = B_FALSE; uint64_t unspare_guid = 0; char *vdpath; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); vd = spa_lookup_by_guid(spa, guid, B_FALSE); if (vd == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); if (!vd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); pvd = vd->vdev_parent; /* * If the parent/child relationship is not as expected, don't do it. * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing * vdev that's replacing B with C. The user's intent in replacing * is to go from M(A,B) to M(A,C). If the user decides to cancel * the replace by detaching C, the expected behavior is to end up * M(A,B). But suppose that right after deciding to detach C, * the replacement of B completes. We would have M(A,C), and then * ask to detach C, which would leave us with just A -- not what * the user wanted. To prevent this, we make sure that the * parent/child relationship hasn't changed -- in this example, * that C's parent is still the replacing vdev R. */ if (pvd->vdev_guid != pguid && pguid != 0) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); /* * Only 'replacing' or 'spare' vdevs can be replaced. */ if (replace_done && pvd->vdev_ops != &vdev_replacing_ops && pvd->vdev_ops != &vdev_spare_ops) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); ASSERT(pvd->vdev_ops != &vdev_spare_ops || spa_version(spa) >= SPA_VERSION_SPARES); /* * Only mirror, replacing, and spare vdevs support detach. */ if (pvd->vdev_ops != &vdev_replacing_ops && pvd->vdev_ops != &vdev_mirror_ops && pvd->vdev_ops != &vdev_spare_ops) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); /* * If this device has the only valid copy of some data, * we cannot safely detach it. */ if (vdev_dtl_required(vd)) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); ASSERT(pvd->vdev_children >= 2); /* * If we are detaching the second disk from a replacing vdev, then * check to see if we changed the original vdev's path to have "/old" * at the end in spa_vdev_attach(). If so, undo that change now. */ if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 && vd->vdev_path != NULL) { size_t len = strlen(vd->vdev_path); for (int c = 0; c < pvd->vdev_children; c++) { cvd = pvd->vdev_child[c]; if (cvd == vd || cvd->vdev_path == NULL) continue; if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && strcmp(cvd->vdev_path + len, "/old") == 0) { spa_strfree(cvd->vdev_path); cvd->vdev_path = spa_strdup(vd->vdev_path); break; } } } /* * If we are detaching the original disk from a spare, then it implies * that the spare should become a real disk, and be removed from the * active spare list for the pool. */ if (pvd->vdev_ops == &vdev_spare_ops && vd->vdev_id == 0 && pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare) unspare = B_TRUE; /* * Erase the disk labels so the disk can be used for other things. * This must be done after all other error cases are handled, * but before we disembowel vd (so we can still do I/O to it). * But if we can't do it, don't treat the error as fatal -- * it may be that the unwritability of the disk is the reason * it's being detached! */ error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); /* * Remove vd from its parent and compact the parent's children. */ vdev_remove_child(pvd, vd); vdev_compact_children(pvd); /* * Remember one of the remaining children so we can get tvd below. */ cvd = pvd->vdev_child[pvd->vdev_children - 1]; /* * If we need to remove the remaining child from the list of hot spares, * do it now, marking the vdev as no longer a spare in the process. * We must do this before vdev_remove_parent(), because that can * change the GUID if it creates a new toplevel GUID. For a similar * reason, we must remove the spare now, in the same txg as the detach; * otherwise someone could attach a new sibling, change the GUID, and * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. */ if (unspare) { ASSERT(cvd->vdev_isspare); spa_spare_remove(cvd); unspare_guid = cvd->vdev_guid; (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); cvd->vdev_unspare = B_TRUE; } /* * If the parent mirror/replacing vdev only has one child, * the parent is no longer needed. Remove it from the tree. */ if (pvd->vdev_children == 1) { if (pvd->vdev_ops == &vdev_spare_ops) cvd->vdev_unspare = B_FALSE; vdev_remove_parent(cvd); } /* * We don't set tvd until now because the parent we just removed * may have been the previous top-level vdev. */ tvd = cvd->vdev_top; ASSERT(tvd->vdev_parent == rvd); /* * Reevaluate the parent vdev state. */ vdev_propagate_state(cvd); /* * If the 'autoexpand' property is set on the pool then automatically * try to expand the size of the pool. For example if the device we * just detached was smaller than the others, it may be possible to * add metaslabs (i.e. grow the pool). We need to reopen the vdev * first so that we can obtain the updated sizes of the leaf vdevs. */ if (spa->spa_autoexpand) { vdev_reopen(tvd); vdev_expand(tvd, txg); } vdev_config_dirty(tvd); /* * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that * vd->vdev_detached is set and free vd's DTL object in syncing context. * But first make sure we're not on any *other* txg's DTL list, to * prevent vd from being accessed after it's freed. */ vdpath = spa_strdup(vd->vdev_path); for (int t = 0; t < TXG_SIZE; t++) (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); vd->vdev_detached = B_TRUE; vdev_dirty(tvd, VDD_DTL, vd, txg); spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); /* hang on to the spa before we release the lock */ spa_open_ref(spa, FTAG); error = spa_vdev_exit(spa, vd, txg, 0); spa_history_log_internal(spa, "detach", NULL, "vdev=%s", vdpath); spa_strfree(vdpath); /* * If this was the removal of the original device in a hot spare vdev, * then we want to go through and remove the device from the hot spare * list of every other pool. */ if (unspare) { spa_t *altspa = NULL; mutex_enter(&spa_namespace_lock); while ((altspa = spa_next(altspa)) != NULL) { if (altspa->spa_state != POOL_STATE_ACTIVE || altspa == spa) continue; spa_open_ref(altspa, FTAG); mutex_exit(&spa_namespace_lock); (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); mutex_enter(&spa_namespace_lock); spa_close(altspa, FTAG); } mutex_exit(&spa_namespace_lock); /* search the rest of the vdevs for spares to remove */ spa_vdev_resilver_done(spa); } /* all done with the spa; OK to release */ mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); mutex_exit(&spa_namespace_lock); return (error); } /* * Split a set of devices from their mirrors, and create a new pool from them. */ int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, nvlist_t *props, boolean_t exp) { int error = 0; uint64_t txg, *glist; spa_t *newspa; uint_t c, children, lastlog; nvlist_t **child, *nvl, *tmp; dmu_tx_t *tx; char *altroot = NULL; vdev_t *rvd, **vml = NULL; /* vdev modify list */ boolean_t activate_slog; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); /* clear the log and flush everything up to now */ activate_slog = spa_passivate_log(spa); (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); error = spa_offline_log(spa); txg = spa_vdev_config_enter(spa); if (activate_slog) spa_activate_log(spa); if (error != 0) return (spa_vdev_exit(spa, NULL, txg, error)); /* check new spa name before going any further */ if (spa_lookup(newname) != NULL) return (spa_vdev_exit(spa, NULL, txg, EEXIST)); /* * scan through all the children to ensure they're all mirrors */ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); /* first, check to ensure we've got the right child count */ rvd = spa->spa_root_vdev; lastlog = 0; for (c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; /* don't count the holes & logs as children */ if (vd->vdev_islog || vd->vdev_ishole) { if (lastlog == 0) lastlog = c; continue; } lastlog = 0; } if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); /* next, ensure no spare or cache devices are part of the split */ if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); /* then, loop over each vdev and validate it */ for (c = 0; c < children; c++) { uint64_t is_hole = 0; (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &is_hole); if (is_hole != 0) { if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || spa->spa_root_vdev->vdev_child[c]->vdev_islog) { continue; } else { error = SET_ERROR(EINVAL); break; } } /* which disk is going to be split? */ if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, &glist[c]) != 0) { error = SET_ERROR(EINVAL); break; } /* look it up in the spa */ vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); if (vml[c] == NULL) { error = SET_ERROR(ENODEV); break; } /* make sure there's nothing stopping the split */ if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || vml[c]->vdev_islog || vml[c]->vdev_ishole || vml[c]->vdev_isspare || vml[c]->vdev_isl2cache || !vdev_writeable(vml[c]) || vml[c]->vdev_children != 0 || vml[c]->vdev_state != VDEV_STATE_HEALTHY || c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { error = SET_ERROR(EINVAL); break; } if (vdev_dtl_required(vml[c])) { error = SET_ERROR(EBUSY); break; } /* we need certain info from the top level */ VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, vml[c]->vdev_top->vdev_ms_array) == 0); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, vml[c]->vdev_top->vdev_ms_shift) == 0); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, vml[c]->vdev_top->vdev_asize) == 0); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, vml[c]->vdev_top->vdev_ashift) == 0); /* transfer per-vdev ZAPs */ ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0); VERIFY0(nvlist_add_uint64(child[c], ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap)); ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0); VERIFY0(nvlist_add_uint64(child[c], ZPOOL_CONFIG_VDEV_TOP_ZAP, vml[c]->vdev_parent->vdev_top_zap)); } if (error != 0) { kmem_free(vml, children * sizeof (vdev_t *)); kmem_free(glist, children * sizeof (uint64_t)); return (spa_vdev_exit(spa, NULL, txg, error)); } /* stop writers from using the disks */ for (c = 0; c < children; c++) { if (vml[c] != NULL) vml[c]->vdev_offline = B_TRUE; } vdev_reopen(spa->spa_root_vdev); /* * Temporarily record the splitting vdevs in the spa config. This * will disappear once the config is regenerated. */ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, glist, children) == 0); kmem_free(glist, children * sizeof (uint64_t)); mutex_enter(&spa->spa_props_lock); VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, nvl) == 0); mutex_exit(&spa->spa_props_lock); spa->spa_config_splitting = nvl; vdev_config_dirty(spa->spa_root_vdev); /* configure and create the new pool */ VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, spa->spa_config_txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_generate_guid(NULL)) == 0); VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); /* add the new pool to the namespace */ newspa = spa_add(newname, config, altroot); newspa->spa_avz_action = AVZ_ACTION_REBUILD; newspa->spa_config_txg = spa->spa_config_txg; spa_set_log_state(newspa, SPA_LOG_CLEAR); /* release the spa config lock, retaining the namespace lock */ spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 1); spa_activate(newspa, spa_mode_global); spa_async_suspend(newspa); #ifndef illumos /* mark that we are creating new spa by splitting */ newspa->spa_splitting_newspa = B_TRUE; #endif /* create the new pool from the disks of the original pool */ error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE); #ifndef illumos newspa->spa_splitting_newspa = B_FALSE; #endif if (error) goto out; /* if that worked, generate a real config for the new pool */ if (newspa->spa_root_vdev != NULL) { VERIFY(nvlist_alloc(&newspa->spa_config_splitting, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, B_TRUE)); } /* set the props */ if (props != NULL) { spa_configfile_set(newspa, props, B_FALSE); error = spa_prop_set(newspa, props); if (error) goto out; } /* flush everything */ txg = spa_vdev_config_enter(newspa); vdev_config_dirty(newspa->spa_root_vdev); (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 2); spa_async_resume(newspa); /* finally, update the original pool's config */ txg = spa_vdev_config_enter(spa); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) dmu_tx_abort(tx); for (c = 0; c < children; c++) { if (vml[c] != NULL) { vdev_split(vml[c]); if (error == 0) spa_history_log_internal(spa, "detach", tx, "vdev=%s", vml[c]->vdev_path); vdev_free(vml[c]); } } spa->spa_avz_action = AVZ_ACTION_REBUILD; vdev_config_dirty(spa->spa_root_vdev); spa->spa_config_splitting = NULL; nvlist_free(nvl); if (error == 0) dmu_tx_commit(tx); (void) spa_vdev_exit(spa, NULL, txg, 0); if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 3); /* split is complete; log a history record */ spa_history_log_internal(newspa, "split", NULL, "from pool %s", spa_name(spa)); kmem_free(vml, children * sizeof (vdev_t *)); /* if we're not going to mount the filesystems in userland, export */ if (exp) error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, B_FALSE, B_FALSE); return (error); out: spa_unload(newspa); spa_deactivate(newspa); spa_remove(newspa); txg = spa_vdev_config_enter(spa); /* re-online all offlined disks */ for (c = 0; c < children; c++) { if (vml[c] != NULL) vml[c]->vdev_offline = B_FALSE; } vdev_reopen(spa->spa_root_vdev); nvlist_free(spa->spa_config_splitting); spa->spa_config_splitting = NULL; (void) spa_vdev_exit(spa, NULL, txg, error); kmem_free(vml, children * sizeof (vdev_t *)); return (error); } static nvlist_t * spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) { for (int i = 0; i < count; i++) { uint64_t guid; VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, &guid) == 0); if (guid == target_guid) return (nvpp[i]); } return (NULL); } static void spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, nvlist_t *dev_to_remove) { nvlist_t **newdev = NULL; if (count > 1) newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); for (int i = 0, j = 0; i < count; i++) { if (dev[i] == dev_to_remove) continue; VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); } VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); for (int i = 0; i < count - 1; i++) nvlist_free(newdev[i]); if (count > 1) kmem_free(newdev, (count - 1) * sizeof (void *)); } /* * Evacuate the device. */ static int spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) { uint64_t txg; int error = 0; ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); ASSERT(vd == vd->vdev_top); /* * Evacuate the device. We don't hold the config lock as writer * since we need to do I/O but we do keep the * spa_namespace_lock held. Once this completes the device * should no longer have any blocks allocated on it. */ if (vd->vdev_islog) { if (vd->vdev_stat.vs_alloc != 0) error = spa_offline_log(spa); } else { error = SET_ERROR(ENOTSUP); } if (error) return (error); /* * The evacuation succeeded. Remove any remaining MOS metadata * associated with this vdev, and wait for these changes to sync. */ ASSERT0(vd->vdev_stat.vs_alloc); txg = spa_vdev_config_enter(spa); vd->vdev_removing = B_TRUE; vdev_dirty_leaves(vd, VDD_DTL, txg); vdev_config_dirty(vd); spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); return (0); } /* * Complete the removal by cleaning up the namespace. */ static void spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) { vdev_t *rvd = spa->spa_root_vdev; uint64_t id = vd->vdev_id; boolean_t last_vdev = (id == (rvd->vdev_children - 1)); ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); ASSERT(vd == vd->vdev_top); /* * Only remove any devices which are empty. */ if (vd->vdev_stat.vs_alloc != 0) return; (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); if (list_link_active(&vd->vdev_state_dirty_node)) vdev_state_clean(vd); if (list_link_active(&vd->vdev_config_dirty_node)) vdev_config_clean(vd); vdev_free(vd); if (last_vdev) { vdev_compact_children(rvd); } else { vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); vdev_add_child(rvd, vd); } vdev_config_dirty(rvd); /* * Reassess the health of our root vdev. */ vdev_reopen(rvd); } /* * Remove a device from the pool - * * Removing a device from the vdev namespace requires several steps * and can take a significant amount of time. As a result we use * the spa_vdev_config_[enter/exit] functions which allow us to * grab and release the spa_config_lock while still holding the namespace * lock. During each step the configuration is synced out. * * Currently, this supports removing only hot spares, slogs, and level 2 ARC * devices. */ int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) { vdev_t *vd; sysevent_t *ev = NULL; metaslab_group_t *mg; nvlist_t **spares, **l2cache, *nv; uint64_t txg = 0; uint_t nspares, nl2cache; int error = 0; boolean_t locked = MUTEX_HELD(&spa_namespace_lock); ASSERT(spa_writeable(spa)); if (!locked) txg = spa_vdev_enter(spa); vd = spa_lookup_by_guid(spa, guid, B_FALSE); if (spa->spa_spares.sav_vdevs != NULL && nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { /* * Only remove the hot spare if it's not currently in use * in this pool. */ if (vd == NULL || unspare) { if (vd == NULL) vd = spa_lookup_by_guid(spa, guid, B_TRUE); ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX); spa_vdev_remove_aux(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares, nv); spa_load_spares(spa); spa->spa_spares.sav_sync = B_TRUE; } else { error = SET_ERROR(EBUSY); } } else if (spa->spa_l2cache.sav_vdevs != NULL && nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { /* * Cache devices can always be removed. */ vd = spa_lookup_by_guid(spa, guid, B_TRUE); ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX); spa_vdev_remove_aux(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); spa_load_l2cache(spa); spa->spa_l2cache.sav_sync = B_TRUE; } else if (vd != NULL && vd->vdev_islog) { ASSERT(!locked); ASSERT(vd == vd->vdev_top); mg = vd->vdev_mg; /* * Stop allocating from this vdev. */ metaslab_group_passivate(mg); /* * Wait for the youngest allocations and frees to sync, * and then wait for the deferral of those frees to finish. */ spa_vdev_config_exit(spa, NULL, txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); /* * Attempt to evacuate the vdev. */ error = spa_vdev_remove_evacuate(spa, vd); txg = spa_vdev_config_enter(spa); /* * If we couldn't evacuate the vdev, unwind. */ if (error) { metaslab_group_activate(mg); return (spa_vdev_exit(spa, NULL, txg, error)); } /* * Clean up the vdev namespace. */ ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_DEV); spa_vdev_remove_from_namespace(spa, vd); } else if (vd != NULL) { /* * Normal vdevs cannot be removed (yet). */ error = SET_ERROR(ENOTSUP); } else { /* * There is no vdev of any kind with the specified guid. */ error = SET_ERROR(ENOENT); } if (!locked) error = spa_vdev_exit(spa, NULL, txg, error); if (ev) spa_event_post(ev); return (error); } /* * Find any device that's done replacing, or a vdev marked 'unspare' that's * currently spared, so we can detach it. */ static vdev_t * spa_vdev_resilver_done_hunt(vdev_t *vd) { vdev_t *newvd, *oldvd; for (int c = 0; c < vd->vdev_children; c++) { oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); if (oldvd != NULL) return (oldvd); } /* * Check for a completed replacement. We always consider the first * vdev in the list to be the oldest vdev, and the last one to be * the newest (see spa_vdev_attach() for how that works). In * the case where the newest vdev is faulted, we will not automatically * remove it after a resilver completes. This is OK as it will require * user intervention to determine which disk the admin wishes to keep. */ if (vd->vdev_ops == &vdev_replacing_ops) { ASSERT(vd->vdev_children > 1); newvd = vd->vdev_child[vd->vdev_children - 1]; oldvd = vd->vdev_child[0]; if (vdev_dtl_empty(newvd, DTL_MISSING) && vdev_dtl_empty(newvd, DTL_OUTAGE) && !vdev_dtl_required(oldvd)) return (oldvd); } /* * Check for a completed resilver with the 'unspare' flag set. */ if (vd->vdev_ops == &vdev_spare_ops) { vdev_t *first = vd->vdev_child[0]; vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; if (last->vdev_unspare) { oldvd = first; newvd = last; } else if (first->vdev_unspare) { oldvd = last; newvd = first; } else { oldvd = NULL; } if (oldvd != NULL && vdev_dtl_empty(newvd, DTL_MISSING) && vdev_dtl_empty(newvd, DTL_OUTAGE) && !vdev_dtl_required(oldvd)) return (oldvd); /* * If there are more than two spares attached to a disk, * and those spares are not required, then we want to * attempt to free them up now so that they can be used * by other pools. Once we're back down to a single * disk+spare, we stop removing them. */ if (vd->vdev_children > 2) { newvd = vd->vdev_child[1]; if (newvd->vdev_isspare && last->vdev_isspare && vdev_dtl_empty(last, DTL_MISSING) && vdev_dtl_empty(last, DTL_OUTAGE) && !vdev_dtl_required(newvd)) return (newvd); } } return (NULL); } static void spa_vdev_resilver_done(spa_t *spa) { vdev_t *vd, *pvd, *ppvd; uint64_t guid, sguid, pguid, ppguid; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { pvd = vd->vdev_parent; ppvd = pvd->vdev_parent; guid = vd->vdev_guid; pguid = pvd->vdev_guid; ppguid = ppvd->vdev_guid; sguid = 0; /* * If we have just finished replacing a hot spared device, then * we need to detach the parent's first child (the original hot * spare) as well. */ if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && ppvd->vdev_children == 2) { ASSERT(pvd->vdev_ops == &vdev_replacing_ops); sguid = ppvd->vdev_child[1]->vdev_guid; } ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd)); spa_config_exit(spa, SCL_ALL, FTAG); if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) return; if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) return; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); } spa_config_exit(spa, SCL_ALL, FTAG); } /* * Update the stored path or FRU for this vdev. */ int spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, boolean_t ispath) { vdev_t *vd; boolean_t sync = B_FALSE; ASSERT(spa_writeable(spa)); spa_vdev_state_enter(spa, SCL_ALL); if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) return (spa_vdev_state_exit(spa, NULL, ENOENT)); if (!vd->vdev_ops->vdev_op_leaf) return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); if (ispath) { if (strcmp(value, vd->vdev_path) != 0) { spa_strfree(vd->vdev_path); vd->vdev_path = spa_strdup(value); sync = B_TRUE; } } else { if (vd->vdev_fru == NULL) { vd->vdev_fru = spa_strdup(value); sync = B_TRUE; } else if (strcmp(value, vd->vdev_fru) != 0) { spa_strfree(vd->vdev_fru); vd->vdev_fru = spa_strdup(value); sync = B_TRUE; } } return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); } int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) { return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); } int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) { return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); } /* * ========================================================================== * SPA Scanning * ========================================================================== */ int spa_scan_stop(spa_t *spa) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (dsl_scan_resilvering(spa->spa_dsl_pool)) return (SET_ERROR(EBUSY)); return (dsl_scan_cancel(spa->spa_dsl_pool)); } int spa_scan(spa_t *spa, pool_scan_func_t func) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) return (SET_ERROR(ENOTSUP)); /* * If a resilver was requested, but there is no DTL on a * writeable leaf device, we have nothing to do. */ if (func == POOL_SCAN_RESILVER && !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); return (0); } return (dsl_scan(spa->spa_dsl_pool, func)); } /* * ========================================================================== * SPA async task processing * ========================================================================== */ static void spa_async_remove(spa_t *spa, vdev_t *vd) { if (vd->vdev_remove_wanted) { vd->vdev_remove_wanted = B_FALSE; vd->vdev_delayed_close = B_FALSE; vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); /* * We want to clear the stats, but we don't want to do a full * vdev_clear() as that will cause us to throw away * degraded/faulted state as well as attempt to reopen the * device, all of which is a waste. */ vd->vdev_stat.vs_read_errors = 0; vd->vdev_stat.vs_write_errors = 0; vd->vdev_stat.vs_checksum_errors = 0; vdev_state_dirty(vd->vdev_top); /* Tell userspace that the vdev is gone. */ zfs_post_remove(spa, vd); } for (int c = 0; c < vd->vdev_children; c++) spa_async_remove(spa, vd->vdev_child[c]); } static void spa_async_probe(spa_t *spa, vdev_t *vd) { if (vd->vdev_probe_wanted) { vd->vdev_probe_wanted = B_FALSE; vdev_reopen(vd); /* vdev_open() does the actual probe */ } for (int c = 0; c < vd->vdev_children; c++) spa_async_probe(spa, vd->vdev_child[c]); } static void spa_async_autoexpand(spa_t *spa, vdev_t *vd) { sysevent_id_t eid; nvlist_t *attr; char *physpath; if (!spa->spa_autoexpand) return; for (int c = 0; c < vd->vdev_children; c++) { vdev_t *cvd = vd->vdev_child[c]; spa_async_autoexpand(spa, cvd); } if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) return; physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, ESC_ZFS_VDEV_AUTOEXPAND, attr, &eid, DDI_SLEEP); nvlist_free(attr); kmem_free(physpath, MAXPATHLEN); } static void spa_async_thread(void *arg) { spa_t *spa = arg; int tasks; ASSERT(spa->spa_sync_on); mutex_enter(&spa->spa_async_lock); tasks = spa->spa_async_tasks; spa->spa_async_tasks &= SPA_ASYNC_REMOVE; mutex_exit(&spa->spa_async_lock); /* * See if the config needs to be updated. */ if (tasks & SPA_ASYNC_CONFIG_UPDATE) { uint64_t old_space, new_space; mutex_enter(&spa_namespace_lock); old_space = metaslab_class_get_space(spa_normal_class(spa)); spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); new_space = metaslab_class_get_space(spa_normal_class(spa)); mutex_exit(&spa_namespace_lock); /* * If the pool grew as a result of the config update, * then log an internal history event. */ if (new_space != old_space) { spa_history_log_internal(spa, "vdev online", NULL, "pool '%s' size: %llu(+%llu)", spa_name(spa), new_space, new_space - old_space); } } if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa_async_autoexpand(spa, spa->spa_root_vdev); spa_config_exit(spa, SCL_CONFIG, FTAG); } /* * See if any devices need to be probed. */ if (tasks & SPA_ASYNC_PROBE) { spa_vdev_state_enter(spa, SCL_NONE); spa_async_probe(spa, spa->spa_root_vdev); (void) spa_vdev_state_exit(spa, NULL, 0); } /* * If any devices are done replacing, detach them. */ if (tasks & SPA_ASYNC_RESILVER_DONE) spa_vdev_resilver_done(spa); /* * Kick off a resilver. */ if (tasks & SPA_ASYNC_RESILVER) dsl_resilver_restart(spa->spa_dsl_pool, 0); /* * Let the world know that we're done. */ mutex_enter(&spa->spa_async_lock); spa->spa_async_thread = NULL; cv_broadcast(&spa->spa_async_cv); mutex_exit(&spa->spa_async_lock); thread_exit(); } static void spa_async_thread_vd(void *arg) { spa_t *spa = arg; int tasks; ASSERT(spa->spa_sync_on); mutex_enter(&spa->spa_async_lock); tasks = spa->spa_async_tasks; retry: spa->spa_async_tasks &= ~SPA_ASYNC_REMOVE; mutex_exit(&spa->spa_async_lock); /* * See if any devices need to be marked REMOVED. */ if (tasks & SPA_ASYNC_REMOVE) { spa_vdev_state_enter(spa, SCL_NONE); spa_async_remove(spa, spa->spa_root_vdev); for (int i = 0; i < spa->spa_l2cache.sav_count; i++) spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); for (int i = 0; i < spa->spa_spares.sav_count; i++) spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); (void) spa_vdev_state_exit(spa, NULL, 0); } /* * Let the world know that we're done. */ mutex_enter(&spa->spa_async_lock); tasks = spa->spa_async_tasks; if ((tasks & SPA_ASYNC_REMOVE) != 0) goto retry; spa->spa_async_thread_vd = NULL; cv_broadcast(&spa->spa_async_cv); mutex_exit(&spa->spa_async_lock); thread_exit(); } void spa_async_suspend(spa_t *spa) { mutex_enter(&spa->spa_async_lock); spa->spa_async_suspended++; while (spa->spa_async_thread != NULL && spa->spa_async_thread_vd != NULL) cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); mutex_exit(&spa->spa_async_lock); } void spa_async_resume(spa_t *spa) { mutex_enter(&spa->spa_async_lock); ASSERT(spa->spa_async_suspended != 0); spa->spa_async_suspended--; mutex_exit(&spa->spa_async_lock); } static boolean_t spa_async_tasks_pending(spa_t *spa) { uint_t non_config_tasks; uint_t config_task; boolean_t config_task_suspended; non_config_tasks = spa->spa_async_tasks & ~(SPA_ASYNC_CONFIG_UPDATE | SPA_ASYNC_REMOVE); config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE; if (spa->spa_ccw_fail_time == 0) { config_task_suspended = B_FALSE; } else { config_task_suspended = (gethrtime() - spa->spa_ccw_fail_time) < (zfs_ccw_retry_interval * NANOSEC); } return (non_config_tasks || (config_task && !config_task_suspended)); } static void spa_async_dispatch(spa_t *spa) { mutex_enter(&spa->spa_async_lock); if (spa_async_tasks_pending(spa) && !spa->spa_async_suspended && spa->spa_async_thread == NULL && rootdir != NULL) spa->spa_async_thread = thread_create(NULL, 0, spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); } static void spa_async_dispatch_vd(spa_t *spa) { mutex_enter(&spa->spa_async_lock); if ((spa->spa_async_tasks & SPA_ASYNC_REMOVE) != 0 && !spa->spa_async_suspended && spa->spa_async_thread_vd == NULL && rootdir != NULL) spa->spa_async_thread_vd = thread_create(NULL, 0, spa_async_thread_vd, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); } void spa_async_request(spa_t *spa, int task) { zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); mutex_enter(&spa->spa_async_lock); spa->spa_async_tasks |= task; mutex_exit(&spa->spa_async_lock); spa_async_dispatch_vd(spa); } /* * ========================================================================== * SPA syncing routines * ========================================================================== */ static int bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { bpobj_t *bpo = arg; bpobj_enqueue(bpo, bp, tx); return (0); } static int spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { zio_t *zio = arg; zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, BP_GET_PSIZE(bp), zio->io_flags)); return (0); } /* * Note: this simple function is not inlined to make it easier to dtrace the * amount of time spent syncing frees. */ static void spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx) { zio_t *zio = zio_root(spa, NULL, NULL, 0); bplist_iterate(bpl, spa_free_sync_cb, zio, tx); VERIFY(zio_wait(zio) == 0); } /* * Note: this simple function is not inlined to make it easier to dtrace the * amount of time spent syncing deferred frees. */ static void spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx) { zio_t *zio = zio_root(spa, NULL, NULL, 0); VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj, spa_free_sync_cb, zio, tx), ==, 0); VERIFY0(zio_wait(zio)); } static void spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) { char *packed = NULL; size_t bufsize; size_t nvsize = 0; dmu_buf_t *db; VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); /* * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration * information. This avoids the dmu_buf_will_dirty() path and * saves us a pre-read to get data we don't actually care about. */ bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); packed = kmem_alloc(bufsize, KM_SLEEP); VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); bzero(packed + nvsize, bufsize - nvsize); dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); kmem_free(packed, bufsize); VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = nvsize; dmu_buf_rele(db, FTAG); } static void spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, const char *config, const char *entry) { nvlist_t *nvroot; nvlist_t **list; int i; if (!sav->sav_sync) return; /* * Update the MOS nvlist describing the list of available devices. * spa_validate_aux() will have already made sure this nvlist is * valid and the vdevs are labeled appropriately. */ if (sav->sav_object == 0) { sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); VERIFY(zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, &sav->sav_object, tx) == 0); } VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (sav->sav_count == 0) { VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); } else { list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); for (i = 0; i < sav->sav_count; i++) list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], B_FALSE, VDEV_CONFIG_L2CACHE); VERIFY(nvlist_add_nvlist_array(nvroot, config, list, sav->sav_count) == 0); for (i = 0; i < sav->sav_count; i++) nvlist_free(list[i]); kmem_free(list, sav->sav_count * sizeof (void *)); } spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); nvlist_free(nvroot); sav->sav_sync = B_FALSE; } /* * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t. * The all-vdev ZAP must be empty. */ static void spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx) { spa_t *spa = vd->vdev_spa; if (vd->vdev_top_zap != 0) { VERIFY0(zap_add_int(spa->spa_meta_objset, avz, vd->vdev_top_zap, tx)); } if (vd->vdev_leaf_zap != 0) { VERIFY0(zap_add_int(spa->spa_meta_objset, avz, vd->vdev_leaf_zap, tx)); } for (uint64_t i = 0; i < vd->vdev_children; i++) { spa_avz_build(vd->vdev_child[i], avz, tx); } } static void spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) { nvlist_t *config; /* * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS, * its config may not be dirty but we still need to build per-vdev ZAPs. * Similarly, if the pool is being assembled (e.g. after a split), we * need to rebuild the AVZ although the config may not be dirty. */ if (list_is_empty(&spa->spa_config_dirty_list) && spa->spa_avz_action == AVZ_ACTION_NONE) return; spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE || spa->spa_all_vdev_zaps != 0); if (spa->spa_avz_action == AVZ_ACTION_REBUILD) { /* Make and build the new AVZ */ uint64_t new_avz = zap_create(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); spa_avz_build(spa->spa_root_vdev, new_avz, tx); /* Diff old AVZ with new one */ zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_all_vdev_zaps); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { uint64_t vdzap = za.za_first_integer; if (zap_lookup_int(spa->spa_meta_objset, new_avz, vdzap) == ENOENT) { /* * ZAP is listed in old AVZ but not in new one; * destroy it */ VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap, tx)); } } zap_cursor_fini(&zc); /* Destroy the old AVZ */ VERIFY0(zap_destroy(spa->spa_meta_objset, spa->spa_all_vdev_zaps, tx)); /* Replace the old AVZ in the dir obj with the new one */ VERIFY0(zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, sizeof (new_avz), 1, &new_avz, tx)); spa->spa_all_vdev_zaps = new_avz; } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) { zap_cursor_t zc; zap_attribute_t za; /* Walk through the AVZ and destroy all listed ZAPs */ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_all_vdev_zaps); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { uint64_t zap = za.za_first_integer; VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx)); } zap_cursor_fini(&zc); /* Destroy and unlink the AVZ itself */ VERIFY0(zap_destroy(spa->spa_meta_objset, spa->spa_all_vdev_zaps, tx)); VERIFY0(zap_remove(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx)); spa->spa_all_vdev_zaps = 0; } if (spa->spa_all_vdev_zaps == 0) { spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx); } spa->spa_avz_action = AVZ_ACTION_NONE; /* Create ZAPs for vdevs that don't have them. */ vdev_construct_zaps(spa->spa_root_vdev, tx); config = spa_config_generate(spa, spa->spa_root_vdev, dmu_tx_get_txg(tx), B_FALSE); /* * If we're upgrading the spa version then make sure that * the config object gets updated with the correct version. */ if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version) fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa->spa_uberblock.ub_version); spa_config_exit(spa, SCL_STATE, FTAG); nvlist_free(spa->spa_config_syncing); spa->spa_config_syncing = config; spa_sync_nvlist(spa, spa->spa_config_object, config, tx); } static void spa_sync_version(void *arg, dmu_tx_t *tx) { uint64_t *versionp = arg; uint64_t version = *versionp; spa_t *spa = dmu_tx_pool(tx)->dp_spa; /* * Setting the version is special cased when first creating the pool. */ ASSERT(tx->tx_txg != TXG_INITIAL); ASSERT(SPA_VERSION_IS_SUPPORTED(version)); ASSERT(version >= spa_version(spa)); spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); spa_history_log_internal(spa, "set", tx, "version=%lld", version); } /* * Set zpool properties. */ static void spa_sync_props(void *arg, dmu_tx_t *tx) { nvlist_t *nvp = arg; spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = spa->spa_meta_objset; nvpair_t *elem = NULL; mutex_enter(&spa->spa_props_lock); while ((elem = nvlist_next_nvpair(nvp, elem))) { uint64_t intval; char *strval, *fname; zpool_prop_t prop; const char *propname; zprop_type_t proptype; spa_feature_t fid; switch (prop = zpool_name_to_prop(nvpair_name(elem))) { case ZPROP_INVAL: /* * We checked this earlier in spa_prop_validate(). */ ASSERT(zpool_prop_feature(nvpair_name(elem))); fname = strchr(nvpair_name(elem), '@') + 1; VERIFY0(zfeature_lookup_name(fname, &fid)); spa_feature_enable(spa, fid, tx); spa_history_log_internal(spa, "set", tx, "%s=enabled", nvpair_name(elem)); break; case ZPOOL_PROP_VERSION: intval = fnvpair_value_uint64(elem); /* * The version is synced seperatly before other * properties and should be correct by now. */ ASSERT3U(spa_version(spa), >=, intval); break; case ZPOOL_PROP_ALTROOT: /* * 'altroot' is a non-persistent property. It should * have been set temporarily at creation or import time. */ ASSERT(spa->spa_root != NULL); break; case ZPOOL_PROP_READONLY: case ZPOOL_PROP_CACHEFILE: /* * 'readonly' and 'cachefile' are also non-persisitent * properties. */ break; case ZPOOL_PROP_COMMENT: strval = fnvpair_value_string(elem); if (spa->spa_comment != NULL) spa_strfree(spa->spa_comment); spa->spa_comment = spa_strdup(strval); /* * We need to dirty the configuration on all the vdevs * so that their labels get updated. It's unnecessary * to do this for pool creation since the vdev's * configuratoin has already been dirtied. */ if (tx->tx_txg != TXG_INITIAL) vdev_config_dirty(spa->spa_root_vdev); spa_history_log_internal(spa, "set", tx, "%s=%s", nvpair_name(elem), strval); break; default: /* * Set pool property values in the poolprops mos object. */ if (spa->spa_pool_props_object == 0) { spa->spa_pool_props_object = zap_create_link(mos, DMU_OT_POOL_PROPS, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, tx); } /* normalize the property name */ propname = zpool_prop_to_name(prop); proptype = zpool_prop_get_type(prop); if (nvpair_type(elem) == DATA_TYPE_STRING) { ASSERT(proptype == PROP_TYPE_STRING); strval = fnvpair_value_string(elem); VERIFY0(zap_update(mos, spa->spa_pool_props_object, propname, 1, strlen(strval) + 1, strval, tx)); spa_history_log_internal(spa, "set", tx, "%s=%s", nvpair_name(elem), strval); } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { intval = fnvpair_value_uint64(elem); if (proptype == PROP_TYPE_INDEX) { const char *unused; VERIFY0(zpool_prop_index_to_string( prop, intval, &unused)); } VERIFY0(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx)); spa_history_log_internal(spa, "set", tx, "%s=%lld", nvpair_name(elem), intval); } else { ASSERT(0); /* not allowed */ } switch (prop) { case ZPOOL_PROP_DELEGATION: spa->spa_delegation = intval; break; case ZPOOL_PROP_BOOTFS: spa->spa_bootfs = intval; break; case ZPOOL_PROP_FAILUREMODE: spa->spa_failmode = intval; break; case ZPOOL_PROP_AUTOEXPAND: spa->spa_autoexpand = intval; if (tx->tx_txg != TXG_INITIAL) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); break; case ZPOOL_PROP_DEDUPDITTO: spa->spa_dedup_ditto = intval; break; default: break; } } } mutex_exit(&spa->spa_props_lock); } /* * Perform one-time upgrade on-disk changes. spa_version() does not * reflect the new version this txg, so there must be no changes this * txg to anything that the upgrade code depends on after it executes. * Therefore this must be called after dsl_pool_sync() does the sync * tasks. */ static void spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) { dsl_pool_t *dp = spa->spa_dsl_pool; ASSERT(spa->spa_sync_pass == 1); rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { dsl_pool_create_origin(dp, tx); /* Keeping the origin open increases spa_minref */ spa->spa_minref += 3; } if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { dsl_pool_upgrade_clones(dp, tx); } if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { dsl_pool_upgrade_dir_clones(dp, tx); /* Keeping the freedir open increases spa_minref */ spa->spa_minref += 3; } if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES && spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { spa_feature_create_zap_objects(spa, tx); } /* * LZ4_COMPRESS feature's behaviour was changed to activate_on_enable * when possibility to use lz4 compression for metadata was added * Old pools that have this feature enabled must be upgraded to have * this feature active */ if (spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { boolean_t lz4_en = spa_feature_is_enabled(spa, SPA_FEATURE_LZ4_COMPRESS); boolean_t lz4_ac = spa_feature_is_active(spa, SPA_FEATURE_LZ4_COMPRESS); if (lz4_en && !lz4_ac) spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx); } /* * If we haven't written the salt, do so now. Note that the * feature may not be activated yet, but that's fine since * the presence of this ZAP entry is backwards compatible. */ if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT) == ENOENT) { VERIFY0(zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1, sizeof (spa->spa_cksum_salt.zcs_bytes), spa->spa_cksum_salt.zcs_bytes, tx)); } rrw_exit(&dp->dp_config_rwlock, FTAG); } /* * Sync the specified transaction group. New blocks may be dirtied as * part of the process, so we iterate until it converges. */ void spa_sync(spa_t *spa, uint64_t txg) { dsl_pool_t *dp = spa->spa_dsl_pool; objset_t *mos = spa->spa_meta_objset; bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd; dmu_tx_t *tx; int error; uint32_t max_queue_depth = zfs_vdev_async_write_max_active * zfs_vdev_queue_depth_pct / 100; VERIFY(spa_writeable(spa)); /* * Lock out configuration changes. */ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa->spa_syncing_txg = txg; spa->spa_sync_pass = 0; mutex_enter(&spa->spa_alloc_lock); VERIFY0(avl_numnodes(&spa->spa_alloc_tree)); mutex_exit(&spa->spa_alloc_lock); /* * If there are any pending vdev state changes, convert them * into config changes that go out with this transaction group. */ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); while (list_head(&spa->spa_state_dirty_list) != NULL) { /* * We need the write lock here because, for aux vdevs, * calling vdev_config_dirty() modifies sav_config. * This is ugly and will become unnecessary when we * eliminate the aux vdev wart by integrating all vdevs * into the root vdev tree. */ spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { vdev_state_clean(vd); vdev_config_dirty(vd); } spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); } spa_config_exit(spa, SCL_STATE, FTAG); tx = dmu_tx_create_assigned(dp, txg); spa->spa_sync_starttime = gethrtime(); #ifdef illumos VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, spa->spa_sync_starttime + spa->spa_deadman_synctime)); #else /* !illumos */ #ifdef _KERNEL callout_reset(&spa->spa_deadman_cycid, hz * spa->spa_deadman_synctime / NANOSEC, spa_deadman, spa); #endif #endif /* illumos */ /* * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, * set spa_deflate if we have no raid-z vdevs. */ if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { int i; for (i = 0; i < rvd->vdev_children; i++) { vd = rvd->vdev_child[i]; if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) break; } if (i == rvd->vdev_children) { spa->spa_deflate = TRUE; VERIFY(0 == zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, sizeof (uint64_t), 1, &spa->spa_deflate, tx)); } } /* * Set the top-level vdev's max queue depth. Evaluate each * top-level's async write queue depth in case it changed. * The max queue depth will not change in the middle of syncing * out this txg. */ uint64_t queue_depth_total = 0; for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; if (mg == NULL || mg->mg_class != spa_normal_class(spa) || !metaslab_group_initialized(mg)) continue; /* * It is safe to do a lock-free check here because only async * allocations look at mg_max_alloc_queue_depth, and async * allocations all happen from spa_sync(). */ ASSERT0(refcount_count(&mg->mg_alloc_queue_depth)); mg->mg_max_alloc_queue_depth = max_queue_depth; queue_depth_total += mg->mg_max_alloc_queue_depth; } metaslab_class_t *mc = spa_normal_class(spa); ASSERT0(refcount_count(&mc->mc_alloc_slots)); mc->mc_alloc_max_slots = queue_depth_total; mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled; ASSERT3U(mc->mc_alloc_max_slots, <=, max_queue_depth * rvd->vdev_children); /* * Iterate to convergence. */ do { int pass = ++spa->spa_sync_pass; spa_sync_config_object(spa, tx); spa_sync_aux_dev(spa, &spa->spa_spares, tx, ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); spa_errlog_sync(spa, txg); dsl_pool_sync(dp, txg); if (pass < zfs_sync_pass_deferred_free) { spa_sync_frees(spa, free_bpl, tx); } else { /* * We can not defer frees in pass 1, because * we sync the deferred frees later in pass 1. */ ASSERT3U(pass, >, 1); bplist_iterate(free_bpl, bpobj_enqueue_cb, &spa->spa_deferred_bpobj, tx); } ddt_sync(spa, txg); dsl_scan_sync(dp, tx); while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) vdev_sync(vd, txg); if (pass == 1) { spa_sync_upgrades(spa, tx); ASSERT3U(txg, >=, spa->spa_uberblock.ub_rootbp.blk_birth); /* * Note: We need to check if the MOS is dirty * because we could have marked the MOS dirty * without updating the uberblock (e.g. if we * have sync tasks but no dirty user data). We * need to check the uberblock's rootbp because * it is updated if we have synced out dirty * data (though in this case the MOS will most * likely also be dirty due to second order * effects, we don't want to rely on that here). */ if (spa->spa_uberblock.ub_rootbp.blk_birth < txg && !dmu_objset_is_dirty(mos, txg)) { /* * Nothing changed on the first pass, * therefore this TXG is a no-op. Avoid * syncing deferred frees, so that we * can keep this TXG as a no-op. */ ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg)); break; } spa_sync_deferred_frees(spa, tx); } } while (dmu_objset_is_dirty(mos, txg)); if (!list_is_empty(&spa->spa_config_dirty_list)) { /* * Make sure that the number of ZAPs for all the vdevs matches * the number of ZAPs in the per-vdev ZAP list. This only gets * called if the config is dirty; otherwise there may be * outstanding AVZ operations that weren't completed in * spa_sync_config_object. */ uint64_t all_vdev_zap_entry_count; ASSERT0(zap_count(spa->spa_meta_objset, spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count)); ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==, all_vdev_zap_entry_count); } /* * Rewrite the vdev configuration (which includes the uberblock) * to commit the transaction group. * * If there are no dirty vdevs, we sync the uberblock to a few * random top-level vdevs that are known to be visible in the * config cache (see spa_vdev_add() for a complete description). * If there *are* dirty vdevs, sync the uberblock to all vdevs. */ for (;;) { /* * We hold SCL_STATE to prevent vdev open/close/etc. * while we're attempting to write the vdev labels. */ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); if (list_is_empty(&spa->spa_config_dirty_list)) { vdev_t *svd[SPA_DVAS_PER_BP]; int svdcount = 0; int children = rvd->vdev_children; int c0 = spa_get_random(children); for (int c = 0; c < children; c++) { vd = rvd->vdev_child[(c0 + c) % children]; if (vd->vdev_ms_array == 0 || vd->vdev_islog) continue; svd[svdcount++] = vd; if (svdcount == SPA_DVAS_PER_BP) break; } error = vdev_config_sync(svd, svdcount, txg); } else { error = vdev_config_sync(rvd->vdev_child, rvd->vdev_children, txg); } if (error == 0) spa->spa_last_synced_guid = rvd->vdev_guid; spa_config_exit(spa, SCL_STATE, FTAG); if (error == 0) break; zio_suspend(spa, NULL); zio_resume_wait(spa); } dmu_tx_commit(tx); #ifdef illumos VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY)); #else /* !illumos */ #ifdef _KERNEL callout_drain(&spa->spa_deadman_cycid); #endif #endif /* illumos */ /* * Clear the dirty config list. */ while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) vdev_config_clean(vd); /* * Now that the new config has synced transactionally, * let it become visible to the config cache. */ if (spa->spa_config_syncing != NULL) { spa_config_set(spa, spa->spa_config_syncing); spa->spa_config_txg = txg; spa->spa_config_syncing = NULL; } - spa->spa_ubsync = spa->spa_uberblock; - dsl_pool_sync_done(dp, txg); mutex_enter(&spa->spa_alloc_lock); VERIFY0(avl_numnodes(&spa->spa_alloc_tree)); mutex_exit(&spa->spa_alloc_lock); /* * Update usable space statistics. */ while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) vdev_sync_done(vd, txg); spa_update_dspace(spa); /* * It had better be the case that we didn't dirty anything * since vdev_config_sync(). */ ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); spa->spa_sync_pass = 0; + /* + * Update the last synced uberblock here. We want to do this at + * the end of spa_sync() so that consumers of spa_last_synced_txg() + * will be guaranteed that all the processing associated with + * that txg has been completed. + */ + spa->spa_ubsync = spa->spa_uberblock; spa_config_exit(spa, SCL_CONFIG, FTAG); spa_handle_ignored_writes(spa); /* * If any async tasks have been requested, kick them off. */ spa_async_dispatch(spa); spa_async_dispatch_vd(spa); } /* * Sync all pools. We don't want to hold the namespace lock across these * operations, so we take a reference on the spa_t and drop the lock during the * sync. */ void spa_sync_allpools(void) { spa_t *spa = NULL; mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { if (spa_state(spa) != POOL_STATE_ACTIVE || !spa_writeable(spa) || spa_suspended(spa)) continue; spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); txg_wait_synced(spa_get_dsl(spa), 0); mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); } mutex_exit(&spa_namespace_lock); } /* * ========================================================================== * Miscellaneous routines * ========================================================================== */ /* * Remove all pools in the system. */ void spa_evict_all(void) { spa_t *spa; /* * Remove all cached state. All pools should be closed now, * so every spa in the AVL tree should be unreferenced. */ mutex_enter(&spa_namespace_lock); while ((spa = spa_next(NULL)) != NULL) { /* * Stop async tasks. The async thread may need to detach * a device that's been replaced, which requires grabbing * spa_namespace_lock, so we must drop it here. */ spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); spa_async_suspend(spa); mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); if (spa->spa_state != POOL_STATE_UNINITIALIZED) { spa_unload(spa); spa_deactivate(spa); } spa_remove(spa); } mutex_exit(&spa_namespace_lock); } vdev_t * spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) { vdev_t *vd; int i; if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) return (vd); if (aux) { for (i = 0; i < spa->spa_l2cache.sav_count; i++) { vd = spa->spa_l2cache.sav_vdevs[i]; if (vd->vdev_guid == guid) return (vd); } for (i = 0; i < spa->spa_spares.sav_count; i++) { vd = spa->spa_spares.sav_vdevs[i]; if (vd->vdev_guid == guid) return (vd); } } return (NULL); } void spa_upgrade(spa_t *spa, uint64_t version) { ASSERT(spa_writeable(spa)); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); /* * This should only be called for a non-faulted pool, and since a * future version would result in an unopenable pool, this shouldn't be * possible. */ ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version)); ASSERT3U(version, >=, spa->spa_uberblock.ub_version); spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); txg_wait_synced(spa_get_dsl(spa), 0); } boolean_t spa_has_spare(spa_t *spa, uint64_t guid) { int i; uint64_t spareguid; spa_aux_vdev_t *sav = &spa->spa_spares; for (i = 0; i < sav->sav_count; i++) if (sav->sav_vdevs[i]->vdev_guid == guid) return (B_TRUE); for (i = 0; i < sav->sav_npending; i++) { if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, &spareguid) == 0 && spareguid == guid) return (B_TRUE); } return (B_FALSE); } /* * Check if a pool has an active shared spare device. * Note: reference count of an active spare is 2, as a spare and as a replace */ static boolean_t spa_has_active_shared_spare(spa_t *spa) { int i, refcnt; uint64_t pool; spa_aux_vdev_t *sav = &spa->spa_spares; for (i = 0; i < sav->sav_count; i++) { if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, &refcnt) && pool != 0ULL && pool == spa_guid(spa) && refcnt > 2) return (B_TRUE); } return (B_FALSE); } static sysevent_t * spa_event_create(spa_t *spa, vdev_t *vd, const char *name) { sysevent_t *ev = NULL; #ifdef _KERNEL sysevent_attr_list_t *attr = NULL; sysevent_value_t value; ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", SE_SLEEP); ASSERT(ev != NULL); value.value_type = SE_DATA_TYPE_STRING; value.value.sv_string = spa_name(spa); if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) goto done; value.value_type = SE_DATA_TYPE_UINT64; value.value.sv_uint64 = spa_guid(spa); if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) goto done; if (vd) { value.value_type = SE_DATA_TYPE_UINT64; value.value.sv_uint64 = vd->vdev_guid; if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, SE_SLEEP) != 0) goto done; if (vd->vdev_path) { value.value_type = SE_DATA_TYPE_STRING; value.value.sv_string = vd->vdev_path; if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, &value, SE_SLEEP) != 0) goto done; } } if (sysevent_attach_attributes(ev, attr) != 0) goto done; attr = NULL; done: if (attr) sysevent_free_attr(attr); #endif return (ev); } static void spa_event_post(sysevent_t *ev) { #ifdef _KERNEL sysevent_id_t eid; (void) log_sysevent(ev, SE_SLEEP, &eid); sysevent_free(ev); #endif } /* * Post a sysevent corresponding to the given event. The 'name' must be one of * the event definitions in sys/sysevent/eventdefs.h. The payload will be * filled in from the spa and (optionally) the vdev. This doesn't do anything * in the userland libzpool, as we don't want consumers to misinterpret ztest * or zdb as real changes. */ void spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) { spa_event_post(spa_event_create(spa, vd, name)); } Index: projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c =================================================================== --- projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c (revision 309262) +++ projects/clang391-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c (revision 309263) @@ -1,2150 +1,2196 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ /* Portions Copyright 2010 Robert Milkowski */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The zfs intent log (ZIL) saves transaction records of system calls * that change the file system in memory with enough information * to be able to replay them. These are stored in memory until * either the DMU transaction group (txg) commits them to the stable pool * and they can be discarded, or they are flushed to the stable log * (also in the pool) due to a fsync, O_DSYNC or other synchronous * requirement. In the event of a panic or power fail then those log * records (transactions) are replayed. * * There is one ZIL per file system. Its on-disk (pool) format consists * of 3 parts: * * - ZIL header * - ZIL blocks * - ZIL records * * A log record holds a system call transaction. Log blocks can * hold many log records and the blocks are chained together. * Each ZIL block contains a block pointer (blkptr_t) to the next * ZIL block in the chain. The ZIL header points to the first * block in the chain. Note there is not a fixed place in the pool * to hold blocks. They are dynamically allocated and freed as * needed from the blocks available. Figure X shows the ZIL structure: */ /* * Disable intent logging replay. This global ZIL switch affects all pools. */ int zil_replay_disable = 0; SYSCTL_DECL(_vfs_zfs); SYSCTL_INT(_vfs_zfs, OID_AUTO, zil_replay_disable, CTLFLAG_RWTUN, &zil_replay_disable, 0, "Disable intent logging replay"); /* * Tunable parameter for debugging or performance analysis. Setting * zfs_nocacheflush will cause corruption on power loss if a volatile * out-of-order write cache is enabled. */ boolean_t zfs_nocacheflush = B_FALSE; SYSCTL_INT(_vfs_zfs, OID_AUTO, cache_flush_disable, CTLFLAG_RDTUN, &zfs_nocacheflush, 0, "Disable cache flush"); boolean_t zfs_trim_enabled = B_TRUE; SYSCTL_DECL(_vfs_zfs_trim); SYSCTL_INT(_vfs_zfs_trim, OID_AUTO, enabled, CTLFLAG_RDTUN, &zfs_trim_enabled, 0, "Enable ZFS TRIM"); /* * Limit SLOG write size per commit executed with synchronous priority. * Any writes above that executed with lower (asynchronous) priority to * limit potential SLOG device abuse by single active ZIL writer. */ uint64_t zil_slog_limit = 768 * 1024; SYSCTL_QUAD(_vfs_zfs, OID_AUTO, zil_slog_limit, CTLFLAG_RWTUN, &zil_slog_limit, 0, "Maximal SLOG commit size with sync priority"); static kmem_cache_t *zil_lwb_cache; #define LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \ sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused)) /* * ziltest is by and large an ugly hack, but very useful in * checking replay without tedious work. * When running ziltest we want to keep all itx's and so maintain * a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG * We subtract TXG_CONCURRENT_STATES to allow for common code. */ #define ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES) static int zil_bp_compare(const void *x1, const void *x2) { const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva; const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva; if (DVA_GET_VDEV(dva1) < DVA_GET_VDEV(dva2)) return (-1); if (DVA_GET_VDEV(dva1) > DVA_GET_VDEV(dva2)) return (1); if (DVA_GET_OFFSET(dva1) < DVA_GET_OFFSET(dva2)) return (-1); if (DVA_GET_OFFSET(dva1) > DVA_GET_OFFSET(dva2)) return (1); return (0); } static void zil_bp_tree_init(zilog_t *zilog) { avl_create(&zilog->zl_bp_tree, zil_bp_compare, sizeof (zil_bp_node_t), offsetof(zil_bp_node_t, zn_node)); } static void zil_bp_tree_fini(zilog_t *zilog) { avl_tree_t *t = &zilog->zl_bp_tree; zil_bp_node_t *zn; void *cookie = NULL; while ((zn = avl_destroy_nodes(t, &cookie)) != NULL) kmem_free(zn, sizeof (zil_bp_node_t)); avl_destroy(t); } int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp) { avl_tree_t *t = &zilog->zl_bp_tree; const dva_t *dva; zil_bp_node_t *zn; avl_index_t where; if (BP_IS_EMBEDDED(bp)) return (0); dva = BP_IDENTITY(bp); if (avl_find(t, dva, &where) != NULL) return (SET_ERROR(EEXIST)); zn = kmem_alloc(sizeof (zil_bp_node_t), KM_SLEEP); zn->zn_dva = *dva; avl_insert(t, zn, where); return (0); } static zil_header_t * zil_header_in_syncing_context(zilog_t *zilog) { return ((zil_header_t *)zilog->zl_header); } static void zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) { zio_cksum_t *zc = &bp->blk_cksum; zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL); zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL); zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os); zc->zc_word[ZIL_ZC_SEQ] = 1ULL; } /* * Read a log block and make sure it's valid. */ static int zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, char **end) { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf = NULL; zbookmark_phys_t zb; int error; if (zilog->zl_header->zh_claim_txg == 0) zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) zio_flags |= ZIO_FLAG_SPECULATIVE; SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { zio_cksum_t cksum = bp->blk_cksum; /* * Validate the checksummed log block. * * Sequence numbers should be... sequential. The checksum * verifier for the next block should be bp's checksum plus 1. * * Also check the log chain linkage and size used. */ cksum.zc_word[ZIL_ZC_SEQ]++; if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { zil_chain_t *zilc = abuf->b_data; char *lr = (char *)(zilc + 1); uint64_t len = zilc->zc_nused - sizeof (zil_chain_t); if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) { error = SET_ERROR(ECKSUM); } else { ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE); bcopy(lr, dst, len); *end = (char *)dst + len; *nbp = zilc->zc_next_blk; } } else { char *lr = abuf->b_data; uint64_t size = BP_GET_LSIZE(bp); zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1; if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) || (zilc->zc_nused > (size - sizeof (*zilc)))) { error = SET_ERROR(ECKSUM); } else { ASSERT3U(zilc->zc_nused, <=, SPA_OLD_MAXBLOCKSIZE); bcopy(lr, dst, zilc->zc_nused); *end = (char *)dst + zilc->zc_nused; *nbp = zilc->zc_next_blk; } } arc_buf_destroy(abuf, &abuf); } return (error); } /* * Read a TX_WRITE log data block. */ static int zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; const blkptr_t *bp = &lr->lr_blkptr; arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf = NULL; zbookmark_phys_t zb; int error; if (BP_IS_HOLE(bp)) { if (wbuf != NULL) bzero(wbuf, MAX(BP_GET_LSIZE(bp), lr->lr_length)); return (0); } if (zilog->zl_header->zh_claim_txg == 0) zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { if (wbuf != NULL) bcopy(abuf->b_data, wbuf, arc_buf_size(abuf)); arc_buf_destroy(abuf, &abuf); } return (error); } /* * Parse the intent log, and call parse_func for each valid record within. */ int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) { const zil_header_t *zh = zilog->zl_header; boolean_t claimed = !!zh->zh_claim_txg; uint64_t claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX; uint64_t claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX; uint64_t max_blk_seq = 0; uint64_t max_lr_seq = 0; uint64_t blk_count = 0; uint64_t lr_count = 0; blkptr_t blk, next_blk; char *lrbuf, *lrp; int error = 0; /* * Old logs didn't record the maximum zh_claim_lr_seq. */ if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) claim_lr_seq = UINT64_MAX; /* * Starting at the block pointed to by zh_log we read the log chain. * For each block in the chain we strongly check that block to * ensure its validity. We stop when an invalid block is found. * For each block pointer in the chain we call parse_blk_func(). * For each record in each valid block we call parse_lr_func(). * If the log has been claimed, stop if we encounter a sequence * number greater than the highest claimed sequence number. */ lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE); zil_bp_tree_init(zilog); for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) { uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ]; int reclen; char *end; if (blk_seq > claim_blk_seq) break; if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0) break; ASSERT3U(max_blk_seq, <, blk_seq); max_blk_seq = blk_seq; blk_count++; if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) break; error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); if (error != 0) break; for (lrp = lrbuf; lrp < end; lrp += reclen) { lr_t *lr = (lr_t *)lrp; reclen = lr->lrc_reclen; ASSERT3U(reclen, >=, sizeof (lr_t)); if (lr->lrc_seq > claim_lr_seq) goto done; if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0) goto done; ASSERT3U(max_lr_seq, <, lr->lrc_seq); max_lr_seq = lr->lrc_seq; lr_count++; } } done: zilog->zl_parse_error = error; zilog->zl_parse_blk_seq = max_blk_seq; zilog->zl_parse_lr_seq = max_lr_seq; zilog->zl_parse_blk_count = blk_count; zilog->zl_parse_lr_count = lr_count; ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); zil_bp_tree_fini(zilog); zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); return (error); } static int zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg) { /* * Claim log block if not already committed and not already claimed. * If tx == NULL, just verify that the block is claimable. */ if (BP_IS_HOLE(bp) || bp->blk_birth < first_txg || zil_bp_tree_add(zilog, bp) != 0) return (0); return (zio_wait(zio_claim(NULL, zilog->zl_spa, tx == NULL ? 0 : first_txg, bp, spa_claim_notify, NULL, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB))); } static int zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg) { lr_write_t *lr = (lr_write_t *)lrc; int error; if (lrc->lrc_txtype != TX_WRITE) return (0); /* * If the block is not readable, don't claim it. This can happen * in normal operation when a log block is written to disk before * some of the dmu_sync() blocks it points to. In this case, the * transaction cannot have been committed to anyone (we would have * waited for all writes to be stable first), so it is semantically * correct to declare this the end of the log. */ if (lr->lr_blkptr.blk_birth >= first_txg && (error = zil_read_log_data(zilog, lr, NULL)) != 0) return (error); return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); } /* ARGSUSED */ static int zil_free_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t claim_txg) { zio_free_zil(zilog->zl_spa, dmu_tx_get_txg(tx), bp); return (0); } static int zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg) { lr_write_t *lr = (lr_write_t *)lrc; blkptr_t *bp = &lr->lr_blkptr; /* * If we previously claimed it, we need to free it. */ if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE && bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp)) zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); return (0); } static lwb_t * zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg) { lwb_t *lwb; lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); lwb->lwb_zilog = zilog; lwb->lwb_blk = *bp; lwb->lwb_slog = slog; lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp)); lwb->lwb_max_txg = txg; lwb->lwb_zio = NULL; lwb->lwb_tx = NULL; if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { lwb->lwb_nused = sizeof (zil_chain_t); lwb->lwb_sz = BP_GET_LSIZE(bp); } else { lwb->lwb_nused = 0; lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t); } mutex_enter(&zilog->zl_lock); list_insert_tail(&zilog->zl_lwb_list, lwb); mutex_exit(&zilog->zl_lock); return (lwb); } /* * Called when we create in-memory log transactions so that we know * to cleanup the itxs at the end of spa_sync(). */ void zilog_dirty(zilog_t *zilog, uint64_t txg) { dsl_pool_t *dp = zilog->zl_dmu_pool; dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); if (ds->ds_is_snapshot) panic("dirtying snapshot!"); if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, zilog); } } +/* + * Determine if the zil is dirty in the specified txg. Callers wanting to + * ensure that the dirty state does not change must hold the itxg_lock for + * the specified txg. Holding the lock will ensure that the zil cannot be + * dirtied (zil_itx_assign) or cleaned (zil_clean) while we check its current + * state. + */ boolean_t +zilog_is_dirty_in_txg(zilog_t *zilog, uint64_t txg) +{ + dsl_pool_t *dp = zilog->zl_dmu_pool; + + if (txg_list_member(&dp->dp_dirty_zilogs, zilog, txg & TXG_MASK)) + return (B_TRUE); + return (B_FALSE); +} + +/* + * Determine if the zil is dirty. The zil is considered dirty if it has + * any pending itx records that have not been cleaned by zil_clean(). + */ +boolean_t zilog_is_dirty(zilog_t *zilog) { dsl_pool_t *dp = zilog->zl_dmu_pool; for (int t = 0; t < TXG_SIZE; t++) { if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t)) return (B_TRUE); } return (B_FALSE); } /* * Create an on-disk intent log. */ static lwb_t * zil_create(zilog_t *zilog) { const zil_header_t *zh = zilog->zl_header; lwb_t *lwb = NULL; uint64_t txg = 0; dmu_tx_t *tx = NULL; blkptr_t blk; int error = 0; boolean_t slog = FALSE; /* * Wait for any previous destroy to complete. */ txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); ASSERT(zh->zh_claim_txg == 0); ASSERT(zh->zh_replay_seq == 0); blk = zh->zh_log; /* * Allocate an initial log block if: * - there isn't one already * - the existing block is the wrong endianess */ if (BP_IS_HOLE(&blk) || BP_SHOULD_BYTESWAP(&blk)) { tx = dmu_tx_create(zilog->zl_os); VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0); dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); txg = dmu_tx_get_txg(tx); if (!BP_IS_HOLE(&blk)) { zio_free_zil(zilog->zl_spa, txg, &blk); BP_ZERO(&blk); } error = zio_alloc_zil(zilog->zl_spa, txg, &blk, NULL, ZIL_MIN_BLKSZ, &slog); if (error == 0) zil_init_log_chain(zilog, &blk); } /* * Allocate a log write buffer (lwb) for the first log block. */ if (error == 0) lwb = zil_alloc_lwb(zilog, &blk, slog, txg); /* * If we just allocated the first log block, commit our transaction * and wait for zil_sync() to stuff the block poiner into zh_log. * (zh is part of the MOS, so we cannot modify it in open context.) */ if (tx != NULL) { dmu_tx_commit(tx); txg_wait_synced(zilog->zl_dmu_pool, txg); } ASSERT(bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0); return (lwb); } /* * In one tx, free all log blocks and clear the log header. * If keep_first is set, then we're replaying a log with no content. * We want to keep the first block, however, so that the first * synchronous transaction doesn't require a txg_wait_synced() * in zil_create(). We don't need to txg_wait_synced() here either * when keep_first is set, because both zil_create() and zil_destroy() * will wait for any in-progress destroys to complete. */ void zil_destroy(zilog_t *zilog, boolean_t keep_first) { const zil_header_t *zh = zilog->zl_header; lwb_t *lwb; dmu_tx_t *tx; uint64_t txg; /* * Wait for any previous destroy to complete. */ txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); zilog->zl_old_header = *zh; /* debugging aid */ if (BP_IS_HOLE(&zh->zh_log)) return; tx = dmu_tx_create(zilog->zl_os); VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0); dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); txg = dmu_tx_get_txg(tx); mutex_enter(&zilog->zl_lock); ASSERT3U(zilog->zl_destroy_txg, <, txg); zilog->zl_destroy_txg = txg; zilog->zl_keep_first = keep_first; if (!list_is_empty(&zilog->zl_lwb_list)) { ASSERT(zh->zh_claim_txg == 0); VERIFY(!keep_first); while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { list_remove(&zilog->zl_lwb_list, lwb); if (lwb->lwb_buf != NULL) zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk); kmem_cache_free(zil_lwb_cache, lwb); } } else if (!keep_first) { zil_destroy_sync(zilog, tx); } mutex_exit(&zilog->zl_lock); dmu_tx_commit(tx); } void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) { ASSERT(list_is_empty(&zilog->zl_lwb_list)); (void) zil_parse(zilog, zil_free_log_block, zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); } int zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) { dmu_tx_t *tx = txarg; uint64_t first_txg = dmu_tx_get_txg(tx); zilog_t *zilog; zil_header_t *zh; objset_t *os; int error; error = dmu_objset_own_obj(dp, ds->ds_object, DMU_OST_ANY, B_FALSE, FTAG, &os); if (error != 0) { /* * EBUSY indicates that the objset is inconsistent, in which * case it can not have a ZIL. */ if (error != EBUSY) { cmn_err(CE_WARN, "can't open objset for %llu, error %u", (unsigned long long)ds->ds_object, error); } return (0); } zilog = dmu_objset_zil(os); zh = zil_header_in_syncing_context(zilog); if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) { if (!BP_IS_HOLE(&zh->zh_log)) zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); BP_ZERO(&zh->zh_log); dsl_dataset_dirty(dmu_objset_ds(os), tx); dmu_objset_disown(os, FTAG); return (0); } /* * Claim all log blocks if we haven't already done so, and remember * the highest claimed sequence number. This ensures that if we can * read only part of the log now (e.g. due to a missing device), * but we can read the entire log later, we will not try to replay * or destroy beyond the last block we successfully claimed. */ ASSERT3U(zh->zh_claim_txg, <=, first_txg); if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { (void) zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, first_txg); zh->zh_claim_txg = first_txg; zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1) zh->zh_flags |= ZIL_REPLAY_NEEDED; zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID; dsl_dataset_dirty(dmu_objset_ds(os), tx); } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); dmu_objset_disown(os, FTAG); return (0); } /* * Check the log by walking the log chain. * Checksum errors are ok as they indicate the end of the chain. * Any other error (no device or read failure) returns an error. */ /* ARGSUSED */ int zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) { zilog_t *zilog; objset_t *os; blkptr_t *bp; int error; ASSERT(tx == NULL); error = dmu_objset_from_ds(ds, &os); if (error != 0) { cmn_err(CE_WARN, "can't open objset %llu, error %d", (unsigned long long)ds->ds_object, error); return (0); } zilog = dmu_objset_zil(os); bp = (blkptr_t *)&zilog->zl_header->zh_log; /* * Check the first block and determine if it's on a log device * which may have been removed or faulted prior to loading this * pool. If so, there's no point in checking the rest of the log * as its content should have already been synced to the pool. */ if (!BP_IS_HOLE(bp)) { vdev_t *vd; boolean_t valid = B_TRUE; spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER); vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0])); if (vd->vdev_islog && vdev_is_dead(vd)) valid = vdev_log_state_valid(vd); spa_config_exit(os->os_spa, SCL_STATE, FTAG); if (!valid) return (0); } /* * Because tx == NULL, zil_claim_log_block() will not actually claim * any blocks, but just determine whether it is possible to do so. * In addition to checking the log chain, zil_claim_log_block() * will invoke zio_claim() with a done func of spa_claim_notify(), * which will update spa_max_claim_txg. See spa_load() for details. */ error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa)); return ((error == ECKSUM || error == ENOENT) ? 0 : error); } static int zil_vdev_compare(const void *x1, const void *x2) { const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev; const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev; if (v1 < v2) return (-1); if (v1 > v2) return (1); return (0); } void zil_add_block(zilog_t *zilog, const blkptr_t *bp) { avl_tree_t *t = &zilog->zl_vdev_tree; avl_index_t where; zil_vdev_node_t *zv, zvsearch; int ndvas = BP_GET_NDVAS(bp); int i; if (zfs_nocacheflush) return; ASSERT(zilog->zl_writer); /* * Even though we're zl_writer, we still need a lock because the * zl_get_data() callbacks may have dmu_sync() done callbacks * that will run concurrently. */ mutex_enter(&zilog->zl_vdev_lock); for (i = 0; i < ndvas; i++) { zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]); if (avl_find(t, &zvsearch, &where) == NULL) { zv = kmem_alloc(sizeof (*zv), KM_SLEEP); zv->zv_vdev = zvsearch.zv_vdev; avl_insert(t, zv, where); } } mutex_exit(&zilog->zl_vdev_lock); } static void zil_flush_vdevs(zilog_t *zilog) { spa_t *spa = zilog->zl_spa; avl_tree_t *t = &zilog->zl_vdev_tree; void *cookie = NULL; zil_vdev_node_t *zv; zio_t *zio; ASSERT(zilog->zl_writer); /* * We don't need zl_vdev_lock here because we're the zl_writer, * and all zl_get_data() callbacks are done. */ if (avl_numnodes(t) == 0) return; spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); if (vd != NULL) zio_flush(zio, vd); kmem_free(zv, sizeof (*zv)); } /* * Wait for all the flushes to complete. Not all devices actually * support the DKIOCFLUSHWRITECACHE ioctl, so it's OK if it fails. */ (void) zio_wait(zio); spa_config_exit(spa, SCL_STATE, FTAG); } /* * Function called when a log block write completes */ static void zil_lwb_write_done(zio_t *zio) { lwb_t *lwb = zio->io_private; zilog_t *zilog = lwb->lwb_zilog; dmu_tx_t *tx = lwb->lwb_tx; ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF); ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG); ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER); ASSERT(!BP_IS_GANG(zio->io_bp)); ASSERT(!BP_IS_HOLE(zio->io_bp)); ASSERT(BP_GET_FILL(zio->io_bp) == 0); /* * Ensure the lwb buffer pointer is cleared before releasing * the txg. If we have had an allocation failure and * the txg is waiting to sync then we want want zil_sync() * to remove the lwb so that it's not picked up as the next new * one in zil_commit_writer(). zil_sync() will only remove * the lwb if lwb_buf is null. */ zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); mutex_enter(&zilog->zl_lock); lwb->lwb_buf = NULL; lwb->lwb_tx = NULL; mutex_exit(&zilog->zl_lock); /* * Now that we've written this log block, we have a stable pointer * to the next block in the chain, so it's OK to let the txg in * which we allocated the next block sync. */ dmu_tx_commit(tx); } /* * Initialize the io for a log block. */ static void zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb) { zbookmark_phys_t zb; zio_priority_t prio; SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); if (zilog->zl_root_zio == NULL) { zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL, ZIO_FLAG_CANFAIL); } if (lwb->lwb_zio == NULL) { if (zilog->zl_cur_used <= zil_slog_limit || !lwb->lwb_slog) prio = ZIO_PRIORITY_SYNC_WRITE; else prio = ZIO_PRIORITY_ASYNC_WRITE; lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa, 0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE, &zb); } } /* * Define a limited set of intent log block sizes. * * These must be a multiple of 4KB. Note only the amount used (again * aligned to 4KB) actually gets written. However, we can't always just * allocate SPA_OLD_MAXBLOCKSIZE as the slog space could be exhausted. */ uint64_t zil_block_buckets[] = { 4096, /* non TX_WRITE */ 8192+4096, /* data base */ 32*1024 + 4096, /* NFS writes */ UINT64_MAX }; /* * Start a log block write and advance to the next log block. * Calls are serialized. */ static lwb_t * zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) { lwb_t *nlwb = NULL; zil_chain_t *zilc; spa_t *spa = zilog->zl_spa; blkptr_t *bp; dmu_tx_t *tx; uint64_t txg; uint64_t zil_blksz, wsz; int i, error; boolean_t slog; if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) { zilc = (zil_chain_t *)lwb->lwb_buf; bp = &zilc->zc_next_blk; } else { zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz); bp = &zilc->zc_next_blk; } ASSERT(lwb->lwb_nused <= lwb->lwb_sz); /* * Allocate the next block and save its address in this block * before writing it in order to establish the log chain. * Note that if the allocation of nlwb synced before we wrote * the block that points at it (lwb), we'd leak it if we crashed. * Therefore, we don't do dmu_tx_commit() until zil_lwb_write_done(). * We dirty the dataset to ensure that zil_sync() will be called * to clean up in the event of allocation failure or I/O failure. */ tx = dmu_tx_create(zilog->zl_os); VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0); dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); txg = dmu_tx_get_txg(tx); lwb->lwb_tx = tx; /* * Log blocks are pre-allocated. Here we select the size of the next * block, based on size used in the last block. * - first find the smallest bucket that will fit the block from a * limited set of block sizes. This is because it's faster to write * blocks allocated from the same metaslab as they are adjacent or * close. * - next find the maximum from the new suggested size and an array of * previous sizes. This lessens a picket fence effect of wrongly * guesssing the size if we have a stream of say 2k, 64k, 2k, 64k * requests. * * Note we only write what is used, but we can't just allocate * the maximum block size because we can exhaust the available * pool log space. */ zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t); for (i = 0; zil_blksz > zil_block_buckets[i]; i++) continue; zil_blksz = zil_block_buckets[i]; if (zil_blksz == UINT64_MAX) zil_blksz = SPA_OLD_MAXBLOCKSIZE; zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz; for (i = 0; i < ZIL_PREV_BLKS; i++) zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]); zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1); BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, &slog); if (error == 0) { ASSERT3U(bp->blk_birth, ==, txg); bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; /* * Allocate a new log write buffer (lwb). */ nlwb = zil_alloc_lwb(zilog, bp, slog, txg); /* Record the block for later vdev flushing */ zil_add_block(zilog, &lwb->lwb_blk); } if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) { /* For Slim ZIL only write what is used. */ wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t); ASSERT3U(wsz, <=, lwb->lwb_sz); zio_shrink(lwb->lwb_zio, wsz); } else { wsz = lwb->lwb_sz; } zilc->zc_pad = 0; zilc->zc_nused = lwb->lwb_nused; zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum; /* * clear unused data for security */ bzero(lwb->lwb_buf + lwb->lwb_nused, wsz - lwb->lwb_nused); zio_nowait(lwb->lwb_zio); /* Kick off the write for the old log block */ /* * If there was an allocation failure then nlwb will be null which * forces a txg_wait_synced(). */ return (nlwb); } static lwb_t * zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) { lr_t *lrcb, *lrc = &itx->itx_lr; /* common log record */ lr_write_t *lrwb, *lrw = (lr_write_t *)lrc; char *lr_buf; uint64_t txg = lrc->lrc_txg; uint64_t reclen = lrc->lrc_reclen; uint64_t dlen = 0; uint64_t dnow, lwb_sp; if (lwb == NULL) return (NULL); ASSERT(lwb->lwb_buf != NULL); - ASSERT(zilog_is_dirty(zilog) || - spa_freeze_txg(zilog->zl_spa) != UINT64_MAX); if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) dlen = P2ROUNDUP_TYPED( lrw->lr_length, sizeof (uint64_t), uint64_t); zilog->zl_cur_used += (reclen + dlen); zil_lwb_write_init(zilog, lwb); cont: /* * If this record won't fit in the current log block, start a new one. * For WR_NEED_COPY optimize layout for minimal number of chunks, but * try to keep wasted space withing reasonable range (12%). */ lwb_sp = lwb->lwb_sz - lwb->lwb_nused; if (reclen > lwb_sp || (reclen + dlen > lwb_sp && lwb_sp < ZIL_MAX_LOG_DATA / 8 && (dlen % ZIL_MAX_LOG_DATA == 0 || lwb_sp < reclen + dlen % ZIL_MAX_LOG_DATA))) { lwb = zil_lwb_write_start(zilog, lwb); if (lwb == NULL) return (NULL); zil_lwb_write_init(zilog, lwb); ASSERT(LWB_EMPTY(lwb)); lwb_sp = lwb->lwb_sz - lwb->lwb_nused; ASSERT3U(reclen + MIN(dlen, sizeof(uint64_t)), <=, lwb_sp); } dnow = MIN(dlen, lwb_sp - reclen); lr_buf = lwb->lwb_buf + lwb->lwb_nused; bcopy(lrc, lr_buf, reclen); lrcb = (lr_t *)lr_buf; lrwb = (lr_write_t *)lrcb; /* * If it's a write, fetch the data or get its blkptr as appropriate. */ if (lrc->lrc_txtype == TX_WRITE) { if (txg > spa_freeze_txg(zilog->zl_spa)) txg_wait_synced(zilog->zl_dmu_pool, txg); if (itx->itx_wr_state != WR_COPIED) { char *dbuf; int error; if (itx->itx_wr_state == WR_NEED_COPY) { dbuf = lr_buf + reclen; lrcb->lrc_reclen += dnow; if (lrwb->lr_length > dnow) lrwb->lr_length = dnow; lrw->lr_offset += dnow; lrw->lr_length -= dnow; } else { ASSERT(itx->itx_wr_state == WR_INDIRECT); dbuf = NULL; } error = zilog->zl_get_data( itx->itx_private, lrwb, dbuf, lwb->lwb_zio); if (error == EIO) { txg_wait_synced(zilog->zl_dmu_pool, txg); return (lwb); } if (error != 0) { ASSERT(error == ENOENT || error == EEXIST || error == EALREADY); return (lwb); } } } /* * We're actually making an entry, so update lrc_seq to be the * log record sequence number. Note that this is generally not * equal to the itx sequence number because not all transactions * are synchronous, and sometimes spa_sync() gets there first. */ lrcb->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */ lwb->lwb_nused += reclen + dnow; lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg); ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz); ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t))); dlen -= dnow; if (dlen > 0) { zilog->zl_cur_used += reclen; goto cont; } return (lwb); } itx_t * zil_itx_create(uint64_t txtype, size_t lrsize) { itx_t *itx; lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t); itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP); itx->itx_lr.lrc_txtype = txtype; itx->itx_lr.lrc_reclen = lrsize; itx->itx_lr.lrc_seq = 0; /* defensive */ itx->itx_sync = B_TRUE; /* default is synchronous */ return (itx); } void zil_itx_destroy(itx_t *itx) { kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); } /* * Free up the sync and async itxs. The itxs_t has already been detached * so no locks are needed. */ static void zil_itxg_clean(itxs_t *itxs) { itx_t *itx; list_t *list; avl_tree_t *t; void *cookie; itx_async_node_t *ian; list = &itxs->i_sync_list; while ((itx = list_head(list)) != NULL) { list_remove(list, itx); kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); } cookie = NULL; t = &itxs->i_async_tree; while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) { list = &ian->ia_list; while ((itx = list_head(list)) != NULL) { list_remove(list, itx); kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); } list_destroy(list); kmem_free(ian, sizeof (itx_async_node_t)); } avl_destroy(t); kmem_free(itxs, sizeof (itxs_t)); } static int zil_aitx_compare(const void *x1, const void *x2) { const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid; const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid; if (o1 < o2) return (-1); if (o1 > o2) return (1); return (0); } /* * Remove all async itx with the given oid. */ static void zil_remove_async(zilog_t *zilog, uint64_t oid) { uint64_t otxg, txg; itx_async_node_t *ian; avl_tree_t *t; avl_index_t where; list_t clean_list; itx_t *itx; ASSERT(oid != 0); list_create(&clean_list, sizeof (itx_t), offsetof(itx_t, itx_node)); if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ otxg = ZILTEST_TXG; else otxg = spa_last_synced_txg(zilog->zl_spa) + 1; for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; mutex_enter(&itxg->itxg_lock); if (itxg->itxg_txg != txg) { mutex_exit(&itxg->itxg_lock); continue; } /* * Locate the object node and append its list. */ t = &itxg->itxg_itxs->i_async_tree; ian = avl_find(t, &oid, &where); if (ian != NULL) list_move_tail(&clean_list, &ian->ia_list); mutex_exit(&itxg->itxg_lock); } while ((itx = list_head(&clean_list)) != NULL) { list_remove(&clean_list, itx); kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); } list_destroy(&clean_list); } void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) { uint64_t txg; itxg_t *itxg; itxs_t *itxs, *clean = NULL; /* * Object ids can be re-instantiated in the next txg so * remove any async transactions to avoid future leaks. * This can happen if a fsync occurs on the re-instantiated * object for a WR_INDIRECT or WR_NEED_COPY write, which gets * the new file data and flushes a write record for the old object. */ if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE) zil_remove_async(zilog, itx->itx_oid); /* * Ensure the data of a renamed file is committed before the rename. */ if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME) zil_async_to_sync(zilog, itx->itx_oid); if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) txg = ZILTEST_TXG; else txg = dmu_tx_get_txg(tx); itxg = &zilog->zl_itxg[txg & TXG_MASK]; mutex_enter(&itxg->itxg_lock); itxs = itxg->itxg_itxs; if (itxg->itxg_txg != txg) { if (itxs != NULL) { /* * The zil_clean callback hasn't got around to cleaning * this itxg. Save the itxs for release below. * This should be rare. */ clean = itxg->itxg_itxs; } itxg->itxg_txg = txg; itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t), KM_SLEEP); list_create(&itxs->i_sync_list, sizeof (itx_t), offsetof(itx_t, itx_node)); avl_create(&itxs->i_async_tree, zil_aitx_compare, sizeof (itx_async_node_t), offsetof(itx_async_node_t, ia_node)); } if (itx->itx_sync) { list_insert_tail(&itxs->i_sync_list, itx); } else { avl_tree_t *t = &itxs->i_async_tree; uint64_t foid = ((lr_ooo_t *)&itx->itx_lr)->lr_foid; itx_async_node_t *ian; avl_index_t where; ian = avl_find(t, &foid, &where); if (ian == NULL) { ian = kmem_alloc(sizeof (itx_async_node_t), KM_SLEEP); list_create(&ian->ia_list, sizeof (itx_t), offsetof(itx_t, itx_node)); ian->ia_foid = foid; avl_insert(t, ian, where); } list_insert_tail(&ian->ia_list, itx); } itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx); zilog_dirty(zilog, txg); mutex_exit(&itxg->itxg_lock); /* Release the old itxs now we've dropped the lock */ if (clean != NULL) zil_itxg_clean(clean); } /* * If there are any in-memory intent log transactions which have now been * synced then start up a taskq to free them. We should only do this after we * have written out the uberblocks (i.e. txg has been comitted) so that * don't inadvertently clean out in-memory log records that would be required * by zil_commit(). */ void zil_clean(zilog_t *zilog, uint64_t synced_txg) { itxg_t *itxg = &zilog->zl_itxg[synced_txg & TXG_MASK]; itxs_t *clean_me; mutex_enter(&itxg->itxg_lock); if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) { mutex_exit(&itxg->itxg_lock); return; } ASSERT3U(itxg->itxg_txg, <=, synced_txg); ASSERT(itxg->itxg_txg != 0); ASSERT(zilog->zl_clean_taskq != NULL); clean_me = itxg->itxg_itxs; itxg->itxg_itxs = NULL; itxg->itxg_txg = 0; mutex_exit(&itxg->itxg_lock); /* * Preferably start a task queue to free up the old itxs but * if taskq_dispatch can't allocate resources to do that then * free it in-line. This should be rare. Note, using TQ_SLEEP * created a bad performance problem. */ if (taskq_dispatch(zilog->zl_clean_taskq, (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP) == 0) zil_itxg_clean(clean_me); } /* * Get the list of itxs to commit into zl_itx_commit_list. */ static void zil_get_commit_list(zilog_t *zilog) { uint64_t otxg, txg; list_t *commit_list = &zilog->zl_itx_commit_list; if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ otxg = ZILTEST_TXG; else otxg = spa_last_synced_txg(zilog->zl_spa) + 1; + /* + * This is inherently racy, since there is nothing to prevent + * the last synced txg from changing. That's okay since we'll + * only commit things in the future. + */ for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; mutex_enter(&itxg->itxg_lock); if (itxg->itxg_txg != txg) { mutex_exit(&itxg->itxg_lock); continue; } + /* + * If we're adding itx records to the zl_itx_commit_list, + * then the zil better be dirty in this "txg". We can assert + * that here since we're holding the itxg_lock which will + * prevent spa_sync from cleaning it. Once we add the itxs + * to the zl_itx_commit_list we must commit it to disk even + * if it's unnecessary (i.e. the txg was synced). + */ + ASSERT(zilog_is_dirty_in_txg(zilog, txg) || + spa_freeze_txg(zilog->zl_spa) != UINT64_MAX); list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list); mutex_exit(&itxg->itxg_lock); } } /* * Move the async itxs for a specified object to commit into sync lists. */ void zil_async_to_sync(zilog_t *zilog, uint64_t foid) { uint64_t otxg, txg; itx_async_node_t *ian; avl_tree_t *t; avl_index_t where; if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ otxg = ZILTEST_TXG; else otxg = spa_last_synced_txg(zilog->zl_spa) + 1; + /* + * This is inherently racy, since there is nothing to prevent + * the last synced txg from changing. + */ for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; mutex_enter(&itxg->itxg_lock); if (itxg->itxg_txg != txg) { mutex_exit(&itxg->itxg_lock); continue; } /* * If a foid is specified then find that node and append its * list. Otherwise walk the tree appending all the lists * to the sync list. We add to the end rather than the * beginning to ensure the create has happened. */ t = &itxg->itxg_itxs->i_async_tree; if (foid != 0) { ian = avl_find(t, &foid, &where); if (ian != NULL) { list_move_tail(&itxg->itxg_itxs->i_sync_list, &ian->ia_list); } } else { void *cookie = NULL; while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) { list_move_tail(&itxg->itxg_itxs->i_sync_list, &ian->ia_list); list_destroy(&ian->ia_list); kmem_free(ian, sizeof (itx_async_node_t)); } } mutex_exit(&itxg->itxg_lock); } } static void zil_commit_writer(zilog_t *zilog) { uint64_t txg; itx_t *itx; lwb_t *lwb; spa_t *spa = zilog->zl_spa; int error = 0; ASSERT(zilog->zl_root_zio == NULL); mutex_exit(&zilog->zl_lock); zil_get_commit_list(zilog); /* * Return if there's nothing to commit before we dirty the fs by * calling zil_create(). */ if (list_head(&zilog->zl_itx_commit_list) == NULL) { mutex_enter(&zilog->zl_lock); return; } if (zilog->zl_suspend) { lwb = NULL; } else { lwb = list_tail(&zilog->zl_lwb_list); if (lwb == NULL) lwb = zil_create(zilog); } DTRACE_PROBE1(zil__cw1, zilog_t *, zilog); while (itx = list_head(&zilog->zl_itx_commit_list)) { txg = itx->itx_lr.lrc_txg; - ASSERT(txg); + ASSERT3U(txg, !=, 0); + /* + * This is inherently racy and may result in us writing + * out a log block for a txg that was just synced. This is + * ok since we'll end cleaning up that log block the next + * time we call zil_sync(). + */ if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa)) lwb = zil_lwb_commit(zilog, itx, lwb); list_remove(&zilog->zl_itx_commit_list, itx); kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); } DTRACE_PROBE1(zil__cw2, zilog_t *, zilog); /* write the last block out */ if (lwb != NULL && lwb->lwb_zio != NULL) lwb = zil_lwb_write_start(zilog, lwb); zilog->zl_cur_used = 0; /* * Wait if necessary for the log blocks to be on stable storage. */ if (zilog->zl_root_zio) { error = zio_wait(zilog->zl_root_zio); zilog->zl_root_zio = NULL; zil_flush_vdevs(zilog); } if (error || lwb == NULL) txg_wait_synced(zilog->zl_dmu_pool, 0); mutex_enter(&zilog->zl_lock); /* * Remember the highest committed log sequence number for ztest. * We only update this value when all the log writes succeeded, * because ztest wants to ASSERT that it got the whole log chain. */ if (error == 0 && lwb != NULL) zilog->zl_commit_lr_seq = zilog->zl_lr_seq; } /* * Commit zfs transactions to stable storage. * If foid is 0 push out all transactions, otherwise push only those * for that object or might reference that object. * * itxs are committed in batches. In a heavily stressed zil there will be * a commit writer thread who is writing out a bunch of itxs to the log * for a set of committing threads (cthreads) in the same batch as the writer. * Those cthreads are all waiting on the same cv for that batch. * * There will also be a different and growing batch of threads that are * waiting to commit (qthreads). When the committing batch completes * a transition occurs such that the cthreads exit and the qthreads become * cthreads. One of the new cthreads becomes the writer thread for the * batch. Any new threads arriving become new qthreads. * * Only 2 condition variables are needed and there's no transition * between the two cvs needed. They just flip-flop between qthreads * and cthreads. * * Using this scheme we can efficiently wakeup up only those threads * that have been committed. */ void zil_commit(zilog_t *zilog, uint64_t foid) { uint64_t mybatch; if (zilog->zl_sync == ZFS_SYNC_DISABLED) return; /* move the async itxs for the foid to the sync queues */ zil_async_to_sync(zilog, foid); mutex_enter(&zilog->zl_lock); mybatch = zilog->zl_next_batch; while (zilog->zl_writer) { cv_wait(&zilog->zl_cv_batch[mybatch & 1], &zilog->zl_lock); if (mybatch <= zilog->zl_com_batch) { mutex_exit(&zilog->zl_lock); return; } } zilog->zl_next_batch++; zilog->zl_writer = B_TRUE; zil_commit_writer(zilog); zilog->zl_com_batch = mybatch; zilog->zl_writer = B_FALSE; mutex_exit(&zilog->zl_lock); /* wake up one thread to become the next writer */ cv_signal(&zilog->zl_cv_batch[(mybatch+1) & 1]); /* wake up all threads waiting for this batch to be committed */ cv_broadcast(&zilog->zl_cv_batch[mybatch & 1]); } /* * Called in syncing context to free committed log blocks and update log header. */ void zil_sync(zilog_t *zilog, dmu_tx_t *tx) { zil_header_t *zh = zil_header_in_syncing_context(zilog); uint64_t txg = dmu_tx_get_txg(tx); spa_t *spa = zilog->zl_spa; uint64_t *replayed_seq = &zilog->zl_replayed_seq[txg & TXG_MASK]; lwb_t *lwb; /* * We don't zero out zl_destroy_txg, so make sure we don't try * to destroy it twice. */ if (spa_sync_pass(spa) != 1) return; mutex_enter(&zilog->zl_lock); ASSERT(zilog->zl_stop_sync == 0); if (*replayed_seq != 0) { ASSERT(zh->zh_replay_seq < *replayed_seq); zh->zh_replay_seq = *replayed_seq; *replayed_seq = 0; } if (zilog->zl_destroy_txg == txg) { blkptr_t blk = zh->zh_log; ASSERT(list_head(&zilog->zl_lwb_list) == NULL); bzero(zh, sizeof (zil_header_t)); bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq)); if (zilog->zl_keep_first) { /* * If this block was part of log chain that couldn't * be claimed because a device was missing during * zil_claim(), but that device later returns, * then this block could erroneously appear valid. * To guard against this, assign a new GUID to the new * log chain so it doesn't matter what blk points to. */ zil_init_log_chain(zilog, &blk); zh->zh_log = blk; } } while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { zh->zh_log = lwb->lwb_blk; if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg) break; list_remove(&zilog->zl_lwb_list, lwb); zio_free_zil(spa, txg, &lwb->lwb_blk); kmem_cache_free(zil_lwb_cache, lwb); /* * If we don't have anything left in the lwb list then * we've had an allocation failure and we need to zero * out the zil_header blkptr so that we don't end * up freeing the same block twice. */ if (list_head(&zilog->zl_lwb_list) == NULL) BP_ZERO(&zh->zh_log); } mutex_exit(&zilog->zl_lock); } void zil_init(void) { zil_lwb_cache = kmem_cache_create("zil_lwb_cache", sizeof (struct lwb), 0, NULL, NULL, NULL, NULL, NULL, 0); } void zil_fini(void) { kmem_cache_destroy(zil_lwb_cache); } void zil_set_sync(zilog_t *zilog, uint64_t sync) { zilog->zl_sync = sync; } void zil_set_logbias(zilog_t *zilog, uint64_t logbias) { zilog->zl_logbias = logbias; } zilog_t * zil_alloc(objset_t *os, zil_header_t *zh_phys) { zilog_t *zilog; zilog = kmem_zalloc(sizeof (zilog_t), KM_SLEEP); zilog->zl_header = zh_phys; zilog->zl_os = os; zilog->zl_spa = dmu_objset_spa(os); zilog->zl_dmu_pool = dmu_objset_pool(os); zilog->zl_destroy_txg = TXG_INITIAL - 1; zilog->zl_logbias = dmu_objset_logbias(os); zilog->zl_sync = dmu_objset_syncprop(os); zilog->zl_next_batch = 1; mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); for (int i = 0; i < TXG_SIZE; i++) { mutex_init(&zilog->zl_itxg[i].itxg_lock, NULL, MUTEX_DEFAULT, NULL); } list_create(&zilog->zl_lwb_list, sizeof (lwb_t), offsetof(lwb_t, lwb_node)); list_create(&zilog->zl_itx_commit_list, sizeof (itx_t), offsetof(itx_t, itx_node)); mutex_init(&zilog->zl_vdev_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zilog->zl_vdev_tree, zil_vdev_compare, sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node)); cv_init(&zilog->zl_cv_writer, NULL, CV_DEFAULT, NULL); cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL); cv_init(&zilog->zl_cv_batch[0], NULL, CV_DEFAULT, NULL); cv_init(&zilog->zl_cv_batch[1], NULL, CV_DEFAULT, NULL); return (zilog); } void zil_free(zilog_t *zilog) { zilog->zl_stop_sync = 1; ASSERT0(zilog->zl_suspend); ASSERT0(zilog->zl_suspending); ASSERT(list_is_empty(&zilog->zl_lwb_list)); list_destroy(&zilog->zl_lwb_list); avl_destroy(&zilog->zl_vdev_tree); mutex_destroy(&zilog->zl_vdev_lock); ASSERT(list_is_empty(&zilog->zl_itx_commit_list)); list_destroy(&zilog->zl_itx_commit_list); for (int i = 0; i < TXG_SIZE; i++) { /* * It's possible for an itx to be generated that doesn't dirty * a txg (e.g. ztest TX_TRUNCATE). So there's no zil_clean() * callback to remove the entry. We remove those here. * * Also free up the ziltest itxs. */ if (zilog->zl_itxg[i].itxg_itxs) zil_itxg_clean(zilog->zl_itxg[i].itxg_itxs); mutex_destroy(&zilog->zl_itxg[i].itxg_lock); } mutex_destroy(&zilog->zl_lock); cv_destroy(&zilog->zl_cv_writer); cv_destroy(&zilog->zl_cv_suspend); cv_destroy(&zilog->zl_cv_batch[0]); cv_destroy(&zilog->zl_cv_batch[1]); kmem_free(zilog, sizeof (zilog_t)); } /* * Open an intent log. */ zilog_t * zil_open(objset_t *os, zil_get_data_t *get_data) { zilog_t *zilog = dmu_objset_zil(os); ASSERT(zilog->zl_clean_taskq == NULL); ASSERT(zilog->zl_get_data == NULL); ASSERT(list_is_empty(&zilog->zl_lwb_list)); zilog->zl_get_data = get_data; zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri, 2, 2, TASKQ_PREPOPULATE); return (zilog); } /* * Close an intent log. */ void zil_close(zilog_t *zilog) { lwb_t *lwb; uint64_t txg = 0; zil_commit(zilog, 0); /* commit all itx */ /* * The lwb_max_txg for the stubby lwb will reflect the last activity * for the zil. After a txg_wait_synced() on the txg we know all the * callbacks have occurred that may clean the zil. Only then can we * destroy the zl_clean_taskq. */ mutex_enter(&zilog->zl_lock); lwb = list_tail(&zilog->zl_lwb_list); if (lwb != NULL) txg = lwb->lwb_max_txg; mutex_exit(&zilog->zl_lock); if (txg) txg_wait_synced(zilog->zl_dmu_pool, txg); - ASSERT(!zilog_is_dirty(zilog)); + + if (zilog_is_dirty(zilog)) + zfs_dbgmsg("zil (%p) is dirty, txg %llu", zilog, txg); + VERIFY(!zilog_is_dirty(zilog)); taskq_destroy(zilog->zl_clean_taskq); zilog->zl_clean_taskq = NULL; zilog->zl_get_data = NULL; /* * We should have only one LWB left on the list; remove it now. */ mutex_enter(&zilog->zl_lock); lwb = list_head(&zilog->zl_lwb_list); if (lwb != NULL) { ASSERT(lwb == list_tail(&zilog->zl_lwb_list)); list_remove(&zilog->zl_lwb_list, lwb); zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); kmem_cache_free(zil_lwb_cache, lwb); } mutex_exit(&zilog->zl_lock); } static char *suspend_tag = "zil suspending"; /* * Suspend an intent log. While in suspended mode, we still honor * synchronous semantics, but we rely on txg_wait_synced() to do it. * On old version pools, we suspend the log briefly when taking a * snapshot so that it will have an empty intent log. * * Long holds are not really intended to be used the way we do here -- * held for such a short time. A concurrent caller of dsl_dataset_long_held() * could fail. Therefore we take pains to only put a long hold if it is * actually necessary. Fortunately, it will only be necessary if the * objset is currently mounted (or the ZVOL equivalent). In that case it * will already have a long hold, so we are not really making things any worse. * * Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or * zvol_state_t), and use their mechanism to prevent their hold from being * dropped (e.g. VFS_HOLD()). However, that would be even more pain for * very little gain. * * if cookiep == NULL, this does both the suspend & resume. * Otherwise, it returns with the dataset "long held", and the cookie * should be passed into zil_resume(). */ int zil_suspend(const char *osname, void **cookiep) { objset_t *os; zilog_t *zilog; const zil_header_t *zh; int error; error = dmu_objset_hold(osname, suspend_tag, &os); if (error != 0) return (error); zilog = dmu_objset_zil(os); mutex_enter(&zilog->zl_lock); zh = zilog->zl_header; if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */ mutex_exit(&zilog->zl_lock); dmu_objset_rele(os, suspend_tag); return (SET_ERROR(EBUSY)); } /* * Don't put a long hold in the cases where we can avoid it. This * is when there is no cookie so we are doing a suspend & resume * (i.e. called from zil_vdev_offline()), and there's nothing to do * for the suspend because it's already suspended, or there's no ZIL. */ if (cookiep == NULL && !zilog->zl_suspending && (zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) { mutex_exit(&zilog->zl_lock); dmu_objset_rele(os, suspend_tag); return (0); } dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag); dsl_pool_rele(dmu_objset_pool(os), suspend_tag); zilog->zl_suspend++; if (zilog->zl_suspend > 1) { /* * Someone else is already suspending it. * Just wait for them to finish. */ while (zilog->zl_suspending) cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); mutex_exit(&zilog->zl_lock); if (cookiep == NULL) zil_resume(os); else *cookiep = os; return (0); } /* * If there is no pointer to an on-disk block, this ZIL must not * be active (e.g. filesystem not mounted), so there's nothing * to clean up. */ if (BP_IS_HOLE(&zh->zh_log)) { ASSERT(cookiep != NULL); /* fast path already handled */ *cookiep = os; mutex_exit(&zilog->zl_lock); return (0); } zilog->zl_suspending = B_TRUE; mutex_exit(&zilog->zl_lock); zil_commit(zilog, 0); zil_destroy(zilog, B_FALSE); mutex_enter(&zilog->zl_lock); zilog->zl_suspending = B_FALSE; cv_broadcast(&zilog->zl_cv_suspend); mutex_exit(&zilog->zl_lock); if (cookiep == NULL) zil_resume(os); else *cookiep = os; return (0); } void zil_resume(void *cookie) { objset_t *os = cookie; zilog_t *zilog = dmu_objset_zil(os); mutex_enter(&zilog->zl_lock); ASSERT(zilog->zl_suspend != 0); zilog->zl_suspend--; mutex_exit(&zilog->zl_lock); dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag); dsl_dataset_rele(dmu_objset_ds(os), suspend_tag); } typedef struct zil_replay_arg { zil_replay_func_t **zr_replay; void *zr_arg; boolean_t zr_byteswap; char *zr_lr; } zil_replay_arg_t; static int zil_replay_error(zilog_t *zilog, lr_t *lr, int error) { char name[ZFS_MAX_DATASET_NAME_LEN]; zilog->zl_replaying_seq--; /* didn't actually replay this one */ dmu_objset_name(zilog->zl_os, name); cmn_err(CE_WARN, "ZFS replay transaction error %d, " "dataset %s, seq 0x%llx, txtype %llu %s\n", error, name, (u_longlong_t)lr->lrc_seq, (u_longlong_t)(lr->lrc_txtype & ~TX_CI), (lr->lrc_txtype & TX_CI) ? "CI" : ""); return (error); } static int zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) { zil_replay_arg_t *zr = zra; const zil_header_t *zh = zilog->zl_header; uint64_t reclen = lr->lrc_reclen; uint64_t txtype = lr->lrc_txtype; int error = 0; zilog->zl_replaying_seq = lr->lrc_seq; if (lr->lrc_seq <= zh->zh_replay_seq) /* already replayed */ return (0); if (lr->lrc_txg < claim_txg) /* already committed */ return (0); /* Strip case-insensitive bit, still present in log record */ txtype &= ~TX_CI; if (txtype == 0 || txtype >= TX_MAX_TYPE) return (zil_replay_error(zilog, lr, EINVAL)); /* * If this record type can be logged out of order, the object * (lr_foid) may no longer exist. That's legitimate, not an error. */ if (TX_OOO(txtype)) { error = dmu_object_info(zilog->zl_os, ((lr_ooo_t *)lr)->lr_foid, NULL); if (error == ENOENT || error == EEXIST) return (0); } /* * Make a copy of the data so we can revise and extend it. */ bcopy(lr, zr->zr_lr, reclen); /* * If this is a TX_WRITE with a blkptr, suck in the data. */ if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) { error = zil_read_log_data(zilog, (lr_write_t *)lr, zr->zr_lr + reclen); if (error != 0) return (zil_replay_error(zilog, lr, error)); } /* * The log block containing this lr may have been byteswapped * so that we can easily examine common fields like lrc_txtype. * However, the log is a mix of different record types, and only the * replay vectors know how to byteswap their records. Therefore, if * the lr was byteswapped, undo it before invoking the replay vector. */ if (zr->zr_byteswap) byteswap_uint64_array(zr->zr_lr, reclen); /* * We must now do two things atomically: replay this log record, * and update the log header sequence number to reflect the fact that * we did so. At the end of each replay function the sequence number * is updated if we are in replay mode. */ error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap); if (error != 0) { /* * The DMU's dnode layer doesn't see removes until the txg * commits, so a subsequent claim can spuriously fail with * EEXIST. So if we receive any error we try syncing out * any removes then retry the transaction. Note that we * specify B_FALSE for byteswap now, so we don't do it twice. */ txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0); error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE); if (error != 0) return (zil_replay_error(zilog, lr, error)); } return (0); } /* ARGSUSED */ static int zil_incr_blks(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) { zilog->zl_replay_blks++; return (0); } /* * If this dataset has a non-empty intent log, replay it and destroy it. */ void zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) { zilog_t *zilog = dmu_objset_zil(os); const zil_header_t *zh = zilog->zl_header; zil_replay_arg_t zr; if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) { zil_destroy(zilog, B_TRUE); return; } zr.zr_replay = replay_func; zr.zr_arg = arg; zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); zr.zr_lr = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); /* * Wait for in-progress removes to sync before starting replay. */ txg_wait_synced(zilog->zl_dmu_pool, 0); zilog->zl_replay = B_TRUE; zilog->zl_replay_time = ddi_get_lbolt(); ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, zh->zh_claim_txg); kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); zilog->zl_replay = B_FALSE; } boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx) { if (zilog->zl_sync == ZFS_SYNC_DISABLED) return (B_TRUE); if (zilog->zl_replay) { dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = zilog->zl_replaying_seq; return (B_TRUE); } return (B_FALSE); } /* ARGSUSED */ int zil_vdev_offline(const char *osname, void *arg) { int error; error = zil_suspend(osname, NULL); if (error != 0) return (SET_ERROR(EEXIST)); return (0); } Index: projects/clang391-import/sys/cddl/contrib/opensolaris =================================================================== --- projects/clang391-import/sys/cddl/contrib/opensolaris (revision 309262) +++ projects/clang391-import/sys/cddl/contrib/opensolaris (revision 309263) Property changes on: projects/clang391-import/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,2 ## Merged /head/sys/cddl/contrib/opensolaris:r309166-309262 Merged /vendor-sys/illumos/dist:r309249 Index: projects/clang391-import/sys/dev/ahci/ahci.c =================================================================== --- projects/clang391-import/sys/dev/ahci/ahci.c (revision 309262) +++ projects/clang391-import/sys/dev/ahci/ahci.c (revision 309263) @@ -1,2737 +1,2733 @@ /*- * Copyright (c) 2009-2012 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ahci.h" #include #include #include #include #include /* local prototypes */ static void ahci_intr(void *data); static void ahci_intr_one(void *data); static void ahci_intr_one_edge(void *data); static int ahci_ch_init(device_t dev); static int ahci_ch_deinit(device_t dev); static int ahci_ch_suspend(device_t dev); static int ahci_ch_resume(device_t dev); static void ahci_ch_pm(void *arg); static void ahci_ch_intr(void *arg); static void ahci_ch_intr_direct(void *arg); static void ahci_ch_intr_main(struct ahci_channel *ch, uint32_t istatus); static void ahci_begin_transaction(struct ahci_channel *ch, union ccb *ccb); static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void ahci_execute_transaction(struct ahci_slot *slot); static void ahci_timeout(struct ahci_slot *slot); static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et); static int ahci_setup_fis(struct ahci_channel *ch, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag); static void ahci_dmainit(device_t dev); static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static void ahci_dmafini(device_t dev); static void ahci_slotsalloc(device_t dev); static void ahci_slotsfree(device_t dev); static void ahci_reset(struct ahci_channel *ch); static void ahci_start(struct ahci_channel *ch, int fbs); static void ahci_stop(struct ahci_channel *ch); static void ahci_clo(struct ahci_channel *ch); static void ahci_start_fr(struct ahci_channel *ch); static void ahci_stop_fr(struct ahci_channel *ch); static int ahci_sata_connect(struct ahci_channel *ch); static int ahci_sata_phy_reset(struct ahci_channel *ch); static int ahci_wait_ready(struct ahci_channel *ch, int t, int t0); static void ahci_issue_recovery(struct ahci_channel *ch); static void ahci_process_read_log(struct ahci_channel *ch, union ccb *ccb); static void ahci_process_request_sense(struct ahci_channel *ch, union ccb *ccb); static void ahciaction(struct cam_sim *sim, union ccb *ccb); static void ahcipoll(struct cam_sim *sim); static MALLOC_DEFINE(M_AHCI, "AHCI driver", "AHCI driver data buffers"); #define recovery_type spriv_field0 #define RECOVERY_NONE 0 #define RECOVERY_READ_LOG 1 #define RECOVERY_REQUEST_SENSE 2 #define recovery_slot spriv_field1 int ahci_ctlr_setup(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); /* Clear interrupts */ ATA_OUTL(ctlr->r_mem, AHCI_IS, ATA_INL(ctlr->r_mem, AHCI_IS)); /* Configure CCC */ if (ctlr->ccc) { ATA_OUTL(ctlr->r_mem, AHCI_CCCP, ATA_INL(ctlr->r_mem, AHCI_PI)); ATA_OUTL(ctlr->r_mem, AHCI_CCCC, (ctlr->ccc << AHCI_CCCC_TV_SHIFT) | (4 << AHCI_CCCC_CC_SHIFT) | AHCI_CCCC_EN); ctlr->cccv = (ATA_INL(ctlr->r_mem, AHCI_CCCC) & AHCI_CCCC_INT_MASK) >> AHCI_CCCC_INT_SHIFT; if (bootverbose) { device_printf(dev, "CCC with %dms/4cmd enabled on vector %d\n", ctlr->ccc, ctlr->cccv); } } /* Enable AHCI interrupts */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, ATA_INL(ctlr->r_mem, AHCI_GHC) | AHCI_GHC_IE); return (0); } int ahci_ctlr_reset(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int timeout; /* Enable AHCI mode */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE); /* Reset AHCI controller */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE|AHCI_GHC_HR); for (timeout = 1000; timeout > 0; timeout--) { DELAY(1000); if ((ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_HR) == 0) break; } if (timeout == 0) { device_printf(dev, "AHCI controller reset failure\n"); return (ENXIO); } /* Reenable AHCI mode */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE); if (ctlr->quirks & AHCI_Q_RESTORE_CAP) { /* * Restore capability field. * This is write to a read-only register to restore its state. * On fully standard-compliant hardware this is not needed and * this operation shall not take place. See ahci_pci.c for * platforms using this quirk. */ ATA_OUTL(ctlr->r_mem, AHCI_CAP, ctlr->caps); } return (0); } int ahci_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int error, i, speed, unit; uint32_t u, version; device_t child; ctlr->dev = dev; ctlr->ccc = 0; resource_int_value(device_get_name(dev), device_get_unit(dev), "ccc", &ctlr->ccc); /* Setup our own memory management for channels. */ ctlr->sc_iomem.rm_start = rman_get_start(ctlr->r_mem); ctlr->sc_iomem.rm_end = rman_get_end(ctlr->r_mem); ctlr->sc_iomem.rm_type = RMAN_ARRAY; ctlr->sc_iomem.rm_descr = "I/O memory addresses"; if ((error = rman_init(&ctlr->sc_iomem)) != 0) { ahci_free_mem(dev); return (error); } if ((error = rman_manage_region(&ctlr->sc_iomem, rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) { ahci_free_mem(dev); rman_fini(&ctlr->sc_iomem); return (error); } /* Get the HW capabilities */ version = ATA_INL(ctlr->r_mem, AHCI_VS); ctlr->caps = ATA_INL(ctlr->r_mem, AHCI_CAP); if (version >= 0x00010200) ctlr->caps2 = ATA_INL(ctlr->r_mem, AHCI_CAP2); if (ctlr->caps & AHCI_CAP_EMS) ctlr->capsem = ATA_INL(ctlr->r_mem, AHCI_EM_CTL); if (ctlr->quirks & AHCI_Q_FORCE_PI) { /* * Enable ports. * The spec says that BIOS sets up bits corresponding to * available ports. On platforms where this information * is missing, the driver can define available ports on its own. */ int nports = (ctlr->caps & AHCI_CAP_NPMASK) + 1; int nmask = (1 << nports) - 1; ATA_OUTL(ctlr->r_mem, AHCI_PI, nmask); device_printf(dev, "Forcing PI to %d ports (mask = %x)\n", nports, nmask); } ctlr->ichannels = ATA_INL(ctlr->r_mem, AHCI_PI); /* Identify and set separate quirks for HBA and RAID f/w Marvells. */ if ((ctlr->quirks & AHCI_Q_ALTSIG) && (ctlr->caps & AHCI_CAP_SPM) == 0) ctlr->quirks |= AHCI_Q_NOBSYRES; if (ctlr->quirks & AHCI_Q_1CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->ichannels &= 0x01; } if (ctlr->quirks & AHCI_Q_2CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->caps |= 1; ctlr->ichannels &= 0x03; } if (ctlr->quirks & AHCI_Q_4CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->caps |= 3; ctlr->ichannels &= 0x0f; } ctlr->channels = MAX(flsl(ctlr->ichannels), (ctlr->caps & AHCI_CAP_NPMASK) + 1); if (ctlr->quirks & AHCI_Q_NOPMP) ctlr->caps &= ~AHCI_CAP_SPM; if (ctlr->quirks & AHCI_Q_NONCQ) ctlr->caps &= ~AHCI_CAP_SNCQ; if ((ctlr->caps & AHCI_CAP_CCCS) == 0) ctlr->ccc = 0; ctlr->emloc = ATA_INL(ctlr->r_mem, AHCI_EM_LOC); /* Create controller-wide DMA tag. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, (ctlr->caps & AHCI_CAP_64BIT) ? BUS_SPACE_MAXADDR : BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, 0, NULL, NULL, &ctlr->dma_tag)) { ahci_free_mem(dev); rman_fini(&ctlr->sc_iomem); return (ENXIO); } ahci_ctlr_setup(dev); /* Setup interrupts. */ if ((error = ahci_setup_interrupt(dev)) != 0) { bus_dma_tag_destroy(ctlr->dma_tag); ahci_free_mem(dev); rman_fini(&ctlr->sc_iomem); return (error); } i = 0; for (u = ctlr->ichannels; u != 0; u >>= 1) i += (u & 1); ctlr->direct = (ctlr->msi && (ctlr->numirqs > 1 || i <= 3)); resource_int_value(device_get_name(dev), device_get_unit(dev), "direct", &ctlr->direct); /* Announce HW capabilities. */ speed = (ctlr->caps & AHCI_CAP_ISS) >> AHCI_CAP_ISS_SHIFT; device_printf(dev, "AHCI v%x.%02x with %d %sGbps ports, Port Multiplier %s%s\n", ((version >> 20) & 0xf0) + ((version >> 16) & 0x0f), ((version >> 4) & 0xf0) + (version & 0x0f), (ctlr->caps & AHCI_CAP_NPMASK) + 1, ((speed == 1) ? "1.5":((speed == 2) ? "3": ((speed == 3) ? "6":"?"))), (ctlr->caps & AHCI_CAP_SPM) ? "supported" : "not supported", (ctlr->caps & AHCI_CAP_FBSS) ? " with FBS" : ""); if (ctlr->quirks != 0) { device_printf(dev, "quirks=0x%b\n", ctlr->quirks, AHCI_Q_BIT_STRING); } if (bootverbose) { device_printf(dev, "Caps:%s%s%s%s%s%s%s%s %sGbps", (ctlr->caps & AHCI_CAP_64BIT) ? " 64bit":"", (ctlr->caps & AHCI_CAP_SNCQ) ? " NCQ":"", (ctlr->caps & AHCI_CAP_SSNTF) ? " SNTF":"", (ctlr->caps & AHCI_CAP_SMPS) ? " MPS":"", (ctlr->caps & AHCI_CAP_SSS) ? " SS":"", (ctlr->caps & AHCI_CAP_SALP) ? " ALP":"", (ctlr->caps & AHCI_CAP_SAL) ? " AL":"", (ctlr->caps & AHCI_CAP_SCLO) ? " CLO":"", ((speed == 1) ? "1.5":((speed == 2) ? "3": ((speed == 3) ? "6":"?")))); printf("%s%s%s%s%s%s %dcmd%s%s%s %dports\n", (ctlr->caps & AHCI_CAP_SAM) ? " AM":"", (ctlr->caps & AHCI_CAP_SPM) ? " PM":"", (ctlr->caps & AHCI_CAP_FBSS) ? " FBS":"", (ctlr->caps & AHCI_CAP_PMD) ? " PMD":"", (ctlr->caps & AHCI_CAP_SSC) ? " SSC":"", (ctlr->caps & AHCI_CAP_PSC) ? " PSC":"", ((ctlr->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1, (ctlr->caps & AHCI_CAP_CCCS) ? " CCC":"", (ctlr->caps & AHCI_CAP_EMS) ? " EM":"", (ctlr->caps & AHCI_CAP_SXS) ? " eSATA":"", (ctlr->caps & AHCI_CAP_NPMASK) + 1); } if (bootverbose && version >= 0x00010200) { device_printf(dev, "Caps2:%s%s%s%s%s%s\n", (ctlr->caps2 & AHCI_CAP2_DESO) ? " DESO":"", (ctlr->caps2 & AHCI_CAP2_SADM) ? " SADM":"", (ctlr->caps2 & AHCI_CAP2_SDS) ? " SDS":"", (ctlr->caps2 & AHCI_CAP2_APST) ? " APST":"", (ctlr->caps2 & AHCI_CAP2_NVMP) ? " NVMP":"", (ctlr->caps2 & AHCI_CAP2_BOH) ? " BOH":""); } /* Attach all channels on this controller */ for (unit = 0; unit < ctlr->channels; unit++) { child = device_add_child(dev, "ahcich", -1); if (child == NULL) { device_printf(dev, "failed to add channel device\n"); continue; } device_set_ivars(child, (void *)(intptr_t)unit); if ((ctlr->ichannels & (1 << unit)) == 0) device_disable(child); } if (ctlr->caps & AHCI_CAP_EMS) { child = device_add_child(dev, "ahciem", -1); if (child == NULL) device_printf(dev, "failed to add enclosure device\n"); else device_set_ivars(child, (void *)(intptr_t)-1); } bus_generic_attach(dev); return (0); } int ahci_detach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int i; /* Detach & delete all children */ device_delete_children(dev); /* Free interrupts. */ for (i = 0; i < ctlr->numirqs; i++) { if (ctlr->irqs[i].r_irq) { bus_teardown_intr(dev, ctlr->irqs[i].r_irq, ctlr->irqs[i].handle); bus_release_resource(dev, SYS_RES_IRQ, ctlr->irqs[i].r_irq_rid, ctlr->irqs[i].r_irq); } } bus_dma_tag_destroy(ctlr->dma_tag); /* Free memory. */ rman_fini(&ctlr->sc_iomem); ahci_free_mem(dev); return (0); } void ahci_free_mem(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); /* Release memory resources */ if (ctlr->r_mem) bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); if (ctlr->r_msix_table) bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_msix_tab_rid, ctlr->r_msix_table); if (ctlr->r_msix_pba) bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_msix_pba_rid, ctlr->r_msix_pba); ctlr->r_msix_pba = ctlr->r_mem = ctlr->r_msix_table = NULL; } int ahci_setup_interrupt(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int i; /* Check for single MSI vector fallback. */ if (ctlr->numirqs > 1 && (ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_MRSM) != 0) { device_printf(dev, "Falling back to one MSI\n"); ctlr->numirqs = 1; } /* Ensure we don't overrun irqs. */ if (ctlr->numirqs > AHCI_MAX_IRQS) { device_printf(dev, "Too many irqs %d > %d (clamping)\n", ctlr->numirqs, AHCI_MAX_IRQS); ctlr->numirqs = AHCI_MAX_IRQS; } /* Allocate all IRQs. */ for (i = 0; i < ctlr->numirqs; i++) { ctlr->irqs[i].ctlr = ctlr; ctlr->irqs[i].r_irq_rid = i + (ctlr->msi ? 1 : 0); if (ctlr->channels == 1 && !ctlr->ccc && ctlr->msi) ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE; else if (ctlr->numirqs == 1 || i >= ctlr->channels || (ctlr->ccc && i == ctlr->cccv)) ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL; else if (ctlr->channels > ctlr->numirqs && i == ctlr->numirqs - 1) ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER; else ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE; if (!(ctlr->irqs[i].r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ctlr->irqs[i].r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) { device_printf(dev, "unable to map interrupt\n"); return (ENXIO); } if ((bus_setup_intr(dev, ctlr->irqs[i].r_irq, ATA_INTR_FLAGS, NULL, (ctlr->irqs[i].mode != AHCI_IRQ_MODE_ONE) ? ahci_intr : ((ctlr->quirks & AHCI_Q_EDGEIS) ? ahci_intr_one_edge : ahci_intr_one), &ctlr->irqs[i], &ctlr->irqs[i].handle))) { /* SOS XXX release r_irq */ device_printf(dev, "unable to setup interrupt\n"); return (ENXIO); } if (ctlr->numirqs > 1) { bus_describe_intr(dev, ctlr->irqs[i].r_irq, ctlr->irqs[i].handle, ctlr->irqs[i].mode == AHCI_IRQ_MODE_ONE ? "ch%d" : "%d", i); } } return (0); } /* * Common case interrupt handler. */ static void ahci_intr(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; u_int32_t is, ise = 0; void *arg; int unit; if (irq->mode == AHCI_IRQ_MODE_ALL) { unit = 0; if (ctlr->ccc) is = ctlr->ichannels; else is = ATA_INL(ctlr->r_mem, AHCI_IS); } else { /* AHCI_IRQ_MODE_AFTER */ unit = irq->r_irq_rid - 1; is = ATA_INL(ctlr->r_mem, AHCI_IS); is &= (0xffffffff << unit); } /* CCC interrupt is edge triggered. */ if (ctlr->ccc) ise = 1 << ctlr->cccv; /* Some controllers have edge triggered IS. */ if (ctlr->quirks & AHCI_Q_EDGEIS) ise |= is; if (ise != 0) ATA_OUTL(ctlr->r_mem, AHCI_IS, ise); for (; unit < ctlr->channels; unit++) { if ((is & (1 << unit)) != 0 && (arg = ctlr->interrupt[unit].argument)) { ctlr->interrupt[unit].function(arg); } } /* AHCI declares level triggered IS. */ if (!(ctlr->quirks & AHCI_Q_EDGEIS)) ATA_OUTL(ctlr->r_mem, AHCI_IS, is); ATA_RBL(ctlr->r_mem, AHCI_IS); } /* * Simplified interrupt handler for multivector MSI mode. */ static void ahci_intr_one(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; void *arg; int unit; unit = irq->r_irq_rid - 1; if ((arg = ctlr->interrupt[unit].argument)) ctlr->interrupt[unit].function(arg); /* AHCI declares level triggered IS. */ ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit); ATA_RBL(ctlr->r_mem, AHCI_IS); } static void ahci_intr_one_edge(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; void *arg; int unit; unit = irq->r_irq_rid - 1; /* Some controllers have edge triggered IS. */ ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit); if ((arg = ctlr->interrupt[unit].argument)) ctlr->interrupt[unit].function(arg); ATA_RBL(ctlr->r_mem, AHCI_IS); } struct resource * ahci_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct ahci_controller *ctlr = device_get_softc(dev); struct resource *res; rman_res_t st; int offset, size, unit; unit = (intptr_t)device_get_ivars(child); res = NULL; switch (type) { case SYS_RES_MEMORY: if (unit >= 0) { offset = AHCI_OFFSET + (unit << 7); size = 128; } else if (*rid == 0) { offset = AHCI_EM_CTL; size = 4; } else { offset = (ctlr->emloc & 0xffff0000) >> 14; size = (ctlr->emloc & 0x0000ffff) << 2; if (*rid != 1) { if (*rid == 2 && (ctlr->capsem & (AHCI_EM_XMT | AHCI_EM_SMB)) == 0) offset += size; else break; } } st = rman_get_start(ctlr->r_mem); res = rman_reserve_resource(&ctlr->sc_iomem, st + offset, st + offset + size - 1, size, RF_ACTIVE, child); if (res) { bus_space_handle_t bsh; bus_space_tag_t bst; bsh = rman_get_bushandle(ctlr->r_mem); bst = rman_get_bustag(ctlr->r_mem); bus_space_subregion(bst, bsh, offset, 128, &bsh); rman_set_bushandle(res, bsh); rman_set_bustag(res, bst); } break; case SYS_RES_IRQ: if (*rid == ATA_IRQ_RID) res = ctlr->irqs[0].r_irq; break; } return (res); } int ahci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { switch (type) { case SYS_RES_MEMORY: rman_release_resource(r); return (0); case SYS_RES_IRQ: if (rid != ATA_IRQ_RID) return (ENOENT); return (0); } return (EINVAL); } int ahci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, driver_filter_t *filter, driver_intr_t *function, void *argument, void **cookiep) { struct ahci_controller *ctlr = device_get_softc(dev); int unit = (intptr_t)device_get_ivars(child); if (filter != NULL) { printf("ahci.c: we cannot use a filter here\n"); return (EINVAL); } ctlr->interrupt[unit].function = function; ctlr->interrupt[unit].argument = argument; return (0); } int ahci_teardown_intr(device_t dev, device_t child, struct resource *irq, void *cookie) { struct ahci_controller *ctlr = device_get_softc(dev); int unit = (intptr_t)device_get_ivars(child); ctlr->interrupt[unit].function = NULL; ctlr->interrupt[unit].argument = NULL; return (0); } int ahci_print_child(device_t dev, device_t child) { int retval, channel; retval = bus_print_child_header(dev, child); channel = (int)(intptr_t)device_get_ivars(child); if (channel >= 0) retval += printf(" at channel %d", channel); retval += bus_print_child_footer(dev, child); return (retval); } int ahci_child_location_str(device_t dev, device_t child, char *buf, size_t buflen) { int channel; channel = (int)(intptr_t)device_get_ivars(child); if (channel >= 0) snprintf(buf, buflen, "channel=%d", channel); return (0); } bus_dma_tag_t ahci_get_dma_tag(device_t dev, device_t child) { struct ahci_controller *ctlr = device_get_softc(dev); return (ctlr->dma_tag); } static int ahci_ch_probe(device_t dev) { device_set_desc_copy(dev, "AHCI channel"); return (BUS_PROBE_DEFAULT); } static int ahci_ch_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(device_get_parent(dev)); struct ahci_channel *ch = device_get_softc(dev); struct cam_devq *devq; int rid, error, i, sata_rev = 0; u_int32_t version; ch->dev = dev; ch->unit = (intptr_t)device_get_ivars(dev); ch->caps = ctlr->caps; ch->caps2 = ctlr->caps2; ch->start = ctlr->ch_start; ch->quirks = ctlr->quirks; ch->vendorid = ctlr->vendorid; ch->deviceid = ctlr->deviceid; ch->subvendorid = ctlr->subvendorid; ch->subdeviceid = ctlr->subdeviceid; ch->numslots = ((ch->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1; mtx_init(&ch->mtx, "AHCI channel lock", NULL, MTX_DEF); ch->pm_level = 0; resource_int_value(device_get_name(dev), device_get_unit(dev), "pm_level", &ch->pm_level); STAILQ_INIT(&ch->doneq); if (ch->pm_level > 3) callout_init_mtx(&ch->pm_timer, &ch->mtx, 0); callout_init_mtx(&ch->reset_timer, &ch->mtx, 0); /* JMicron external ports (0) sometimes limited */ if ((ctlr->quirks & AHCI_Q_SATA1_UNIT0) && ch->unit == 0) sata_rev = 1; if (ch->quirks & AHCI_Q_SATA2) sata_rev = 2; resource_int_value(device_get_name(dev), device_get_unit(dev), "sata_rev", &sata_rev); for (i = 0; i < 16; i++) { ch->user[i].revision = sata_rev; ch->user[i].mode = 0; ch->user[i].bytecount = 8192; ch->user[i].tags = ch->numslots; ch->user[i].caps = 0; ch->curr[i] = ch->user[i]; if (ch->pm_level) { ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ | CTS_SATA_CAPS_H_APST | CTS_SATA_CAPS_D_PMREQ | CTS_SATA_CAPS_D_APST; } ch->user[i].caps |= CTS_SATA_CAPS_H_DMAAA | CTS_SATA_CAPS_H_AN; } rid = 0; if (!(ch->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE))) return (ENXIO); ch->chcaps = ATA_INL(ch->r_mem, AHCI_P_CMD); version = ATA_INL(ctlr->r_mem, AHCI_VS); if (version < 0x00010200 && (ctlr->caps & AHCI_CAP_FBSS)) ch->chcaps |= AHCI_P_CMD_FBSCP; if (ch->caps2 & AHCI_CAP2_SDS) ch->chscaps = ATA_INL(ch->r_mem, AHCI_P_DEVSLP); if (bootverbose) { device_printf(dev, "Caps:%s%s%s%s%s%s\n", (ch->chcaps & AHCI_P_CMD_HPCP) ? " HPCP":"", (ch->chcaps & AHCI_P_CMD_MPSP) ? " MPSP":"", (ch->chcaps & AHCI_P_CMD_CPD) ? " CPD":"", (ch->chcaps & AHCI_P_CMD_ESP) ? " ESP":"", (ch->chcaps & AHCI_P_CMD_FBSCP) ? " FBSCP":"", (ch->chscaps & AHCI_P_DEVSLP_DSP) ? " DSP":""); } ahci_dmainit(dev); ahci_slotsalloc(dev); mtx_lock(&ch->mtx); ahci_ch_init(dev); rid = ATA_IRQ_RID; if (!(ch->r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE))) { device_printf(dev, "Unable to map interrupt\n"); error = ENXIO; goto err0; } if ((bus_setup_intr(dev, ch->r_irq, ATA_INTR_FLAGS, NULL, ctlr->direct ? ahci_ch_intr_direct : ahci_ch_intr, ch, &ch->ih))) { device_printf(dev, "Unable to setup interrupt\n"); error = ENXIO; goto err1; } /* Create the device queue for our SIM. */ devq = cam_simq_alloc(ch->numslots); if (devq == NULL) { device_printf(dev, "Unable to allocate simq\n"); error = ENOMEM; goto err1; } /* Construct SIM entry */ ch->sim = cam_sim_alloc(ahciaction, ahcipoll, "ahcich", ch, device_get_unit(dev), (struct mtx *)&ch->mtx, min(2, ch->numslots), (ch->caps & AHCI_CAP_SNCQ) ? ch->numslots : 0, devq); if (ch->sim == NULL) { cam_simq_free(devq); device_printf(dev, "unable to allocate sim\n"); error = ENOMEM; goto err1; } if (xpt_bus_register(ch->sim, dev, 0) != CAM_SUCCESS) { device_printf(dev, "unable to register xpt bus\n"); error = ENXIO; goto err2; } if (xpt_create_path(&ch->path, /*periph*/NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { device_printf(dev, "unable to create path\n"); error = ENXIO; goto err3; } if (ch->pm_level > 3) { callout_reset(&ch->pm_timer, (ch->pm_level == 4) ? hz / 1000 : hz / 8, ahci_ch_pm, ch); } mtx_unlock(&ch->mtx); return (0); err3: xpt_bus_deregister(cam_sim_path(ch->sim)); err2: cam_sim_free(ch->sim, /*free_devq*/TRUE); err1: bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); err0: bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_unlock(&ch->mtx); mtx_destroy(&ch->mtx); return (error); } static int ahci_ch_detach(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); xpt_async(AC_LOST_DEVICE, ch->path, NULL); /* Forget about reset. */ if (ch->resetting) { ch->resetting = 0; xpt_release_simq(ch->sim, TRUE); } xpt_free_path(ch->path); xpt_bus_deregister(cam_sim_path(ch->sim)); cam_sim_free(ch->sim, /*free_devq*/TRUE); mtx_unlock(&ch->mtx); if (ch->pm_level > 3) callout_drain(&ch->pm_timer); callout_drain(&ch->reset_timer); bus_teardown_intr(dev, ch->r_irq, ch->ih); bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); ahci_ch_deinit(dev); ahci_slotsfree(dev); ahci_dmafini(dev); bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_destroy(&ch->mtx); return (0); } static int ahci_ch_init(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); uint64_t work; /* Disable port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Setup work areas */ work = ch->dma.work_bus + AHCI_CL_OFFSET; ATA_OUTL(ch->r_mem, AHCI_P_CLB, work & 0xffffffff); ATA_OUTL(ch->r_mem, AHCI_P_CLBU, work >> 32); work = ch->dma.rfis_bus; ATA_OUTL(ch->r_mem, AHCI_P_FB, work & 0xffffffff); ATA_OUTL(ch->r_mem, AHCI_P_FBU, work >> 32); /* Activate the channel and power/spin up device */ ATA_OUTL(ch->r_mem, AHCI_P_CMD, (AHCI_P_CMD_ACTIVE | AHCI_P_CMD_POD | AHCI_P_CMD_SUD | ((ch->pm_level == 2 || ch->pm_level == 3) ? AHCI_P_CMD_ALPE : 0) | ((ch->pm_level > 2) ? AHCI_P_CMD_ASP : 0 ))); ahci_start_fr(ch); ahci_start(ch, 1); return (0); } static int ahci_ch_deinit(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); /* Disable port interrupts. */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Reset command register. */ ahci_stop(ch); ahci_stop_fr(ch); ATA_OUTL(ch->r_mem, AHCI_P_CMD, 0); /* Allow everything, including partial and slumber modes. */ ATA_OUTL(ch->r_mem, AHCI_P_SCTL, 0); /* Request slumber mode transition and give some time to get there. */ ATA_OUTL(ch->r_mem, AHCI_P_CMD, AHCI_P_CMD_SLUMBER); DELAY(100); /* Disable PHY. */ ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE); return (0); } static int ahci_ch_suspend(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); xpt_freeze_simq(ch->sim, 1); /* Forget about reset. */ if (ch->resetting) { ch->resetting = 0; callout_stop(&ch->reset_timer); xpt_release_simq(ch->sim, TRUE); } while (ch->oslots) msleep(ch, &ch->mtx, PRIBIO, "ahcisusp", hz/100); ahci_ch_deinit(dev); mtx_unlock(&ch->mtx); return (0); } static int ahci_ch_resume(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); ahci_ch_init(dev); ahci_reset(ch); xpt_release_simq(ch->sim, TRUE); mtx_unlock(&ch->mtx); return (0); } devclass_t ahcich_devclass; static device_method_t ahcich_methods[] = { DEVMETHOD(device_probe, ahci_ch_probe), DEVMETHOD(device_attach, ahci_ch_attach), DEVMETHOD(device_detach, ahci_ch_detach), DEVMETHOD(device_suspend, ahci_ch_suspend), DEVMETHOD(device_resume, ahci_ch_resume), DEVMETHOD_END }; static driver_t ahcich_driver = { "ahcich", ahcich_methods, sizeof(struct ahci_channel) }; DRIVER_MODULE(ahcich, ahci, ahcich_driver, ahcich_devclass, NULL, NULL); struct ahci_dc_cb_args { bus_addr_t maddr; int error; }; static void ahci_dmainit(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); struct ahci_dc_cb_args dcba; size_t rfsize; /* Command area. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1024, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, AHCI_WORK_SIZE, 1, AHCI_WORK_SIZE, 0, NULL, NULL, &ch->dma.work_tag)) goto error; if (bus_dmamem_alloc(ch->dma.work_tag, (void **)&ch->dma.work, BUS_DMA_ZERO, &ch->dma.work_map)) goto error; if (bus_dmamap_load(ch->dma.work_tag, ch->dma.work_map, ch->dma.work, AHCI_WORK_SIZE, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) { bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map); goto error; } ch->dma.work_bus = dcba.maddr; /* FIS receive area. */ if (ch->chcaps & AHCI_P_CMD_FBSCP) rfsize = 4096; else rfsize = 256; if (bus_dma_tag_create(bus_get_dma_tag(dev), rfsize, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, rfsize, 1, rfsize, 0, NULL, NULL, &ch->dma.rfis_tag)) goto error; if (bus_dmamem_alloc(ch->dma.rfis_tag, (void **)&ch->dma.rfis, 0, &ch->dma.rfis_map)) goto error; if (bus_dmamap_load(ch->dma.rfis_tag, ch->dma.rfis_map, ch->dma.rfis, rfsize, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) { bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map); goto error; } ch->dma.rfis_bus = dcba.maddr; /* Data area. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, AHCI_SG_ENTRIES * PAGE_SIZE * ch->numslots, AHCI_SG_ENTRIES, AHCI_PRD_MAX, 0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag)) { goto error; } return; error: device_printf(dev, "WARNING - DMA initialization failed\n"); ahci_dmafini(dev); } static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct ahci_dc_cb_args *dcba = (struct ahci_dc_cb_args *)xsc; if (!(dcba->error = error)) dcba->maddr = segs[0].ds_addr; } static void ahci_dmafini(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); if (ch->dma.data_tag) { bus_dma_tag_destroy(ch->dma.data_tag); ch->dma.data_tag = NULL; } if (ch->dma.rfis_bus) { bus_dmamap_unload(ch->dma.rfis_tag, ch->dma.rfis_map); bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map); ch->dma.rfis_bus = 0; ch->dma.rfis = NULL; } if (ch->dma.work_bus) { bus_dmamap_unload(ch->dma.work_tag, ch->dma.work_map); bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map); ch->dma.work_bus = 0; ch->dma.work = NULL; } if (ch->dma.work_tag) { bus_dma_tag_destroy(ch->dma.work_tag); ch->dma.work_tag = NULL; } } static void ahci_slotsalloc(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; /* Alloc and setup command/dma slots */ bzero(ch->slot, sizeof(ch->slot)); for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; slot->ch = ch; slot->slot = i; slot->state = AHCI_SLOT_EMPTY; slot->ccb = NULL; callout_init_mtx(&slot->timeout, &ch->mtx, 0); if (bus_dmamap_create(ch->dma.data_tag, 0, &slot->dma.data_map)) device_printf(ch->dev, "FAILURE - create data_map\n"); } } static void ahci_slotsfree(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; /* Free all dma slots */ for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; callout_drain(&slot->timeout); if (slot->dma.data_map) { bus_dmamap_destroy(ch->dma.data_tag, slot->dma.data_map); slot->dma.data_map = NULL; } } } static int ahci_phy_check_events(struct ahci_channel *ch, u_int32_t serr) { if (((ch->pm_level == 0) && (serr & ATA_SE_PHY_CHANGED)) || ((ch->pm_level != 0 || ch->listening) && (serr & ATA_SE_EXCHANGED))) { u_int32_t status = ATA_INL(ch->r_mem, AHCI_P_SSTS); union ccb *ccb; if (bootverbose) { if ((status & ATA_SS_DET_MASK) != ATA_SS_DET_NO_DEVICE) device_printf(ch->dev, "CONNECT requested\n"); else device_printf(ch->dev, "DISCONNECT requested\n"); } ahci_reset(ch); if ((ccb = xpt_alloc_ccb_nowait()) == NULL) return (0); if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); return (0); } xpt_rescan(ccb); return (1); } return (0); } static void ahci_cpd_check_events(struct ahci_channel *ch) { u_int32_t status; union ccb *ccb; device_t dev; if (ch->pm_level == 0) return; status = ATA_INL(ch->r_mem, AHCI_P_CMD); if ((status & AHCI_P_CMD_CPD) == 0) return; if (bootverbose) { dev = ch->dev; if (status & AHCI_P_CMD_CPS) { device_printf(dev, "COLD CONNECT requested\n"); } else device_printf(dev, "COLD DISCONNECT requested\n"); } ahci_reset(ch); if ((ccb = xpt_alloc_ccb_nowait()) == NULL) return; if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); return; } xpt_rescan(ccb); } static void ahci_notify_events(struct ahci_channel *ch, u_int32_t status) { struct cam_path *dpath; int i; if (ch->caps & AHCI_CAP_SSNTF) ATA_OUTL(ch->r_mem, AHCI_P_SNTF, status); if (bootverbose) device_printf(ch->dev, "SNTF 0x%04x\n", status); for (i = 0; i < 16; i++) { if ((status & (1 << i)) == 0) continue; if (xpt_create_path(&dpath, NULL, xpt_path_path_id(ch->path), i, 0) == CAM_REQ_CMP) { xpt_async(AC_SCSI_AEN, dpath, NULL); xpt_free_path(dpath); } } } static void ahci_done(struct ahci_channel *ch, union ccb *ccb) { mtx_assert(&ch->mtx, MA_OWNED); if ((ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0 || ch->batch == 0) { xpt_done(ccb); return; } STAILQ_INSERT_TAIL(&ch->doneq, &ccb->ccb_h, sim_links.stqe); } static void ahci_ch_intr(void *arg) { struct ahci_channel *ch = (struct ahci_channel *)arg; uint32_t istatus; /* Read interrupt statuses. */ istatus = ATA_INL(ch->r_mem, AHCI_P_IS); - if (istatus == 0) - return; mtx_lock(&ch->mtx); ahci_ch_intr_main(ch, istatus); mtx_unlock(&ch->mtx); } static void ahci_ch_intr_direct(void *arg) { struct ahci_channel *ch = (struct ahci_channel *)arg; struct ccb_hdr *ccb_h; uint32_t istatus; STAILQ_HEAD(, ccb_hdr) tmp_doneq = STAILQ_HEAD_INITIALIZER(tmp_doneq); /* Read interrupt statuses. */ istatus = ATA_INL(ch->r_mem, AHCI_P_IS); - if (istatus == 0) - return; mtx_lock(&ch->mtx); ch->batch = 1; ahci_ch_intr_main(ch, istatus); ch->batch = 0; /* * Prevent the possibility of issues caused by processing the queue * while unlocked below by moving the contents to a local queue. */ STAILQ_CONCAT(&tmp_doneq, &ch->doneq); mtx_unlock(&ch->mtx); while ((ccb_h = STAILQ_FIRST(&tmp_doneq)) != NULL) { STAILQ_REMOVE_HEAD(&tmp_doneq, sim_links.stqe); xpt_done_direct((union ccb *)ccb_h); } } static void ahci_ch_pm(void *arg) { struct ahci_channel *ch = (struct ahci_channel *)arg; uint32_t work; if (ch->numrslots != 0) return; work = ATA_INL(ch->r_mem, AHCI_P_CMD); if (ch->pm_level == 4) work |= AHCI_P_CMD_PARTIAL; else work |= AHCI_P_CMD_SLUMBER; ATA_OUTL(ch->r_mem, AHCI_P_CMD, work); } static void ahci_ch_intr_main(struct ahci_channel *ch, uint32_t istatus) { uint32_t cstatus, serr = 0, sntf = 0, ok, err; enum ahci_err_type et; int i, ccs, port, reset = 0; /* Clear interrupt statuses. */ ATA_OUTL(ch->r_mem, AHCI_P_IS, istatus); /* Read command statuses. */ if (ch->numtslots != 0) cstatus = ATA_INL(ch->r_mem, AHCI_P_SACT); else cstatus = 0; if (ch->numrslots != ch->numtslots) cstatus |= ATA_INL(ch->r_mem, AHCI_P_CI); /* Read SNTF in one of possible ways. */ if ((istatus & AHCI_P_IX_SDB) && (ch->pm_present || ch->curr[0].atapi != 0)) { if (ch->caps & AHCI_CAP_SSNTF) sntf = ATA_INL(ch->r_mem, AHCI_P_SNTF); else if (ch->fbs_enabled) { u_int8_t *fis = ch->dma.rfis + 0x58; for (i = 0; i < 16; i++) { if (fis[1] & 0x80) { fis[1] &= 0x7f; sntf |= 1 << i; } fis += 256; } } else { u_int8_t *fis = ch->dma.rfis + 0x58; if (fis[1] & 0x80) sntf = (1 << (fis[1] & 0x0f)); } } /* Process PHY events */ if (istatus & (AHCI_P_IX_PC | AHCI_P_IX_PRC | AHCI_P_IX_OF | AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) { serr = ATA_INL(ch->r_mem, AHCI_P_SERR); if (serr) { ATA_OUTL(ch->r_mem, AHCI_P_SERR, serr); reset = ahci_phy_check_events(ch, serr); } } /* Process cold presence detection events */ if ((istatus & AHCI_P_IX_CPD) && !reset) ahci_cpd_check_events(ch); /* Process command errors */ if (istatus & (AHCI_P_IX_OF | AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) { ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT; //device_printf(dev, "%s ERROR is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x fbs %08x ccs %d\n", // __func__, istatus, cstatus, sstatus, ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), // serr, ATA_INL(ch->r_mem, AHCI_P_FBS), ccs); port = -1; if (ch->fbs_enabled) { uint32_t fbs = ATA_INL(ch->r_mem, AHCI_P_FBS); if (fbs & AHCI_P_FBS_SDE) { port = (fbs & AHCI_P_FBS_DWE) >> AHCI_P_FBS_DWE_SHIFT; } else { for (i = 0; i < 16; i++) { if (ch->numrslotspd[i] == 0) continue; if (port == -1) port = i; else if (port != i) { port = -2; break; } } } } err = ch->rslots & cstatus; } else { ccs = 0; err = 0; port = -1; } /* Complete all successful commands. */ ok = ch->rslots & ~cstatus; for (i = 0; i < ch->numslots; i++) { if ((ok >> i) & 1) ahci_end_transaction(&ch->slot[i], AHCI_ERR_NONE); } /* On error, complete the rest of commands with error statuses. */ if (err) { if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } ahci_done(ch, fccb); } for (i = 0; i < ch->numslots; i++) { /* XXX: reqests in loading state. */ if (((err >> i) & 1) == 0) continue; if (port >= 0 && ch->slot[i].ccb->ccb_h.target_id != port) continue; if (istatus & AHCI_P_IX_TFE) { if (port != -2) { /* Task File Error */ if (ch->numtslotspd[ ch->slot[i].ccb->ccb_h.target_id] == 0) { /* Untagged operation. */ if (i == ccs) et = AHCI_ERR_TFE; else et = AHCI_ERR_INNOCENT; } else { /* Tagged operation. */ et = AHCI_ERR_NCQ; } } else { et = AHCI_ERR_TFE; ch->fatalerr = 1; } } else if (istatus & AHCI_P_IX_IF) { if (ch->numtslots == 0 && i != ccs && port != -2) et = AHCI_ERR_INNOCENT; else et = AHCI_ERR_SATA; } else et = AHCI_ERR_INVALID; ahci_end_transaction(&ch->slot[i], et); } /* * We can't reinit port if there are some other * commands active, use resume to complete them. */ if (ch->rslots != 0 && !ch->recoverycmd) ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | AHCI_P_FBS_DEC); } /* Process NOTIFY events */ if (sntf) ahci_notify_events(ch, sntf); } /* Must be called with channel locked. */ static int ahci_check_collision(struct ahci_channel *ch, union ccb *ccb) { int t = ccb->ccb_h.target_id; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { /* Tagged command while we have no supported tag free. */ if (((~ch->oslots) & (0xffffffff >> (32 - ch->curr[t].tags))) == 0) return (1); /* If we have FBS */ if (ch->fbs_enabled) { /* Tagged command while untagged are active. */ if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] == 0) return (1); } else { /* Tagged command while untagged are active. */ if (ch->numrslots != 0 && ch->numtslots == 0) return (1); /* Tagged command while tagged to other target is active. */ if (ch->numtslots != 0 && ch->taggedtarget != ccb->ccb_h.target_id) return (1); } } else { /* If we have FBS */ if (ch->fbs_enabled) { /* Untagged command while tagged are active. */ if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] != 0) return (1); } else { /* Untagged command while tagged are active. */ if (ch->numrslots != 0 && ch->numtslots != 0) return (1); } } if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) { /* Atomic command while anything active. */ if (ch->numrslots != 0) return (1); } /* We have some atomic command running. */ if (ch->aslots != 0) return (1); return (0); } /* Must be called with channel locked. */ static void ahci_begin_transaction(struct ahci_channel *ch, union ccb *ccb) { struct ahci_slot *slot; int tag, tags; /* Choose empty slot. */ tags = ch->numslots; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) tags = ch->curr[ccb->ccb_h.target_id].tags; if (ch->lastslot + 1 < tags) tag = ffs(~(ch->oslots >> (ch->lastslot + 1))); else tag = 0; if (tag == 0 || tag + ch->lastslot >= tags) tag = ffs(~ch->oslots) - 1; else tag += ch->lastslot; ch->lastslot = tag; /* Occupy chosen slot. */ slot = &ch->slot[tag]; slot->ccb = ccb; /* Stop PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3) callout_stop(&ch->pm_timer); /* Update channel stats. */ ch->oslots |= (1 << tag); ch->numrslots++; ch->numrslotspd[ccb->ccb_h.target_id]++; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ch->numtslots++; ch->numtslotspd[ccb->ccb_h.target_id]++; ch->taggedtarget = ccb->ccb_h.target_id; } if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) ch->aslots |= (1 << tag); if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { slot->state = AHCI_SLOT_LOADING; bus_dmamap_load_ccb(ch->dma.data_tag, slot->dma.data_map, ccb, ahci_dmasetprd, slot, 0); } else { slot->dma.nsegs = 0; ahci_execute_transaction(slot); } } /* Locked by busdma engine. */ static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct ahci_slot *slot = arg; struct ahci_channel *ch = slot->ch; struct ahci_cmd_tab *ctp; struct ahci_dma_prd *prd; int i; if (error) { device_printf(ch->dev, "DMA load error\n"); ahci_end_transaction(slot, AHCI_ERR_INVALID); return; } KASSERT(nsegs <= AHCI_SG_ENTRIES, ("too many DMA segment entries\n")); /* Get a piece of the workspace for this request */ ctp = (struct ahci_cmd_tab *) (ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); /* Fill S/G table */ prd = &ctp->prd_tab[0]; for (i = 0; i < nsegs; i++) { prd[i].dba = htole64(segs[i].ds_addr); prd[i].dbc = htole32((segs[i].ds_len - 1) & AHCI_PRD_MASK); } slot->dma.nsegs = nsegs; bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, ((slot->ccb->ccb_h.flags & CAM_DIR_IN) ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE)); ahci_execute_transaction(slot); } /* Must be called with channel locked. */ static void ahci_execute_transaction(struct ahci_slot *slot) { struct ahci_channel *ch = slot->ch; struct ahci_cmd_tab *ctp; struct ahci_cmd_list *clp; union ccb *ccb = slot->ccb; int port = ccb->ccb_h.target_id & 0x0f; int fis_size, i, softreset; uint8_t *fis = ch->dma.rfis + 0x40; uint8_t val; /* Get a piece of the workspace for this request */ ctp = (struct ahci_cmd_tab *) (ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); /* Setup the FIS for this request */ if (!(fis_size = ahci_setup_fis(ch, ctp, ccb, slot->slot))) { device_printf(ch->dev, "Setting up SATA FIS failed\n"); ahci_end_transaction(slot, AHCI_ERR_INVALID); return; } /* Setup the command list entry */ clp = (struct ahci_cmd_list *) (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot)); clp->cmd_flags = htole16( (ccb->ccb_h.flags & CAM_DIR_OUT ? AHCI_CMD_WRITE : 0) | (ccb->ccb_h.func_code == XPT_SCSI_IO ? (AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH) : 0) | (fis_size / sizeof(u_int32_t)) | (port << 12)); clp->prd_length = htole16(slot->dma.nsegs); /* Special handling for Soft Reset command. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL)) { if (ccb->ataio.cmd.control & ATA_A_RESET) { softreset = 1; /* Kick controller into sane state */ ahci_stop(ch); ahci_clo(ch); ahci_start(ch, 0); clp->cmd_flags |= AHCI_CMD_RESET | AHCI_CMD_CLR_BUSY; } else { softreset = 2; /* Prepare FIS receive area for check. */ for (i = 0; i < 20; i++) fis[i] = 0xff; } } else softreset = 0; clp->bytecount = 0; clp->cmd_table_phys = htole64(ch->dma.work_bus + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_PREREAD); /* Set ACTIVE bit for NCQ commands. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ATA_OUTL(ch->r_mem, AHCI_P_SACT, 1 << slot->slot); } /* If FBS is enabled, set PMP port. */ if (ch->fbs_enabled) { ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | (port << AHCI_P_FBS_DEV_SHIFT)); } /* Issue command to the controller. */ slot->state = AHCI_SLOT_RUNNING; ch->rslots |= (1 << slot->slot); ATA_OUTL(ch->r_mem, AHCI_P_CI, (1 << slot->slot)); /* Device reset commands doesn't interrupt. Poll them. */ if (ccb->ccb_h.func_code == XPT_ATA_IO && (ccb->ataio.cmd.command == ATA_DEVICE_RESET || softreset)) { int count, timeout = ccb->ccb_h.timeout * 100; enum ahci_err_type et = AHCI_ERR_NONE; for (count = 0; count < timeout; count++) { DELAY(10); if (!(ATA_INL(ch->r_mem, AHCI_P_CI) & (1 << slot->slot))) break; if ((ATA_INL(ch->r_mem, AHCI_P_TFD) & ATA_S_ERROR) && softreset != 1) { #if 0 device_printf(ch->dev, "Poll error on slot %d, TFD: %04x\n", slot->slot, ATA_INL(ch->r_mem, AHCI_P_TFD)); #endif et = AHCI_ERR_TFE; break; } /* Workaround for ATI SB600/SB700 chipsets. */ if (ccb->ccb_h.target_id == 15 && (ch->quirks & AHCI_Q_ATI_PMP_BUG) && (ATA_INL(ch->r_mem, AHCI_P_IS) & AHCI_P_IX_IPM)) { et = AHCI_ERR_TIMEOUT; break; } } /* * Marvell HBAs with non-RAID firmware do not wait for * readiness after soft reset, so we have to wait here. * Marvell RAIDs do not have this problem, but instead * sometimes forget to update FIS receive area, breaking * this wait. */ if ((ch->quirks & AHCI_Q_NOBSYRES) == 0 && (ch->quirks & AHCI_Q_ATI_PMP_BUG) == 0 && softreset == 2 && et == AHCI_ERR_NONE) { for ( ; count < timeout; count++) { bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_POSTREAD); val = fis[2]; bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_PREREAD); if ((val & ATA_S_BUSY) == 0) break; DELAY(10); } } if (timeout && (count >= timeout)) { device_printf(ch->dev, "Poll timeout on slot %d port %d\n", slot->slot, port); device_printf(ch->dev, "is %08x cs %08x ss %08x " "rs %08x tfd %02x serr %08x cmd %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI), ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR), ATA_INL(ch->r_mem, AHCI_P_CMD)); et = AHCI_ERR_TIMEOUT; } /* Kick controller into sane state and enable FBS. */ if (softreset == 2) ch->eslots |= (1 << slot->slot); ahci_end_transaction(slot, et); return; } /* Start command execution timeout */ callout_reset_sbt(&slot->timeout, SBT_1MS * ccb->ccb_h.timeout / 2, 0, (timeout_t*)ahci_timeout, slot, 0); return; } /* Must be called with channel locked. */ static void ahci_process_timeout(struct ahci_channel *ch) { int i; mtx_assert(&ch->mtx, MA_OWNED); /* Handle the rest of commands. */ for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; ahci_end_transaction(&ch->slot[i], AHCI_ERR_TIMEOUT); } } /* Must be called with channel locked. */ static void ahci_rearm_timeout(struct ahci_channel *ch) { int i; mtx_assert(&ch->mtx, MA_OWNED); for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; /* Do we have a running request on slot? */ if (slot->state < AHCI_SLOT_RUNNING) continue; if ((ch->toslots & (1 << i)) == 0) continue; callout_reset_sbt(&slot->timeout, SBT_1MS * slot->ccb->ccb_h.timeout / 2, 0, (timeout_t*)ahci_timeout, slot, 0); } } /* Locked by callout mechanism. */ static void ahci_timeout(struct ahci_slot *slot) { struct ahci_channel *ch = slot->ch; device_t dev = ch->dev; uint32_t sstatus; int ccs; int i; /* Check for stale timeout. */ if (slot->state < AHCI_SLOT_RUNNING) return; /* Check if slot was not being executed last time we checked. */ if (slot->state < AHCI_SLOT_EXECUTING) { /* Check if slot started executing. */ sstatus = ATA_INL(ch->r_mem, AHCI_P_SACT); ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT; if ((sstatus & (1 << slot->slot)) != 0 || ccs == slot->slot || ch->fbs_enabled || ch->wrongccs) slot->state = AHCI_SLOT_EXECUTING; else if ((ch->rslots & (1 << ccs)) == 0) { ch->wrongccs = 1; slot->state = AHCI_SLOT_EXECUTING; } callout_reset_sbt(&slot->timeout, SBT_1MS * slot->ccb->ccb_h.timeout / 2, 0, (timeout_t*)ahci_timeout, slot, 0); return; } device_printf(dev, "Timeout on slot %d port %d\n", slot->slot, slot->ccb->ccb_h.target_id & 0x0f); device_printf(dev, "is %08x cs %08x ss %08x rs %08x tfd %02x " "serr %08x cmd %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI), ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR), ATA_INL(ch->r_mem, AHCI_P_CMD)); /* Handle frozen command. */ if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } ahci_done(ch, fccb); } if (!ch->fbs_enabled && !ch->wrongccs) { /* Without FBS we know real timeout source. */ ch->fatalerr = 1; /* Handle command with timeout. */ ahci_end_transaction(&ch->slot[slot->slot], AHCI_ERR_TIMEOUT); /* Handle the rest of commands. */ for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT); } } else { /* With FBS we wait for other commands timeout and pray. */ if (ch->toslots == 0) xpt_freeze_simq(ch->sim, 1); ch->toslots |= (1 << slot->slot); if ((ch->rslots & ~ch->toslots) == 0) ahci_process_timeout(ch); else device_printf(dev, " ... waiting for slots %08x\n", ch->rslots & ~ch->toslots); } } /* Must be called with channel locked. */ static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) { struct ahci_channel *ch = slot->ch; union ccb *ccb = slot->ccb; struct ahci_cmd_list *clp; int lastto; uint32_t sig; bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); clp = (struct ahci_cmd_list *) (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot)); /* Read result registers to the result struct * May be incorrect if several commands finished same time, * so read only when sure or have to. */ if (ccb->ccb_h.func_code == XPT_ATA_IO) { struct ata_res *res = &ccb->ataio.res; if ((et == AHCI_ERR_TFE) || (ccb->ataio.cmd.flags & CAM_ATAIO_NEEDRESULT)) { u_int8_t *fis = ch->dma.rfis + 0x40; bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_POSTREAD); if (ch->fbs_enabled) { fis += ccb->ccb_h.target_id * 256; res->status = fis[2]; res->error = fis[3]; } else { uint16_t tfd = ATA_INL(ch->r_mem, AHCI_P_TFD); res->status = tfd; res->error = tfd >> 8; } res->lba_low = fis[4]; res->lba_mid = fis[5]; res->lba_high = fis[6]; res->device = fis[7]; res->lba_low_exp = fis[8]; res->lba_mid_exp = fis[9]; res->lba_high_exp = fis[10]; res->sector_count = fis[12]; res->sector_count_exp = fis[13]; /* * Some weird controllers do not return signature in * FIS receive area. Read it from PxSIG register. */ if ((ch->quirks & AHCI_Q_ALTSIG) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && (ccb->ataio.cmd.control & ATA_A_RESET) == 0) { sig = ATA_INL(ch->r_mem, AHCI_P_SIG); res->lba_high = sig >> 24; res->lba_mid = sig >> 16; res->lba_low = sig >> 8; res->sector_count = sig; } } else bzero(res, sizeof(*res)); if ((ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) == 0 && (ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && (ch->quirks & AHCI_Q_NOCOUNT) == 0) { ccb->ataio.resid = ccb->ataio.dxfer_len - le32toh(clp->bytecount); } } else { if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && (ch->quirks & AHCI_Q_NOCOUNT) == 0) { ccb->csio.resid = ccb->csio.dxfer_len - le32toh(clp->bytecount); } } if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, (ccb->ccb_h.flags & CAM_DIR_IN) ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ch->dma.data_tag, slot->dma.data_map); } if (et != AHCI_ERR_NONE) ch->eslots |= (1 << slot->slot); /* In case of error, freeze device for proper recovery. */ if ((et != AHCI_ERR_NONE) && (!ch->recoverycmd) && !(ccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(ccb->ccb_h.path, 1); ccb->ccb_h.status |= CAM_DEV_QFRZN; } /* Set proper result status. */ ccb->ccb_h.status &= ~CAM_STATUS_MASK; switch (et) { case AHCI_ERR_NONE: ccb->ccb_h.status |= CAM_REQ_CMP; if (ccb->ccb_h.func_code == XPT_SCSI_IO) ccb->csio.scsi_status = SCSI_STATUS_OK; break; case AHCI_ERR_INVALID: ch->fatalerr = 1; ccb->ccb_h.status |= CAM_REQ_INVALID; break; case AHCI_ERR_INNOCENT: ccb->ccb_h.status |= CAM_REQUEUE_REQ; break; case AHCI_ERR_TFE: case AHCI_ERR_NCQ: if (ccb->ccb_h.func_code == XPT_SCSI_IO) { ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND; } else { ccb->ccb_h.status |= CAM_ATA_STATUS_ERROR; } break; case AHCI_ERR_SATA: ch->fatalerr = 1; if (!ch->recoverycmd) { xpt_freeze_simq(ch->sim, 1); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_RELEASE_SIMQ; } ccb->ccb_h.status |= CAM_UNCOR_PARITY; break; case AHCI_ERR_TIMEOUT: if (!ch->recoverycmd) { xpt_freeze_simq(ch->sim, 1); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_RELEASE_SIMQ; } ccb->ccb_h.status |= CAM_CMD_TIMEOUT; break; default: ch->fatalerr = 1; ccb->ccb_h.status |= CAM_REQ_CMP_ERR; } /* Free slot. */ ch->oslots &= ~(1 << slot->slot); ch->rslots &= ~(1 << slot->slot); ch->aslots &= ~(1 << slot->slot); slot->state = AHCI_SLOT_EMPTY; slot->ccb = NULL; /* Update channel stats. */ ch->numrslots--; ch->numrslotspd[ccb->ccb_h.target_id]--; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ch->numtslots--; ch->numtslotspd[ccb->ccb_h.target_id]--; } /* Cancel timeout state if request completed normally. */ if (et != AHCI_ERR_TIMEOUT) { lastto = (ch->toslots == (1 << slot->slot)); ch->toslots &= ~(1 << slot->slot); if (lastto) xpt_release_simq(ch->sim, TRUE); } /* If it was first request of reset sequence and there is no error, * proceed to second request. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && (ccb->ataio.cmd.control & ATA_A_RESET) && et == AHCI_ERR_NONE) { ccb->ataio.cmd.control &= ~ATA_A_RESET; ahci_begin_transaction(ch, ccb); return; } /* If it was our READ LOG command - process it. */ if (ccb->ccb_h.recovery_type == RECOVERY_READ_LOG) { ahci_process_read_log(ch, ccb); /* If it was our REQUEST SENSE command - process it. */ } else if (ccb->ccb_h.recovery_type == RECOVERY_REQUEST_SENSE) { ahci_process_request_sense(ch, ccb); /* If it was NCQ or ATAPI command error, put result on hold. */ } else if (et == AHCI_ERR_NCQ || ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_SCSI_STATUS_ERROR && (ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) == 0)) { ch->hold[slot->slot] = ccb; ch->numhslots++; } else ahci_done(ch, ccb); /* If we have no other active commands, ... */ if (ch->rslots == 0) { /* if there was fatal error - reset port. */ if (ch->toslots != 0 || ch->fatalerr) { ahci_reset(ch); } else { /* if we have slots in error, we can reinit port. */ if (ch->eslots != 0) { ahci_stop(ch); ahci_clo(ch); ahci_start(ch, 1); } /* if there commands on hold, we can do READ LOG. */ if (!ch->recoverycmd && ch->numhslots) ahci_issue_recovery(ch); } /* If all the rest of commands are in timeout - give them chance. */ } else if ((ch->rslots & ~ch->toslots) == 0 && et != AHCI_ERR_TIMEOUT) ahci_rearm_timeout(ch); /* Unfreeze frozen command. */ if (ch->frozen && !ahci_check_collision(ch, ch->frozen)) { union ccb *fccb = ch->frozen; ch->frozen = NULL; ahci_begin_transaction(ch, fccb); xpt_release_simq(ch->sim, TRUE); } /* Start PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3 && (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) { callout_schedule(&ch->pm_timer, (ch->pm_level == 4) ? hz / 1000 : hz / 8); } } static void ahci_issue_recovery(struct ahci_channel *ch) { union ccb *ccb; struct ccb_ataio *ataio; struct ccb_scsiio *csio; int i; /* Find some held command. */ for (i = 0; i < ch->numslots; i++) { if (ch->hold[i]) break; } ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { device_printf(ch->dev, "Unable to allocate recovery command\n"); completeall: /* We can't do anything -- complete held commands. */ for (i = 0; i < ch->numslots; i++) { if (ch->hold[i] == NULL) continue; ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; ch->hold[i]->ccb_h.status |= CAM_RESRC_UNAVAIL; ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } ahci_reset(ch); return; } ccb->ccb_h = ch->hold[i]->ccb_h; /* Reuse old header. */ if (ccb->ccb_h.func_code == XPT_ATA_IO) { /* READ LOG */ ccb->ccb_h.recovery_type = RECOVERY_READ_LOG; ccb->ccb_h.func_code = XPT_ATA_IO; ccb->ccb_h.flags = CAM_DIR_IN; ccb->ccb_h.timeout = 1000; /* 1s should be enough. */ ataio = &ccb->ataio; ataio->data_ptr = malloc(512, M_AHCI, M_NOWAIT); if (ataio->data_ptr == NULL) { xpt_free_ccb(ccb); device_printf(ch->dev, "Unable to allocate memory for READ LOG command\n"); goto completeall; } ataio->dxfer_len = 512; bzero(&ataio->cmd, sizeof(ataio->cmd)); ataio->cmd.flags = CAM_ATAIO_48BIT; ataio->cmd.command = 0x2F; /* READ LOG EXT */ ataio->cmd.sector_count = 1; ataio->cmd.sector_count_exp = 0; ataio->cmd.lba_low = 0x10; ataio->cmd.lba_mid = 0; ataio->cmd.lba_mid_exp = 0; } else { /* REQUEST SENSE */ ccb->ccb_h.recovery_type = RECOVERY_REQUEST_SENSE; ccb->ccb_h.recovery_slot = i; ccb->ccb_h.func_code = XPT_SCSI_IO; ccb->ccb_h.flags = CAM_DIR_IN; ccb->ccb_h.status = 0; ccb->ccb_h.timeout = 1000; /* 1s should be enough. */ csio = &ccb->csio; csio->data_ptr = (void *)&ch->hold[i]->csio.sense_data; csio->dxfer_len = ch->hold[i]->csio.sense_len; csio->cdb_len = 6; bzero(&csio->cdb_io, sizeof(csio->cdb_io)); csio->cdb_io.cdb_bytes[0] = 0x03; csio->cdb_io.cdb_bytes[4] = csio->dxfer_len; } /* Freeze SIM while doing recovery. */ ch->recoverycmd = 1; xpt_freeze_simq(ch->sim, 1); ahci_begin_transaction(ch, ccb); } static void ahci_process_read_log(struct ahci_channel *ch, union ccb *ccb) { uint8_t *data; struct ata_res *res; int i; ch->recoverycmd = 0; data = ccb->ataio.data_ptr; if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP && (data[0] & 0x80) == 0) { for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; if (ch->hold[i]->ccb_h.func_code != XPT_ATA_IO) continue; if ((data[0] & 0x1F) == i) { res = &ch->hold[i]->ataio.res; res->status = data[2]; res->error = data[3]; res->lba_low = data[4]; res->lba_mid = data[5]; res->lba_high = data[6]; res->device = data[7]; res->lba_low_exp = data[8]; res->lba_mid_exp = data[9]; res->lba_high_exp = data[10]; res->sector_count = data[12]; res->sector_count_exp = data[13]; } else { ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; ch->hold[i]->ccb_h.status |= CAM_REQUEUE_REQ; } ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } } else { if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) device_printf(ch->dev, "Error while READ LOG EXT\n"); else if ((data[0] & 0x80) == 0) { device_printf(ch->dev, "Non-queued command error in READ LOG EXT\n"); } for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; if (ch->hold[i]->ccb_h.func_code != XPT_ATA_IO) continue; ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } } free(ccb->ataio.data_ptr, M_AHCI); xpt_free_ccb(ccb); xpt_release_simq(ch->sim, TRUE); } static void ahci_process_request_sense(struct ahci_channel *ch, union ccb *ccb) { int i; ch->recoverycmd = 0; i = ccb->ccb_h.recovery_slot; if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { ch->hold[i]->ccb_h.status |= CAM_AUTOSNS_VALID; } else { ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; ch->hold[i]->ccb_h.status |= CAM_AUTOSENSE_FAIL; } ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; xpt_free_ccb(ccb); xpt_release_simq(ch->sim, TRUE); } static void ahci_start(struct ahci_channel *ch, int fbs) { u_int32_t cmd; /* Run the channel start callback, if any. */ if (ch->start) ch->start(ch); /* Clear SATA error register */ ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xFFFFFFFF); /* Clear any interrupts pending on this channel */ ATA_OUTL(ch->r_mem, AHCI_P_IS, 0xFFFFFFFF); /* Configure FIS-based switching if supported. */ if (ch->chcaps & AHCI_P_CMD_FBSCP) { ch->fbs_enabled = (fbs && ch->pm_present) ? 1 : 0; ATA_OUTL(ch->r_mem, AHCI_P_FBS, ch->fbs_enabled ? AHCI_P_FBS_EN : 0); } /* Start operations on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); cmd &= ~AHCI_P_CMD_PMA; ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_ST | (ch->pm_present ? AHCI_P_CMD_PMA : 0)); } static void ahci_stop(struct ahci_channel *ch) { u_int32_t cmd; int timeout; /* Kill all activity on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_ST); /* Wait for activity stop. */ timeout = 0; do { DELAY(10); if (timeout++ > 50000) { device_printf(ch->dev, "stopping AHCI engine failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CR); ch->eslots = 0; } static void ahci_clo(struct ahci_channel *ch) { u_int32_t cmd; int timeout; /* Issue Command List Override if supported */ if (ch->caps & AHCI_CAP_SCLO) { cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); cmd |= AHCI_P_CMD_CLO; ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd); timeout = 0; do { DELAY(10); if (timeout++ > 50000) { device_printf(ch->dev, "executing CLO failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CLO); } } static void ahci_stop_fr(struct ahci_channel *ch) { u_int32_t cmd; int timeout; /* Kill all FIS reception on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_FRE); /* Wait for FIS reception stop. */ timeout = 0; do { DELAY(10); if (timeout++ > 50000) { device_printf(ch->dev, "stopping AHCI FR engine failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_FR); } static void ahci_start_fr(struct ahci_channel *ch) { u_int32_t cmd; /* Start FIS reception on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_FRE); } static int ahci_wait_ready(struct ahci_channel *ch, int t, int t0) { int timeout = 0; uint32_t val; while ((val = ATA_INL(ch->r_mem, AHCI_P_TFD)) & (ATA_S_BUSY | ATA_S_DRQ)) { if (timeout > t) { if (t != 0) { device_printf(ch->dev, "AHCI reset: device not ready after %dms " "(tfd = %08x)\n", MAX(t, 0) + t0, val); } return (EBUSY); } DELAY(1000); timeout++; } if (bootverbose) device_printf(ch->dev, "AHCI reset: device ready after %dms\n", timeout + t0); return (0); } static void ahci_reset_to(void *arg) { struct ahci_channel *ch = arg; if (ch->resetting == 0) return; ch->resetting--; if (ahci_wait_ready(ch, ch->resetting == 0 ? -1 : 0, (310 - ch->resetting) * 100) == 0) { ch->resetting = 0; ahci_start(ch, 1); xpt_release_simq(ch->sim, TRUE); return; } if (ch->resetting == 0) { ahci_clo(ch); ahci_start(ch, 1); xpt_release_simq(ch->sim, TRUE); return; } callout_schedule(&ch->reset_timer, hz / 10); } static void ahci_reset(struct ahci_channel *ch) { struct ahci_controller *ctlr = device_get_softc(device_get_parent(ch->dev)); int i; xpt_freeze_simq(ch->sim, 1); if (bootverbose) device_printf(ch->dev, "AHCI reset...\n"); /* Forget about previous reset. */ if (ch->resetting) { ch->resetting = 0; callout_stop(&ch->reset_timer); xpt_release_simq(ch->sim, TRUE); } /* Requeue freezed command. */ if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } ahci_done(ch, fccb); } /* Kill the engine and requeue all running commands. */ ahci_stop(ch); for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; /* XXX; Commands in loading state. */ ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT); } for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } if (ch->toslots != 0) xpt_release_simq(ch->sim, TRUE); ch->eslots = 0; ch->toslots = 0; ch->wrongccs = 0; ch->fatalerr = 0; /* Tell the XPT about the event */ xpt_async(AC_BUS_RESET, ch->path, NULL); /* Disable port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Reset and reconnect PHY, */ if (!ahci_sata_phy_reset(ch)) { if (bootverbose) device_printf(ch->dev, "AHCI reset: device not found\n"); ch->devices = 0; /* Enable wanted port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, (((ch->pm_level != 0) ? AHCI_P_IX_CPD | AHCI_P_IX_MP : 0) | AHCI_P_IX_PRC | AHCI_P_IX_PC)); xpt_release_simq(ch->sim, TRUE); return; } if (bootverbose) device_printf(ch->dev, "AHCI reset: device found\n"); /* Wait for clearing busy status. */ if (ahci_wait_ready(ch, dumping ? 31000 : 0, 0)) { if (dumping) ahci_clo(ch); else ch->resetting = 310; } ch->devices = 1; /* Enable wanted port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, (((ch->pm_level != 0) ? AHCI_P_IX_CPD | AHCI_P_IX_MP : 0) | AHCI_P_IX_TFE | AHCI_P_IX_HBF | AHCI_P_IX_HBD | AHCI_P_IX_IF | AHCI_P_IX_OF | ((ch->pm_level == 0) ? AHCI_P_IX_PRC : 0) | AHCI_P_IX_PC | AHCI_P_IX_DP | AHCI_P_IX_UF | (ctlr->ccc ? 0 : AHCI_P_IX_SDB) | AHCI_P_IX_DS | AHCI_P_IX_PS | (ctlr->ccc ? 0 : AHCI_P_IX_DHR))); if (ch->resetting) callout_reset(&ch->reset_timer, hz / 10, ahci_reset_to, ch); else { ahci_start(ch, 1); xpt_release_simq(ch->sim, TRUE); } } static int ahci_setup_fis(struct ahci_channel *ch, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag) { u_int8_t *fis = &ctp->cfis[0]; bzero(fis, 20); fis[0] = 0x27; /* host to device */ fis[1] = (ccb->ccb_h.target_id & 0x0f); if (ccb->ccb_h.func_code == XPT_SCSI_IO) { fis[1] |= 0x80; fis[2] = ATA_PACKET_CMD; if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && ch->curr[ccb->ccb_h.target_id].mode >= ATA_DMA) fis[3] = ATA_F_DMA; else { fis[5] = ccb->csio.dxfer_len; fis[6] = ccb->csio.dxfer_len >> 8; } fis[7] = ATA_D_LBA; fis[15] = ATA_A_4BIT; bcopy((ccb->ccb_h.flags & CAM_CDB_POINTER) ? ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes, ctp->acmd, ccb->csio.cdb_len); bzero(ctp->acmd + ccb->csio.cdb_len, 32 - ccb->csio.cdb_len); } else if ((ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) == 0) { fis[1] |= 0x80; fis[2] = ccb->ataio.cmd.command; fis[3] = ccb->ataio.cmd.features; fis[4] = ccb->ataio.cmd.lba_low; fis[5] = ccb->ataio.cmd.lba_mid; fis[6] = ccb->ataio.cmd.lba_high; fis[7] = ccb->ataio.cmd.device; fis[8] = ccb->ataio.cmd.lba_low_exp; fis[9] = ccb->ataio.cmd.lba_mid_exp; fis[10] = ccb->ataio.cmd.lba_high_exp; fis[11] = ccb->ataio.cmd.features_exp; if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { fis[12] = tag << 3; } else { fis[12] = ccb->ataio.cmd.sector_count; } fis[13] = ccb->ataio.cmd.sector_count_exp; fis[15] = ATA_A_4BIT; } else { fis[15] = ccb->ataio.cmd.control; } if (ccb->ataio.ata_flags & ATA_FLAG_AUX) { fis[16] = ccb->ataio.aux & 0xff; fis[17] = (ccb->ataio.aux >> 8) & 0xff; fis[18] = (ccb->ataio.aux >> 16) & 0xff; fis[19] = (ccb->ataio.aux >> 24) & 0xff; } return (20); } static int ahci_sata_connect(struct ahci_channel *ch) { u_int32_t status; int timeout, found = 0; /* Wait up to 100ms for "connect well" */ for (timeout = 0; timeout < 1000 ; timeout++) { status = ATA_INL(ch->r_mem, AHCI_P_SSTS); if ((status & ATA_SS_DET_MASK) != ATA_SS_DET_NO_DEVICE) found = 1; if (((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_ONLINE) && ((status & ATA_SS_SPD_MASK) != ATA_SS_SPD_NO_SPEED) && ((status & ATA_SS_IPM_MASK) == ATA_SS_IPM_ACTIVE)) break; if ((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_OFFLINE) { if (bootverbose) { device_printf(ch->dev, "SATA offline status=%08x\n", status); } return (0); } if (found == 0 && timeout >= 100) break; DELAY(100); } if (timeout >= 1000 || !found) { if (bootverbose) { device_printf(ch->dev, "SATA connect timeout time=%dus status=%08x\n", timeout * 100, status); } return (0); } if (bootverbose) { device_printf(ch->dev, "SATA connect time=%dus status=%08x\n", timeout * 100, status); } /* Clear SATA error register */ ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xffffffff); return (1); } static int ahci_sata_phy_reset(struct ahci_channel *ch) { int sata_rev; uint32_t val; if (ch->listening) { val = ATA_INL(ch->r_mem, AHCI_P_CMD); val |= AHCI_P_CMD_SUD; ATA_OUTL(ch->r_mem, AHCI_P_CMD, val); ch->listening = 0; } sata_rev = ch->user[ch->pm_present ? 15 : 0].revision; if (sata_rev == 1) val = ATA_SC_SPD_SPEED_GEN1; else if (sata_rev == 2) val = ATA_SC_SPD_SPEED_GEN2; else if (sata_rev == 3) val = ATA_SC_SPD_SPEED_GEN3; else val = 0; ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_RESET | val | ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER); DELAY(1000); ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_IDLE | val | ((ch->pm_level > 0) ? 0 : (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER))); if (!ahci_sata_connect(ch)) { if (ch->caps & AHCI_CAP_SSS) { val = ATA_INL(ch->r_mem, AHCI_P_CMD); val &= ~AHCI_P_CMD_SUD; ATA_OUTL(ch->r_mem, AHCI_P_CMD, val); ch->listening = 1; } else if (ch->pm_level > 0) ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE); return (0); } return (1); } static int ahci_check_ids(struct ahci_channel *ch, union ccb *ccb) { if (ccb->ccb_h.target_id > ((ch->caps & AHCI_CAP_SPM) ? 15 : 0)) { ccb->ccb_h.status = CAM_TID_INVALID; ahci_done(ch, ccb); return (-1); } if (ccb->ccb_h.target_lun != 0) { ccb->ccb_h.status = CAM_LUN_INVALID; ahci_done(ch, ccb); return (-1); } return (0); } static void ahciaction(struct cam_sim *sim, union ccb *ccb) { struct ahci_channel *ch; CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("ahciaction func_code=%x\n", ccb->ccb_h.func_code)); ch = (struct ahci_channel *)cam_sim_softc(sim); switch (ccb->ccb_h.func_code) { /* Common cases first */ case XPT_ATA_IO: /* Execute the requested I/O operation */ case XPT_SCSI_IO: if (ahci_check_ids(ch, ccb)) return; if (ch->devices == 0 || (ch->pm_present == 0 && ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) { ccb->ccb_h.status = CAM_SEL_TIMEOUT; break; } ccb->ccb_h.recovery_type = RECOVERY_NONE; /* Check for command collision. */ if (ahci_check_collision(ch, ccb)) { /* Freeze command. */ ch->frozen = ccb; /* We have only one frozen slot, so freeze simq also. */ xpt_freeze_simq(ch->sim, 1); return; } ahci_begin_transaction(ch, ccb); return; case XPT_EN_LUN: /* Enable LUN as a target */ case XPT_TARGET_IO: /* Execute target I/O request */ case XPT_ACCEPT_TARGET_IO: /* Accept Host Target Mode CDB */ case XPT_CONT_TARGET_IO: /* Continue Host Target I/O Connection*/ case XPT_ABORT: /* Abort the specified CCB */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; break; case XPT_SET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; struct ahci_device *d; if (ahci_check_ids(ch, ccb)) return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else d = &ch->user[ccb->ccb_h.target_id]; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_REVISION) d->revision = cts->xport_specific.sata.revision; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_MODE) d->mode = cts->xport_specific.sata.mode; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_BYTECOUNT) d->bytecount = min(8192, cts->xport_specific.sata.bytecount); if (cts->xport_specific.sata.valid & CTS_SATA_VALID_TAGS) d->tags = min(ch->numslots, cts->xport_specific.sata.tags); if (cts->xport_specific.sata.valid & CTS_SATA_VALID_PM) ch->pm_present = cts->xport_specific.sata.pm_present; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_ATAPI) d->atapi = cts->xport_specific.sata.atapi; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS) d->caps = cts->xport_specific.sata.caps; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: /* Get default/user set transfer settings for the target */ { struct ccb_trans_settings *cts = &ccb->cts; struct ahci_device *d; uint32_t status; if (ahci_check_ids(ch, ccb)) return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else d = &ch->user[ccb->ccb_h.target_id]; cts->protocol = PROTO_UNSPECIFIED; cts->protocol_version = PROTO_VERSION_UNSPECIFIED; cts->transport = XPORT_SATA; cts->transport_version = XPORT_VERSION_UNSPECIFIED; cts->proto_specific.valid = 0; cts->xport_specific.sata.valid = 0; if (cts->type == CTS_TYPE_CURRENT_SETTINGS && (ccb->ccb_h.target_id == 15 || (ccb->ccb_h.target_id == 0 && !ch->pm_present))) { status = ATA_INL(ch->r_mem, AHCI_P_SSTS) & ATA_SS_SPD_MASK; if (status & 0x0f0) { cts->xport_specific.sata.revision = (status & 0x0f0) >> 4; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; } cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D; if (ch->pm_level) { if (ch->caps & (AHCI_CAP_PSC | AHCI_CAP_SSC)) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ; if (ch->caps2 & AHCI_CAP2_APST) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_APST; } if ((ch->caps & AHCI_CAP_SNCQ) && (ch->quirks & AHCI_Q_NOAA) == 0) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_DMAAA; cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_AN; cts->xport_specific.sata.caps &= ch->user[ccb->ccb_h.target_id].caps; cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } else { cts->xport_specific.sata.revision = d->revision; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; cts->xport_specific.sata.caps = d->caps; cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } cts->xport_specific.sata.mode = d->mode; cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE; cts->xport_specific.sata.bytecount = d->bytecount; cts->xport_specific.sata.valid |= CTS_SATA_VALID_BYTECOUNT; cts->xport_specific.sata.pm_present = ch->pm_present; cts->xport_specific.sata.valid |= CTS_SATA_VALID_PM; cts->xport_specific.sata.tags = d->tags; cts->xport_specific.sata.valid |= CTS_SATA_VALID_TAGS; cts->xport_specific.sata.atapi = d->atapi; cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_BUS: /* Reset the specified SCSI bus */ case XPT_RESET_DEV: /* Bus Device Reset the specified SCSI device */ ahci_reset(ch); ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_TERM_IO: /* Terminate the I/O process */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; break; case XPT_PATH_INQ: /* Path routing inquiry */ { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; /* XXX??? */ cpi->hba_inquiry = PI_SDTR_ABLE; if (ch->caps & AHCI_CAP_SNCQ) cpi->hba_inquiry |= PI_TAG_ABLE; if (ch->caps & AHCI_CAP_SPM) cpi->hba_inquiry |= PI_SATAPM; cpi->target_sprt = 0; cpi->hba_misc = PIM_SEQSCAN | PIM_UNMAPPED | PIM_ATA_EXT; cpi->hba_eng_cnt = 0; if (ch->caps & AHCI_CAP_SPM) cpi->max_target = 15; else cpi->max_target = 0; cpi->max_lun = 0; cpi->initiator_id = 0; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, "AHCI", HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->transport = XPORT_SATA; cpi->transport_version = XPORT_VERSION_UNSPECIFIED; cpi->protocol = PROTO_ATA; cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; cpi->maxio = MAXPHYS; /* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */ if (ch->quirks & AHCI_Q_MAXIO_64K) cpi->maxio = min(cpi->maxio, 128 * 512); cpi->hba_vendor = ch->vendorid; cpi->hba_device = ch->deviceid; cpi->hba_subvendor = ch->subvendorid; cpi->hba_subdevice = ch->subdeviceid; cpi->ccb_h.status = CAM_REQ_CMP; break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } ahci_done(ch, ccb); } static void ahcipoll(struct cam_sim *sim) { struct ahci_channel *ch = (struct ahci_channel *)cam_sim_softc(sim); uint32_t istatus; /* Read interrupt statuses and process if any. */ istatus = ATA_INL(ch->r_mem, AHCI_P_IS); if (istatus != 0) ahci_ch_intr_main(ch, istatus); if (ch->resetting != 0 && (--ch->resetpolldiv <= 0 || !callout_pending(&ch->reset_timer))) { ch->resetpolldiv = 1000; ahci_reset_to(ch); } } MODULE_VERSION(ahci, 1); MODULE_DEPEND(ahci, cam, 1, 1, 1); Index: projects/clang391-import/sys/dev/ahci/ahci_pci.c =================================================================== --- projects/clang391-import/sys/dev/ahci/ahci_pci.c (revision 309262) +++ projects/clang391-import/sys/dev/ahci/ahci_pci.c (revision 309263) @@ -1,641 +1,648 @@ /*- * Copyright (c) 2009-2012 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ahci.h" static int force_ahci = 1; TUNABLE_INT("hw.ahci.force", &force_ahci); static const struct { uint32_t id; uint8_t rev; const char *name; int quirks; } ahci_ids[] = { {0x43801002, 0x00, "AMD SB600", AHCI_Q_NOMSI | AHCI_Q_ATI_PMP_BUG | AHCI_Q_MAXIO_64K}, {0x43901002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43911002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43921002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43931002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43941002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, /* Not sure SB8x0/SB9x0 needs this quirk. Be conservative though */ {0x43951002, 0x00, "AMD SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG}, {0x78001022, 0x00, "AMD Hudson-2", 0}, {0x78011022, 0x00, "AMD Hudson-2", 0}, {0x78021022, 0x00, "AMD Hudson-2", 0}, {0x78031022, 0x00, "AMD Hudson-2", 0}, {0x78041022, 0x00, "AMD Hudson-2", 0}, - {0x06111b21, 0x00, "ASMedia ASM2106", 0}, - {0x06121b21, 0x00, "ASMedia ASM1061", 0}, + {0x06011b21, 0x00, "ASMedia ASM1060", 0}, + {0x06021b21, 0x00, "ASMedia ASM1060", 0}, + {0x06111b21, 0x00, "ASMedia ASM1061", 0}, + {0x06121b21, 0x00, "ASMedia ASM1062", 0}, + {0x06201b21, 0x00, "ASMedia ASM106x", 0}, + {0x06211b21, 0x00, "ASMedia ASM106x", 0}, + {0x06221b21, 0x00, "ASMedia ASM106x", 0}, + {0x06241b21, 0x00, "ASMedia ASM106x", 0}, + {0x06251b21, 0x00, "ASMedia ASM106x", 0}, {0x26528086, 0x00, "Intel ICH6", AHCI_Q_NOFORCE}, {0x26538086, 0x00, "Intel ICH6M", AHCI_Q_NOFORCE}, {0x26818086, 0x00, "Intel ESB2", 0}, {0x26828086, 0x00, "Intel ESB2", 0}, {0x26838086, 0x00, "Intel ESB2", 0}, {0x27c18086, 0x00, "Intel ICH7", 0}, {0x27c38086, 0x00, "Intel ICH7", 0}, {0x27c58086, 0x00, "Intel ICH7M", 0}, {0x27c68086, 0x00, "Intel ICH7M", 0}, {0x28218086, 0x00, "Intel ICH8", 0}, {0x28228086, 0x00, "Intel ICH8", 0}, {0x28248086, 0x00, "Intel ICH8", 0}, {0x28298086, 0x00, "Intel ICH8M", 0}, {0x282a8086, 0x00, "Intel ICH8M", 0}, {0x29228086, 0x00, "Intel ICH9", 0}, {0x29238086, 0x00, "Intel ICH9", 0}, {0x29248086, 0x00, "Intel ICH9", 0}, {0x29258086, 0x00, "Intel ICH9", 0}, {0x29278086, 0x00, "Intel ICH9", 0}, {0x29298086, 0x00, "Intel ICH9M", 0}, {0x292a8086, 0x00, "Intel ICH9M", 0}, {0x292b8086, 0x00, "Intel ICH9M", 0}, {0x292c8086, 0x00, "Intel ICH9M", 0}, {0x292f8086, 0x00, "Intel ICH9M", 0}, {0x294d8086, 0x00, "Intel ICH9", 0}, {0x294e8086, 0x00, "Intel ICH9M", 0}, {0x3a058086, 0x00, "Intel ICH10", 0}, {0x3a228086, 0x00, "Intel ICH10", 0}, {0x3a258086, 0x00, "Intel ICH10", 0}, {0x3b228086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b238086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b258086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b298086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b2c8086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b2f8086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x1c028086, 0x00, "Intel Cougar Point", 0}, {0x1c038086, 0x00, "Intel Cougar Point", 0}, {0x1c048086, 0x00, "Intel Cougar Point", 0}, {0x1c058086, 0x00, "Intel Cougar Point", 0}, {0x1d028086, 0x00, "Intel Patsburg", 0}, {0x1d048086, 0x00, "Intel Patsburg", 0}, {0x1d068086, 0x00, "Intel Patsburg", 0}, {0x28268086, 0x00, "Intel Patsburg (RAID)", 0}, {0x1e028086, 0x00, "Intel Panther Point", 0}, {0x1e038086, 0x00, "Intel Panther Point", 0}, {0x1e048086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e058086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e068086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e078086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e0e8086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e0f8086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1f228086, 0x00, "Intel Avoton", 0}, {0x1f238086, 0x00, "Intel Avoton", 0}, {0x1f248086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f258086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f268086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f278086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f2e8086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f2f8086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f328086, 0x00, "Intel Avoton", 0}, {0x1f338086, 0x00, "Intel Avoton", 0}, {0x1f348086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f358086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f368086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f378086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f3e8086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f3f8086, 0x00, "Intel Avoton (RAID)", 0}, {0x23a38086, 0x00, "Intel Coleto Creek", 0}, {0x28238086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x28278086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8c028086, 0x00, "Intel Lynx Point", 0}, {0x8c038086, 0x00, "Intel Lynx Point", 0}, {0x8c048086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c058086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c068086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c078086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c0e8086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c0f8086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c828086, 0x00, "Intel Wildcat Point", 0}, {0x8c838086, 0x00, "Intel Wildcat Point", 0}, {0x8c848086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c858086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c868086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c878086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c8e8086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c8f8086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8d028086, 0x00, "Intel Wellsburg", 0}, {0x8d048086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d068086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d628086, 0x00, "Intel Wellsburg", 0}, {0x8d648086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d668086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d6e8086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x9c028086, 0x00, "Intel Lynx Point-LP", 0}, {0x9c038086, 0x00, "Intel Lynx Point-LP", 0}, {0x9c048086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c058086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c068086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c078086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c0e8086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c0f8086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9d038086, 0x00, "Intel Sunrise Point-LP", 0}, {0x9d058086, 0x00, "Intel Sunrise Point-LP (RAID)", 0}, {0x9d078086, 0x00, "Intel Sunrise Point-LP (RAID)", 0}, {0xa1028086, 0x00, "Intel Sunrise Point", 0}, {0xa1038086, 0x00, "Intel Sunrise Point", 0}, {0xa1058086, 0x00, "Intel Sunrise Point (RAID)", 0}, {0xa1068086, 0x00, "Intel Sunrise Point (RAID)", 0}, {0xa1078086, 0x00, "Intel Sunrise Point (RAID)", 0}, {0xa10f8086, 0x00, "Intel Sunrise Point (RAID)", 0}, {0x23238086, 0x00, "Intel DH89xxCC", 0}, {0x2360197b, 0x00, "JMicron JMB360", 0}, {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE | AHCI_Q_1CH}, {0x2362197b, 0x00, "JMicron JMB362", 0}, {0x2363197b, 0x00, "JMicron JMB363", AHCI_Q_NOFORCE}, {0x2365197b, 0x00, "JMicron JMB365", AHCI_Q_NOFORCE}, {0x2366197b, 0x00, "JMicron JMB366", AHCI_Q_NOFORCE}, {0x2368197b, 0x00, "JMicron JMB368", AHCI_Q_NOFORCE}, {0x611111ab, 0x00, "Marvell 88SE6111", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_1CH | AHCI_Q_EDGEIS}, {0x612111ab, 0x00, "Marvell 88SE6121", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_2CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT}, {0x614111ab, 0x00, "Marvell 88SE6141", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_4CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT}, {0x614511ab, 0x00, "Marvell 88SE6145", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_4CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT}, {0x91201b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS}, {0x91231b4b, 0x11, "Marvell 88SE912x", AHCI_Q_ALTSIG}, {0x91231b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS|AHCI_Q_SATA2}, {0x91251b4b, 0x00, "Marvell 88SE9125", 0}, {0x91281b4b, 0x00, "Marvell 88SE9128", AHCI_Q_ALTSIG}, {0x91301b4b, 0x00, "Marvell 88SE9130", AHCI_Q_ALTSIG}, {0x91721b4b, 0x00, "Marvell 88SE9172", 0}, {0x91821b4b, 0x00, "Marvell 88SE9182", 0}, {0x91831b4b, 0x00, "Marvell 88SS9183", 0}, {0x91a01b4b, 0x00, "Marvell 88SE91Ax", 0}, {0x92151b4b, 0x00, "Marvell 88SE9215", 0}, {0x92201b4b, 0x00, "Marvell 88SE9220", AHCI_Q_ALTSIG}, {0x92301b4b, 0x00, "Marvell 88SE9230", AHCI_Q_ALTSIG}, {0x92351b4b, 0x00, "Marvell 88SE9235", 0}, {0x06201103, 0x00, "HighPoint RocketRAID 620", 0}, {0x06201b4b, 0x00, "HighPoint RocketRAID 620", 0}, {0x06221103, 0x00, "HighPoint RocketRAID 622", 0}, {0x06221b4b, 0x00, "HighPoint RocketRAID 622", 0}, {0x06401103, 0x00, "HighPoint RocketRAID 640", 0}, {0x06401b4b, 0x00, "HighPoint RocketRAID 640", 0}, {0x06441103, 0x00, "HighPoint RocketRAID 644", 0}, {0x06441b4b, 0x00, "HighPoint RocketRAID 644", 0}, {0x06411103, 0x00, "HighPoint RocketRAID 640L", 0}, {0x06421103, 0x00, "HighPoint RocketRAID 642L", 0}, {0x06451103, 0x00, "HighPoint RocketRAID 644L", 0}, {0x044c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x055010de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055110de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055210de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055310de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055510de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055610de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055710de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055810de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055910de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055A10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055B10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x058410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x07f010de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f110de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f210de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f310de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f410de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f510de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f610de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f710de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f810de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f910de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07fa10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07fb10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x0ad010de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad110de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad210de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad310de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad410de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad510de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad610de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad710de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad810de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad910de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ada10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0adb10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ab410de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab510de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab610de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab710de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab810de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab910de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0aba10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abb10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abc10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abd10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abe10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abf10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0d8410de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8510de, 0x00, "NVIDIA MCP89", AHCI_Q_NOFORCE|AHCI_Q_NOAA}, {0x0d8610de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8710de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8810de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8910de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8a10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8b10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8c10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8d10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8e10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8f10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x3781105a, 0x00, "Promise TX8660", 0}, {0x33491106, 0x00, "VIA VT8251", AHCI_Q_NOPMP|AHCI_Q_NONCQ}, {0x62871106, 0x00, "VIA VT8251", AHCI_Q_NOPMP|AHCI_Q_NONCQ}, {0x11841039, 0x00, "SiS 966", 0}, {0x11851039, 0x00, "SiS 968", 0}, {0x01861039, 0x00, "SiS 968", 0}, {0xa01c177d, 0x00, "ThunderX", AHCI_Q_ABAR0|AHCI_Q_1MSI}, {0x00311c36, 0x00, "Annapurna", AHCI_Q_FORCE_PI|AHCI_Q_RESTORE_CAP|AHCI_Q_NOMSIX}, {0x00000000, 0x00, NULL, 0} }; static int ahci_pci_ctlr_reset(device_t dev) { if (pci_read_config(dev, PCIR_DEVVENDOR, 4) == 0x28298086 && (pci_read_config(dev, 0x92, 1) & 0xfe) == 0x04) pci_write_config(dev, 0x92, 0x01, 1); return ahci_ctlr_reset(dev); } static int ahci_probe(device_t dev) { char buf[64]; int i, valid = 0; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); /* * Ensure it is not a PCI bridge (some vendors use * the same PID and VID in PCI bridge and AHCI cards). */ if (pci_get_class(dev) == PCIC_BRIDGE) return (ENXIO); /* Is this a possible AHCI candidate? */ if (pci_get_class(dev) == PCIC_STORAGE && pci_get_subclass(dev) == PCIS_STORAGE_SATA && pci_get_progif(dev) == PCIP_STORAGE_SATA_AHCI_1_0) valid = 1; else if (pci_get_class(dev) == PCIC_STORAGE && pci_get_subclass(dev) == PCIS_STORAGE_RAID) valid = 2; /* Is this a known AHCI chip? */ for (i = 0; ahci_ids[i].id != 0; i++) { if (ahci_ids[i].id == devid && ahci_ids[i].rev <= revid && (valid || (force_ahci == 1 && !(ahci_ids[i].quirks & AHCI_Q_NOFORCE)))) { /* Do not attach JMicrons with single PCI function. */ if (pci_get_vendor(dev) == 0x197b && (pci_read_config(dev, 0xdf, 1) & 0x40) == 0) return (ENXIO); snprintf(buf, sizeof(buf), "%s AHCI SATA controller", ahci_ids[i].name); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } } if (valid != 1) return (ENXIO); device_set_desc_copy(dev, "AHCI SATA controller"); return (BUS_PROBE_DEFAULT); } static int ahci_ata_probe(device_t dev) { char buf[64]; int i; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); if ((intptr_t)device_get_ivars(dev) >= 0) return (ENXIO); /* Is this a known AHCI chip? */ for (i = 0; ahci_ids[i].id != 0; i++) { if (ahci_ids[i].id == devid && ahci_ids[i].rev <= revid) { snprintf(buf, sizeof(buf), "%s AHCI SATA controller", ahci_ids[i].name); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } } device_set_desc_copy(dev, "AHCI SATA controller"); return (BUS_PROBE_DEFAULT); } static int ahci_pci_read_msix_bars(device_t dev, uint8_t *table_bar, uint8_t *pba_bar) { int cap_offset = 0, ret; uint32_t val; if ((table_bar == NULL) || (pba_bar == NULL)) return (EINVAL); ret = pci_find_cap(dev, PCIY_MSIX, &cap_offset); if (ret != 0) return (EINVAL); val = pci_read_config(dev, cap_offset + PCIR_MSIX_TABLE, 4); *table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); val = pci_read_config(dev, cap_offset + PCIR_MSIX_PBA, 4); *pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); return (0); } static int ahci_pci_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int error, i; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); int msi_count, msix_count; uint8_t table_bar = 0, pba_bar = 0; msi_count = pci_msi_count(dev); msix_count = pci_msix_count(dev); i = 0; while (ahci_ids[i].id != 0 && (ahci_ids[i].id != devid || ahci_ids[i].rev > revid)) i++; ctlr->quirks = ahci_ids[i].quirks; /* Limit speed for my onboard JMicron external port. * It is not eSATA really, limit to SATA 1 */ if (pci_get_devid(dev) == 0x2363197b && pci_get_subvendor(dev) == 0x1043 && pci_get_subdevice(dev) == 0x81e4) ctlr->quirks |= AHCI_Q_SATA1_UNIT0; resource_int_value(device_get_name(dev), device_get_unit(dev), "quirks", &ctlr->quirks); ctlr->vendorid = pci_get_vendor(dev); ctlr->deviceid = pci_get_device(dev); ctlr->subvendorid = pci_get_subvendor(dev); ctlr->subdeviceid = pci_get_subdevice(dev); /* Default AHCI Base Address is BAR(5), Cavium uses BAR(0) */ if (ctlr->quirks & AHCI_Q_ABAR0) ctlr->r_rid = PCIR_BAR(0); else ctlr->r_rid = PCIR_BAR(5); if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_rid, RF_ACTIVE))) return ENXIO; if (ctlr->quirks & AHCI_Q_NOMSIX) msix_count = 0; /* Read MSI-x BAR IDs if supported */ if (msix_count > 0) { error = ahci_pci_read_msix_bars(dev, &table_bar, &pba_bar); if (error == 0) { ctlr->r_msix_tab_rid = table_bar; ctlr->r_msix_pba_rid = pba_bar; } else { /* Failed to read BARs, disable MSI-x */ msix_count = 0; } } /* Allocate resources for MSI-x table and PBA */ if (msix_count > 0) { /* * Allocate new MSI-x table only if not * allocated before. */ ctlr->r_msix_table = NULL; if (ctlr->r_msix_tab_rid != ctlr->r_rid) { /* Separate BAR for MSI-x */ ctlr->r_msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_msix_tab_rid, RF_ACTIVE); if (ctlr->r_msix_table == NULL) { ahci_free_mem(dev); return (ENXIO); } } /* * Allocate new PBA table only if not * allocated before. */ ctlr->r_msix_pba = NULL; if ((ctlr->r_msix_pba_rid != ctlr->r_msix_tab_rid) && (ctlr->r_msix_pba_rid != ctlr->r_rid)) { /* Separate BAR for PBA */ ctlr->r_msix_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_msix_pba_rid, RF_ACTIVE); if (ctlr->r_msix_pba == NULL) { ahci_free_mem(dev); return (ENXIO); } } } pci_enable_busmaster(dev); /* Reset controller */ if ((error = ahci_pci_ctlr_reset(dev)) != 0) { ahci_free_mem(dev); return (error); } /* Setup interrupts. */ /* Setup MSI register parameters */ /* Process hints. */ if (ctlr->quirks & AHCI_Q_NOMSI) ctlr->msi = 0; else if (ctlr->quirks & AHCI_Q_1MSI) ctlr->msi = 1; else ctlr->msi = 2; resource_int_value(device_get_name(dev), device_get_unit(dev), "msi", &ctlr->msi); ctlr->numirqs = 1; if (msi_count == 0 && msix_count == 0) ctlr->msi = 0; if (ctlr->msi < 0) ctlr->msi = 0; else if (ctlr->msi == 1) { msi_count = min(1, msi_count); msix_count = min(1, msix_count); } else if (ctlr->msi > 1) ctlr->msi = 2; /* Allocate MSI/MSI-x if needed/present. */ if (ctlr->msi > 0) { error = ENXIO; /* Try to allocate MSI-x first */ if (msix_count > 0) { error = pci_alloc_msix(dev, &msix_count); if (error == 0) ctlr->numirqs = msix_count; } /* * Try to allocate MSI if msi_count is greater than 0 * and if MSI-x allocation failed. */ if ((error != 0) && (msi_count > 0)) { error = pci_alloc_msi(dev, &msi_count); if (error == 0) ctlr->numirqs = msi_count; } /* Both MSI and MSI-x allocations failed */ if (error != 0) { ctlr->msi = 0; device_printf(dev, "Failed to allocate MSI/MSI-x, " "falling back to INTx\n"); } } error = ahci_attach(dev); if (error != 0) { if (ctlr->msi > 0) pci_release_msi(dev); ahci_free_mem(dev); } return error; } static int ahci_pci_detach(device_t dev) { ahci_detach(dev); pci_release_msi(dev); return (0); } static int ahci_pci_suspend(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); bus_generic_suspend(dev); /* Disable interupts, so the state change(s) doesn't trigger */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, ATA_INL(ctlr->r_mem, AHCI_GHC) & (~AHCI_GHC_IE)); return 0; } static int ahci_pci_resume(device_t dev) { int res; if ((res = ahci_pci_ctlr_reset(dev)) != 0) return (res); ahci_ctlr_setup(dev); return (bus_generic_resume(dev)); } devclass_t ahci_devclass; static device_method_t ahci_methods[] = { DEVMETHOD(device_probe, ahci_probe), DEVMETHOD(device_attach, ahci_pci_attach), DEVMETHOD(device_detach, ahci_pci_detach), DEVMETHOD(device_suspend, ahci_pci_suspend), DEVMETHOD(device_resume, ahci_pci_resume), DEVMETHOD(bus_print_child, ahci_print_child), DEVMETHOD(bus_alloc_resource, ahci_alloc_resource), DEVMETHOD(bus_release_resource, ahci_release_resource), DEVMETHOD(bus_setup_intr, ahci_setup_intr), DEVMETHOD(bus_teardown_intr,ahci_teardown_intr), DEVMETHOD(bus_child_location_str, ahci_child_location_str), DEVMETHOD(bus_get_dma_tag, ahci_get_dma_tag), DEVMETHOD_END }; static driver_t ahci_driver = { "ahci", ahci_methods, sizeof(struct ahci_controller) }; DRIVER_MODULE(ahci, pci, ahci_driver, ahci_devclass, NULL, NULL); static device_method_t ahci_ata_methods[] = { DEVMETHOD(device_probe, ahci_ata_probe), DEVMETHOD(device_attach, ahci_pci_attach), DEVMETHOD(device_detach, ahci_pci_detach), DEVMETHOD(device_suspend, ahci_pci_suspend), DEVMETHOD(device_resume, ahci_pci_resume), DEVMETHOD(bus_print_child, ahci_print_child), DEVMETHOD(bus_alloc_resource, ahci_alloc_resource), DEVMETHOD(bus_release_resource, ahci_release_resource), DEVMETHOD(bus_setup_intr, ahci_setup_intr), DEVMETHOD(bus_teardown_intr,ahci_teardown_intr), DEVMETHOD(bus_child_location_str, ahci_child_location_str), DEVMETHOD_END }; static driver_t ahci_ata_driver = { "ahci", ahci_ata_methods, sizeof(struct ahci_controller) }; DRIVER_MODULE(ahci, atapci, ahci_ata_driver, ahci_devclass, NULL, NULL); Index: projects/clang391-import/sys/dev/ath/if_ath.c =================================================================== --- projects/clang391-import/sys/dev/ath/if_ath.c (revision 309262) +++ projects/clang391-import/sys/dev/ath/if_ath.c (revision 309263) @@ -1,6671 +1,6707 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Atheros Wireless LAN controller. * * This software is derived from work of Atsushi Onoe; his contribution * is greatly appreciated. */ #include "opt_inet.h" #include "opt_ath.h" /* * This is needed for register operations which are performed * by the driver - eg, calls to ath_hal_gettsf32(). * * It's also required for any AH_DEBUG checks in here, eg the * module dependencies. */ #include "opt_ah.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for mp_ncpus */ #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include #ifdef INET #include #include #endif #include #include /* XXX for softled */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ATH_TX99_DIAG #include #endif #ifdef ATH_DEBUG_ALQ #include #endif /* * Only enable this if you're working on PS-POLL support. */ #define ATH_SW_PSQ /* * ATH_BCBUF determines the number of vap's that can transmit * beacons and also (currently) the number of vap's that can * have unique mac addresses/bssid. When staggering beacons * 4 is probably a good max as otherwise the beacons become * very closely spaced and there is limited time for cab q traffic * to go out. You can burst beacons instead but that is not good * for stations in power save and at some point you really want * another radio (and channel). * * The limit on the number of mac addresses is tied to our use of * the U/L bit and tracking addresses in a byte; it would be * worthwhile to allow more for applications like proxy sta. */ CTASSERT(ATH_BCBUF <= 8); static struct ieee80211vap *ath_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void ath_vap_delete(struct ieee80211vap *); static int ath_init(struct ath_softc *); static void ath_stop(struct ath_softc *); static int ath_reset_vap(struct ieee80211vap *, u_long); static int ath_transmit(struct ieee80211com *, struct mbuf *); static int ath_media_change(struct ifnet *); static void ath_watchdog(void *); static void ath_parent(struct ieee80211com *); static void ath_fatal_proc(void *, int); static void ath_bmiss_vap(struct ieee80211vap *); static void ath_bmiss_proc(void *, int); static void ath_key_update_begin(struct ieee80211vap *); static void ath_key_update_end(struct ieee80211vap *); static void ath_update_mcast_hw(struct ath_softc *); static void ath_update_mcast(struct ieee80211com *); static void ath_update_promisc(struct ieee80211com *); static void ath_updateslot(struct ieee80211com *); static void ath_bstuck_proc(void *, int); static void ath_reset_proc(void *, int); static int ath_desc_alloc(struct ath_softc *); static void ath_desc_free(struct ath_softc *); static struct ieee80211_node *ath_node_alloc(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN]); static void ath_node_cleanup(struct ieee80211_node *); static void ath_node_free(struct ieee80211_node *); static void ath_node_getsignal(const struct ieee80211_node *, int8_t *, int8_t *); static void ath_txq_init(struct ath_softc *sc, struct ath_txq *, int); static struct ath_txq *ath_txq_setup(struct ath_softc*, int qtype, int subtype); static int ath_tx_setup(struct ath_softc *, int, int); static void ath_tx_cleanupq(struct ath_softc *, struct ath_txq *); static void ath_tx_cleanup(struct ath_softc *); static int ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched); static void ath_tx_proc_q0(void *, int); static void ath_tx_proc_q0123(void *, int); static void ath_tx_proc(void *, int); static void ath_txq_sched_tasklet(void *, int); static int ath_chan_set(struct ath_softc *, struct ieee80211_channel *); static void ath_chan_change(struct ath_softc *, struct ieee80211_channel *); static void ath_scan_start(struct ieee80211com *); static void ath_scan_end(struct ieee80211com *); static void ath_set_channel(struct ieee80211com *); #ifdef ATH_ENABLE_11N static void ath_update_chw(struct ieee80211com *); #endif /* ATH_ENABLE_11N */ static void ath_calibrate(void *); static int ath_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void ath_setup_stationkey(struct ieee80211_node *); static void ath_newassoc(struct ieee80211_node *, int); static int ath_setregdomain(struct ieee80211com *, struct ieee80211_regdomain *, int, struct ieee80211_channel []); static void ath_getradiocaps(struct ieee80211com *, int, int *, struct ieee80211_channel []); static int ath_getchannels(struct ath_softc *); static int ath_rate_setup(struct ath_softc *, u_int mode); static void ath_setcurmode(struct ath_softc *, enum ieee80211_phymode); static void ath_announce(struct ath_softc *); static void ath_dfs_tasklet(void *, int); static void ath_node_powersave(struct ieee80211_node *, int); static int ath_node_set_tim(struct ieee80211_node *, int); static void ath_node_recv_pspoll(struct ieee80211_node *, struct mbuf *); #ifdef IEEE80211_SUPPORT_TDMA #include #endif SYSCTL_DECL(_hw_ath); /* XXX validate sysctl values */ static int ath_longcalinterval = 30; /* long cals every 30 secs */ SYSCTL_INT(_hw_ath, OID_AUTO, longcal, CTLFLAG_RW, &ath_longcalinterval, 0, "long chip calibration interval (secs)"); static int ath_shortcalinterval = 100; /* short cals every 100 ms */ SYSCTL_INT(_hw_ath, OID_AUTO, shortcal, CTLFLAG_RW, &ath_shortcalinterval, 0, "short chip calibration interval (msecs)"); static int ath_resetcalinterval = 20*60; /* reset cal state 20 mins */ SYSCTL_INT(_hw_ath, OID_AUTO, resetcal, CTLFLAG_RW, &ath_resetcalinterval, 0, "reset chip calibration results (secs)"); static int ath_anicalinterval = 100; /* ANI calibration - 100 msec */ SYSCTL_INT(_hw_ath, OID_AUTO, anical, CTLFLAG_RW, &ath_anicalinterval, 0, "ANI calibration (msecs)"); int ath_rxbuf = ATH_RXBUF; /* # rx buffers to allocate */ SYSCTL_INT(_hw_ath, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &ath_rxbuf, 0, "rx buffers allocated"); int ath_txbuf = ATH_TXBUF; /* # tx buffers to allocate */ SYSCTL_INT(_hw_ath, OID_AUTO, txbuf, CTLFLAG_RWTUN, &ath_txbuf, 0, "tx buffers allocated"); int ath_txbuf_mgmt = ATH_MGMT_TXBUF; /* # mgmt tx buffers to allocate */ SYSCTL_INT(_hw_ath, OID_AUTO, txbuf_mgmt, CTLFLAG_RWTUN, &ath_txbuf_mgmt, 0, "tx (mgmt) buffers allocated"); int ath_bstuck_threshold = 4; /* max missed beacons */ SYSCTL_INT(_hw_ath, OID_AUTO, bstuck, CTLFLAG_RW, &ath_bstuck_threshold, 0, "max missed beacon xmits before chip reset"); MALLOC_DEFINE(M_ATHDEV, "athdev", "ath driver dma buffers"); void ath_legacy_attach_comp_func(struct ath_softc *sc) { /* * Special case certain configurations. Note the * CAB queue is handled by these specially so don't * include them when checking the txq setup mask. */ switch (sc->sc_txqsetup &~ (1<sc_cabq->axq_qnum)) { case 0x01: TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0, sc); break; case 0x0f: TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0123, sc); break; default: TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc, sc); break; } } /* * Set the target power mode. * * If this is called during a point in time where * the hardware is being programmed elsewhere, it will * simply store it away and update it when all current * uses of the hardware are completed. + * + * If the chip is going into network sleep or power off, then + * we will wait until all uses of the chip are done before + * going into network sleep or power off. + * + * If the chip is being programmed full-awake, then immediately + * program it full-awake so we can actually stay awake rather than + * the chip potentially going to sleep underneath us. */ void -_ath_power_setpower(struct ath_softc *sc, int power_state, const char *file, int line) +_ath_power_setpower(struct ath_softc *sc, int power_state, int selfgen, + const char *file, int line) { ATH_LOCK_ASSERT(sc); - sc->sc_target_powerstate = power_state; - - DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n", + DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d, target=%d, cur=%d\n", __func__, file, line, power_state, - sc->sc_powersave_refcnt); + sc->sc_powersave_refcnt, + sc->sc_target_powerstate, + sc->sc_cur_powerstate); - if (sc->sc_powersave_refcnt == 0 && + sc->sc_target_powerstate = power_state; + + /* + * Don't program the chip into network sleep if the chip + * is being programmed elsewhere. + * + * However, if the chip is being programmed /awake/, force + * the chip awake so we stay awake. + */ + if ((sc->sc_powersave_refcnt == 0 || power_state == HAL_PM_AWAKE) && power_state != sc->sc_cur_powerstate) { sc->sc_cur_powerstate = power_state; ath_hal_setpower(sc->sc_ah, power_state); /* * If the NIC is force-awake, then set the * self-gen frame state appropriately. * * If the nic is in network sleep or full-sleep, * we let the above call leave the self-gen * state as "sleep". */ - if (sc->sc_cur_powerstate == HAL_PM_AWAKE && + if (selfgen && + sc->sc_cur_powerstate == HAL_PM_AWAKE && sc->sc_target_selfgen_state != HAL_PM_AWAKE) { ath_hal_setselfgenpower(sc->sc_ah, sc->sc_target_selfgen_state); } } } /* * Set the current self-generated frames state. * * This is separate from the target power mode. The chip may be * awake but the desired state is "sleep", so frames sent to the * destination has PWRMGT=1 in the 802.11 header. The NIC also * needs to know to set PWRMGT=1 in self-generated frames. */ void _ath_power_set_selfgen(struct ath_softc *sc, int power_state, const char *file, int line) { ATH_LOCK_ASSERT(sc); DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n", __func__, file, line, power_state, sc->sc_target_selfgen_state); sc->sc_target_selfgen_state = power_state; /* * If the NIC is force-awake, then set the power state. * Network-state and full-sleep will already transition it to * mark self-gen frames as sleeping - and we can't * guarantee the NIC is awake to program the self-gen frame * setting anyway. */ if (sc->sc_cur_powerstate == HAL_PM_AWAKE) { ath_hal_setselfgenpower(sc->sc_ah, power_state); } } /* * Set the hardware power mode and take a reference. * * This doesn't update the target power mode in the driver; * it just updates the hardware power state. * * XXX it should only ever force the hardware awake; it should * never be called to set it asleep. */ void _ath_power_set_power_state(struct ath_softc *sc, int power_state, const char *file, int line) { ATH_LOCK_ASSERT(sc); DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n", __func__, file, line, power_state, sc->sc_powersave_refcnt); sc->sc_powersave_refcnt++; + /* + * Only do the power state change if we're not programming + * it elsewhere. + */ if (power_state != sc->sc_cur_powerstate) { ath_hal_setpower(sc->sc_ah, power_state); sc->sc_cur_powerstate = power_state; - /* * Adjust the self-gen powerstate if appropriate. */ if (sc->sc_cur_powerstate == HAL_PM_AWAKE && sc->sc_target_selfgen_state != HAL_PM_AWAKE) { ath_hal_setselfgenpower(sc->sc_ah, sc->sc_target_selfgen_state); } - } } /* * Restore the power save mode to what it once was. * * This will decrement the reference counter and once it hits * zero, it'll restore the powersave state. */ void _ath_power_restore_power_state(struct ath_softc *sc, const char *file, int line) { ATH_LOCK_ASSERT(sc); DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) refcnt=%d, target state=%d\n", __func__, file, line, sc->sc_powersave_refcnt, sc->sc_target_powerstate); if (sc->sc_powersave_refcnt == 0) device_printf(sc->sc_dev, "%s: refcnt=0?\n", __func__); else sc->sc_powersave_refcnt--; if (sc->sc_powersave_refcnt == 0 && sc->sc_target_powerstate != sc->sc_cur_powerstate) { sc->sc_cur_powerstate = sc->sc_target_powerstate; ath_hal_setpower(sc->sc_ah, sc->sc_target_powerstate); } /* * Adjust the self-gen powerstate if appropriate. */ if (sc->sc_cur_powerstate == HAL_PM_AWAKE && sc->sc_target_selfgen_state != HAL_PM_AWAKE) { ath_hal_setselfgenpower(sc->sc_ah, sc->sc_target_selfgen_state); } } /* * Configure the initial HAL configuration values based on bus * specific parameters. * * Some PCI IDs and other information may need tweaking. * * XXX TODO: ath9k and the Atheros HAL only program comm2g_switch_enable * if BT antenna diversity isn't enabled. * * So, let's also figure out how to enable BT diversity for AR9485. */ static void ath_setup_hal_config(struct ath_softc *sc, HAL_OPS_CONFIG *ah_config) { /* XXX TODO: only for PCI devices? */ if (sc->sc_pci_devinfo & (ATH_PCI_CUS198 | ATH_PCI_CUS230)) { ah_config->ath_hal_ext_lna_ctl_gpio = 0x200; /* bit 9 */ ah_config->ath_hal_ext_atten_margin_cfg = AH_TRUE; ah_config->ath_hal_min_gainidx = AH_TRUE; ah_config->ath_hal_ant_ctrl_comm2g_switch_enable = 0x000bbb88; /* XXX low_rssi_thresh */ /* XXX fast_div_bias */ device_printf(sc->sc_dev, "configuring for %s\n", (sc->sc_pci_devinfo & ATH_PCI_CUS198) ? "CUS198" : "CUS230"); } if (sc->sc_pci_devinfo & ATH_PCI_CUS217) device_printf(sc->sc_dev, "CUS217 card detected\n"); if (sc->sc_pci_devinfo & ATH_PCI_CUS252) device_printf(sc->sc_dev, "CUS252 card detected\n"); if (sc->sc_pci_devinfo & ATH_PCI_AR9565_1ANT) device_printf(sc->sc_dev, "WB335 1-ANT card detected\n"); if (sc->sc_pci_devinfo & ATH_PCI_AR9565_2ANT) device_printf(sc->sc_dev, "WB335 2-ANT card detected\n"); if (sc->sc_pci_devinfo & ATH_PCI_BT_ANT_DIV) device_printf(sc->sc_dev, "Bluetooth Antenna Diversity card detected\n"); if (sc->sc_pci_devinfo & ATH_PCI_KILLER) device_printf(sc->sc_dev, "Killer Wireless card detected\n"); #if 0 /* * Some WB335 cards do not support antenna diversity. Since * we use a hardcoded value for AR9565 instead of using the * EEPROM/OTP data, remove the combining feature from * the HW capabilities bitmap. */ if (sc->sc_pci_devinfo & (ATH9K_PCI_AR9565_1ANT | ATH9K_PCI_AR9565_2ANT)) { if (!(sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV)) pCap->hw_caps &= ~ATH9K_HW_CAP_ANT_DIV_COMB; } if (sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV) { pCap->hw_caps |= ATH9K_HW_CAP_BT_ANT_DIV; device_printf(sc->sc_dev, "Set BT/WLAN RX diversity capability\n"); } #endif if (sc->sc_pci_devinfo & ATH_PCI_D3_L1_WAR) { ah_config->ath_hal_pcie_waen = 0x0040473b; device_printf(sc->sc_dev, "Enable WAR for ASPM D3/L1\n"); } #if 0 if (sc->sc_pci_devinfo & ATH9K_PCI_NO_PLL_PWRSAVE) { ah->config.no_pll_pwrsave = true; device_printf(sc->sc_dev, "Disable PLL PowerSave\n"); } #endif } /* * Attempt to fetch the MAC address from the kernel environment. * * Returns 0, macaddr in macaddr if successful; -1 otherwise. */ static int ath_fetch_mac_kenv(struct ath_softc *sc, uint8_t *macaddr) { char devid_str[32]; int local_mac = 0; char *local_macstr; /* * Fetch from the kenv rather than using hints. * * Hints would be nice but the transition to dynamic * hints/kenv doesn't happen early enough for this * to work reliably (eg on anything embedded.) */ snprintf(devid_str, 32, "hint.%s.%d.macaddr", device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev)); if ((local_macstr = kern_getenv(devid_str)) != NULL) { uint32_t tmpmac[ETHER_ADDR_LEN]; int count; int i; /* Have a MAC address; should use it */ device_printf(sc->sc_dev, "Overriding MAC address from environment: '%s'\n", local_macstr); /* Extract out the MAC address */ count = sscanf(local_macstr, "%x%*c%x%*c%x%*c%x%*c%x%*c%x", &tmpmac[0], &tmpmac[1], &tmpmac[2], &tmpmac[3], &tmpmac[4], &tmpmac[5]); if (count == 6) { /* Valid! */ local_mac = 1; for (i = 0; i < ETHER_ADDR_LEN; i++) macaddr[i] = tmpmac[i]; } /* Done! */ freeenv(local_macstr); local_macstr = NULL; } if (local_mac) return (0); return (-1); } #define HAL_MODE_HT20 (HAL_MODE_11NG_HT20 | HAL_MODE_11NA_HT20) #define HAL_MODE_HT40 \ (HAL_MODE_11NG_HT40PLUS | HAL_MODE_11NG_HT40MINUS | \ HAL_MODE_11NA_HT40PLUS | HAL_MODE_11NA_HT40MINUS) int ath_attach(u_int16_t devid, struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = NULL; HAL_STATUS status; int error = 0, i; u_int wmodes; int rx_chainmask, tx_chainmask; HAL_OPS_CONFIG ah_config; DPRINTF(sc, ATH_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(sc->sc_dev); /* * Configure the initial configuration data. * * This is stuff that may be needed early during attach * rather than done via configuration calls later. */ bzero(&ah_config, sizeof(ah_config)); ath_setup_hal_config(sc, &ah_config); ah = ath_hal_attach(devid, sc, sc->sc_st, sc->sc_sh, sc->sc_eepromdata, &ah_config, &status); if (ah == NULL) { device_printf(sc->sc_dev, "unable to attach hardware; HAL status %u\n", status); error = ENXIO; goto bad; } sc->sc_ah = ah; sc->sc_invalid = 0; /* ready to go, enable interrupt handling */ #ifdef ATH_DEBUG sc->sc_debug = ath_debug; #endif /* * Setup the DMA/EDMA functions based on the current * hardware support. * * This is required before the descriptors are allocated. */ if (ath_hal_hasedma(sc->sc_ah)) { sc->sc_isedma = 1; ath_recv_setup_edma(sc); ath_xmit_setup_edma(sc); } else { ath_recv_setup_legacy(sc); ath_xmit_setup_legacy(sc); } if (ath_hal_hasmybeacon(sc->sc_ah)) { sc->sc_do_mybeacon = 1; } /* * Check if the MAC has multi-rate retry support. * We do this by trying to setup a fake extended * descriptor. MAC's that don't have support will * return false w/o doing anything. MAC's that do * support it will return true w/o doing anything. */ sc->sc_mrretry = ath_hal_setupxtxdesc(ah, NULL, 0,0, 0,0, 0,0); /* * Check if the device has hardware counters for PHY * errors. If so we need to enable the MIB interrupt * so we can act on stat triggers. */ if (ath_hal_hwphycounters(ah)) sc->sc_needmib = 1; /* * Get the hardware key cache size. */ sc->sc_keymax = ath_hal_keycachesize(ah); if (sc->sc_keymax > ATH_KEYMAX) { device_printf(sc->sc_dev, "Warning, using only %u of %u key cache slots\n", ATH_KEYMAX, sc->sc_keymax); sc->sc_keymax = ATH_KEYMAX; } /* * Reset the key cache since some parts do not * reset the contents on initial power up. */ for (i = 0; i < sc->sc_keymax; i++) ath_hal_keyreset(ah, i); /* * Collect the default channel list. */ error = ath_getchannels(sc); if (error != 0) goto bad; /* * Setup rate tables for all potential media types. */ ath_rate_setup(sc, IEEE80211_MODE_11A); ath_rate_setup(sc, IEEE80211_MODE_11B); ath_rate_setup(sc, IEEE80211_MODE_11G); ath_rate_setup(sc, IEEE80211_MODE_TURBO_A); ath_rate_setup(sc, IEEE80211_MODE_TURBO_G); ath_rate_setup(sc, IEEE80211_MODE_STURBO_A); ath_rate_setup(sc, IEEE80211_MODE_11NA); ath_rate_setup(sc, IEEE80211_MODE_11NG); ath_rate_setup(sc, IEEE80211_MODE_HALF); ath_rate_setup(sc, IEEE80211_MODE_QUARTER); /* NB: setup here so ath_rate_update is happy */ ath_setcurmode(sc, IEEE80211_MODE_11A); /* * Allocate TX descriptors and populate the lists. */ error = ath_desc_alloc(sc); if (error != 0) { device_printf(sc->sc_dev, "failed to allocate TX descriptors: %d\n", error); goto bad; } error = ath_txdma_setup(sc); if (error != 0) { device_printf(sc->sc_dev, "failed to allocate TX descriptors: %d\n", error); goto bad; } /* * Allocate RX descriptors and populate the lists. */ error = ath_rxdma_setup(sc); if (error != 0) { device_printf(sc->sc_dev, "failed to allocate RX descriptors: %d\n", error); goto bad; } callout_init_mtx(&sc->sc_cal_ch, &sc->sc_mtx, 0); callout_init_mtx(&sc->sc_wd_ch, &sc->sc_mtx, 0); ATH_TXBUF_LOCK_INIT(sc); sc->sc_tq = taskqueue_create("ath_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->sc_tq); taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->sc_dev)); TASK_INIT(&sc->sc_rxtask, 0, sc->sc_rx.recv_tasklet, sc); TASK_INIT(&sc->sc_bmisstask, 0, ath_bmiss_proc, sc); TASK_INIT(&sc->sc_bstucktask,0, ath_bstuck_proc, sc); TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc); TASK_INIT(&sc->sc_txqtask, 0, ath_txq_sched_tasklet, sc); TASK_INIT(&sc->sc_fataltask, 0, ath_fatal_proc, sc); /* * Allocate hardware transmit queues: one queue for * beacon frames and one data queue for each QoS * priority. Note that the hal handles resetting * these queues at the needed time. * * XXX PS-Poll */ sc->sc_bhalq = ath_beaconq_setup(sc); if (sc->sc_bhalq == (u_int) -1) { device_printf(sc->sc_dev, "unable to setup a beacon xmit queue!\n"); error = EIO; goto bad2; } sc->sc_cabq = ath_txq_setup(sc, HAL_TX_QUEUE_CAB, 0); if (sc->sc_cabq == NULL) { device_printf(sc->sc_dev, "unable to setup CAB xmit queue!\n"); error = EIO; goto bad2; } /* NB: insure BK queue is the lowest priority h/w queue */ if (!ath_tx_setup(sc, WME_AC_BK, HAL_WME_AC_BK)) { device_printf(sc->sc_dev, "unable to setup xmit queue for %s traffic!\n", ieee80211_wme_acnames[WME_AC_BK]); error = EIO; goto bad2; } if (!ath_tx_setup(sc, WME_AC_BE, HAL_WME_AC_BE) || !ath_tx_setup(sc, WME_AC_VI, HAL_WME_AC_VI) || !ath_tx_setup(sc, WME_AC_VO, HAL_WME_AC_VO)) { /* * Not enough hardware tx queues to properly do WME; * just punt and assign them all to the same h/w queue. * We could do a better job of this if, for example, * we allocate queues when we switch from station to * AP mode. */ if (sc->sc_ac2q[WME_AC_VI] != NULL) ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]); if (sc->sc_ac2q[WME_AC_BE] != NULL) ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]); sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK]; } /* * Attach the TX completion function. * * The non-EDMA chips may have some special case optimisations; * this method gives everyone a chance to attach cleanly. */ sc->sc_tx.xmit_attach_comp_func(sc); /* * Setup rate control. Some rate control modules * call back to change the anntena state so expose * the necessary entry points. * XXX maybe belongs in struct ath_ratectrl? */ sc->sc_setdefantenna = ath_setdefantenna; sc->sc_rc = ath_rate_attach(sc); if (sc->sc_rc == NULL) { error = EIO; goto bad2; } /* Attach DFS module */ if (! ath_dfs_attach(sc)) { device_printf(sc->sc_dev, "%s: unable to attach DFS\n", __func__); error = EIO; goto bad2; } /* Attach spectral module */ if (ath_spectral_attach(sc) < 0) { device_printf(sc->sc_dev, "%s: unable to attach spectral\n", __func__); error = EIO; goto bad2; } /* Attach bluetooth coexistence module */ if (ath_btcoex_attach(sc) < 0) { device_printf(sc->sc_dev, "%s: unable to attach bluetooth coexistence\n", __func__); error = EIO; goto bad2; } /* Attach LNA diversity module */ if (ath_lna_div_attach(sc) < 0) { device_printf(sc->sc_dev, "%s: unable to attach LNA diversity\n", __func__); error = EIO; goto bad2; } /* Start DFS processing tasklet */ TASK_INIT(&sc->sc_dfstask, 0, ath_dfs_tasklet, sc); /* Configure LED state */ sc->sc_blinking = 0; sc->sc_ledstate = 1; sc->sc_ledon = 0; /* low true */ sc->sc_ledidle = (2700*hz)/1000; /* 2.7sec */ callout_init(&sc->sc_ledtimer, 1); /* * Don't setup hardware-based blinking. * * Although some NICs may have this configured in the * default reset register values, the user may wish * to alter which pins have which function. * * The reference driver attaches the MAC network LED to GPIO1 and * the MAC power LED to GPIO2. However, the DWA-552 cardbus * NIC has these reversed. */ sc->sc_hardled = (1 == 0); sc->sc_led_net_pin = -1; sc->sc_led_pwr_pin = -1; /* * Auto-enable soft led processing for IBM cards and for * 5211 minipci cards. Users can also manually enable/disable * support with a sysctl. */ sc->sc_softled = (devid == AR5212_DEVID_IBM || devid == AR5211_DEVID); ath_led_config(sc); ath_hal_setledstate(ah, HAL_LED_INIT); /* XXX not right but it's not used anywhere important */ ic->ic_phytype = IEEE80211_T_OFDM; ic->ic_opmode = IEEE80211_M_STA; ic->ic_caps = IEEE80211_C_STA /* station mode */ | IEEE80211_C_IBSS /* ibss, nee adhoc, mode */ | IEEE80211_C_HOSTAP /* hostap mode */ | IEEE80211_C_MONITOR /* monitor mode */ | IEEE80211_C_AHDEMO /* adhoc demo mode */ | IEEE80211_C_WDS /* 4-address traffic works */ | IEEE80211_C_MBSS /* mesh point link mode */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_SHSLOT /* short slot time supported */ | IEEE80211_C_WPA /* capable of WPA1+WPA2 */ #ifndef ATH_ENABLE_11N | IEEE80211_C_BGSCAN /* capable of bg scanning */ #endif | IEEE80211_C_TXFRAG /* handle tx frags */ #ifdef ATH_ENABLE_DFS | IEEE80211_C_DFS /* Enable radar detection */ #endif | IEEE80211_C_PMGT /* Station side power mgmt */ | IEEE80211_C_SWSLEEP ; /* * Query the hal to figure out h/w crypto support. */ if (ath_hal_ciphersupported(ah, HAL_CIPHER_WEP)) ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP; if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_OCB)) ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_OCB; if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_CCM)) ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_CCM; if (ath_hal_ciphersupported(ah, HAL_CIPHER_CKIP)) ic->ic_cryptocaps |= IEEE80211_CRYPTO_CKIP; if (ath_hal_ciphersupported(ah, HAL_CIPHER_TKIP)) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIP; /* * Check if h/w does the MIC and/or whether the * separate key cache entries are required to * handle both tx+rx MIC keys. */ if (ath_hal_ciphersupported(ah, HAL_CIPHER_MIC)) ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC; /* * If the h/w supports storing tx+rx MIC keys * in one cache slot automatically enable use. */ if (ath_hal_hastkipsplit(ah) || !ath_hal_settkipsplit(ah, AH_FALSE)) sc->sc_splitmic = 1; /* * If the h/w can do TKIP MIC together with WME then * we use it; otherwise we force the MIC to be done * in software by the net80211 layer. */ if (ath_hal_haswmetkipmic(ah)) sc->sc_wmetkipmic = 1; } sc->sc_hasclrkey = ath_hal_ciphersupported(ah, HAL_CIPHER_CLR); /* * Check for multicast key search support. */ if (ath_hal_hasmcastkeysearch(sc->sc_ah) && !ath_hal_getmcastkeysearch(sc->sc_ah)) { ath_hal_setmcastkeysearch(sc->sc_ah, 1); } sc->sc_mcastkey = ath_hal_getmcastkeysearch(ah); /* * Mark key cache slots associated with global keys * as in use. If we knew TKIP was not to be used we * could leave the +32, +64, and +32+64 slots free. */ for (i = 0; i < IEEE80211_WEP_NKID; i++) { setbit(sc->sc_keymap, i); setbit(sc->sc_keymap, i+64); if (sc->sc_splitmic) { setbit(sc->sc_keymap, i+32); setbit(sc->sc_keymap, i+32+64); } } /* * TPC support can be done either with a global cap or * per-packet support. The latter is not available on * all parts. We're a bit pedantic here as all parts * support a global cap. */ if (ath_hal_hastpc(ah) || ath_hal_hastxpowlimit(ah)) ic->ic_caps |= IEEE80211_C_TXPMGT; /* * Mark WME capability only if we have sufficient * hardware queues to do proper priority scheduling. */ if (sc->sc_ac2q[WME_AC_BE] != sc->sc_ac2q[WME_AC_BK]) ic->ic_caps |= IEEE80211_C_WME; /* * Check for misc other capabilities. */ if (ath_hal_hasbursting(ah)) ic->ic_caps |= IEEE80211_C_BURST; sc->sc_hasbmask = ath_hal_hasbssidmask(ah); sc->sc_hasbmatch = ath_hal_hasbssidmatch(ah); sc->sc_hastsfadd = ath_hal_hastsfadjust(ah); sc->sc_rxslink = ath_hal_self_linked_final_rxdesc(ah); /* XXX TODO: just make this a "store tx/rx timestamp length" operation */ if (ath_hal_get_rx_tsf_prec(ah, &i)) { if (i == 32) { sc->sc_rxtsf32 = 1; } if (bootverbose) device_printf(sc->sc_dev, "RX timestamp: %d bits\n", i); } if (ath_hal_get_tx_tsf_prec(ah, &i)) { if (bootverbose) device_printf(sc->sc_dev, "TX timestamp: %d bits\n", i); } sc->sc_hasenforcetxop = ath_hal_hasenforcetxop(ah); sc->sc_rx_lnamixer = ath_hal_hasrxlnamixer(ah); sc->sc_hasdivcomb = ath_hal_hasdivantcomb(ah); if (ath_hal_hasfastframes(ah)) ic->ic_caps |= IEEE80211_C_FF; wmodes = ath_hal_getwirelessmodes(ah); if (wmodes & (HAL_MODE_108G|HAL_MODE_TURBO)) ic->ic_caps |= IEEE80211_C_TURBOP; #ifdef IEEE80211_SUPPORT_TDMA if (ath_hal_macversion(ah) > 0x78) { ic->ic_caps |= IEEE80211_C_TDMA; /* capable of TDMA */ ic->ic_tdma_update = ath_tdma_update; } #endif /* * TODO: enforce that at least this many frames are available * in the txbuf list before allowing data frames (raw or * otherwise) to be transmitted. */ sc->sc_txq_data_minfree = 10; /* * Leave this as default to maintain legacy behaviour. * Shortening the cabq/mcastq may end up causing some * undesirable behaviour. */ sc->sc_txq_mcastq_maxdepth = ath_txbuf; /* * How deep can the node software TX queue get whilst it's asleep. */ sc->sc_txq_node_psq_maxdepth = 16; /* * Default the maximum queue depth for a given node * to 1/4'th the TX buffers, or 64, whichever * is larger. */ sc->sc_txq_node_maxdepth = MAX(64, ath_txbuf / 4); /* Enable CABQ by default */ sc->sc_cabq_enable = 1; /* * Allow the TX and RX chainmasks to be overridden by * environment variables and/or device.hints. * * This must be done early - before the hardware is * calibrated or before the 802.11n stream calculation * is done. */ if (resource_int_value(device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev), "rx_chainmask", &rx_chainmask) == 0) { device_printf(sc->sc_dev, "Setting RX chainmask to 0x%x\n", rx_chainmask); (void) ath_hal_setrxchainmask(sc->sc_ah, rx_chainmask); } if (resource_int_value(device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev), "tx_chainmask", &tx_chainmask) == 0) { device_printf(sc->sc_dev, "Setting TX chainmask to 0x%x\n", tx_chainmask); (void) ath_hal_settxchainmask(sc->sc_ah, tx_chainmask); } /* * Query the TX/RX chainmask configuration. * * This is only relevant for 11n devices. */ ath_hal_getrxchainmask(ah, &sc->sc_rxchainmask); ath_hal_gettxchainmask(ah, &sc->sc_txchainmask); /* * Disable MRR with protected frames by default. * Only 802.11n series NICs can handle this. */ sc->sc_mrrprot = 0; /* XXX should be a capability */ /* * Query the enterprise mode information the HAL. */ if (ath_hal_getcapability(ah, HAL_CAP_ENTERPRISE_MODE, 0, &sc->sc_ent_cfg) == HAL_OK) sc->sc_use_ent = 1; #ifdef ATH_ENABLE_11N /* * Query HT capabilities */ if (ath_hal_getcapability(ah, HAL_CAP_HT, 0, NULL) == HAL_OK && (wmodes & (HAL_MODE_HT20 | HAL_MODE_HT40))) { uint32_t rxs, txs; uint32_t ldpc; device_printf(sc->sc_dev, "[HT] enabling HT modes\n"); sc->sc_mrrprot = 1; /* XXX should be a capability */ ic->ic_htcaps = IEEE80211_HTC_HT /* HT operation */ | IEEE80211_HTC_AMPDU /* A-MPDU tx/rx */ | IEEE80211_HTC_AMSDU /* A-MSDU tx/rx */ | IEEE80211_HTCAP_MAXAMSDU_3839 /* max A-MSDU length */ | IEEE80211_HTCAP_SMPS_OFF; /* SM power save off */ /* * Enable short-GI for HT20 only if the hardware * advertises support. * Notably, anything earlier than the AR9287 doesn't. */ if ((ath_hal_getcapability(ah, HAL_CAP_HT20_SGI, 0, NULL) == HAL_OK) && (wmodes & HAL_MODE_HT20)) { device_printf(sc->sc_dev, "[HT] enabling short-GI in 20MHz mode\n"); ic->ic_htcaps |= IEEE80211_HTCAP_SHORTGI20; } if (wmodes & HAL_MODE_HT40) ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40 | IEEE80211_HTCAP_SHORTGI40; /* * TX/RX streams need to be taken into account when * negotiating which MCS rates it'll receive and * what MCS rates are available for TX. */ (void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 0, &txs); (void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 1, &rxs); ic->ic_txstream = txs; ic->ic_rxstream = rxs; /* * Setup TX and RX STBC based on what the HAL allows and * the currently configured chainmask set. * Ie - don't enable STBC TX if only one chain is enabled. * STBC RX is fine on a single RX chain; it just won't * provide any real benefit. */ if (ath_hal_getcapability(ah, HAL_CAP_RX_STBC, 0, NULL) == HAL_OK) { sc->sc_rx_stbc = 1; device_printf(sc->sc_dev, "[HT] 1 stream STBC receive enabled\n"); ic->ic_htcaps |= IEEE80211_HTCAP_RXSTBC_1STREAM; } if (txs > 1 && ath_hal_getcapability(ah, HAL_CAP_TX_STBC, 0, NULL) == HAL_OK) { sc->sc_tx_stbc = 1; device_printf(sc->sc_dev, "[HT] 1 stream STBC transmit enabled\n"); ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC; } (void) ath_hal_getcapability(ah, HAL_CAP_RTS_AGGR_LIMIT, 1, &sc->sc_rts_aggr_limit); if (sc->sc_rts_aggr_limit != (64 * 1024)) device_printf(sc->sc_dev, "[HT] RTS aggregates limited to %d KiB\n", sc->sc_rts_aggr_limit / 1024); /* * LDPC */ if ((ath_hal_getcapability(ah, HAL_CAP_LDPC, 0, &ldpc)) == HAL_OK && (ldpc == 1)) { sc->sc_has_ldpc = 1; device_printf(sc->sc_dev, "[HT] LDPC transmit/receive enabled\n"); ic->ic_htcaps |= IEEE80211_HTCAP_LDPC; } device_printf(sc->sc_dev, "[HT] %d RX streams; %d TX streams\n", rxs, txs); } #endif /* * Initial aggregation settings. */ sc->sc_hwq_limit_aggr = ATH_AGGR_MIN_QDEPTH; sc->sc_hwq_limit_nonaggr = ATH_NONAGGR_MIN_QDEPTH; sc->sc_tid_hwq_lo = ATH_AGGR_SCHED_LOW; sc->sc_tid_hwq_hi = ATH_AGGR_SCHED_HIGH; sc->sc_aggr_limit = ATH_AGGR_MAXSIZE; sc->sc_delim_min_pad = 0; /* * Check if the hardware requires PCI register serialisation. * Some of the Owl based MACs require this. */ if (mp_ncpus > 1 && ath_hal_getcapability(ah, HAL_CAP_SERIALISE_WAR, 0, NULL) == HAL_OK) { sc->sc_ah->ah_config.ah_serialise_reg_war = 1; device_printf(sc->sc_dev, "Enabling register serialisation\n"); } /* * Initialise the deferred completed RX buffer list. */ TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_HP]); TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_LP]); /* * Indicate we need the 802.11 header padded to a * 32-bit boundary for 4-address and QoS frames. */ ic->ic_flags |= IEEE80211_F_DATAPAD; /* * Query the hal about antenna support. */ sc->sc_defant = ath_hal_getdefantenna(ah); /* * Not all chips have the VEOL support we want to * use with IBSS beacons; check here for it. */ sc->sc_hasveol = ath_hal_hasveol(ah); /* get mac address from kenv first, then hardware */ if (ath_fetch_mac_kenv(sc, ic->ic_macaddr) == 0) { /* Tell the HAL now about the new MAC */ ath_hal_setmac(ah, ic->ic_macaddr); } else { ath_hal_getmac(ah, ic->ic_macaddr); } if (sc->sc_hasbmask) ath_hal_getbssidmask(ah, sc->sc_hwbssidmask); /* NB: used to size node table key mapping array */ ic->ic_max_keyix = sc->sc_keymax; /* call MI attach routine. */ ieee80211_ifattach(ic); ic->ic_setregdomain = ath_setregdomain; ic->ic_getradiocaps = ath_getradiocaps; sc->sc_opmode = HAL_M_STA; /* override default methods */ ic->ic_ioctl = ath_ioctl; ic->ic_parent = ath_parent; ic->ic_transmit = ath_transmit; ic->ic_newassoc = ath_newassoc; ic->ic_updateslot = ath_updateslot; ic->ic_wme.wme_update = ath_wme_update; ic->ic_vap_create = ath_vap_create; ic->ic_vap_delete = ath_vap_delete; ic->ic_raw_xmit = ath_raw_xmit; ic->ic_update_mcast = ath_update_mcast; ic->ic_update_promisc = ath_update_promisc; ic->ic_node_alloc = ath_node_alloc; sc->sc_node_free = ic->ic_node_free; ic->ic_node_free = ath_node_free; sc->sc_node_cleanup = ic->ic_node_cleanup; ic->ic_node_cleanup = ath_node_cleanup; ic->ic_node_getsignal = ath_node_getsignal; ic->ic_scan_start = ath_scan_start; ic->ic_scan_end = ath_scan_end; ic->ic_set_channel = ath_set_channel; #ifdef ATH_ENABLE_11N /* 802.11n specific - but just override anyway */ sc->sc_addba_request = ic->ic_addba_request; sc->sc_addba_response = ic->ic_addba_response; sc->sc_addba_stop = ic->ic_addba_stop; sc->sc_bar_response = ic->ic_bar_response; sc->sc_addba_response_timeout = ic->ic_addba_response_timeout; ic->ic_addba_request = ath_addba_request; ic->ic_addba_response = ath_addba_response; ic->ic_addba_response_timeout = ath_addba_response_timeout; ic->ic_addba_stop = ath_addba_stop; ic->ic_bar_response = ath_bar_response; ic->ic_update_chw = ath_update_chw; #endif /* ATH_ENABLE_11N */ #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT /* * There's one vendor bitmap entry in the RX radiotap * header; make sure that's taken into account. */ ieee80211_radiotap_attachv(ic, &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), 0, ATH_TX_RADIOTAP_PRESENT, &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), 1, ATH_RX_RADIOTAP_PRESENT); #else /* * No vendor bitmap/extensions are present. */ ieee80211_radiotap_attach(ic, &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), ATH_TX_RADIOTAP_PRESENT, &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), ATH_RX_RADIOTAP_PRESENT); #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ /* * Setup the ALQ logging if required */ #ifdef ATH_DEBUG_ALQ if_ath_alq_init(&sc->sc_alq, device_get_nameunit(sc->sc_dev)); if_ath_alq_setcfg(&sc->sc_alq, sc->sc_ah->ah_macVersion, sc->sc_ah->ah_macRev, sc->sc_ah->ah_phyRev, sc->sc_ah->ah_magic); #endif /* * Setup dynamic sysctl's now that country code and * regdomain are available from the hal. */ ath_sysctlattach(sc); ath_sysctl_stats_attach(sc); ath_sysctl_hal_attach(sc); if (bootverbose) ieee80211_announce(ic); ath_announce(sc); /* * Put it to sleep for now. */ ATH_LOCK(sc); - ath_power_setpower(sc, HAL_PM_FULL_SLEEP); + ath_power_setpower(sc, HAL_PM_FULL_SLEEP, 1); ATH_UNLOCK(sc); return 0; bad2: ath_tx_cleanup(sc); ath_desc_free(sc); ath_txdma_teardown(sc); ath_rxdma_teardown(sc); bad: if (ah) ath_hal_detach(ah); sc->sc_invalid = 1; return error; } int ath_detach(struct ath_softc *sc) { /* * NB: the order of these is important: * o stop the chip so no more interrupts will fire * o call the 802.11 layer before detaching the hal to * insure callbacks into the driver to delete global * key cache entries can be handled * o free the taskqueue which drains any pending tasks * o reclaim the tx queue data structures after calling * the 802.11 layer as we'll get called back to reclaim * node state and potentially want to use them * o to cleanup the tx queues the hal is called, so detach * it last * Other than that, it's straightforward... */ /* * XXX Wake the hardware up first. ath_stop() will still * wake it up first, but I'd rather do it here just to * ensure it's awake. */ ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); - ath_power_setpower(sc, HAL_PM_AWAKE); + ath_power_setpower(sc, HAL_PM_AWAKE, 1); /* * Stop things cleanly. */ ath_stop(sc); ATH_UNLOCK(sc); ieee80211_ifdetach(&sc->sc_ic); taskqueue_free(sc->sc_tq); #ifdef ATH_TX99_DIAG if (sc->sc_tx99 != NULL) sc->sc_tx99->detach(sc->sc_tx99); #endif ath_rate_detach(sc->sc_rc); #ifdef ATH_DEBUG_ALQ if_ath_alq_tidyup(&sc->sc_alq); #endif ath_lna_div_detach(sc); ath_btcoex_detach(sc); ath_spectral_detach(sc); ath_dfs_detach(sc); ath_desc_free(sc); ath_txdma_teardown(sc); ath_rxdma_teardown(sc); ath_tx_cleanup(sc); ath_hal_detach(sc->sc_ah); /* NB: sets chip in full sleep */ return 0; } /* * MAC address handling for multiple BSS on the same radio. * The first vap uses the MAC address from the EEPROM. For * subsequent vap's we set the U/L bit (bit 1) in the MAC * address and use the next six bits as an index. */ static void assign_address(struct ath_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone) { int i; if (clone && sc->sc_hasbmask) { /* NB: we only do this if h/w supports multiple bssid */ for (i = 0; i < 8; i++) if ((sc->sc_bssidmask & (1<sc_bssidmask |= 1<sc_hwbssidmask[0] &= ~mac[0]; if (i == 0) sc->sc_nbssid0++; } static void reclaim_address(struct ath_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN]) { int i = mac[0] >> 2; uint8_t mask; if (i != 0 || --sc->sc_nbssid0 == 0) { sc->sc_bssidmask &= ~(1<sc_bssidmask & (1<sc_hwbssidmask[0] |= mask; } } /* * Assign a beacon xmit slot. We try to space out * assignments so when beacons are staggered the * traffic coming out of the cab q has maximal time * to go out before the next beacon is scheduled. */ static int assign_bslot(struct ath_softc *sc) { u_int slot, free; free = 0; for (slot = 0; slot < ATH_BCBUF; slot++) if (sc->sc_bslot[slot] == NULL) { if (sc->sc_bslot[(slot+1)%ATH_BCBUF] == NULL && sc->sc_bslot[(slot-1)%ATH_BCBUF] == NULL) return slot; free = slot; /* NB: keep looking for a double slot */ } return free; } static struct ieee80211vap * ath_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac0[IEEE80211_ADDR_LEN]) { struct ath_softc *sc = ic->ic_softc; struct ath_vap *avp; struct ieee80211vap *vap; uint8_t mac[IEEE80211_ADDR_LEN]; int needbeacon, error; enum ieee80211_opmode ic_opmode; avp = malloc(sizeof(struct ath_vap), M_80211_VAP, M_WAITOK | M_ZERO); needbeacon = 0; IEEE80211_ADDR_COPY(mac, mac0); ATH_LOCK(sc); ic_opmode = opmode; /* default to opmode of new vap */ switch (opmode) { case IEEE80211_M_STA: if (sc->sc_nstavaps != 0) { /* XXX only 1 for now */ device_printf(sc->sc_dev, "only 1 sta vap supported\n"); goto bad; } if (sc->sc_nvaps) { /* * With multiple vaps we must fall back * to s/w beacon miss handling. */ flags |= IEEE80211_CLONE_NOBEACONS; } if (flags & IEEE80211_CLONE_NOBEACONS) { /* * Station mode w/o beacons are implemented w/ AP mode. */ ic_opmode = IEEE80211_M_HOSTAP; } break; case IEEE80211_M_IBSS: if (sc->sc_nvaps != 0) { /* XXX only 1 for now */ device_printf(sc->sc_dev, "only 1 ibss vap supported\n"); goto bad; } needbeacon = 1; break; case IEEE80211_M_AHDEMO: #ifdef IEEE80211_SUPPORT_TDMA if (flags & IEEE80211_CLONE_TDMA) { if (sc->sc_nvaps != 0) { device_printf(sc->sc_dev, "only 1 tdma vap supported\n"); goto bad; } needbeacon = 1; flags |= IEEE80211_CLONE_NOBEACONS; } /* fall thru... */ #endif case IEEE80211_M_MONITOR: if (sc->sc_nvaps != 0 && ic->ic_opmode != opmode) { /* * Adopt existing mode. Adding a monitor or ahdemo * vap to an existing configuration is of dubious * value but should be ok. */ /* XXX not right for monitor mode */ ic_opmode = ic->ic_opmode; } break; case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: needbeacon = 1; break; case IEEE80211_M_WDS: if (sc->sc_nvaps != 0 && ic->ic_opmode == IEEE80211_M_STA) { device_printf(sc->sc_dev, "wds not supported in sta mode\n"); goto bad; } /* * Silently remove any request for a unique * bssid; WDS vap's always share the local * mac address. */ flags &= ~IEEE80211_CLONE_BSSID; if (sc->sc_nvaps == 0) ic_opmode = IEEE80211_M_HOSTAP; else ic_opmode = ic->ic_opmode; break; default: device_printf(sc->sc_dev, "unknown opmode %d\n", opmode); goto bad; } /* * Check that a beacon buffer is available; the code below assumes it. */ if (needbeacon & TAILQ_EMPTY(&sc->sc_bbuf)) { device_printf(sc->sc_dev, "no beacon buffer available\n"); goto bad; } /* STA, AHDEMO? */ if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) { assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID); ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask); } vap = &avp->av_vap; /* XXX can't hold mutex across if_alloc */ ATH_UNLOCK(sc); error = ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); ATH_LOCK(sc); if (error != 0) { device_printf(sc->sc_dev, "%s: error %d creating vap\n", __func__, error); goto bad2; } /* h/w crypto support */ vap->iv_key_alloc = ath_key_alloc; vap->iv_key_delete = ath_key_delete; vap->iv_key_set = ath_key_set; vap->iv_key_update_begin = ath_key_update_begin; vap->iv_key_update_end = ath_key_update_end; /* override various methods */ avp->av_recv_mgmt = vap->iv_recv_mgmt; vap->iv_recv_mgmt = ath_recv_mgmt; vap->iv_reset = ath_reset_vap; vap->iv_update_beacon = ath_beacon_update; avp->av_newstate = vap->iv_newstate; vap->iv_newstate = ath_newstate; avp->av_bmiss = vap->iv_bmiss; vap->iv_bmiss = ath_bmiss_vap; avp->av_node_ps = vap->iv_node_ps; vap->iv_node_ps = ath_node_powersave; avp->av_set_tim = vap->iv_set_tim; vap->iv_set_tim = ath_node_set_tim; avp->av_recv_pspoll = vap->iv_recv_pspoll; vap->iv_recv_pspoll = ath_node_recv_pspoll; /* Set default parameters */ /* * Anything earlier than some AR9300 series MACs don't * support a smaller MPDU density. */ vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_8; /* * All NICs can handle the maximum size, however * AR5416 based MACs can only TX aggregates w/ RTS * protection when the total aggregate size is <= 8k. * However, for now that's enforced by the TX path. */ vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K; avp->av_bslot = -1; if (needbeacon) { /* * Allocate beacon state and setup the q for buffered * multicast frames. We know a beacon buffer is * available because we checked above. */ avp->av_bcbuf = TAILQ_FIRST(&sc->sc_bbuf); TAILQ_REMOVE(&sc->sc_bbuf, avp->av_bcbuf, bf_list); if (opmode != IEEE80211_M_IBSS || !sc->sc_hasveol) { /* * Assign the vap to a beacon xmit slot. As above * this cannot fail to find a free one. */ avp->av_bslot = assign_bslot(sc); KASSERT(sc->sc_bslot[avp->av_bslot] == NULL, ("beacon slot %u not empty", avp->av_bslot)); sc->sc_bslot[avp->av_bslot] = vap; sc->sc_nbcnvaps++; } if (sc->sc_hastsfadd && sc->sc_nbcnvaps > 0) { /* * Multple vaps are to transmit beacons and we * have h/w support for TSF adjusting; enable * use of staggered beacons. */ sc->sc_stagbeacons = 1; } ath_txq_init(sc, &avp->av_mcastq, ATH_TXQ_SWQ); } ic->ic_opmode = ic_opmode; if (opmode != IEEE80211_M_WDS) { sc->sc_nvaps++; if (opmode == IEEE80211_M_STA) sc->sc_nstavaps++; if (opmode == IEEE80211_M_MBSS) sc->sc_nmeshvaps++; } switch (ic_opmode) { case IEEE80211_M_IBSS: sc->sc_opmode = HAL_M_IBSS; break; case IEEE80211_M_STA: sc->sc_opmode = HAL_M_STA; break; case IEEE80211_M_AHDEMO: #ifdef IEEE80211_SUPPORT_TDMA if (vap->iv_caps & IEEE80211_C_TDMA) { sc->sc_tdma = 1; /* NB: disable tsf adjust */ sc->sc_stagbeacons = 0; } /* * NB: adhoc demo mode is a pseudo mode; to the hal it's * just ap mode. */ /* fall thru... */ #endif case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: sc->sc_opmode = HAL_M_HOSTAP; break; case IEEE80211_M_MONITOR: sc->sc_opmode = HAL_M_MONITOR; break; default: /* XXX should not happen */ break; } if (sc->sc_hastsfadd) { /* * Configure whether or not TSF adjust should be done. */ ath_hal_settsfadjust(sc->sc_ah, sc->sc_stagbeacons); } if (flags & IEEE80211_CLONE_NOBEACONS) { /* * Enable s/w beacon miss handling. */ sc->sc_swbmiss = 1; } ATH_UNLOCK(sc); /* complete setup */ ieee80211_vap_attach(vap, ath_media_change, ieee80211_media_status, mac); return vap; bad2: reclaim_address(sc, mac); ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask); bad: free(avp, M_80211_VAP); ATH_UNLOCK(sc); return NULL; } static void ath_vap_delete(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ath_softc *sc = ic->ic_softc; struct ath_hal *ah = sc->sc_ah; struct ath_vap *avp = ATH_VAP(vap); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__); if (sc->sc_running) { /* * Quiesce the hardware while we remove the vap. In * particular we need to reclaim all references to * the vap state by any frames pending on the tx queues. */ ath_hal_intrset(ah, 0); /* disable interrupts */ /* XXX Do all frames from all vaps/nodes need draining here? */ ath_stoprecv(sc, 1); /* stop recv side */ ath_draintxq(sc, ATH_RESET_DEFAULT); /* stop hw xmit side */ } /* .. leave the hardware awake for now. */ ieee80211_vap_detach(vap); /* * XXX Danger Will Robinson! Danger! * * Because ieee80211_vap_detach() can queue a frame (the station * diassociate message?) after we've drained the TXQ and * flushed the software TXQ, we will end up with a frame queued * to a node whose vap is about to be freed. * * To work around this, flush the hardware/software again. * This may be racy - the ath task may be running and the packet * may be being scheduled between sw->hw txq. Tsk. * * TODO: figure out why a new node gets allocated somewhere around * here (after the ath_tx_swq() call; and after an ath_stop() * call!) */ ath_draintxq(sc, ATH_RESET_DEFAULT); ATH_LOCK(sc); /* * Reclaim beacon state. Note this must be done before * the vap instance is reclaimed as we may have a reference * to it in the buffer for the beacon frame. */ if (avp->av_bcbuf != NULL) { if (avp->av_bslot != -1) { sc->sc_bslot[avp->av_bslot] = NULL; sc->sc_nbcnvaps--; } ath_beacon_return(sc, avp->av_bcbuf); avp->av_bcbuf = NULL; if (sc->sc_nbcnvaps == 0) { sc->sc_stagbeacons = 0; if (sc->sc_hastsfadd) ath_hal_settsfadjust(sc->sc_ah, 0); } /* * Reclaim any pending mcast frames for the vap. */ ath_tx_draintxq(sc, &avp->av_mcastq); } /* * Update bookkeeping. */ if (vap->iv_opmode == IEEE80211_M_STA) { sc->sc_nstavaps--; if (sc->sc_nstavaps == 0 && sc->sc_swbmiss) sc->sc_swbmiss = 0; } else if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) { reclaim_address(sc, vap->iv_myaddr); ath_hal_setbssidmask(ah, sc->sc_hwbssidmask); if (vap->iv_opmode == IEEE80211_M_MBSS) sc->sc_nmeshvaps--; } if (vap->iv_opmode != IEEE80211_M_WDS) sc->sc_nvaps--; #ifdef IEEE80211_SUPPORT_TDMA /* TDMA operation ceases when the last vap is destroyed */ if (sc->sc_tdma && sc->sc_nvaps == 0) { sc->sc_tdma = 0; sc->sc_swbmiss = 0; } #endif free(avp, M_80211_VAP); if (sc->sc_running) { /* * Restart rx+tx machines if still running (RUNNING will * be reset if we just destroyed the last vap). */ if (ath_startrecv(sc) != 0) device_printf(sc->sc_dev, "%s: unable to restart recv logic\n", __func__); if (sc->sc_beacons) { /* restart beacons */ #ifdef IEEE80211_SUPPORT_TDMA if (sc->sc_tdma) ath_tdma_config(sc, NULL); else #endif ath_beacon_config(sc, NULL); } ath_hal_intrset(ah, sc->sc_imask); } /* Ok, let the hardware asleep. */ ath_power_restore_power_state(sc); ATH_UNLOCK(sc); } void ath_suspend(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; sc->sc_resume_up = ic->ic_nrunning != 0; ieee80211_suspend_all(ic); /* * NB: don't worry about putting the chip in low power * mode; pci will power off our socket on suspend and * CardBus detaches the device. * * XXX TODO: well, that's great, except for non-cardbus * devices! */ /* * XXX This doesn't wait until all pending taskqueue * items and parallel transmit/receive/other threads * are running! */ ath_hal_intrset(sc->sc_ah, 0); taskqueue_block(sc->sc_tq); ATH_LOCK(sc); callout_stop(&sc->sc_cal_ch); ATH_UNLOCK(sc); /* * XXX ensure sc_invalid is 1 */ /* Disable the PCIe PHY, complete with workarounds */ ath_hal_enablepcie(sc->sc_ah, 1, 1); } /* * Reset the key cache since some parts do not reset the * contents on resume. First we clear all entries, then * re-load keys that the 802.11 layer assumes are setup * in h/w. */ static void ath_reset_keycache(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; int i; ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); for (i = 0; i < sc->sc_keymax; i++) ath_hal_keyreset(ah, i); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); ieee80211_crypto_reload_keys(ic); } /* * Fetch the current chainmask configuration based on the current * operating channel and options. */ static void ath_update_chainmasks(struct ath_softc *sc, struct ieee80211_channel *chan) { /* * Set TX chainmask to the currently configured chainmask; * the TX chainmask depends upon the current operating mode. */ sc->sc_cur_rxchainmask = sc->sc_rxchainmask; if (IEEE80211_IS_CHAN_HT(chan)) { sc->sc_cur_txchainmask = sc->sc_txchainmask; } else { sc->sc_cur_txchainmask = 1; } DPRINTF(sc, ATH_DEBUG_RESET, "%s: TX chainmask is now 0x%x, RX is now 0x%x\n", __func__, sc->sc_cur_txchainmask, sc->sc_cur_rxchainmask); } void ath_resume(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; HAL_STATUS status; ath_hal_enablepcie(ah, 0, 0); /* * Must reset the chip before we reload the * keycache as we were powered down on suspend. */ ath_update_chainmasks(sc, sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan); ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask, sc->sc_cur_rxchainmask); /* Ensure we set the current power state to on */ ATH_LOCK(sc); ath_power_setselfgen(sc, HAL_PM_AWAKE); ath_power_set_power_state(sc, HAL_PM_AWAKE); - ath_power_setpower(sc, HAL_PM_AWAKE); + ath_power_setpower(sc, HAL_PM_AWAKE, 1); ATH_UNLOCK(sc); ath_hal_reset(ah, sc->sc_opmode, sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan, AH_FALSE, HAL_RESET_NORMAL, &status); ath_reset_keycache(sc); ATH_RX_LOCK(sc); sc->sc_rx_stopped = 1; sc->sc_rx_resetted = 1; ATH_RX_UNLOCK(sc); /* Let DFS at it in case it's a DFS channel */ ath_dfs_radar_enable(sc, ic->ic_curchan); /* Let spectral at in case spectral is enabled */ ath_spectral_enable(sc, ic->ic_curchan); /* * Let bluetooth coexistence at in case it's needed for this channel */ ath_btcoex_enable(sc, ic->ic_curchan); /* * If we're doing TDMA, enforce the TXOP limitation for chips that * support it. */ if (sc->sc_hasenforcetxop && sc->sc_tdma) ath_hal_setenforcetxop(sc->sc_ah, 1); else ath_hal_setenforcetxop(sc->sc_ah, 0); /* Restore the LED configuration */ ath_led_config(sc); ath_hal_setledstate(ah, HAL_LED_INIT); if (sc->sc_resume_up) ieee80211_resume_all(ic); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); /* XXX beacons ? */ } void ath_shutdown(struct ath_softc *sc) { ATH_LOCK(sc); ath_stop(sc); ATH_UNLOCK(sc); /* NB: no point powering down chip as we're about to reboot */ } /* * Interrupt handler. Most of the actual processing is deferred. */ void ath_intr(void *arg) { struct ath_softc *sc = arg; struct ath_hal *ah = sc->sc_ah; HAL_INT status = 0; uint32_t txqs; /* * If we're inside a reset path, just print a warning and * clear the ISR. The reset routine will finish it for us. */ ATH_PCU_LOCK(sc); if (sc->sc_inreset_cnt) { HAL_INT status; ath_hal_getisr(ah, &status); /* clear ISR */ ath_hal_intrset(ah, 0); /* disable further intr's */ DPRINTF(sc, ATH_DEBUG_ANY, "%s: in reset, ignoring: status=0x%x\n", __func__, status); ATH_PCU_UNLOCK(sc); return; } if (sc->sc_invalid) { /* * The hardware is not ready/present, don't touch anything. * Note this can happen early on if the IRQ is shared. */ DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid; ignored\n", __func__); ATH_PCU_UNLOCK(sc); return; } if (!ath_hal_intrpend(ah)) { /* shared irq, not for us */ ATH_PCU_UNLOCK(sc); return; } ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); if (sc->sc_ic.ic_nrunning == 0 && sc->sc_running == 0) { HAL_INT status; DPRINTF(sc, ATH_DEBUG_ANY, "%s: ic_nrunning %d sc_running %d\n", __func__, sc->sc_ic.ic_nrunning, sc->sc_running); ath_hal_getisr(ah, &status); /* clear ISR */ ath_hal_intrset(ah, 0); /* disable further intr's */ ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); return; } /* * Figure out the reason(s) for the interrupt. Note * that the hal returns a pseudo-ISR that may include * bits we haven't explicitly enabled so we mask the * value to insure we only process bits we requested. */ ath_hal_getisr(ah, &status); /* NB: clears ISR too */ DPRINTF(sc, ATH_DEBUG_INTR, "%s: status 0x%x\n", __func__, status); ATH_KTR(sc, ATH_KTR_INTERRUPTS, 1, "ath_intr: mask=0x%.8x", status); #ifdef ATH_DEBUG_ALQ if_ath_alq_post_intr(&sc->sc_alq, status, ah->ah_intrstate, ah->ah_syncstate); #endif /* ATH_DEBUG_ALQ */ #ifdef ATH_KTR_INTR_DEBUG ATH_KTR(sc, ATH_KTR_INTERRUPTS, 5, "ath_intr: ISR=0x%.8x, ISR_S0=0x%.8x, ISR_S1=0x%.8x, ISR_S2=0x%.8x, ISR_S5=0x%.8x", ah->ah_intrstate[0], ah->ah_intrstate[1], ah->ah_intrstate[2], ah->ah_intrstate[3], ah->ah_intrstate[6]); #endif /* Squirrel away SYNC interrupt debugging */ if (ah->ah_syncstate != 0) { int i; for (i = 0; i < 32; i++) if (ah->ah_syncstate & (i << i)) sc->sc_intr_stats.sync_intr[i]++; } status &= sc->sc_imask; /* discard unasked for bits */ /* Short-circuit un-handled interrupts */ if (status == 0x0) { ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); return; } /* * Take a note that we're inside the interrupt handler, so * the reset routines know to wait. */ sc->sc_intr_cnt++; ATH_PCU_UNLOCK(sc); /* * Handle the interrupt. We won't run concurrent with the reset * or channel change routines as they'll wait for sc_intr_cnt * to be 0 before continuing. */ if (status & HAL_INT_FATAL) { sc->sc_stats.ast_hardware++; ath_hal_intrset(ah, 0); /* disable intr's until reset */ taskqueue_enqueue(sc->sc_tq, &sc->sc_fataltask); } else { if (status & HAL_INT_SWBA) { /* * Software beacon alert--time to send a beacon. * Handle beacon transmission directly; deferring * this is too slow to meet timing constraints * under load. */ #ifdef IEEE80211_SUPPORT_TDMA if (sc->sc_tdma) { if (sc->sc_tdmaswba == 0) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ath_tdma_beacon_send(sc, vap); sc->sc_tdmaswba = vap->iv_tdma->tdma_bintval; } else sc->sc_tdmaswba--; } else #endif { ath_beacon_proc(sc, 0); #ifdef IEEE80211_SUPPORT_SUPERG /* * Schedule the rx taskq in case there's no * traffic so any frames held on the staging * queue are aged and potentially flushed. */ sc->sc_rx.recv_sched(sc, 1); #endif } } if (status & HAL_INT_RXEOL) { int imask; ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXEOL"); if (! sc->sc_isedma) { ATH_PCU_LOCK(sc); /* * NB: the hardware should re-read the link when * RXE bit is written, but it doesn't work at * least on older hardware revs. */ sc->sc_stats.ast_rxeol++; /* * Disable RXEOL/RXORN - prevent an interrupt * storm until the PCU logic can be reset. * In case the interface is reset some other * way before "sc_kickpcu" is called, don't * modify sc_imask - that way if it is reset * by a call to ath_reset() somehow, the * interrupt mask will be correctly reprogrammed. */ imask = sc->sc_imask; imask &= ~(HAL_INT_RXEOL | HAL_INT_RXORN); ath_hal_intrset(ah, imask); /* * Only blank sc_rxlink if we've not yet kicked * the PCU. * * This isn't entirely correct - the correct solution * would be to have a PCU lock and engage that for * the duration of the PCU fiddling; which would include * running the RX process. Otherwise we could end up * messing up the RX descriptor chain and making the * RX desc list much shorter. */ if (! sc->sc_kickpcu) sc->sc_rxlink = NULL; sc->sc_kickpcu = 1; ATH_PCU_UNLOCK(sc); } /* * Enqueue an RX proc to handle whatever * is in the RX queue. * This will then kick the PCU if required. */ sc->sc_rx.recv_sched(sc, 1); } if (status & HAL_INT_TXURN) { sc->sc_stats.ast_txurn++; /* bump tx trigger level */ ath_hal_updatetxtriglevel(ah, AH_TRUE); } /* * Handle both the legacy and RX EDMA interrupt bits. * Note that HAL_INT_RXLP is also HAL_INT_RXDESC. */ if (status & (HAL_INT_RX | HAL_INT_RXHP | HAL_INT_RXLP)) { sc->sc_stats.ast_rx_intr++; sc->sc_rx.recv_sched(sc, 1); } if (status & HAL_INT_TX) { sc->sc_stats.ast_tx_intr++; /* * Grab all the currently set bits in the HAL txq bitmap * and blank them. This is the only place we should be * doing this. */ if (! sc->sc_isedma) { ATH_PCU_LOCK(sc); txqs = 0xffffffff; ath_hal_gettxintrtxqs(sc->sc_ah, &txqs); ATH_KTR(sc, ATH_KTR_INTERRUPTS, 3, "ath_intr: TX; txqs=0x%08x, txq_active was 0x%08x, now 0x%08x", txqs, sc->sc_txq_active, sc->sc_txq_active | txqs); sc->sc_txq_active |= txqs; ATH_PCU_UNLOCK(sc); } taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask); } if (status & HAL_INT_BMISS) { sc->sc_stats.ast_bmiss++; taskqueue_enqueue(sc->sc_tq, &sc->sc_bmisstask); } if (status & HAL_INT_GTT) sc->sc_stats.ast_tx_timeout++; if (status & HAL_INT_CST) sc->sc_stats.ast_tx_cst++; if (status & HAL_INT_MIB) { sc->sc_stats.ast_mib++; ATH_PCU_LOCK(sc); /* * Disable interrupts until we service the MIB * interrupt; otherwise it will continue to fire. */ ath_hal_intrset(ah, 0); /* * Let the hal handle the event. We assume it will * clear whatever condition caused the interrupt. */ ath_hal_mibevent(ah, &sc->sc_halstats); /* * Don't reset the interrupt if we've just * kicked the PCU, or we may get a nested * RXEOL before the rxproc has had a chance * to run. */ if (sc->sc_kickpcu == 0) ath_hal_intrset(ah, sc->sc_imask); ATH_PCU_UNLOCK(sc); } if (status & HAL_INT_RXORN) { /* NB: hal marks HAL_INT_FATAL when RXORN is fatal */ ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXORN"); sc->sc_stats.ast_rxorn++; } if (status & HAL_INT_TSFOOR) { + /* out of range beacon - wake the chip up, + * but don't modify self-gen frame config */ device_printf(sc->sc_dev, "%s: TSFOOR\n", __func__); sc->sc_syncbeacon = 1; + ATH_LOCK(sc); + ath_power_setpower(sc, HAL_PM_AWAKE, 0); + ATH_UNLOCK(sc); } if (status & HAL_INT_MCI) { ath_btcoex_mci_intr(sc); } } ATH_PCU_LOCK(sc); sc->sc_intr_cnt--; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); } static void ath_fatal_proc(void *arg, int pending) { struct ath_softc *sc = arg; u_int32_t *state; u_int32_t len; void *sp; if (sc->sc_invalid) return; device_printf(sc->sc_dev, "hardware error; resetting\n"); /* * Fatal errors are unrecoverable. Typically these * are caused by DMA errors. Collect h/w state from * the hal so we can diagnose what's going on. */ if (ath_hal_getfatalstate(sc->sc_ah, &sp, &len)) { KASSERT(len >= 6*sizeof(u_int32_t), ("len %u bytes", len)); state = sp; device_printf(sc->sc_dev, "0x%08x 0x%08x 0x%08x, 0x%08x 0x%08x 0x%08x\n", state[0], state[1] , state[2], state[3], state[4], state[5]); } ath_reset(sc, ATH_RESET_NOLOSS); } static void ath_bmiss_vap(struct ieee80211vap *vap) { struct ath_softc *sc = vap->iv_ic->ic_softc; /* * Workaround phantom bmiss interrupts by sanity-checking * the time of our last rx'd frame. If it is within the * beacon miss interval then ignore the interrupt. If it's * truly a bmiss we'll get another interrupt soon and that'll * be dispatched up for processing. Note this applies only * for h/w beacon miss events. */ /* * XXX TODO: Just read the TSF during the interrupt path; * that way we don't have to wake up again just to read it * again. */ ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); if ((vap->iv_flags_ext & IEEE80211_FEXT_SWBMISS) == 0) { u_int64_t lastrx = sc->sc_lastrx; u_int64_t tsf = ath_hal_gettsf64(sc->sc_ah); /* XXX should take a locked ref to iv_bss */ u_int bmisstimeout = vap->iv_bmissthreshold * vap->iv_bss->ni_intval * 1024; DPRINTF(sc, ATH_DEBUG_BEACON, "%s: tsf %llu lastrx %lld (%llu) bmiss %u\n", __func__, (unsigned long long) tsf, (unsigned long long)(tsf - lastrx), (unsigned long long) lastrx, bmisstimeout); if (tsf - lastrx <= bmisstimeout) { sc->sc_stats.ast_bmiss_phantom++; ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); return; } } /* - * There's no need to keep the hardware awake during the call - * to av_bmiss(). + * Keep the hardware awake if it's asleep (and leave self-gen + * frame config alone) until the next beacon, so we can resync + * against the next beacon. + * + * This handles three common beacon miss cases in STA powersave mode - + * (a) the beacon TBTT isnt a multiple of bintval; + * (b) the beacon was missed; and + * (c) the beacons are being delayed because the AP is busy and + * isn't reliably able to meet its TBTT. */ ATH_LOCK(sc); + ath_power_setpower(sc, HAL_PM_AWAKE, 0); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); + DPRINTF(sc, ATH_DEBUG_BEACON, + "%s: forced awake; force syncbeacon=1\n", __func__); /* * Attempt to force a beacon resync. */ sc->sc_syncbeacon = 1; ATH_VAP(vap)->av_bmiss(vap); } /* XXX this needs a force wakeup! */ int ath_hal_gethangstate(struct ath_hal *ah, uint32_t mask, uint32_t *hangs) { uint32_t rsize; void *sp; if (!ath_hal_getdiagstate(ah, HAL_DIAG_CHECK_HANGS, &mask, sizeof(mask), &sp, &rsize)) return 0; KASSERT(rsize == sizeof(uint32_t), ("resultsize %u", rsize)); *hangs = *(uint32_t *)sp; return 1; } static void ath_bmiss_proc(void *arg, int pending) { struct ath_softc *sc = arg; uint32_t hangs; DPRINTF(sc, ATH_DEBUG_ANY, "%s: pending %u\n", __func__, pending); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ath_beacon_miss(sc); /* * Do a reset upon any becaon miss event. * * It may be a non-recognised RX clear hang which needs a reset * to clear. */ if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0) { ath_reset(sc, ATH_RESET_NOLOSS); device_printf(sc->sc_dev, "bb hang detected (0x%x), resetting\n", hangs); } else { ath_reset(sc, ATH_RESET_NOLOSS); ieee80211_beacon_miss(&sc->sc_ic); } /* Force a beacon resync, in case they've drifted */ sc->sc_syncbeacon = 1; ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); } /* * Handle TKIP MIC setup to deal hardware that doesn't do MIC * calcs together with WME. If necessary disable the crypto * hardware and mark the 802.11 state so keys will be setup * with the MIC work done in software. */ static void ath_settkipmic(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; if ((ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP) && !sc->sc_wmetkipmic) { if (ic->ic_flags & IEEE80211_F_WME) { ath_hal_settkipmic(sc->sc_ah, AH_FALSE); ic->ic_cryptocaps &= ~IEEE80211_CRYPTO_TKIPMIC; } else { ath_hal_settkipmic(sc->sc_ah, AH_TRUE); ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC; } } } static int ath_init(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; HAL_STATUS status; ATH_LOCK_ASSERT(sc); /* * Force the sleep state awake. */ ath_power_setselfgen(sc, HAL_PM_AWAKE); ath_power_set_power_state(sc, HAL_PM_AWAKE); - ath_power_setpower(sc, HAL_PM_AWAKE); + ath_power_setpower(sc, HAL_PM_AWAKE, 1); /* * Stop anything previously setup. This is safe * whether this is the first time through or not. */ ath_stop(sc); /* * The basic interface to setting the hardware in a good * state is ``reset''. On return the hardware is known to * be powered up and with interrupts disabled. This must * be followed by initialization of the appropriate bits * and then setup of the interrupt mask. */ ath_settkipmic(sc); ath_update_chainmasks(sc, ic->ic_curchan); ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask, sc->sc_cur_rxchainmask); if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_FALSE, HAL_RESET_NORMAL, &status)) { device_printf(sc->sc_dev, "unable to reset hardware; hal status %u\n", status); return (ENODEV); } ATH_RX_LOCK(sc); sc->sc_rx_stopped = 1; sc->sc_rx_resetted = 1; ATH_RX_UNLOCK(sc); ath_chan_change(sc, ic->ic_curchan); /* Let DFS at it in case it's a DFS channel */ ath_dfs_radar_enable(sc, ic->ic_curchan); /* Let spectral at in case spectral is enabled */ ath_spectral_enable(sc, ic->ic_curchan); /* * Let bluetooth coexistence at in case it's needed for this channel */ ath_btcoex_enable(sc, ic->ic_curchan); /* * If we're doing TDMA, enforce the TXOP limitation for chips that * support it. */ if (sc->sc_hasenforcetxop && sc->sc_tdma) ath_hal_setenforcetxop(sc->sc_ah, 1); else ath_hal_setenforcetxop(sc->sc_ah, 0); /* * Likewise this is set during reset so update * state cached in the driver. */ sc->sc_diversity = ath_hal_getdiversity(ah); sc->sc_lastlongcal = ticks; sc->sc_resetcal = 1; sc->sc_lastcalreset = 0; sc->sc_lastani = ticks; sc->sc_lastshortcal = ticks; sc->sc_doresetcal = AH_FALSE; /* * Beacon timers were cleared here; give ath_newstate() * a hint that the beacon timers should be poked when * things transition to the RUN state. */ sc->sc_beacons = 0; /* * Setup the hardware after reset: the key cache * is filled as needed and the receive engine is * set going. Frame transmit is handled entirely * in the frame output path; there's nothing to do * here except setup the interrupt mask. */ if (ath_startrecv(sc) != 0) { device_printf(sc->sc_dev, "unable to start recv logic\n"); ath_power_restore_power_state(sc); return (ENODEV); } /* * Enable interrupts. */ sc->sc_imask = HAL_INT_RX | HAL_INT_TX | HAL_INT_RXORN | HAL_INT_TXURN | HAL_INT_FATAL | HAL_INT_GLOBAL; /* * Enable RX EDMA bits. Note these overlap with * HAL_INT_RX and HAL_INT_RXDESC respectively. */ if (sc->sc_isedma) sc->sc_imask |= (HAL_INT_RXHP | HAL_INT_RXLP); /* * If we're an EDMA NIC, we don't care about RXEOL. * Writing a new descriptor in will simply restart * RX DMA. */ if (! sc->sc_isedma) sc->sc_imask |= HAL_INT_RXEOL; /* * Enable MCI interrupt for MCI devices. */ if (sc->sc_btcoex_mci) sc->sc_imask |= HAL_INT_MCI; /* * Enable MIB interrupts when there are hardware phy counters. * Note we only do this (at the moment) for station mode. */ if (sc->sc_needmib && ic->ic_opmode == IEEE80211_M_STA) sc->sc_imask |= HAL_INT_MIB; /* * XXX add capability for this. * * If we're in STA mode (and maybe IBSS?) then register for * TSFOOR interrupts. */ if (ic->ic_opmode == IEEE80211_M_STA) sc->sc_imask |= HAL_INT_TSFOOR; /* Enable global TX timeout and carrier sense timeout if available */ if (ath_hal_gtxto_supported(ah)) sc->sc_imask |= HAL_INT_GTT; DPRINTF(sc, ATH_DEBUG_RESET, "%s: imask=0x%x\n", __func__, sc->sc_imask); sc->sc_running = 1; callout_reset(&sc->sc_wd_ch, hz, ath_watchdog, sc); ath_hal_intrset(ah, sc->sc_imask); ath_power_restore_power_state(sc); return (0); } static void ath_stop(struct ath_softc *sc) { struct ath_hal *ah = sc->sc_ah; ATH_LOCK_ASSERT(sc); /* * Wake the hardware up before fiddling with it. */ ath_power_set_power_state(sc, HAL_PM_AWAKE); if (sc->sc_running) { /* * Shutdown the hardware and driver: * reset 802.11 state machine * turn off timers * disable interrupts * turn off the radio * clear transmit machinery * clear receive machinery * drain and release tx queues * reclaim beacon resources * power down hardware * * Note that some of this work is not possible if the * hardware is gone (invalid). */ #ifdef ATH_TX99_DIAG if (sc->sc_tx99 != NULL) sc->sc_tx99->stop(sc->sc_tx99); #endif callout_stop(&sc->sc_wd_ch); sc->sc_wd_timer = 0; sc->sc_running = 0; if (!sc->sc_invalid) { if (sc->sc_softled) { callout_stop(&sc->sc_ledtimer); ath_hal_gpioset(ah, sc->sc_ledpin, !sc->sc_ledon); sc->sc_blinking = 0; } ath_hal_intrset(ah, 0); } /* XXX we should stop RX regardless of whether it's valid */ if (!sc->sc_invalid) { ath_stoprecv(sc, 1); ath_hal_phydisable(ah); } else sc->sc_rxlink = NULL; ath_draintxq(sc, ATH_RESET_DEFAULT); ath_beacon_free(sc); /* XXX not needed */ } /* And now, restore the current power state */ ath_power_restore_power_state(sc); } /* * Wait until all pending TX/RX has completed. * * This waits until all existing transmit, receive and interrupts * have completed. It's assumed that the caller has first * grabbed the reset lock so it doesn't try to do overlapping * chip resets. */ #define MAX_TXRX_ITERATIONS 100 static void ath_txrx_stop_locked(struct ath_softc *sc) { int i = MAX_TXRX_ITERATIONS; ATH_UNLOCK_ASSERT(sc); ATH_PCU_LOCK_ASSERT(sc); /* * Sleep until all the pending operations have completed. * * The caller must ensure that reset has been incremented * or the pending operations may continue being queued. */ while (sc->sc_rxproc_cnt || sc->sc_txproc_cnt || sc->sc_txstart_cnt || sc->sc_intr_cnt) { if (i <= 0) break; msleep(sc, &sc->sc_pcu_mtx, 0, "ath_txrx_stop", msecs_to_ticks(10)); i--; } if (i <= 0) device_printf(sc->sc_dev, "%s: didn't finish after %d iterations\n", __func__, MAX_TXRX_ITERATIONS); } #undef MAX_TXRX_ITERATIONS #if 0 static void ath_txrx_stop(struct ath_softc *sc) { ATH_UNLOCK_ASSERT(sc); ATH_PCU_UNLOCK_ASSERT(sc); ATH_PCU_LOCK(sc); ath_txrx_stop_locked(sc); ATH_PCU_UNLOCK(sc); } #endif static void ath_txrx_start(struct ath_softc *sc) { taskqueue_unblock(sc->sc_tq); } /* * Grab the reset lock, and wait around until no one else * is trying to do anything with it. * * This is totally horrible but we can't hold this lock for * long enough to do TX/RX or we end up with net80211/ip stack * LORs and eventual deadlock. * * "dowait" signals whether to spin, waiting for the reset * lock count to reach 0. This should (for now) only be used * during the reset path, as the rest of the code may not * be locking-reentrant enough to behave correctly. * * Another, cleaner way should be found to serialise all of * these operations. */ #define MAX_RESET_ITERATIONS 25 static int ath_reset_grablock(struct ath_softc *sc, int dowait) { int w = 0; int i = MAX_RESET_ITERATIONS; ATH_PCU_LOCK_ASSERT(sc); do { if (sc->sc_inreset_cnt == 0) { w = 1; break; } if (dowait == 0) { w = 0; break; } ATH_PCU_UNLOCK(sc); /* * 1 tick is likely not enough time for long calibrations * to complete. So we should wait quite a while. */ pause("ath_reset_grablock", msecs_to_ticks(100)); i--; ATH_PCU_LOCK(sc); } while (i > 0); /* * We always increment the refcounter, regardless * of whether we succeeded to get it in an exclusive * way. */ sc->sc_inreset_cnt++; if (i <= 0) device_printf(sc->sc_dev, "%s: didn't finish after %d iterations\n", __func__, MAX_RESET_ITERATIONS); if (w == 0) device_printf(sc->sc_dev, "%s: warning, recursive reset path!\n", __func__); return w; } #undef MAX_RESET_ITERATIONS /* * Reset the hardware w/o losing operational state. This is * basically a more efficient way of doing ath_stop, ath_init, * followed by state transitions to the current 802.11 * operational state. Used to recover from various errors and * to reset or reload hardware state. */ int ath_reset(struct ath_softc *sc, ATH_RESET_TYPE reset_type) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; HAL_STATUS status; int i; DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__); /* Ensure ATH_LOCK isn't held; ath_rx_proc can't be locked */ ATH_PCU_UNLOCK_ASSERT(sc); ATH_UNLOCK_ASSERT(sc); /* Try to (stop any further TX/RX from occurring */ taskqueue_block(sc->sc_tq); /* * Wake the hardware up. */ ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ATH_PCU_LOCK(sc); /* * Grab the reset lock before TX/RX is stopped. * * This is needed to ensure that when the TX/RX actually does finish, * no further TX/RX/reset runs in parallel with this. */ if (ath_reset_grablock(sc, 1) == 0) { device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n", __func__); } /* disable interrupts */ ath_hal_intrset(ah, 0); /* * Now, ensure that any in progress TX/RX completes before we * continue. */ ath_txrx_stop_locked(sc); ATH_PCU_UNLOCK(sc); /* * Regardless of whether we're doing a no-loss flush or * not, stop the PCU and handle what's in the RX queue. * That way frames aren't dropped which shouldn't be. */ ath_stoprecv(sc, (reset_type != ATH_RESET_NOLOSS)); ath_rx_flush(sc); /* * Should now wait for pending TX/RX to complete * and block future ones from occurring. This needs to be * done before the TX queue is drained. */ ath_draintxq(sc, reset_type); /* stop xmit side */ ath_settkipmic(sc); /* configure TKIP MIC handling */ /* NB: indicate channel change so we do a full reset */ ath_update_chainmasks(sc, ic->ic_curchan); ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask, sc->sc_cur_rxchainmask); if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_TRUE, HAL_RESET_NORMAL, &status)) device_printf(sc->sc_dev, "%s: unable to reset hardware; hal status %u\n", __func__, status); sc->sc_diversity = ath_hal_getdiversity(ah); ATH_RX_LOCK(sc); sc->sc_rx_stopped = 1; sc->sc_rx_resetted = 1; ATH_RX_UNLOCK(sc); /* Let DFS at it in case it's a DFS channel */ ath_dfs_radar_enable(sc, ic->ic_curchan); /* Let spectral at in case spectral is enabled */ ath_spectral_enable(sc, ic->ic_curchan); /* * Let bluetooth coexistence at in case it's needed for this channel */ ath_btcoex_enable(sc, ic->ic_curchan); /* * If we're doing TDMA, enforce the TXOP limitation for chips that * support it. */ if (sc->sc_hasenforcetxop && sc->sc_tdma) ath_hal_setenforcetxop(sc->sc_ah, 1); else ath_hal_setenforcetxop(sc->sc_ah, 0); if (ath_startrecv(sc) != 0) /* restart recv */ device_printf(sc->sc_dev, "%s: unable to start recv logic\n", __func__); /* * We may be doing a reset in response to an ioctl * that changes the channel so update any state that * might change as a result. */ ath_chan_change(sc, ic->ic_curchan); if (sc->sc_beacons) { /* restart beacons */ #ifdef IEEE80211_SUPPORT_TDMA if (sc->sc_tdma) ath_tdma_config(sc, NULL); else #endif ath_beacon_config(sc, NULL); } /* * Release the reset lock and re-enable interrupts here. * If an interrupt was being processed in ath_intr(), * it would disable interrupts at this point. So we have * to atomically enable interrupts and decrement the * reset counter - this way ath_intr() doesn't end up * disabling interrupts without a corresponding enable * in the rest or channel change path. * * Grab the TX reference in case we need to transmit. * That way a parallel transmit doesn't. */ ATH_PCU_LOCK(sc); sc->sc_inreset_cnt--; sc->sc_txstart_cnt++; /* XXX only do this if sc_inreset_cnt == 0? */ ath_hal_intrset(ah, sc->sc_imask); ATH_PCU_UNLOCK(sc); /* * TX and RX can be started here. If it were started with * sc_inreset_cnt > 0, the TX and RX path would abort. * Thus if this is a nested call through the reset or * channel change code, TX completion will occur but * RX completion and ath_start / ath_tx_start will not * run. */ /* Restart TX/RX as needed */ ath_txrx_start(sc); /* XXX TODO: we need to hold the tx refcount here! */ /* Restart TX completion and pending TX */ if (reset_type == ATH_RESET_NOLOSS) { for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i)) { ATH_TXQ_LOCK(&sc->sc_txq[i]); ath_txq_restart_dma(sc, &sc->sc_txq[i]); ATH_TXQ_UNLOCK(&sc->sc_txq[i]); ATH_TX_LOCK(sc); ath_txq_sched(sc, &sc->sc_txq[i]); ATH_TX_UNLOCK(sc); } } } ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); ATH_PCU_LOCK(sc); sc->sc_txstart_cnt--; ATH_PCU_UNLOCK(sc); /* Handle any frames in the TX queue */ /* * XXX should this be done by the caller, rather than * ath_reset() ? */ ath_tx_kick(sc); /* restart xmit */ return 0; } static int ath_reset_vap(struct ieee80211vap *vap, u_long cmd) { struct ieee80211com *ic = vap->iv_ic; struct ath_softc *sc = ic->ic_softc; struct ath_hal *ah = sc->sc_ah; switch (cmd) { case IEEE80211_IOC_TXPOWER: /* * If per-packet TPC is enabled, then we have nothing * to do; otherwise we need to force the global limit. * All this can happen directly; no need to reset. */ if (!ath_hal_gettpc(ah)) ath_hal_settxpowlimit(ah, ic->ic_txpowlimit); return 0; } /* XXX? Full or NOLOSS? */ return ath_reset(sc, ATH_RESET_FULL); } struct ath_buf * _ath_getbuf_locked(struct ath_softc *sc, ath_buf_type_t btype) { struct ath_buf *bf; ATH_TXBUF_LOCK_ASSERT(sc); if (btype == ATH_BUFTYPE_MGMT) bf = TAILQ_FIRST(&sc->sc_txbuf_mgmt); else bf = TAILQ_FIRST(&sc->sc_txbuf); if (bf == NULL) { sc->sc_stats.ast_tx_getnobuf++; } else { if (bf->bf_flags & ATH_BUF_BUSY) { sc->sc_stats.ast_tx_getbusybuf++; bf = NULL; } } if (bf != NULL && (bf->bf_flags & ATH_BUF_BUSY) == 0) { if (btype == ATH_BUFTYPE_MGMT) TAILQ_REMOVE(&sc->sc_txbuf_mgmt, bf, bf_list); else { TAILQ_REMOVE(&sc->sc_txbuf, bf, bf_list); sc->sc_txbuf_cnt--; /* * This shuldn't happen; however just to be * safe print a warning and fudge the txbuf * count. */ if (sc->sc_txbuf_cnt < 0) { device_printf(sc->sc_dev, "%s: sc_txbuf_cnt < 0?\n", __func__); sc->sc_txbuf_cnt = 0; } } } else bf = NULL; if (bf == NULL) { /* XXX should check which list, mgmt or otherwise */ DPRINTF(sc, ATH_DEBUG_XMIT, "%s: %s\n", __func__, TAILQ_FIRST(&sc->sc_txbuf) == NULL ? "out of xmit buffers" : "xmit buffer busy"); return NULL; } /* XXX TODO: should do this at buffer list initialisation */ /* XXX (then, ensure the buffer has the right flag set) */ bf->bf_flags = 0; if (btype == ATH_BUFTYPE_MGMT) bf->bf_flags |= ATH_BUF_MGMT; else bf->bf_flags &= (~ATH_BUF_MGMT); /* Valid bf here; clear some basic fields */ bf->bf_next = NULL; /* XXX just to be sure */ bf->bf_last = NULL; /* XXX again, just to be sure */ bf->bf_comp = NULL; /* XXX again, just to be sure */ bzero(&bf->bf_state, sizeof(bf->bf_state)); /* * Track the descriptor ID only if doing EDMA */ if (sc->sc_isedma) { bf->bf_descid = sc->sc_txbuf_descid; sc->sc_txbuf_descid++; } return bf; } /* * When retrying a software frame, buffers marked ATH_BUF_BUSY * can't be thrown back on the queue as they could still be * in use by the hardware. * * This duplicates the buffer, or returns NULL. * * The descriptor is also copied but the link pointers and * the DMA segments aren't copied; this frame should thus * be again passed through the descriptor setup/chain routines * so the link is correct. * * The caller must free the buffer using ath_freebuf(). */ struct ath_buf * ath_buf_clone(struct ath_softc *sc, struct ath_buf *bf) { struct ath_buf *tbf; tbf = ath_getbuf(sc, (bf->bf_flags & ATH_BUF_MGMT) ? ATH_BUFTYPE_MGMT : ATH_BUFTYPE_NORMAL); if (tbf == NULL) return NULL; /* XXX failure? Why? */ /* Copy basics */ tbf->bf_next = NULL; tbf->bf_nseg = bf->bf_nseg; tbf->bf_flags = bf->bf_flags & ATH_BUF_FLAGS_CLONE; tbf->bf_status = bf->bf_status; tbf->bf_m = bf->bf_m; tbf->bf_node = bf->bf_node; KASSERT((bf->bf_node != NULL), ("%s: bf_node=NULL!", __func__)); /* will be setup by the chain/setup function */ tbf->bf_lastds = NULL; /* for now, last == self */ tbf->bf_last = tbf; tbf->bf_comp = bf->bf_comp; /* NOTE: DMA segments will be setup by the setup/chain functions */ /* The caller has to re-init the descriptor + links */ /* * Free the DMA mapping here, before we NULL the mbuf. * We must only call bus_dmamap_unload() once per mbuf chain * or behaviour is undefined. */ if (bf->bf_m != NULL) { /* * XXX is this POSTWRITE call required? */ bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); } bf->bf_m = NULL; bf->bf_node = NULL; /* Copy state */ memcpy(&tbf->bf_state, &bf->bf_state, sizeof(bf->bf_state)); return tbf; } struct ath_buf * ath_getbuf(struct ath_softc *sc, ath_buf_type_t btype) { struct ath_buf *bf; ATH_TXBUF_LOCK(sc); bf = _ath_getbuf_locked(sc, btype); /* * If a mgmt buffer was requested but we're out of those, * try requesting a normal one. */ if (bf == NULL && btype == ATH_BUFTYPE_MGMT) bf = _ath_getbuf_locked(sc, ATH_BUFTYPE_NORMAL); ATH_TXBUF_UNLOCK(sc); if (bf == NULL) { DPRINTF(sc, ATH_DEBUG_XMIT, "%s: stop queue\n", __func__); sc->sc_stats.ast_tx_qstop++; } return bf; } /* * Transmit a single frame. * * net80211 will free the node reference if the transmit * fails, so don't free the node reference here. */ static int ath_transmit(struct ieee80211com *ic, struct mbuf *m) { struct ath_softc *sc = ic->ic_softc; struct ieee80211_node *ni; struct mbuf *next; struct ath_buf *bf; ath_bufhead frags; int retval = 0; /* * Tell the reset path that we're currently transmitting. */ ATH_PCU_LOCK(sc); if (sc->sc_inreset_cnt > 0) { DPRINTF(sc, ATH_DEBUG_XMIT, "%s: sc_inreset_cnt > 0; bailing\n", __func__); ATH_PCU_UNLOCK(sc); sc->sc_stats.ast_tx_qstop++; ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_task: OACTIVE, finish"); return (ENOBUFS); /* XXX should be EINVAL or? */ } sc->sc_txstart_cnt++; ATH_PCU_UNLOCK(sc); /* Wake the hardware up already */ ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: start"); /* * Grab the TX lock - it's ok to do this here; we haven't * yet started transmitting. */ ATH_TX_LOCK(sc); /* * Node reference, if there's one. */ ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; /* * Enforce how deep a node queue can get. * * XXX it would be nicer if we kept an mbuf queue per * node and only whacked them into ath_bufs when we * are ready to schedule some traffic from them. * .. that may come later. * * XXX we should also track the per-node hardware queue * depth so it is easy to limit the _SUM_ of the swq and * hwq frames. Since we only schedule two HWQ frames * at a time, this should be OK for now. */ if ((!(m->m_flags & M_EAPOL)) && (ATH_NODE(ni)->an_swq_depth > sc->sc_txq_node_maxdepth)) { sc->sc_stats.ast_tx_nodeq_overflow++; retval = ENOBUFS; goto finish; } /* * Check how many TX buffers are available. * * If this is for non-EAPOL traffic, just leave some * space free in order for buffer cloning and raw * frame transmission to occur. * * If it's for EAPOL traffic, ignore this for now. * Management traffic will be sent via the raw transmit * method which bypasses this check. * * This is needed to ensure that EAPOL frames during * (re) keying have a chance to go out. * * See kern/138379 for more information. */ if ((!(m->m_flags & M_EAPOL)) && (sc->sc_txbuf_cnt <= sc->sc_txq_data_minfree)) { sc->sc_stats.ast_tx_nobuf++; retval = ENOBUFS; goto finish; } /* * Grab a TX buffer and associated resources. * * If it's an EAPOL frame, allocate a MGMT ath_buf. * That way even with temporary buffer exhaustion due to * the data path doesn't leave us without the ability * to transmit management frames. * * Otherwise allocate a normal buffer. */ if (m->m_flags & M_EAPOL) bf = ath_getbuf(sc, ATH_BUFTYPE_MGMT); else bf = ath_getbuf(sc, ATH_BUFTYPE_NORMAL); if (bf == NULL) { /* * If we failed to allocate a buffer, fail. * * We shouldn't fail normally, due to the check * above. */ sc->sc_stats.ast_tx_nobuf++; retval = ENOBUFS; goto finish; } /* * At this point we have a buffer; so we need to free it * if we hit any error conditions. */ /* * Check for fragmentation. If this frame * has been broken up verify we have enough * buffers to send all the fragments so all * go out or none... */ TAILQ_INIT(&frags); if ((m->m_flags & M_FRAG) && !ath_txfrag_setup(sc, &frags, m, ni)) { DPRINTF(sc, ATH_DEBUG_XMIT, "%s: out of txfrag buffers\n", __func__); sc->sc_stats.ast_tx_nofrag++; if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); /* * XXXGL: is mbuf valid after ath_txfrag_setup? If yes, * we shouldn't free it but return back. */ ieee80211_free_mbuf(m); m = NULL; goto bad; } /* * At this point if we have any TX fragments, then we will * have bumped the node reference once for each of those. */ /* * XXX Is there anything actually _enforcing_ that the * fragments are being transmitted in one hit, rather than * being interleaved with other transmissions on that * hardware queue? * * The ATH TX output lock is the only thing serialising this * right now. */ /* * Calculate the "next fragment" length field in ath_buf * in order to let the transmit path know enough about * what to next write to the hardware. */ if (m->m_flags & M_FRAG) { struct ath_buf *fbf = bf; struct ath_buf *n_fbf = NULL; struct mbuf *fm = m->m_nextpkt; /* * We need to walk the list of fragments and set * the next size to the following buffer. * However, the first buffer isn't in the frag * list, so we have to do some gymnastics here. */ TAILQ_FOREACH(n_fbf, &frags, bf_list) { fbf->bf_nextfraglen = fm->m_pkthdr.len; fbf = n_fbf; fm = fm->m_nextpkt; } } nextfrag: /* * Pass the frame to the h/w for transmission. * Fragmented frames have each frag chained together * with m_nextpkt. We know there are sufficient ath_buf's * to send all the frags because of work done by * ath_txfrag_setup. We leave m_nextpkt set while * calling ath_tx_start so it can use it to extend the * the tx duration to cover the subsequent frag and * so it can reclaim all the mbufs in case of an error; * ath_tx_start clears m_nextpkt once it commits to * handing the frame to the hardware. * * Note: if this fails, then the mbufs are freed but * not the node reference. * * So, we now have to free the node reference ourselves here * and return OK up to the stack. */ next = m->m_nextpkt; if (ath_tx_start(sc, ni, bf, m)) { bad: if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); reclaim: bf->bf_m = NULL; bf->bf_node = NULL; ATH_TXBUF_LOCK(sc); ath_returnbuf_head(sc, bf); /* * Free the rest of the node references and * buffers for the fragment list. */ ath_txfrag_cleanup(sc, &frags, ni); ATH_TXBUF_UNLOCK(sc); /* * XXX: And free the node/return OK; ath_tx_start() may have * modified the buffer. We currently have no way to * signify that the mbuf was freed but there was an error. */ ieee80211_free_node(ni); retval = 0; goto finish; } /* * Check here if the node is in power save state. */ ath_tx_update_tim(sc, ni, 1); if (next != NULL) { /* * Beware of state changing between frags. * XXX check sta power-save state? */ if (ni->ni_vap->iv_state != IEEE80211_S_RUN) { DPRINTF(sc, ATH_DEBUG_XMIT, "%s: flush fragmented packet, state %s\n", __func__, ieee80211_state_name[ni->ni_vap->iv_state]); /* XXX dmamap */ ieee80211_free_mbuf(next); goto reclaim; } m = next; bf = TAILQ_FIRST(&frags); KASSERT(bf != NULL, ("no buf for txfrag")); TAILQ_REMOVE(&frags, bf, bf_list); goto nextfrag; } /* * Bump watchdog timer. */ sc->sc_wd_timer = 5; finish: ATH_TX_UNLOCK(sc); /* * Finished transmitting! */ ATH_PCU_LOCK(sc); sc->sc_txstart_cnt--; ATH_PCU_UNLOCK(sc); /* Sleep the hardware if required */ ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: finished"); return (retval); } static int ath_media_change(struct ifnet *ifp) { int error = ieee80211_media_change(ifp); /* NB: only the fixed rate can change and that doesn't need a reset */ return (error == ENETRESET ? 0 : error); } /* * Block/unblock tx+rx processing while a key change is done. * We assume the caller serializes key management operations * so we only need to worry about synchronization with other * uses that originate in the driver. */ static void ath_key_update_begin(struct ieee80211vap *vap) { struct ath_softc *sc = vap->iv_ic->ic_softc; DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__); taskqueue_block(sc->sc_tq); } static void ath_key_update_end(struct ieee80211vap *vap) { struct ath_softc *sc = vap->iv_ic->ic_softc; DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__); taskqueue_unblock(sc->sc_tq); } static void ath_update_promisc(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; u_int32_t rfilt; /* configure rx filter */ ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); rfilt = ath_calcrxfilter(sc); ath_hal_setrxfilter(sc->sc_ah, rfilt); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_MODE, "%s: RX filter 0x%x\n", __func__, rfilt); } /* * Driver-internal mcast update call. * * Assumes the hardware is already awake. */ static void ath_update_mcast_hw(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; u_int32_t mfilt[2]; /* calculate and install multicast filter */ if (ic->ic_allmulti == 0) { struct ieee80211vap *vap; struct ifnet *ifp; struct ifmultiaddr *ifma; /* * Merge multicast addresses to form the hardware filter. */ mfilt[0] = mfilt[1] = 0; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { caddr_t dl; uint32_t val; uint8_t pos; /* calculate XOR of eight 6bit values */ dl = LLADDR((struct sockaddr_dl *) ifma->ifma_addr); val = le32dec(dl + 0); pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val; val = le32dec(dl + 3); pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val; pos &= 0x3f; mfilt[pos / 32] |= (1 << (pos % 32)); } if_maddr_runlock(ifp); } } else mfilt[0] = mfilt[1] = ~0; ath_hal_setmcastfilter(sc->sc_ah, mfilt[0], mfilt[1]); DPRINTF(sc, ATH_DEBUG_MODE, "%s: MC filter %08x:%08x\n", __func__, mfilt[0], mfilt[1]); } /* * Called from the net80211 layer - force the hardware * awake before operating. */ static void ath_update_mcast(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ath_update_mcast_hw(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); } void ath_mode_init(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; u_int32_t rfilt; /* configure rx filter */ rfilt = ath_calcrxfilter(sc); ath_hal_setrxfilter(ah, rfilt); /* configure operational mode */ ath_hal_setopmode(ah); /* handle any link-level address change */ ath_hal_setmac(ah, ic->ic_macaddr); /* calculate and install multicast filter */ ath_update_mcast_hw(sc); } /* * Set the slot time based on the current setting. */ void ath_setslottime(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; u_int usec; if (IEEE80211_IS_CHAN_HALF(ic->ic_curchan)) usec = 13; else if (IEEE80211_IS_CHAN_QUARTER(ic->ic_curchan)) usec = 21; else if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) { /* honor short/long slot time only in 11g */ /* XXX shouldn't honor on pure g or turbo g channel */ if (ic->ic_flags & IEEE80211_F_SHSLOT) usec = HAL_SLOT_TIME_9; else usec = HAL_SLOT_TIME_20; } else usec = HAL_SLOT_TIME_9; DPRINTF(sc, ATH_DEBUG_RESET, "%s: chan %u MHz flags 0x%x %s slot, %u usec\n", __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags, ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", usec); /* Wake up the hardware first before updating the slot time */ ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ath_hal_setslottime(ah, usec); ath_power_restore_power_state(sc); sc->sc_updateslot = OK; ATH_UNLOCK(sc); } /* * Callback from the 802.11 layer to update the * slot time based on the current setting. */ static void ath_updateslot(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; /* * When not coordinating the BSS, change the hardware * immediately. For other operation we defer the change * until beacon updates have propagated to the stations. * * XXX sc_updateslot isn't changed behind a lock? */ if (ic->ic_opmode == IEEE80211_M_HOSTAP || ic->ic_opmode == IEEE80211_M_MBSS) sc->sc_updateslot = UPDATE; else ath_setslottime(sc); } /* * Append the contents of src to dst; both queues * are assumed to be locked. */ void ath_txqmove(struct ath_txq *dst, struct ath_txq *src) { ATH_TXQ_LOCK_ASSERT(src); ATH_TXQ_LOCK_ASSERT(dst); TAILQ_CONCAT(&dst->axq_q, &src->axq_q, bf_list); dst->axq_link = src->axq_link; src->axq_link = NULL; dst->axq_depth += src->axq_depth; dst->axq_aggr_depth += src->axq_aggr_depth; src->axq_depth = 0; src->axq_aggr_depth = 0; } /* * Reset the hardware, with no loss. * * This can't be used for a general case reset. */ static void ath_reset_proc(void *arg, int pending) { struct ath_softc *sc = arg; #if 0 device_printf(sc->sc_dev, "%s: resetting\n", __func__); #endif ath_reset(sc, ATH_RESET_NOLOSS); } /* * Reset the hardware after detecting beacons have stopped. */ static void ath_bstuck_proc(void *arg, int pending) { struct ath_softc *sc = arg; uint32_t hangs = 0; if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0) device_printf(sc->sc_dev, "bb hang detected (0x%x)\n", hangs); #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_STUCK_BEACON)) if_ath_alq_post(&sc->sc_alq, ATH_ALQ_STUCK_BEACON, 0, NULL); #endif device_printf(sc->sc_dev, "stuck beacon; resetting (bmiss count %u)\n", sc->sc_bmisscount); sc->sc_stats.ast_bstuck++; /* * This assumes that there's no simultaneous channel mode change * occurring. */ ath_reset(sc, ATH_RESET_NOLOSS); } static int ath_desc_alloc(struct ath_softc *sc) { int error; error = ath_descdma_setup(sc, &sc->sc_txdma, &sc->sc_txbuf, "tx", sc->sc_tx_desclen, ath_txbuf, ATH_MAX_SCATTER); if (error != 0) { return error; } sc->sc_txbuf_cnt = ath_txbuf; error = ath_descdma_setup(sc, &sc->sc_txdma_mgmt, &sc->sc_txbuf_mgmt, "tx_mgmt", sc->sc_tx_desclen, ath_txbuf_mgmt, ATH_TXDESC); if (error != 0) { ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf); return error; } /* * XXX mark txbuf_mgmt frames with ATH_BUF_MGMT, so the * flag doesn't have to be set in ath_getbuf_locked(). */ error = ath_descdma_setup(sc, &sc->sc_bdma, &sc->sc_bbuf, "beacon", sc->sc_tx_desclen, ATH_BCBUF, 1); if (error != 0) { ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf); ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt, &sc->sc_txbuf_mgmt); return error; } return 0; } static void ath_desc_free(struct ath_softc *sc) { if (sc->sc_bdma.dd_desc_len != 0) ath_descdma_cleanup(sc, &sc->sc_bdma, &sc->sc_bbuf); if (sc->sc_txdma.dd_desc_len != 0) ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf); if (sc->sc_txdma_mgmt.dd_desc_len != 0) ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt, &sc->sc_txbuf_mgmt); } static struct ieee80211_node * ath_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ieee80211com *ic = vap->iv_ic; struct ath_softc *sc = ic->ic_softc; const size_t space = sizeof(struct ath_node) + sc->sc_rc->arc_space; struct ath_node *an; an = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO); if (an == NULL) { /* XXX stat+msg */ return NULL; } ath_rate_node_init(sc, an); /* Setup the mutex - there's no associd yet so set the name to NULL */ snprintf(an->an_name, sizeof(an->an_name), "%s: node %p", device_get_nameunit(sc->sc_dev), an); mtx_init(&an->an_mtx, an->an_name, NULL, MTX_DEF); /* XXX setup ath_tid */ ath_tx_tid_init(sc, an); DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__, mac, ":", an); return &an->an_node; } static void ath_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct ath_softc *sc = ic->ic_softc; DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__, ni->ni_macaddr, ":", ATH_NODE(ni)); /* Cleanup ath_tid, free unused bufs, unlink bufs in TXQ */ ath_tx_node_flush(sc, ATH_NODE(ni)); ath_rate_node_cleanup(sc, ATH_NODE(ni)); sc->sc_node_cleanup(ni); } static void ath_node_free(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct ath_softc *sc = ic->ic_softc; DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__, ni->ni_macaddr, ":", ATH_NODE(ni)); mtx_destroy(&ATH_NODE(ni)->an_mtx); sc->sc_node_free(ni); } static void ath_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise) { struct ieee80211com *ic = ni->ni_ic; struct ath_softc *sc = ic->ic_softc; struct ath_hal *ah = sc->sc_ah; *rssi = ic->ic_node_getrssi(ni); if (ni->ni_chan != IEEE80211_CHAN_ANYC) *noise = ath_hal_getchannoise(ah, ni->ni_chan); else *noise = -95; /* nominally correct */ } /* * Set the default antenna. */ void ath_setdefantenna(struct ath_softc *sc, u_int antenna) { struct ath_hal *ah = sc->sc_ah; /* XXX block beacon interrupts */ ath_hal_setdefantenna(ah, antenna); if (sc->sc_defant != antenna) sc->sc_stats.ast_ant_defswitch++; sc->sc_defant = antenna; sc->sc_rxotherant = 0; } static void ath_txq_init(struct ath_softc *sc, struct ath_txq *txq, int qnum) { txq->axq_qnum = qnum; txq->axq_ac = 0; txq->axq_depth = 0; txq->axq_aggr_depth = 0; txq->axq_intrcnt = 0; txq->axq_link = NULL; txq->axq_softc = sc; TAILQ_INIT(&txq->axq_q); TAILQ_INIT(&txq->axq_tidq); TAILQ_INIT(&txq->fifo.axq_q); ATH_TXQ_LOCK_INIT(sc, txq); } /* * Setup a h/w transmit queue. */ static struct ath_txq * ath_txq_setup(struct ath_softc *sc, int qtype, int subtype) { struct ath_hal *ah = sc->sc_ah; HAL_TXQ_INFO qi; int qnum; memset(&qi, 0, sizeof(qi)); qi.tqi_subtype = subtype; qi.tqi_aifs = HAL_TXQ_USEDEFAULT; qi.tqi_cwmin = HAL_TXQ_USEDEFAULT; qi.tqi_cwmax = HAL_TXQ_USEDEFAULT; /* * Enable interrupts only for EOL and DESC conditions. * We mark tx descriptors to receive a DESC interrupt * when a tx queue gets deep; otherwise waiting for the * EOL to reap descriptors. Note that this is done to * reduce interrupt load and this only defers reaping * descriptors, never transmitting frames. Aside from * reducing interrupts this also permits more concurrency. * The only potential downside is if the tx queue backs * up in which case the top half of the kernel may backup * due to a lack of tx descriptors. */ if (sc->sc_isedma) qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE | HAL_TXQ_TXOKINT_ENABLE; else qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE | HAL_TXQ_TXDESCINT_ENABLE; qnum = ath_hal_setuptxqueue(ah, qtype, &qi); if (qnum == -1) { /* * NB: don't print a message, this happens * normally on parts with too few tx queues */ return NULL; } if (qnum >= nitems(sc->sc_txq)) { device_printf(sc->sc_dev, "hal qnum %u out of range, max %zu!\n", qnum, nitems(sc->sc_txq)); ath_hal_releasetxqueue(ah, qnum); return NULL; } if (!ATH_TXQ_SETUP(sc, qnum)) { ath_txq_init(sc, &sc->sc_txq[qnum], qnum); sc->sc_txqsetup |= 1<sc_txq[qnum]; } /* * Setup a hardware data transmit queue for the specified * access control. The hal may not support all requested * queues in which case it will return a reference to a * previously setup queue. We record the mapping from ac's * to h/w queues for use by ath_tx_start and also track * the set of h/w queues being used to optimize work in the * transmit interrupt handler and related routines. */ static int ath_tx_setup(struct ath_softc *sc, int ac, int haltype) { struct ath_txq *txq; if (ac >= nitems(sc->sc_ac2q)) { device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n", ac, nitems(sc->sc_ac2q)); return 0; } txq = ath_txq_setup(sc, HAL_TX_QUEUE_DATA, haltype); if (txq != NULL) { txq->axq_ac = ac; sc->sc_ac2q[ac] = txq; return 1; } else return 0; } /* * Update WME parameters for a transmit queue. */ static int ath_txq_update(struct ath_softc *sc, int ac) { #define ATH_EXPONENT_TO_VALUE(v) ((1<sc_ic; struct ath_txq *txq = sc->sc_ac2q[ac]; struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac]; struct ath_hal *ah = sc->sc_ah; HAL_TXQ_INFO qi; ath_hal_gettxqueueprops(ah, txq->axq_qnum, &qi); #ifdef IEEE80211_SUPPORT_TDMA if (sc->sc_tdma) { /* * AIFS is zero so there's no pre-transmit wait. The * burst time defines the slot duration and is configured * through net80211. The QCU is setup to not do post-xmit * back off, lockout all lower-priority QCU's, and fire * off the DMA beacon alert timer which is setup based * on the slot configuration. */ qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE | HAL_TXQ_TXERRINT_ENABLE | HAL_TXQ_TXURNINT_ENABLE | HAL_TXQ_TXEOLINT_ENABLE | HAL_TXQ_DBA_GATED | HAL_TXQ_BACKOFF_DISABLE | HAL_TXQ_ARB_LOCKOUT_GLOBAL ; qi.tqi_aifs = 0; /* XXX +dbaprep? */ qi.tqi_readyTime = sc->sc_tdmaslotlen; qi.tqi_burstTime = qi.tqi_readyTime; } else { #endif /* * XXX shouldn't this just use the default flags * used in the previous queue setup? */ qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE | HAL_TXQ_TXERRINT_ENABLE | HAL_TXQ_TXDESCINT_ENABLE | HAL_TXQ_TXURNINT_ENABLE | HAL_TXQ_TXEOLINT_ENABLE ; qi.tqi_aifs = wmep->wmep_aifsn; qi.tqi_cwmin = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmin); qi.tqi_cwmax = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmax); qi.tqi_readyTime = 0; qi.tqi_burstTime = IEEE80211_TXOP_TO_US(wmep->wmep_txopLimit); #ifdef IEEE80211_SUPPORT_TDMA } #endif DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%u qflags 0x%x aifs %u cwmin %u cwmax %u burstTime %u\n", __func__, txq->axq_qnum, qi.tqi_qflags, qi.tqi_aifs, qi.tqi_cwmin, qi.tqi_cwmax, qi.tqi_burstTime); if (!ath_hal_settxqueueprops(ah, txq->axq_qnum, &qi)) { device_printf(sc->sc_dev, "unable to update hardware queue " "parameters for %s traffic!\n", ieee80211_wme_acnames[ac]); return 0; } else { ath_hal_resettxqueue(ah, txq->axq_qnum); /* push to h/w */ return 1; } #undef ATH_EXPONENT_TO_VALUE } /* * Callback from the 802.11 layer to update WME parameters. */ int ath_wme_update(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; return !ath_txq_update(sc, WME_AC_BE) || !ath_txq_update(sc, WME_AC_BK) || !ath_txq_update(sc, WME_AC_VI) || !ath_txq_update(sc, WME_AC_VO) ? EIO : 0; } /* * Reclaim resources for a setup queue. */ static void ath_tx_cleanupq(struct ath_softc *sc, struct ath_txq *txq) { ath_hal_releasetxqueue(sc->sc_ah, txq->axq_qnum); sc->sc_txqsetup &= ~(1<axq_qnum); ATH_TXQ_LOCK_DESTROY(txq); } /* * Reclaim all tx queue resources. */ static void ath_tx_cleanup(struct ath_softc *sc) { int i; ATH_TXBUF_LOCK_DESTROY(sc); for (i = 0; i < HAL_NUM_TX_QUEUES; i++) if (ATH_TXQ_SETUP(sc, i)) ath_tx_cleanupq(sc, &sc->sc_txq[i]); } /* * Return h/w rate index for an IEEE rate (w/o basic rate bit) * using the current rates in sc_rixmap. */ int ath_tx_findrix(const struct ath_softc *sc, uint8_t rate) { int rix = sc->sc_rixmap[rate]; /* NB: return lowest rix for invalid rate */ return (rix == 0xff ? 0 : rix); } static void ath_tx_update_stats(struct ath_softc *sc, struct ath_tx_status *ts, struct ath_buf *bf) { struct ieee80211_node *ni = bf->bf_node; struct ieee80211com *ic = &sc->sc_ic; int sr, lr, pri; if (ts->ts_status == 0) { u_int8_t txant = ts->ts_antenna; sc->sc_stats.ast_ant_tx[txant]++; sc->sc_ant_tx[txant]++; if (ts->ts_finaltsi != 0) sc->sc_stats.ast_tx_altrate++; pri = M_WME_GETAC(bf->bf_m); if (pri >= WME_AC_VO) ic->ic_wme.wme_hipri_traffic++; if ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) ni->ni_inact = ni->ni_inact_reload; } else { if (ts->ts_status & HAL_TXERR_XRETRY) sc->sc_stats.ast_tx_xretries++; if (ts->ts_status & HAL_TXERR_FIFO) sc->sc_stats.ast_tx_fifoerr++; if (ts->ts_status & HAL_TXERR_FILT) sc->sc_stats.ast_tx_filtered++; if (ts->ts_status & HAL_TXERR_XTXOP) sc->sc_stats.ast_tx_xtxop++; if (ts->ts_status & HAL_TXERR_TIMER_EXPIRED) sc->sc_stats.ast_tx_timerexpired++; if (bf->bf_m->m_flags & M_FF) sc->sc_stats.ast_ff_txerr++; } /* XXX when is this valid? */ if (ts->ts_flags & HAL_TX_DESC_CFG_ERR) sc->sc_stats.ast_tx_desccfgerr++; /* * This can be valid for successful frame transmission! * If there's a TX FIFO underrun during aggregate transmission, * the MAC will pad the rest of the aggregate with delimiters. * If a BA is returned, the frame is marked as "OK" and it's up * to the TX completion code to notice which frames weren't * successfully transmitted. */ if (ts->ts_flags & HAL_TX_DATA_UNDERRUN) sc->sc_stats.ast_tx_data_underrun++; if (ts->ts_flags & HAL_TX_DELIM_UNDERRUN) sc->sc_stats.ast_tx_delim_underrun++; sr = ts->ts_shortretry; lr = ts->ts_longretry; sc->sc_stats.ast_tx_shortretry += sr; sc->sc_stats.ast_tx_longretry += lr; } /* * The default completion. If fail is 1, this means * "please don't retry the frame, and just return -1 status * to the net80211 stack. */ void ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail) { struct ath_tx_status *ts = &bf->bf_status.ds_txstat; int st; if (fail == 1) st = -1; else st = ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) ? ts->ts_status : HAL_TXERR_XRETRY; #if 0 if (bf->bf_state.bfs_dobaw) device_printf(sc->sc_dev, "%s: bf %p: seqno %d: dobaw should've been cleared!\n", __func__, bf, SEQNO(bf->bf_state.bfs_seqno)); #endif if (bf->bf_next != NULL) device_printf(sc->sc_dev, "%s: bf %p: seqno %d: bf_next not NULL!\n", __func__, bf, SEQNO(bf->bf_state.bfs_seqno)); /* * Check if the node software queue is empty; if so * then clear the TIM. * * This needs to be done before the buffer is freed as * otherwise the node reference will have been released * and the node may not actually exist any longer. * * XXX I don't like this belonging here, but it's cleaner * to do it here right now then all the other places * where ath_tx_default_comp() is called. * * XXX TODO: during drain, ensure that the callback is * being called so we get a chance to update the TIM. */ if (bf->bf_node) { ATH_TX_LOCK(sc); ath_tx_update_tim(sc, bf->bf_node, 0); ATH_TX_UNLOCK(sc); } /* * Do any tx complete callback. Note this must * be done before releasing the node reference. * This will free the mbuf, release the net80211 * node and recycle the ath_buf. */ ath_tx_freebuf(sc, bf, st); } /* * Update rate control with the given completion status. */ void ath_tx_update_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_rc_series *rc, struct ath_tx_status *ts, int frmlen, int nframes, int nbad) { struct ath_node *an; /* Only for unicast frames */ if (ni == NULL) return; an = ATH_NODE(ni); ATH_NODE_UNLOCK_ASSERT(an); if ((ts->ts_status & HAL_TXERR_FILT) == 0) { ATH_NODE_LOCK(an); ath_rate_tx_complete(sc, an, rc, ts, frmlen, nframes, nbad); ATH_NODE_UNLOCK(an); } } /* * Process the completion of the given buffer. * * This calls the rate control update and then the buffer completion. * This will either free the buffer or requeue it. In any case, the * bf pointer should be treated as invalid after this function is called. */ void ath_tx_process_buf_completion(struct ath_softc *sc, struct ath_txq *txq, struct ath_tx_status *ts, struct ath_buf *bf) { struct ieee80211_node *ni = bf->bf_node; ATH_TX_UNLOCK_ASSERT(sc); ATH_TXQ_UNLOCK_ASSERT(txq); /* If unicast frame, update general statistics */ if (ni != NULL) { /* update statistics */ ath_tx_update_stats(sc, ts, bf); } /* * Call the completion handler. * The completion handler is responsible for * calling the rate control code. * * Frames with no completion handler get the * rate control code called here. */ if (bf->bf_comp == NULL) { if ((ts->ts_status & HAL_TXERR_FILT) == 0 && (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) { /* * XXX assume this isn't an aggregate * frame. */ ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc, ts, bf->bf_state.bfs_pktlen, 1, (ts->ts_status == 0 ? 0 : 1)); } ath_tx_default_comp(sc, bf, 0); } else bf->bf_comp(sc, bf, 0); } /* * Process completed xmit descriptors from the specified queue. * Kick the packet scheduler if needed. This can occur from this * particular task. */ static int ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched) { struct ath_hal *ah = sc->sc_ah; struct ath_buf *bf; struct ath_desc *ds; struct ath_tx_status *ts; struct ieee80211_node *ni; #ifdef IEEE80211_SUPPORT_SUPERG struct ieee80211com *ic = &sc->sc_ic; #endif /* IEEE80211_SUPPORT_SUPERG */ int nacked; HAL_STATUS status; DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: tx queue %u head %p link %p\n", __func__, txq->axq_qnum, (caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum), txq->axq_link); ATH_KTR(sc, ATH_KTR_TXCOMP, 4, "ath_tx_processq: txq=%u head %p link %p depth %p", txq->axq_qnum, (caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum), txq->axq_link, txq->axq_depth); nacked = 0; for (;;) { ATH_TXQ_LOCK(txq); txq->axq_intrcnt = 0; /* reset periodic desc intr count */ bf = TAILQ_FIRST(&txq->axq_q); if (bf == NULL) { ATH_TXQ_UNLOCK(txq); break; } ds = bf->bf_lastds; /* XXX must be setup correctly! */ ts = &bf->bf_status.ds_txstat; status = ath_hal_txprocdesc(ah, ds, ts); #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_XMIT_DESC) ath_printtxbuf(sc, bf, txq->axq_qnum, 0, status == HAL_OK); else if ((sc->sc_debug & ATH_DEBUG_RESET) && (dosched == 0)) ath_printtxbuf(sc, bf, txq->axq_qnum, 0, status == HAL_OK); #endif #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) { if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS, sc->sc_tx_statuslen, (char *) ds); } #endif if (status == HAL_EINPROGRESS) { ATH_KTR(sc, ATH_KTR_TXCOMP, 3, "ath_tx_processq: txq=%u, bf=%p ds=%p, HAL_EINPROGRESS", txq->axq_qnum, bf, ds); ATH_TXQ_UNLOCK(txq); break; } ATH_TXQ_REMOVE(txq, bf, bf_list); /* * Sanity check. */ if (txq->axq_qnum != bf->bf_state.bfs_tx_queue) { device_printf(sc->sc_dev, "%s: TXQ=%d: bf=%p, bfs_tx_queue=%d\n", __func__, txq->axq_qnum, bf, bf->bf_state.bfs_tx_queue); } if (txq->axq_qnum != bf->bf_last->bf_state.bfs_tx_queue) { device_printf(sc->sc_dev, "%s: TXQ=%d: bf_last=%p, bfs_tx_queue=%d\n", __func__, txq->axq_qnum, bf->bf_last, bf->bf_last->bf_state.bfs_tx_queue); } #if 0 if (txq->axq_depth > 0) { /* * More frames follow. Mark the buffer busy * so it's not re-used while the hardware may * still re-read the link field in the descriptor. * * Use the last buffer in an aggregate as that * is where the hardware may be - intermediate * descriptors won't be "busy". */ bf->bf_last->bf_flags |= ATH_BUF_BUSY; } else txq->axq_link = NULL; #else bf->bf_last->bf_flags |= ATH_BUF_BUSY; #endif if (bf->bf_state.bfs_aggr) txq->axq_aggr_depth--; ni = bf->bf_node; ATH_KTR(sc, ATH_KTR_TXCOMP, 5, "ath_tx_processq: txq=%u, bf=%p, ds=%p, ni=%p, ts_status=0x%08x", txq->axq_qnum, bf, ds, ni, ts->ts_status); /* * If unicast frame was ack'd update RSSI, * including the last rx time used to * workaround phantom bmiss interrupts. */ if (ni != NULL && ts->ts_status == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) { nacked++; sc->sc_stats.ast_tx_rssi = ts->ts_rssi; ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi, ts->ts_rssi); } ATH_TXQ_UNLOCK(txq); /* * Update statistics and call completion */ ath_tx_process_buf_completion(sc, txq, ts, bf); /* XXX at this point, bf and ni may be totally invalid */ } #ifdef IEEE80211_SUPPORT_SUPERG /* * Flush fast-frame staging queue when traffic slows. */ if (txq->axq_depth <= 1) ieee80211_ff_flush(ic, txq->axq_ac); #endif /* Kick the software TXQ scheduler */ if (dosched) { ATH_TX_LOCK(sc); ath_txq_sched(sc, txq); ATH_TX_UNLOCK(sc); } ATH_KTR(sc, ATH_KTR_TXCOMP, 1, "ath_tx_processq: txq=%u: done", txq->axq_qnum); return nacked; } #define TXQACTIVE(t, q) ( (t) & (1 << (q))) /* * Deferred processing of transmit interrupt; special-cased * for a single hardware transmit queue (e.g. 5210 and 5211). */ static void ath_tx_proc_q0(void *arg, int npending) { struct ath_softc *sc = arg; uint32_t txqs; ATH_PCU_LOCK(sc); sc->sc_txproc_cnt++; txqs = sc->sc_txq_active; sc->sc_txq_active &= ~txqs; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ATH_KTR(sc, ATH_KTR_TXCOMP, 1, "ath_tx_proc_q0: txqs=0x%08x", txqs); if (TXQACTIVE(txqs, 0) && ath_tx_processq(sc, &sc->sc_txq[0], 1)) /* XXX why is lastrx updated in tx code? */ sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah); if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum)) ath_tx_processq(sc, sc->sc_cabq, 1); sc->sc_wd_timer = 0; if (sc->sc_softled) ath_led_event(sc, sc->sc_txrix); ATH_PCU_LOCK(sc); sc->sc_txproc_cnt--; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); ath_tx_kick(sc); } /* * Deferred processing of transmit interrupt; special-cased * for four hardware queues, 0-3 (e.g. 5212 w/ WME support). */ static void ath_tx_proc_q0123(void *arg, int npending) { struct ath_softc *sc = arg; int nacked; uint32_t txqs; ATH_PCU_LOCK(sc); sc->sc_txproc_cnt++; txqs = sc->sc_txq_active; sc->sc_txq_active &= ~txqs; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ATH_KTR(sc, ATH_KTR_TXCOMP, 1, "ath_tx_proc_q0123: txqs=0x%08x", txqs); /* * Process each active queue. */ nacked = 0; if (TXQACTIVE(txqs, 0)) nacked += ath_tx_processq(sc, &sc->sc_txq[0], 1); if (TXQACTIVE(txqs, 1)) nacked += ath_tx_processq(sc, &sc->sc_txq[1], 1); if (TXQACTIVE(txqs, 2)) nacked += ath_tx_processq(sc, &sc->sc_txq[2], 1); if (TXQACTIVE(txqs, 3)) nacked += ath_tx_processq(sc, &sc->sc_txq[3], 1); if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum)) ath_tx_processq(sc, sc->sc_cabq, 1); if (nacked) sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah); sc->sc_wd_timer = 0; if (sc->sc_softled) ath_led_event(sc, sc->sc_txrix); ATH_PCU_LOCK(sc); sc->sc_txproc_cnt--; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); ath_tx_kick(sc); } /* * Deferred processing of transmit interrupt. */ static void ath_tx_proc(void *arg, int npending) { struct ath_softc *sc = arg; int i, nacked; uint32_t txqs; ATH_PCU_LOCK(sc); sc->sc_txproc_cnt++; txqs = sc->sc_txq_active; sc->sc_txq_active &= ~txqs; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ATH_KTR(sc, ATH_KTR_TXCOMP, 1, "ath_tx_proc: txqs=0x%08x", txqs); /* * Process each active queue. */ nacked = 0; for (i = 0; i < HAL_NUM_TX_QUEUES; i++) if (ATH_TXQ_SETUP(sc, i) && TXQACTIVE(txqs, i)) nacked += ath_tx_processq(sc, &sc->sc_txq[i], 1); if (nacked) sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah); sc->sc_wd_timer = 0; if (sc->sc_softled) ath_led_event(sc, sc->sc_txrix); ATH_PCU_LOCK(sc); sc->sc_txproc_cnt--; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); ath_tx_kick(sc); } #undef TXQACTIVE /* * Deferred processing of TXQ rescheduling. */ static void ath_txq_sched_tasklet(void *arg, int npending) { struct ath_softc *sc = arg; int i; /* XXX is skipping ok? */ ATH_PCU_LOCK(sc); #if 0 if (sc->sc_inreset_cnt > 0) { device_printf(sc->sc_dev, "%s: sc_inreset_cnt > 0; skipping\n", __func__); ATH_PCU_UNLOCK(sc); return; } #endif sc->sc_txproc_cnt++; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); ATH_TX_LOCK(sc); for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i)) { ath_txq_sched(sc, &sc->sc_txq[i]); } } ATH_TX_UNLOCK(sc); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); ATH_PCU_LOCK(sc); sc->sc_txproc_cnt--; ATH_PCU_UNLOCK(sc); } void ath_returnbuf_tail(struct ath_softc *sc, struct ath_buf *bf) { ATH_TXBUF_LOCK_ASSERT(sc); if (bf->bf_flags & ATH_BUF_MGMT) TAILQ_INSERT_TAIL(&sc->sc_txbuf_mgmt, bf, bf_list); else { TAILQ_INSERT_TAIL(&sc->sc_txbuf, bf, bf_list); sc->sc_txbuf_cnt++; if (sc->sc_txbuf_cnt > ath_txbuf) { device_printf(sc->sc_dev, "%s: sc_txbuf_cnt > %d?\n", __func__, ath_txbuf); sc->sc_txbuf_cnt = ath_txbuf; } } } void ath_returnbuf_head(struct ath_softc *sc, struct ath_buf *bf) { ATH_TXBUF_LOCK_ASSERT(sc); if (bf->bf_flags & ATH_BUF_MGMT) TAILQ_INSERT_HEAD(&sc->sc_txbuf_mgmt, bf, bf_list); else { TAILQ_INSERT_HEAD(&sc->sc_txbuf, bf, bf_list); sc->sc_txbuf_cnt++; if (sc->sc_txbuf_cnt > ATH_TXBUF) { device_printf(sc->sc_dev, "%s: sc_txbuf_cnt > %d?\n", __func__, ATH_TXBUF); sc->sc_txbuf_cnt = ATH_TXBUF; } } } /* * Free the holding buffer if it exists */ void ath_txq_freeholdingbuf(struct ath_softc *sc, struct ath_txq *txq) { ATH_TXBUF_UNLOCK_ASSERT(sc); ATH_TXQ_LOCK_ASSERT(txq); if (txq->axq_holdingbf == NULL) return; txq->axq_holdingbf->bf_flags &= ~ATH_BUF_BUSY; ATH_TXBUF_LOCK(sc); ath_returnbuf_tail(sc, txq->axq_holdingbf); ATH_TXBUF_UNLOCK(sc); txq->axq_holdingbf = NULL; } /* * Add this buffer to the holding queue, freeing the previous * one if it exists. */ static void ath_txq_addholdingbuf(struct ath_softc *sc, struct ath_buf *bf) { struct ath_txq *txq; txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue]; ATH_TXBUF_UNLOCK_ASSERT(sc); ATH_TXQ_LOCK_ASSERT(txq); /* XXX assert ATH_BUF_BUSY is set */ /* XXX assert the tx queue is under the max number */ if (bf->bf_state.bfs_tx_queue > HAL_NUM_TX_QUEUES) { device_printf(sc->sc_dev, "%s: bf=%p: invalid tx queue (%d)\n", __func__, bf, bf->bf_state.bfs_tx_queue); bf->bf_flags &= ~ATH_BUF_BUSY; ath_returnbuf_tail(sc, bf); return; } ath_txq_freeholdingbuf(sc, txq); txq->axq_holdingbf = bf; } /* * Return a buffer to the pool and update the 'busy' flag on the * previous 'tail' entry. * * This _must_ only be called when the buffer is involved in a completed * TX. The logic is that if it was part of an active TX, the previous * buffer on the list is now not involved in a halted TX DMA queue, waiting * for restart (eg for TDMA.) * * The caller must free the mbuf and recycle the node reference. * * XXX This method of handling busy / holding buffers is insanely stupid. * It requires bf_state.bfs_tx_queue to be correctly assigned. It would * be much nicer if buffers in the processq() methods would instead be * always completed there (pushed onto a txq or ath_bufhead) so we knew * exactly what hardware queue they came from in the first place. */ void ath_freebuf(struct ath_softc *sc, struct ath_buf *bf) { struct ath_txq *txq; txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue]; KASSERT((bf->bf_node == NULL), ("%s: bf->bf_node != NULL\n", __func__)); KASSERT((bf->bf_m == NULL), ("%s: bf->bf_m != NULL\n", __func__)); /* * If this buffer is busy, push it onto the holding queue. */ if (bf->bf_flags & ATH_BUF_BUSY) { ATH_TXQ_LOCK(txq); ath_txq_addholdingbuf(sc, bf); ATH_TXQ_UNLOCK(txq); return; } /* * Not a busy buffer, so free normally */ ATH_TXBUF_LOCK(sc); ath_returnbuf_tail(sc, bf); ATH_TXBUF_UNLOCK(sc); } /* * This is currently used by ath_tx_draintxq() and * ath_tx_tid_free_pkts(). * * It recycles a single ath_buf. */ void ath_tx_freebuf(struct ath_softc *sc, struct ath_buf *bf, int status) { struct ieee80211_node *ni = bf->bf_node; struct mbuf *m0 = bf->bf_m; /* * Make sure that we only sync/unload if there's an mbuf. * If not (eg we cloned a buffer), the unload will have already * occurred. */ if (bf->bf_m != NULL) { bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); } bf->bf_node = NULL; bf->bf_m = NULL; /* Free the buffer, it's not needed any longer */ ath_freebuf(sc, bf); /* Pass the buffer back to net80211 - completing it */ ieee80211_tx_complete(ni, m0, status); } static struct ath_buf * ath_tx_draintxq_get_one(struct ath_softc *sc, struct ath_txq *txq) { struct ath_buf *bf; ATH_TXQ_LOCK_ASSERT(txq); /* * Drain the FIFO queue first, then if it's * empty, move to the normal frame queue. */ bf = TAILQ_FIRST(&txq->fifo.axq_q); if (bf != NULL) { /* * Is it the last buffer in this set? * Decrement the FIFO counter. */ if (bf->bf_flags & ATH_BUF_FIFOEND) { if (txq->axq_fifo_depth == 0) { device_printf(sc->sc_dev, "%s: Q%d: fifo_depth=0, fifo.axq_depth=%d?\n", __func__, txq->axq_qnum, txq->fifo.axq_depth); } else txq->axq_fifo_depth--; } ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list); return (bf); } /* * Debugging! */ if (txq->axq_fifo_depth != 0 || txq->fifo.axq_depth != 0) { device_printf(sc->sc_dev, "%s: Q%d: fifo_depth=%d, fifo.axq_depth=%d\n", __func__, txq->axq_qnum, txq->axq_fifo_depth, txq->fifo.axq_depth); } /* * Now drain the pending queue. */ bf = TAILQ_FIRST(&txq->axq_q); if (bf == NULL) { txq->axq_link = NULL; return (NULL); } ATH_TXQ_REMOVE(txq, bf, bf_list); return (bf); } void ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq) { #ifdef ATH_DEBUG struct ath_hal *ah = sc->sc_ah; #endif struct ath_buf *bf; u_int ix; /* * NB: this assumes output has been stopped and * we do not need to block ath_tx_proc */ for (ix = 0;; ix++) { ATH_TXQ_LOCK(txq); bf = ath_tx_draintxq_get_one(sc, txq); if (bf == NULL) { ATH_TXQ_UNLOCK(txq); break; } if (bf->bf_state.bfs_aggr) txq->axq_aggr_depth--; #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_RESET) { struct ieee80211com *ic = &sc->sc_ic; int status = 0; /* * EDMA operation has a TX completion FIFO * separate from the TX descriptor, so this * method of checking the "completion" status * is wrong. */ if (! sc->sc_isedma) { status = (ath_hal_txprocdesc(ah, bf->bf_lastds, &bf->bf_status.ds_txstat) == HAL_OK); } ath_printtxbuf(sc, bf, txq->axq_qnum, ix, status); ieee80211_dump_pkt(ic, mtod(bf->bf_m, const uint8_t *), bf->bf_m->m_len, 0, -1); } #endif /* ATH_DEBUG */ /* * Since we're now doing magic in the completion * functions, we -must- call it for aggregation * destinations or BAW tracking will get upset. */ /* * Clear ATH_BUF_BUSY; the completion handler * will free the buffer. */ ATH_TXQ_UNLOCK(txq); bf->bf_flags &= ~ATH_BUF_BUSY; if (bf->bf_comp) bf->bf_comp(sc, bf, 1); else ath_tx_default_comp(sc, bf, 1); } /* * Free the holding buffer if it exists */ ATH_TXQ_LOCK(txq); ath_txq_freeholdingbuf(sc, txq); ATH_TXQ_UNLOCK(txq); /* * Drain software queued frames which are on * active TIDs. */ ath_tx_txq_drain(sc, txq); } static void ath_tx_stopdma(struct ath_softc *sc, struct ath_txq *txq) { struct ath_hal *ah = sc->sc_ah; ATH_TXQ_LOCK_ASSERT(txq); DPRINTF(sc, ATH_DEBUG_RESET, "%s: tx queue [%u] %p, active=%d, hwpending=%d, flags 0x%08x, " "link %p, holdingbf=%p\n", __func__, txq->axq_qnum, (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, txq->axq_qnum), (int) (!! ath_hal_txqenabled(ah, txq->axq_qnum)), (int) ath_hal_numtxpending(ah, txq->axq_qnum), txq->axq_flags, txq->axq_link, txq->axq_holdingbf); (void) ath_hal_stoptxdma(ah, txq->axq_qnum); /* We've stopped TX DMA, so mark this as stopped. */ txq->axq_flags &= ~ATH_TXQ_PUTRUNNING; #ifdef ATH_DEBUG if ((sc->sc_debug & ATH_DEBUG_RESET) && (txq->axq_holdingbf != NULL)) { ath_printtxbuf(sc, txq->axq_holdingbf, txq->axq_qnum, 0, 0); } #endif } int ath_stoptxdma(struct ath_softc *sc) { struct ath_hal *ah = sc->sc_ah; int i; /* XXX return value */ if (sc->sc_invalid) return 0; if (!sc->sc_invalid) { /* don't touch the hardware if marked invalid */ DPRINTF(sc, ATH_DEBUG_RESET, "%s: tx queue [%u] %p, link %p\n", __func__, sc->sc_bhalq, (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, sc->sc_bhalq), NULL); /* stop the beacon queue */ (void) ath_hal_stoptxdma(ah, sc->sc_bhalq); /* Stop the data queues */ for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i)) { ATH_TXQ_LOCK(&sc->sc_txq[i]); ath_tx_stopdma(sc, &sc->sc_txq[i]); ATH_TXQ_UNLOCK(&sc->sc_txq[i]); } } } return 1; } #ifdef ATH_DEBUG void ath_tx_dump(struct ath_softc *sc, struct ath_txq *txq) { struct ath_hal *ah = sc->sc_ah; struct ath_buf *bf; int i = 0; if (! (sc->sc_debug & ATH_DEBUG_RESET)) return; device_printf(sc->sc_dev, "%s: Q%d: begin\n", __func__, txq->axq_qnum); TAILQ_FOREACH(bf, &txq->axq_q, bf_list) { ath_printtxbuf(sc, bf, txq->axq_qnum, i, ath_hal_txprocdesc(ah, bf->bf_lastds, &bf->bf_status.ds_txstat) == HAL_OK); i++; } device_printf(sc->sc_dev, "%s: Q%d: end\n", __func__, txq->axq_qnum); } #endif /* ATH_DEBUG */ /* * Drain the transmit queues and reclaim resources. */ void ath_legacy_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type) { struct ath_hal *ah = sc->sc_ah; struct ath_buf *bf_last; int i; (void) ath_stoptxdma(sc); /* * Dump the queue contents */ for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { /* * XXX TODO: should we just handle the completed TX frames * here, whether or not the reset is a full one or not? */ if (ATH_TXQ_SETUP(sc, i)) { #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_RESET) ath_tx_dump(sc, &sc->sc_txq[i]); #endif /* ATH_DEBUG */ if (reset_type == ATH_RESET_NOLOSS) { ath_tx_processq(sc, &sc->sc_txq[i], 0); ATH_TXQ_LOCK(&sc->sc_txq[i]); /* * Free the holding buffer; DMA is now * stopped. */ ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]); /* * Setup the link pointer to be the * _last_ buffer/descriptor in the list. * If there's nothing in the list, set it * to NULL. */ bf_last = ATH_TXQ_LAST(&sc->sc_txq[i], axq_q_s); if (bf_last != NULL) { ath_hal_gettxdesclinkptr(ah, bf_last->bf_lastds, &sc->sc_txq[i].axq_link); } else { sc->sc_txq[i].axq_link = NULL; } ATH_TXQ_UNLOCK(&sc->sc_txq[i]); } else ath_tx_draintxq(sc, &sc->sc_txq[i]); } } #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_RESET) { struct ath_buf *bf = TAILQ_FIRST(&sc->sc_bbuf); if (bf != NULL && bf->bf_m != NULL) { ath_printtxbuf(sc, bf, sc->sc_bhalq, 0, ath_hal_txprocdesc(ah, bf->bf_lastds, &bf->bf_status.ds_txstat) == HAL_OK); ieee80211_dump_pkt(&sc->sc_ic, mtod(bf->bf_m, const uint8_t *), bf->bf_m->m_len, 0, -1); } } #endif /* ATH_DEBUG */ sc->sc_wd_timer = 0; } /* * Update internal state after a channel change. */ static void ath_chan_change(struct ath_softc *sc, struct ieee80211_channel *chan) { enum ieee80211_phymode mode; /* * Change channels and update the h/w rate map * if we're switching; e.g. 11a to 11b/g. */ mode = ieee80211_chan2mode(chan); if (mode != sc->sc_curmode) ath_setcurmode(sc, mode); sc->sc_curchan = chan; } /* * Set/change channels. If the channel is really being changed, * it's done by resetting the chip. To accomplish this we must * first cleanup any pending DMA, then restart stuff after a la * ath_init. */ static int ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; int ret = 0; /* Treat this as an interface reset */ ATH_PCU_UNLOCK_ASSERT(sc); ATH_UNLOCK_ASSERT(sc); /* (Try to) stop TX/RX from occurring */ taskqueue_block(sc->sc_tq); ATH_PCU_LOCK(sc); /* Disable interrupts */ ath_hal_intrset(ah, 0); /* Stop new RX/TX/interrupt completion */ if (ath_reset_grablock(sc, 1) == 0) { device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n", __func__); } /* Stop pending RX/TX completion */ ath_txrx_stop_locked(sc); ATH_PCU_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_RESET, "%s: %u (%u MHz, flags 0x%x)\n", __func__, ieee80211_chan2ieee(ic, chan), chan->ic_freq, chan->ic_flags); if (chan != sc->sc_curchan) { HAL_STATUS status; /* * To switch channels clear any pending DMA operations; * wait long enough for the RX fifo to drain, reset the * hardware at the new frequency, and then re-enable * the relevant bits of the h/w. */ #if 0 ath_hal_intrset(ah, 0); /* disable interrupts */ #endif ath_stoprecv(sc, 1); /* turn off frame recv */ /* * First, handle completed TX/RX frames. */ ath_rx_flush(sc); ath_draintxq(sc, ATH_RESET_NOLOSS); /* * Next, flush the non-scheduled frames. */ ath_draintxq(sc, ATH_RESET_FULL); /* clear pending tx frames */ ath_update_chainmasks(sc, chan); ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask, sc->sc_cur_rxchainmask); if (!ath_hal_reset(ah, sc->sc_opmode, chan, AH_TRUE, HAL_RESET_NORMAL, &status)) { device_printf(sc->sc_dev, "%s: unable to reset " "channel %u (%u MHz, flags 0x%x), hal status %u\n", __func__, ieee80211_chan2ieee(ic, chan), chan->ic_freq, chan->ic_flags, status); ret = EIO; goto finish; } sc->sc_diversity = ath_hal_getdiversity(ah); ATH_RX_LOCK(sc); sc->sc_rx_stopped = 1; sc->sc_rx_resetted = 1; ATH_RX_UNLOCK(sc); /* Let DFS at it in case it's a DFS channel */ ath_dfs_radar_enable(sc, chan); /* Let spectral at in case spectral is enabled */ ath_spectral_enable(sc, chan); /* * Let bluetooth coexistence at in case it's needed for this * channel */ ath_btcoex_enable(sc, ic->ic_curchan); /* * If we're doing TDMA, enforce the TXOP limitation for chips * that support it. */ if (sc->sc_hasenforcetxop && sc->sc_tdma) ath_hal_setenforcetxop(sc->sc_ah, 1); else ath_hal_setenforcetxop(sc->sc_ah, 0); /* * Re-enable rx framework. */ if (ath_startrecv(sc) != 0) { device_printf(sc->sc_dev, "%s: unable to restart recv logic\n", __func__); ret = EIO; goto finish; } /* * Change channels and update the h/w rate map * if we're switching; e.g. 11a to 11b/g. */ ath_chan_change(sc, chan); /* * Reset clears the beacon timers; reset them * here if needed. */ if (sc->sc_beacons) { /* restart beacons */ #ifdef IEEE80211_SUPPORT_TDMA if (sc->sc_tdma) ath_tdma_config(sc, NULL); else #endif ath_beacon_config(sc, NULL); } /* * Re-enable interrupts. */ #if 0 ath_hal_intrset(ah, sc->sc_imask); #endif } finish: ATH_PCU_LOCK(sc); sc->sc_inreset_cnt--; /* XXX only do this if sc_inreset_cnt == 0? */ ath_hal_intrset(ah, sc->sc_imask); ATH_PCU_UNLOCK(sc); ath_txrx_start(sc); /* XXX ath_start? */ return ret; } /* * Periodically recalibrate the PHY to account * for temperature/environment changes. */ static void ath_calibrate(void *arg) { struct ath_softc *sc = arg; struct ath_hal *ah = sc->sc_ah; struct ieee80211com *ic = &sc->sc_ic; HAL_BOOL longCal, isCalDone = AH_TRUE; HAL_BOOL aniCal, shortCal = AH_FALSE; int nextcal; ATH_LOCK_ASSERT(sc); /* * Force the hardware awake for ANI work. */ ath_power_set_power_state(sc, HAL_PM_AWAKE); /* Skip trying to do this if we're in reset */ if (sc->sc_inreset_cnt) goto restart; if (ic->ic_flags & IEEE80211_F_SCAN) /* defer, off channel */ goto restart; longCal = (ticks - sc->sc_lastlongcal >= ath_longcalinterval*hz); aniCal = (ticks - sc->sc_lastani >= ath_anicalinterval*hz/1000); if (sc->sc_doresetcal) shortCal = (ticks - sc->sc_lastshortcal >= ath_shortcalinterval*hz/1000); DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: shortCal=%d; longCal=%d; aniCal=%d\n", __func__, shortCal, longCal, aniCal); if (aniCal) { sc->sc_stats.ast_ani_cal++; sc->sc_lastani = ticks; ath_hal_ani_poll(ah, sc->sc_curchan); } if (longCal) { sc->sc_stats.ast_per_cal++; sc->sc_lastlongcal = ticks; if (ath_hal_getrfgain(ah) == HAL_RFGAIN_NEED_CHANGE) { /* * Rfgain is out of bounds, reset the chip * to load new gain values. */ DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: rfgain change\n", __func__); sc->sc_stats.ast_per_rfgain++; sc->sc_resetcal = 0; sc->sc_doresetcal = AH_TRUE; taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask); callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc); ath_power_restore_power_state(sc); return; } /* * If this long cal is after an idle period, then * reset the data collection state so we start fresh. */ if (sc->sc_resetcal) { (void) ath_hal_calreset(ah, sc->sc_curchan); sc->sc_lastcalreset = ticks; sc->sc_lastshortcal = ticks; sc->sc_resetcal = 0; sc->sc_doresetcal = AH_TRUE; } } /* Only call if we're doing a short/long cal, not for ANI calibration */ if (shortCal || longCal) { isCalDone = AH_FALSE; if (ath_hal_calibrateN(ah, sc->sc_curchan, longCal, &isCalDone)) { if (longCal) { /* * Calibrate noise floor data again in case of change. */ ath_hal_process_noisefloor(ah); } } else { DPRINTF(sc, ATH_DEBUG_ANY, "%s: calibration of channel %u failed\n", __func__, sc->sc_curchan->ic_freq); sc->sc_stats.ast_per_calfail++; } if (shortCal) sc->sc_lastshortcal = ticks; } if (!isCalDone) { restart: /* * Use a shorter interval to potentially collect multiple * data samples required to complete calibration. Once * we're told the work is done we drop back to a longer * interval between requests. We're more aggressive doing * work when operating as an AP to improve operation right * after startup. */ sc->sc_lastshortcal = ticks; nextcal = ath_shortcalinterval*hz/1000; if (sc->sc_opmode != HAL_M_HOSTAP) nextcal *= 10; sc->sc_doresetcal = AH_TRUE; } else { /* nextcal should be the shortest time for next event */ nextcal = ath_longcalinterval*hz; if (sc->sc_lastcalreset == 0) sc->sc_lastcalreset = sc->sc_lastlongcal; else if (ticks - sc->sc_lastcalreset >= ath_resetcalinterval*hz) sc->sc_resetcal = 1; /* setup reset next trip */ sc->sc_doresetcal = AH_FALSE; } /* ANI calibration may occur more often than short/long/resetcal */ if (ath_anicalinterval > 0) nextcal = MIN(nextcal, ath_anicalinterval*hz/1000); if (nextcal != 0) { DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: next +%u (%sisCalDone)\n", __func__, nextcal, isCalDone ? "" : "!"); callout_reset(&sc->sc_cal_ch, nextcal, ath_calibrate, sc); } else { DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: calibration disabled\n", __func__); /* NB: don't rearm timer */ } /* * Restore power state now that we're done. */ ath_power_restore_power_state(sc); } static void ath_scan_start(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; struct ath_hal *ah = sc->sc_ah; u_int32_t rfilt; /* XXX calibration timer? */ /* XXXGL: is constant ieee80211broadcastaddr a correct choice? */ ATH_LOCK(sc); sc->sc_scanning = 1; sc->sc_syncbeacon = 0; rfilt = ath_calcrxfilter(sc); ATH_UNLOCK(sc); ATH_PCU_LOCK(sc); ath_hal_setrxfilter(ah, rfilt); ath_hal_setassocid(ah, ieee80211broadcastaddr, 0); ATH_PCU_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0\n", __func__, rfilt, ether_sprintf(ieee80211broadcastaddr)); } static void ath_scan_end(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; struct ath_hal *ah = sc->sc_ah; u_int32_t rfilt; ATH_LOCK(sc); sc->sc_scanning = 0; rfilt = ath_calcrxfilter(sc); ATH_UNLOCK(sc); ATH_PCU_LOCK(sc); ath_hal_setrxfilter(ah, rfilt); ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid); ath_hal_process_noisefloor(ah); ATH_PCU_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n", __func__, rfilt, ether_sprintf(sc->sc_curbssid), sc->sc_curaid); } #ifdef ATH_ENABLE_11N /* * For now, just do a channel change. * * Later, we'll go through the hard slog of suspending tx/rx, changing rate * control state and resetting the hardware without dropping frames out * of the queue. * * The unfortunate trouble here is making absolutely sure that the * channel width change has propagated enough so the hardware * absolutely isn't handed bogus frames for it's current operating * mode. (Eg, 40MHz frames in 20MHz mode.) Since TX and RX can and * does occur in parallel, we need to make certain we've blocked * any further ongoing TX (and RX, that can cause raw TX) * before we do this. */ static void ath_update_chw(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; DPRINTF(sc, ATH_DEBUG_STATE, "%s: called\n", __func__); ath_set_channel(ic); } #endif /* ATH_ENABLE_11N */ static void ath_set_channel(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); (void) ath_chan_set(sc, ic->ic_curchan); /* * If we are returning to our bss channel then mark state * so the next recv'd beacon's tsf will be used to sync the * beacon timers. Note that since we only hear beacons in * sta/ibss mode this has no effect in other operating modes. */ ATH_LOCK(sc); if (!sc->sc_scanning && ic->ic_curchan == ic->ic_bsschan) sc->sc_syncbeacon = 1; ath_power_restore_power_state(sc); ATH_UNLOCK(sc); } /* * Walk the vap list and check if there any vap's in RUN state. */ static int ath_isanyrunningvaps(struct ieee80211vap *this) { struct ieee80211com *ic = this->iv_ic; struct ieee80211vap *vap; IEEE80211_LOCK_ASSERT(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { if (vap != this && vap->iv_state >= IEEE80211_S_RUN) return 1; } return 0; } static int ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ieee80211com *ic = vap->iv_ic; struct ath_softc *sc = ic->ic_softc; struct ath_vap *avp = ATH_VAP(vap); struct ath_hal *ah = sc->sc_ah; struct ieee80211_node *ni = NULL; int i, error, stamode; u_int32_t rfilt; int csa_run_transition = 0; enum ieee80211_state ostate = vap->iv_state; static const HAL_LED_STATE leds[] = { HAL_LED_INIT, /* IEEE80211_S_INIT */ HAL_LED_SCAN, /* IEEE80211_S_SCAN */ HAL_LED_AUTH, /* IEEE80211_S_AUTH */ HAL_LED_ASSOC, /* IEEE80211_S_ASSOC */ HAL_LED_RUN, /* IEEE80211_S_CAC */ HAL_LED_RUN, /* IEEE80211_S_RUN */ HAL_LED_RUN, /* IEEE80211_S_CSA */ HAL_LED_RUN, /* IEEE80211_S_SLEEP */ }; DPRINTF(sc, ATH_DEBUG_STATE, "%s: %s -> %s\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); /* * net80211 _should_ have the comlock asserted at this point. * There are some comments around the calls to vap->iv_newstate * which indicate that it (newstate) may end up dropping the * lock. This and the subsequent lock assert check after newstate * are an attempt to catch these and figure out how/why. */ IEEE80211_LOCK_ASSERT(ic); /* Before we touch the hardware - wake it up */ ATH_LOCK(sc); /* * If the NIC is in anything other than SLEEP state, * we need to ensure that self-generated frames are * set for PWRMGT=0. Otherwise we may end up with * strange situations. * * XXX TODO: is this actually the case? :-) */ if (nstate != IEEE80211_S_SLEEP) ath_power_setselfgen(sc, HAL_PM_AWAKE); /* * Now, wake the thing up. */ ath_power_set_power_state(sc, HAL_PM_AWAKE); /* * And stop the calibration callout whilst we have * ATH_LOCK held. */ callout_stop(&sc->sc_cal_ch); ATH_UNLOCK(sc); if (ostate == IEEE80211_S_CSA && nstate == IEEE80211_S_RUN) csa_run_transition = 1; ath_hal_setledstate(ah, leds[nstate]); /* set LED */ if (nstate == IEEE80211_S_SCAN) { /* * Scanning: turn off beacon miss and don't beacon. * Mark beacon state so when we reach RUN state we'll * [re]setup beacons. Unblock the task q thread so * deferred interrupt processing is done. */ /* Ensure we stay awake during scan */ ATH_LOCK(sc); ath_power_setselfgen(sc, HAL_PM_AWAKE); - ath_power_setpower(sc, HAL_PM_AWAKE); + ath_power_setpower(sc, HAL_PM_AWAKE, 1); ATH_UNLOCK(sc); ath_hal_intrset(ah, sc->sc_imask &~ (HAL_INT_SWBA | HAL_INT_BMISS)); sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS); sc->sc_beacons = 0; taskqueue_unblock(sc->sc_tq); } ni = ieee80211_ref_node(vap->iv_bss); rfilt = ath_calcrxfilter(sc); stamode = (vap->iv_opmode == IEEE80211_M_STA || vap->iv_opmode == IEEE80211_M_AHDEMO || vap->iv_opmode == IEEE80211_M_IBSS); /* * XXX Dont need to do this (and others) if we've transitioned * from SLEEP->RUN. */ if (stamode && nstate == IEEE80211_S_RUN) { sc->sc_curaid = ni->ni_associd; IEEE80211_ADDR_COPY(sc->sc_curbssid, ni->ni_bssid); ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid); } DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n", __func__, rfilt, ether_sprintf(sc->sc_curbssid), sc->sc_curaid); ath_hal_setrxfilter(ah, rfilt); /* XXX is this to restore keycache on resume? */ if (vap->iv_opmode != IEEE80211_M_STA && (vap->iv_flags & IEEE80211_F_PRIVACY)) { for (i = 0; i < IEEE80211_WEP_NKID; i++) if (ath_hal_keyisvalid(ah, i)) ath_hal_keysetmac(ah, i, ni->ni_bssid); } /* * Invoke the parent method to do net80211 work. */ error = avp->av_newstate(vap, nstate, arg); if (error != 0) goto bad; /* * See above: ensure av_newstate() doesn't drop the lock * on us. */ IEEE80211_LOCK_ASSERT(ic); if (nstate == IEEE80211_S_RUN) { /* NB: collect bss node again, it may have changed */ ieee80211_free_node(ni); ni = ieee80211_ref_node(vap->iv_bss); DPRINTF(sc, ATH_DEBUG_STATE, "%s(RUN): iv_flags 0x%08x bintvl %d bssid %s " "capinfo 0x%04x chan %d\n", __func__, vap->iv_flags, ni->ni_intval, ether_sprintf(ni->ni_bssid), ni->ni_capinfo, ieee80211_chan2ieee(ic, ic->ic_curchan)); switch (vap->iv_opmode) { #ifdef IEEE80211_SUPPORT_TDMA case IEEE80211_M_AHDEMO: if ((vap->iv_caps & IEEE80211_C_TDMA) == 0) break; /* fall thru... */ #endif case IEEE80211_M_HOSTAP: case IEEE80211_M_IBSS: case IEEE80211_M_MBSS: /* * Allocate and setup the beacon frame. * * Stop any previous beacon DMA. This may be * necessary, for example, when an ibss merge * causes reconfiguration; there will be a state * transition from RUN->RUN that means we may * be called with beacon transmission active. */ ath_hal_stoptxdma(ah, sc->sc_bhalq); error = ath_beacon_alloc(sc, ni); if (error != 0) goto bad; /* * If joining an adhoc network defer beacon timer * configuration to the next beacon frame so we * have a current TSF to use. Otherwise we're * starting an ibss/bss so there's no need to delay; * if this is the first vap moving to RUN state, then * beacon state needs to be [re]configured. */ if (vap->iv_opmode == IEEE80211_M_IBSS && ni->ni_tstamp.tsf != 0) { sc->sc_syncbeacon = 1; } else if (!sc->sc_beacons) { #ifdef IEEE80211_SUPPORT_TDMA if (vap->iv_caps & IEEE80211_C_TDMA) ath_tdma_config(sc, vap); else #endif ath_beacon_config(sc, vap); sc->sc_beacons = 1; } break; case IEEE80211_M_STA: /* * Defer beacon timer configuration to the next * beacon frame so we have a current TSF to use * (any TSF collected when scanning is likely old). * However if it's due to a CSA -> RUN transition, * force a beacon update so we pick up a lack of * beacons from an AP in CAC and thus force a * scan. * * And, there's also corner cases here where * after a scan, the AP may have disappeared. * In that case, we may not receive an actual * beacon to update the beacon timer and thus we * won't get notified of the missing beacons. */ if (ostate != IEEE80211_S_RUN && ostate != IEEE80211_S_SLEEP) { DPRINTF(sc, ATH_DEBUG_BEACON, "%s: STA; syncbeacon=1\n", __func__); sc->sc_syncbeacon = 1; if (csa_run_transition) ath_beacon_config(sc, vap); /* * PR: kern/175227 * * Reconfigure beacons during reset; as otherwise * we won't get the beacon timers reprogrammed * after a reset and thus we won't pick up a * beacon miss interrupt. * * Hopefully we'll see a beacon before the BMISS * timer fires (too often), leading to a STA * disassociation. */ sc->sc_beacons = 1; } break; case IEEE80211_M_MONITOR: /* * Monitor mode vaps have only INIT->RUN and RUN->RUN * transitions so we must re-enable interrupts here to * handle the case of a single monitor mode vap. */ ath_hal_intrset(ah, sc->sc_imask); break; case IEEE80211_M_WDS: break; default: break; } /* * Let the hal process statistics collected during a * scan so it can provide calibrated noise floor data. */ ath_hal_process_noisefloor(ah); /* * Reset rssi stats; maybe not the best place... */ sc->sc_halstats.ns_avgbrssi = ATH_RSSI_DUMMY_MARKER; sc->sc_halstats.ns_avgrssi = ATH_RSSI_DUMMY_MARKER; sc->sc_halstats.ns_avgtxrssi = ATH_RSSI_DUMMY_MARKER; /* * Force awake for RUN mode. */ ATH_LOCK(sc); ath_power_setselfgen(sc, HAL_PM_AWAKE); - ath_power_setpower(sc, HAL_PM_AWAKE); + ath_power_setpower(sc, HAL_PM_AWAKE, 1); /* * Finally, start any timers and the task q thread * (in case we didn't go through SCAN state). */ if (ath_longcalinterval != 0) { /* start periodic recalibration timer */ callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc); } else { DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: calibration disabled\n", __func__); } ATH_UNLOCK(sc); taskqueue_unblock(sc->sc_tq); } else if (nstate == IEEE80211_S_INIT) { /* * If there are no vaps left in RUN state then * shutdown host/driver operation: * o disable interrupts * o disable the task queue thread * o mark beacon processing as stopped */ if (!ath_isanyrunningvaps(vap)) { sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS); /* disable interrupts */ ath_hal_intrset(ah, sc->sc_imask &~ HAL_INT_GLOBAL); taskqueue_block(sc->sc_tq); sc->sc_beacons = 0; } #ifdef IEEE80211_SUPPORT_TDMA ath_hal_setcca(ah, AH_TRUE); #endif } else if (nstate == IEEE80211_S_SLEEP) { /* We're going to sleep, so transition appropriately */ /* For now, only do this if we're a single STA vap */ if (sc->sc_nvaps == 1 && vap->iv_opmode == IEEE80211_M_STA) { DPRINTF(sc, ATH_DEBUG_BEACON, "%s: syncbeacon=%d\n", __func__, sc->sc_syncbeacon); ATH_LOCK(sc); /* * Always at least set the self-generated * frame config to set PWRMGT=1. */ ath_power_setselfgen(sc, HAL_PM_NETWORK_SLEEP); /* * If we're not syncing beacons, transition * to NETWORK_SLEEP. * * We stay awake if syncbeacon > 0 in case * we need to listen for some beacons otherwise * our beacon timer config may be wrong. */ if (sc->sc_syncbeacon == 0) { - ath_power_setpower(sc, HAL_PM_NETWORK_SLEEP); + ath_power_setpower(sc, HAL_PM_NETWORK_SLEEP, 1); } ATH_UNLOCK(sc); } } bad: ieee80211_free_node(ni); /* * Restore the power state - either to what it was, or * to network_sleep if it's alright. */ ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); return error; } /* * Allocate a key cache slot to the station so we can * setup a mapping from key index to node. The key cache * slot is needed for managing antenna state and for * compression when stations do not use crypto. We do * it uniliaterally here; if crypto is employed this slot * will be reassigned. */ static void ath_setup_stationkey(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ath_softc *sc = vap->iv_ic->ic_softc; ieee80211_keyix keyix, rxkeyix; /* XXX should take a locked ref to vap->iv_bss */ if (!ath_key_alloc(vap, &ni->ni_ucastkey, &keyix, &rxkeyix)) { /* * Key cache is full; we'll fall back to doing * the more expensive lookup in software. Note * this also means no h/w compression. */ /* XXX msg+statistic */ } else { /* XXX locking? */ ni->ni_ucastkey.wk_keyix = keyix; ni->ni_ucastkey.wk_rxkeyix = rxkeyix; /* NB: must mark device key to get called back on delete */ ni->ni_ucastkey.wk_flags |= IEEE80211_KEY_DEVKEY; IEEE80211_ADDR_COPY(ni->ni_ucastkey.wk_macaddr, ni->ni_macaddr); /* NB: this will create a pass-thru key entry */ ath_keyset(sc, vap, &ni->ni_ucastkey, vap->iv_bss); } } /* * Setup driver-specific state for a newly associated node. * Note that we're called also on a re-associate, the isnew * param tells us if this is the first time or not. */ static void ath_newassoc(struct ieee80211_node *ni, int isnew) { struct ath_node *an = ATH_NODE(ni); struct ieee80211vap *vap = ni->ni_vap; struct ath_softc *sc = vap->iv_ic->ic_softc; const struct ieee80211_txparam *tp = ni->ni_txparms; an->an_mcastrix = ath_tx_findrix(sc, tp->mcastrate); an->an_mgmtrix = ath_tx_findrix(sc, tp->mgmtrate); DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: reassoc; isnew=%d, is_powersave=%d\n", __func__, ni->ni_macaddr, ":", isnew, an->an_is_powersave); ATH_NODE_LOCK(an); ath_rate_newassoc(sc, an, isnew); ATH_NODE_UNLOCK(an); if (isnew && (vap->iv_flags & IEEE80211_F_PRIVACY) == 0 && sc->sc_hasclrkey && ni->ni_ucastkey.wk_keyix == IEEE80211_KEYIX_NONE) ath_setup_stationkey(ni); /* * If we're reassociating, make sure that any paused queues * get unpaused. * * Now, we may have frames in the hardware queue for this node. * So if we are reassociating and there are frames in the queue, * we need to go through the cleanup path to ensure that they're * marked as non-aggregate. */ if (! isnew) { DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: reassoc; is_powersave=%d\n", __func__, ni->ni_macaddr, ":", an->an_is_powersave); /* XXX for now, we can't hold the lock across assoc */ ath_tx_node_reassoc(sc, an); /* XXX for now, we can't hold the lock across wakeup */ if (an->an_is_powersave) ath_tx_node_wakeup(sc, an); } } static int ath_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *reg, int nchans, struct ieee80211_channel chans[]) { struct ath_softc *sc = ic->ic_softc; struct ath_hal *ah = sc->sc_ah; HAL_STATUS status; DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: rd %u cc %u location %c%s\n", __func__, reg->regdomain, reg->country, reg->location, reg->ecm ? " ecm" : ""); status = ath_hal_set_channels(ah, chans, nchans, reg->country, reg->regdomain); if (status != HAL_OK) { DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: failed, status %u\n", __func__, status); return EINVAL; /* XXX */ } return 0; } static void ath_getradiocaps(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { struct ath_softc *sc = ic->ic_softc; struct ath_hal *ah = sc->sc_ah; DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: use rd %u cc %d\n", __func__, SKU_DEBUG, CTRY_DEFAULT); /* XXX check return */ (void) ath_hal_getchannels(ah, chans, maxchans, nchans, HAL_MODE_ALL, CTRY_DEFAULT, SKU_DEBUG, AH_TRUE); } static int ath_getchannels(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; HAL_STATUS status; /* * Collect channel set based on EEPROM contents. */ status = ath_hal_init_channels(ah, ic->ic_channels, IEEE80211_CHAN_MAX, &ic->ic_nchans, HAL_MODE_ALL, CTRY_DEFAULT, SKU_NONE, AH_TRUE); if (status != HAL_OK) { device_printf(sc->sc_dev, "%s: unable to collect channel list from hal, status %d\n", __func__, status); return EINVAL; } (void) ath_hal_getregdomain(ah, &sc->sc_eerd); ath_hal_getcountrycode(ah, &sc->sc_eecc); /* NB: cannot fail */ /* XXX map Atheros sku's to net80211 SKU's */ /* XXX net80211 types too small */ ic->ic_regdomain.regdomain = (uint16_t) sc->sc_eerd; ic->ic_regdomain.country = (uint16_t) sc->sc_eecc; ic->ic_regdomain.isocc[0] = ' '; /* XXX don't know */ ic->ic_regdomain.isocc[1] = ' '; ic->ic_regdomain.ecm = 1; ic->ic_regdomain.location = 'I'; DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: eeprom rd %u cc %u (mapped rd %u cc %u) location %c%s\n", __func__, sc->sc_eerd, sc->sc_eecc, ic->ic_regdomain.regdomain, ic->ic_regdomain.country, ic->ic_regdomain.location, ic->ic_regdomain.ecm ? " ecm" : ""); return 0; } static int ath_rate_setup(struct ath_softc *sc, u_int mode) { struct ath_hal *ah = sc->sc_ah; const HAL_RATE_TABLE *rt; switch (mode) { case IEEE80211_MODE_11A: rt = ath_hal_getratetable(ah, HAL_MODE_11A); break; case IEEE80211_MODE_HALF: rt = ath_hal_getratetable(ah, HAL_MODE_11A_HALF_RATE); break; case IEEE80211_MODE_QUARTER: rt = ath_hal_getratetable(ah, HAL_MODE_11A_QUARTER_RATE); break; case IEEE80211_MODE_11B: rt = ath_hal_getratetable(ah, HAL_MODE_11B); break; case IEEE80211_MODE_11G: rt = ath_hal_getratetable(ah, HAL_MODE_11G); break; case IEEE80211_MODE_TURBO_A: rt = ath_hal_getratetable(ah, HAL_MODE_108A); break; case IEEE80211_MODE_TURBO_G: rt = ath_hal_getratetable(ah, HAL_MODE_108G); break; case IEEE80211_MODE_STURBO_A: rt = ath_hal_getratetable(ah, HAL_MODE_TURBO); break; case IEEE80211_MODE_11NA: rt = ath_hal_getratetable(ah, HAL_MODE_11NA_HT20); break; case IEEE80211_MODE_11NG: rt = ath_hal_getratetable(ah, HAL_MODE_11NG_HT20); break; default: DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid mode %u\n", __func__, mode); return 0; } sc->sc_rates[mode] = rt; return (rt != NULL); } static void ath_setcurmode(struct ath_softc *sc, enum ieee80211_phymode mode) { /* NB: on/off times from the Atheros NDIS driver, w/ permission */ static const struct { u_int rate; /* tx/rx 802.11 rate */ u_int16_t timeOn; /* LED on time (ms) */ u_int16_t timeOff; /* LED off time (ms) */ } blinkrates[] = { { 108, 40, 10 }, { 96, 44, 11 }, { 72, 50, 13 }, { 48, 57, 14 }, { 36, 67, 16 }, { 24, 80, 20 }, { 22, 100, 25 }, { 18, 133, 34 }, { 12, 160, 40 }, { 10, 200, 50 }, { 6, 240, 58 }, { 4, 267, 66 }, { 2, 400, 100 }, { 0, 500, 130 }, /* XXX half/quarter rates */ }; const HAL_RATE_TABLE *rt; int i, j; memset(sc->sc_rixmap, 0xff, sizeof(sc->sc_rixmap)); rt = sc->sc_rates[mode]; KASSERT(rt != NULL, ("no h/w rate set for phy mode %u", mode)); for (i = 0; i < rt->rateCount; i++) { uint8_t ieeerate = rt->info[i].dot11Rate & IEEE80211_RATE_VAL; if (rt->info[i].phy != IEEE80211_T_HT) sc->sc_rixmap[ieeerate] = i; else sc->sc_rixmap[ieeerate | IEEE80211_RATE_MCS] = i; } memset(sc->sc_hwmap, 0, sizeof(sc->sc_hwmap)); for (i = 0; i < nitems(sc->sc_hwmap); i++) { if (i >= rt->rateCount) { sc->sc_hwmap[i].ledon = (500 * hz) / 1000; sc->sc_hwmap[i].ledoff = (130 * hz) / 1000; continue; } sc->sc_hwmap[i].ieeerate = rt->info[i].dot11Rate & IEEE80211_RATE_VAL; if (rt->info[i].phy == IEEE80211_T_HT) sc->sc_hwmap[i].ieeerate |= IEEE80211_RATE_MCS; sc->sc_hwmap[i].txflags = IEEE80211_RADIOTAP_F_DATAPAD; if (rt->info[i].shortPreamble || rt->info[i].phy == IEEE80211_T_OFDM) sc->sc_hwmap[i].txflags |= IEEE80211_RADIOTAP_F_SHORTPRE; sc->sc_hwmap[i].rxflags = sc->sc_hwmap[i].txflags; for (j = 0; j < nitems(blinkrates)-1; j++) if (blinkrates[j].rate == sc->sc_hwmap[i].ieeerate) break; /* NB: this uses the last entry if the rate isn't found */ /* XXX beware of overlow */ sc->sc_hwmap[i].ledon = (blinkrates[j].timeOn * hz) / 1000; sc->sc_hwmap[i].ledoff = (blinkrates[j].timeOff * hz) / 1000; } sc->sc_currates = rt; sc->sc_curmode = mode; /* * All protection frames are transmitted at 2Mb/s for * 11g, otherwise at 1Mb/s. */ if (mode == IEEE80211_MODE_11G) sc->sc_protrix = ath_tx_findrix(sc, 2*2); else sc->sc_protrix = ath_tx_findrix(sc, 2*1); /* NB: caller is responsible for resetting rate control state */ } static void ath_watchdog(void *arg) { struct ath_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; int do_reset = 0; ATH_LOCK_ASSERT(sc); if (sc->sc_wd_timer != 0 && --sc->sc_wd_timer == 0) { uint32_t hangs; ath_power_set_power_state(sc, HAL_PM_AWAKE); if (ath_hal_gethangstate(sc->sc_ah, 0xffff, &hangs) && hangs != 0) { device_printf(sc->sc_dev, "%s hang detected (0x%x)\n", hangs & 0xff ? "bb" : "mac", hangs); } else device_printf(sc->sc_dev, "device timeout\n"); do_reset = 1; counter_u64_add(ic->ic_oerrors, 1); sc->sc_stats.ast_watchdog++; ath_power_restore_power_state(sc); } /* * We can't hold the lock across the ath_reset() call. * * And since this routine can't hold a lock and sleep, * do the reset deferred. */ if (do_reset) { taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask); } callout_schedule(&sc->sc_wd_ch, hz); } static void ath_parent(struct ieee80211com *ic) { struct ath_softc *sc = ic->ic_softc; int error = EDOOFUS; ATH_LOCK(sc); if (ic->ic_nrunning > 0) { /* * To avoid rescanning another access point, * do not call ath_init() here. Instead, * only reflect promisc mode settings. */ if (sc->sc_running) { ath_power_set_power_state(sc, HAL_PM_AWAKE); ath_mode_init(sc); ath_power_restore_power_state(sc); } else if (!sc->sc_invalid) { /* * Beware of being called during attach/detach * to reset promiscuous mode. In that case we * will still be marked UP but not RUNNING. * However trying to re-init the interface * is the wrong thing to do as we've already * torn down much of our state. There's * probably a better way to deal with this. */ error = ath_init(sc); } } else { ath_stop(sc); if (!sc->sc_invalid) - ath_power_setpower(sc, HAL_PM_FULL_SLEEP); + ath_power_setpower(sc, HAL_PM_FULL_SLEEP, 1); } ATH_UNLOCK(sc); if (error == 0) { #ifdef ATH_TX99_DIAG if (sc->sc_tx99 != NULL) sc->sc_tx99->start(sc->sc_tx99); else #endif ieee80211_start_all(ic); } } /* * Announce various information on device/driver attach. */ static void ath_announce(struct ath_softc *sc) { struct ath_hal *ah = sc->sc_ah; device_printf(sc->sc_dev, "%s mac %d.%d RF%s phy %d.%d\n", ath_hal_mac_name(ah), ah->ah_macVersion, ah->ah_macRev, ath_hal_rf_name(ah), ah->ah_phyRev >> 4, ah->ah_phyRev & 0xf); device_printf(sc->sc_dev, "2GHz radio: 0x%.4x; 5GHz radio: 0x%.4x\n", ah->ah_analog2GhzRev, ah->ah_analog5GhzRev); if (bootverbose) { int i; for (i = 0; i <= WME_AC_VO; i++) { struct ath_txq *txq = sc->sc_ac2q[i]; device_printf(sc->sc_dev, "Use hw queue %u for %s traffic\n", txq->axq_qnum, ieee80211_wme_acnames[i]); } device_printf(sc->sc_dev, "Use hw queue %u for CAB traffic\n", sc->sc_cabq->axq_qnum); device_printf(sc->sc_dev, "Use hw queue %u for beacons\n", sc->sc_bhalq); } if (ath_rxbuf != ATH_RXBUF) device_printf(sc->sc_dev, "using %u rx buffers\n", ath_rxbuf); if (ath_txbuf != ATH_TXBUF) device_printf(sc->sc_dev, "using %u tx buffers\n", ath_txbuf); if (sc->sc_mcastkey && bootverbose) device_printf(sc->sc_dev, "using multicast key search\n"); } static void ath_dfs_tasklet(void *p, int npending) { struct ath_softc *sc = (struct ath_softc *) p; struct ieee80211com *ic = &sc->sc_ic; /* * If previous processing has found a radar event, * signal this to the net80211 layer to begin DFS * processing. */ if (ath_dfs_process_radar_event(sc, sc->sc_curchan)) { /* DFS event found, initiate channel change */ /* * XXX doesn't currently tell us whether the event * XXX was found in the primary or extension * XXX channel! */ IEEE80211_LOCK(ic); ieee80211_dfs_notify_radar(ic, sc->sc_curchan); IEEE80211_UNLOCK(ic); } } /* * Enable/disable power save. This must be called with * no TX driver locks currently held, so it should only * be called from the RX path (which doesn't hold any * TX driver locks.) */ static void ath_node_powersave(struct ieee80211_node *ni, int enable) { #ifdef ATH_SW_PSQ struct ath_node *an = ATH_NODE(ni); struct ieee80211com *ic = ni->ni_ic; struct ath_softc *sc = ic->ic_softc; struct ath_vap *avp = ATH_VAP(ni->ni_vap); /* XXX and no TXQ locks should be held here */ DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d\n", __func__, ni->ni_macaddr, ":", !! enable); /* Suspend or resume software queue handling */ if (enable) ath_tx_node_sleep(sc, an); else ath_tx_node_wakeup(sc, an); /* Update net80211 state */ avp->av_node_ps(ni, enable); #else struct ath_vap *avp = ATH_VAP(ni->ni_vap); /* Update net80211 state */ avp->av_node_ps(ni, enable); #endif/* ATH_SW_PSQ */ } /* * Notification from net80211 that the powersave queue state has * changed. * * Since the software queue also may have some frames: * * + if the node software queue has frames and the TID state * is 0, we set the TIM; * + if the node and the stack are both empty, we clear the TIM bit. * + If the stack tries to set the bit, always set it. * + If the stack tries to clear the bit, only clear it if the * software queue in question is also cleared. * * TODO: this is called during node teardown; so let's ensure this * is all correctly handled and that the TIM bit is cleared. * It may be that the node flush is called _AFTER_ the net80211 * stack clears the TIM. * * Here is the racy part. Since it's possible >1 concurrent, * overlapping TXes will appear complete with a TX completion in * another thread, it's possible that the concurrent TIM calls will * clash. We can't hold the node lock here because setting the * TIM grabs the net80211 comlock and this may cause a LOR. * The solution is either to totally serialise _everything_ at * this point (ie, all TX, completion and any reset/flush go into * one taskqueue) or a new "ath TIM lock" needs to be created that * just wraps the driver state change and this call to avp->av_set_tim(). * * The same race exists in the net80211 power save queue handling * as well. Since multiple transmitting threads may queue frames * into the driver, as well as ps-poll and the driver transmitting * frames (and thus clearing the psq), it's quite possible that * a packet entering the PSQ and a ps-poll being handled will * race, causing the TIM to be cleared and not re-set. */ static int ath_node_set_tim(struct ieee80211_node *ni, int enable) { #ifdef ATH_SW_PSQ struct ieee80211com *ic = ni->ni_ic; struct ath_softc *sc = ic->ic_softc; struct ath_node *an = ATH_NODE(ni); struct ath_vap *avp = ATH_VAP(ni->ni_vap); int changed = 0; ATH_TX_LOCK(sc); an->an_stack_psq = enable; /* * This will get called for all operating modes, * even if avp->av_set_tim is unset. * It's currently set for hostap/ibss modes; but * the same infrastructure is used for both STA * and AP/IBSS node power save. */ if (avp->av_set_tim == NULL) { ATH_TX_UNLOCK(sc); return (0); } /* * If setting the bit, always set it here. * If clearing the bit, only clear it if the * software queue is also empty. * * If the node has left power save, just clear the TIM * bit regardless of the state of the power save queue. * * XXX TODO: although atomics are used, it's quite possible * that a race will occur between this and setting/clearing * in another thread. TX completion will occur always in * one thread, however setting/clearing the TIM bit can come * from a variety of different process contexts! */ if (enable && an->an_tim_set == 1) { DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d, tim_set=1, ignoring\n", __func__, ni->ni_macaddr, ":", enable); ATH_TX_UNLOCK(sc); } else if (enable) { DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d, enabling TIM\n", __func__, ni->ni_macaddr, ":", enable); an->an_tim_set = 1; ATH_TX_UNLOCK(sc); changed = avp->av_set_tim(ni, enable); } else if (an->an_swq_depth == 0) { /* disable */ DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d, an_swq_depth == 0, disabling\n", __func__, ni->ni_macaddr, ":", enable); an->an_tim_set = 0; ATH_TX_UNLOCK(sc); changed = avp->av_set_tim(ni, enable); } else if (! an->an_is_powersave) { /* * disable regardless; the node isn't in powersave now */ DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d, an_pwrsave=0, disabling\n", __func__, ni->ni_macaddr, ":", enable); an->an_tim_set = 0; ATH_TX_UNLOCK(sc); changed = avp->av_set_tim(ni, enable); } else { /* * psq disable, node is currently in powersave, node * software queue isn't empty, so don't clear the TIM bit * for now. */ ATH_TX_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d, an_swq_depth > 0, ignoring\n", __func__, ni->ni_macaddr, ":", enable); changed = 0; } return (changed); #else struct ath_vap *avp = ATH_VAP(ni->ni_vap); /* * Some operating modes don't set av_set_tim(), so don't * update it here. */ if (avp->av_set_tim == NULL) return (0); return (avp->av_set_tim(ni, enable)); #endif /* ATH_SW_PSQ */ } /* * Set or update the TIM from the software queue. * * Check the software queue depth before attempting to do lock * anything; that avoids trying to obtain the lock. Then, * re-check afterwards to ensure nothing has changed in the * meantime. * * set: This is designed to be called from the TX path, after * a frame has been queued; to see if the swq > 0. * * clear: This is designed to be called from the buffer completion point * (right now it's ath_tx_default_comp()) where the state of * a software queue has changed. * * It makes sense to place it at buffer free / completion rather * than after each software queue operation, as there's no real * point in churning the TIM bit as the last frames in the software * queue are transmitted. If they fail and we retry them, we'd * just be setting the TIM bit again anyway. */ void ath_tx_update_tim(struct ath_softc *sc, struct ieee80211_node *ni, int enable) { #ifdef ATH_SW_PSQ struct ath_node *an; struct ath_vap *avp; /* Don't do this for broadcast/etc frames */ if (ni == NULL) return; an = ATH_NODE(ni); avp = ATH_VAP(ni->ni_vap); /* * And for operating modes without the TIM handler set, let's * just skip those. */ if (avp->av_set_tim == NULL) return; ATH_TX_LOCK_ASSERT(sc); if (enable) { if (an->an_is_powersave && an->an_tim_set == 0 && an->an_swq_depth != 0) { DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: swq_depth>0, tim_set=0, set!\n", __func__, ni->ni_macaddr, ":"); an->an_tim_set = 1; (void) avp->av_set_tim(ni, 1); } } else { /* * Don't bother grabbing the lock unless the queue is empty. */ if (an->an_swq_depth != 0) return; if (an->an_is_powersave && an->an_stack_psq == 0 && an->an_tim_set == 1 && an->an_swq_depth == 0) { DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: swq_depth=0, tim_set=1, psq_set=0," " clear!\n", __func__, ni->ni_macaddr, ":"); an->an_tim_set = 0; (void) avp->av_set_tim(ni, 0); } } #else return; #endif /* ATH_SW_PSQ */ } /* * Received a ps-poll frame from net80211. * * Here we get a chance to serve out a software-queued frame ourselves * before we punt it to net80211 to transmit us one itself - either * because there's traffic in the net80211 psq, or a NULL frame to * indicate there's nothing else. */ static void ath_node_recv_pspoll(struct ieee80211_node *ni, struct mbuf *m) { #ifdef ATH_SW_PSQ struct ath_node *an; struct ath_vap *avp; struct ieee80211com *ic = ni->ni_ic; struct ath_softc *sc = ic->ic_softc; int tid; /* Just paranoia */ if (ni == NULL) return; /* * Unassociated (temporary node) station. */ if (ni->ni_associd == 0) return; /* * We do have an active node, so let's begin looking into it. */ an = ATH_NODE(ni); avp = ATH_VAP(ni->ni_vap); /* * For now, we just call the original ps-poll method. * Once we're ready to flip this on: * * + Set leak to 1, as no matter what we're going to have * to send a frame; * + Check the software queue and if there's something in it, * schedule the highest TID thas has traffic from this node. * Then make sure we schedule the software scheduler to * run so it picks up said frame. * * That way whatever happens, we'll at least send _a_ frame * to the given node. * * Again, yes, it's crappy QoS if the node has multiple * TIDs worth of traffic - but let's get it working first * before we optimise it. * * Also yes, there's definitely latency here - we're not * direct dispatching to the hardware in this path (and * we're likely being called from the packet receive path, * so going back into TX may be a little hairy!) but again * I'd like to get this working first before optimising * turn-around time. */ ATH_TX_LOCK(sc); /* * Legacy - we're called and the node isn't asleep. * Immediately punt. */ if (! an->an_is_powersave) { DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: not in powersave?\n", __func__, ni->ni_macaddr, ":"); ATH_TX_UNLOCK(sc); avp->av_recv_pspoll(ni, m); return; } /* * We're in powersave. * * Leak a frame. */ an->an_leak_count = 1; /* * Now, if there's no frames in the node, just punt to * recv_pspoll. * * Don't bother checking if the TIM bit is set, we really * only care if there are any frames here! */ if (an->an_swq_depth == 0) { ATH_TX_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: SWQ empty; punting to net80211\n", __func__, ni->ni_macaddr, ":"); avp->av_recv_pspoll(ni, m); return; } /* * Ok, let's schedule the highest TID that has traffic * and then schedule something. */ for (tid = IEEE80211_TID_SIZE - 1; tid >= 0; tid--) { struct ath_tid *atid = &an->an_tid[tid]; /* * No frames? Skip. */ if (atid->axq_depth == 0) continue; ath_tx_tid_sched(sc, atid); /* * XXX we could do a direct call to the TXQ * scheduler code here to optimise latency * at the expense of a REALLY deep callstack. */ ATH_TX_UNLOCK(sc); taskqueue_enqueue(sc->sc_tq, &sc->sc_txqtask); DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: leaking frame to TID %d\n", __func__, ni->ni_macaddr, ":", tid); return; } ATH_TX_UNLOCK(sc); /* * XXX nothing in the TIDs at this point? Eek. */ DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: TIDs empty, but ath_node showed traffic?!\n", __func__, ni->ni_macaddr, ":"); avp->av_recv_pspoll(ni, m); #else avp->av_recv_pspoll(ni, m); #endif /* ATH_SW_PSQ */ } MODULE_VERSION(if_ath, 1); MODULE_DEPEND(if_ath, wlan, 1, 1, 1); /* 802.11 media layer */ #if defined(IEEE80211_ALQ) || defined(AH_DEBUG_ALQ) || defined(ATH_DEBUG_ALQ) MODULE_DEPEND(if_ath, alq, 1, 1, 1); #endif Index: projects/clang391-import/sys/dev/ath/if_ath_beacon.c =================================================================== --- projects/clang391-import/sys/dev/ath/if_ath_beacon.c (revision 309262) +++ projects/clang391-import/sys/dev/ath/if_ath_beacon.c (revision 309263) @@ -1,1188 +1,1205 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Atheros Wireless LAN controller. * * This software is derived from work of Atsushi Onoe; his contribution * is greatly appreciated. */ #include "opt_inet.h" #include "opt_ath.h" /* * This is needed for register operations which are performed * by the driver - eg, calls to ath_hal_gettsf32(). * * It's also required for any AH_DEBUG checks in here, eg the * module dependencies. */ #include "opt_ah.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for mp_ncpus */ #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #include #ifdef INET #include #include #endif #include #include #include #include #include #ifdef ATH_TX99_DIAG #include #endif /* * Setup a h/w transmit queue for beacons. */ int ath_beaconq_setup(struct ath_softc *sc) { struct ath_hal *ah = sc->sc_ah; HAL_TXQ_INFO qi; memset(&qi, 0, sizeof(qi)); qi.tqi_aifs = HAL_TXQ_USEDEFAULT; qi.tqi_cwmin = HAL_TXQ_USEDEFAULT; qi.tqi_cwmax = HAL_TXQ_USEDEFAULT; /* NB: for dynamic turbo, don't enable any other interrupts */ qi.tqi_qflags = HAL_TXQ_TXDESCINT_ENABLE; if (sc->sc_isedma) qi.tqi_qflags |= HAL_TXQ_TXOKINT_ENABLE | HAL_TXQ_TXERRINT_ENABLE; return ath_hal_setuptxqueue(ah, HAL_TX_QUEUE_BEACON, &qi); } /* * Setup the transmit queue parameters for the beacon queue. */ int ath_beaconq_config(struct ath_softc *sc) { #define ATH_EXPONENT_TO_VALUE(v) ((1<<(v))-1) struct ieee80211com *ic = &sc->sc_ic; struct ath_hal *ah = sc->sc_ah; HAL_TXQ_INFO qi; ath_hal_gettxqueueprops(ah, sc->sc_bhalq, &qi); if (ic->ic_opmode == IEEE80211_M_HOSTAP || ic->ic_opmode == IEEE80211_M_MBSS) { /* * Always burst out beacon and CAB traffic. */ qi.tqi_aifs = ATH_BEACON_AIFS_DEFAULT; qi.tqi_cwmin = ATH_BEACON_CWMIN_DEFAULT; qi.tqi_cwmax = ATH_BEACON_CWMAX_DEFAULT; } else { struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[WME_AC_BE]; /* * Adhoc mode; important thing is to use 2x cwmin. */ qi.tqi_aifs = wmep->wmep_aifsn; qi.tqi_cwmin = 2*ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmin); qi.tqi_cwmax = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmax); } if (!ath_hal_settxqueueprops(ah, sc->sc_bhalq, &qi)) { device_printf(sc->sc_dev, "unable to update parameters for " "beacon hardware queue!\n"); return 0; } else { ath_hal_resettxqueue(ah, sc->sc_bhalq); /* push to h/w */ return 1; } #undef ATH_EXPONENT_TO_VALUE } /* * Allocate and setup an initial beacon frame. */ int ath_beacon_alloc(struct ath_softc *sc, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ath_vap *avp = ATH_VAP(vap); struct ath_buf *bf; struct mbuf *m; int error; bf = avp->av_bcbuf; DPRINTF(sc, ATH_DEBUG_NODE, "%s: bf_m=%p, bf_node=%p\n", __func__, bf->bf_m, bf->bf_node); if (bf->bf_m != NULL) { bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); m_freem(bf->bf_m); bf->bf_m = NULL; } if (bf->bf_node != NULL) { ieee80211_free_node(bf->bf_node); bf->bf_node = NULL; } /* * NB: the beacon data buffer must be 32-bit aligned; * we assume the mbuf routines will return us something * with this alignment (perhaps should assert). */ m = ieee80211_beacon_alloc(ni); if (m == NULL) { device_printf(sc->sc_dev, "%s: cannot get mbuf\n", __func__); sc->sc_stats.ast_be_nombuf++; return ENOMEM; } error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "%s: cannot map mbuf, bus_dmamap_load_mbuf_sg returns %d\n", __func__, error); m_freem(m); return error; } /* * Calculate a TSF adjustment factor required for staggered * beacons. Note that we assume the format of the beacon * frame leaves the tstamp field immediately following the * header. */ if (sc->sc_stagbeacons && avp->av_bslot > 0) { uint64_t tsfadjust; struct ieee80211_frame *wh; /* * The beacon interval is in TU's; the TSF is in usecs. * We figure out how many TU's to add to align the timestamp * then convert to TSF units and handle byte swapping before * inserting it in the frame. The hardware will then add this * each time a beacon frame is sent. Note that we align vap's * 1..N and leave vap 0 untouched. This means vap 0 has a * timestamp in one beacon interval while the others get a * timstamp aligned to the next interval. */ tsfadjust = ni->ni_intval * (ATH_BCBUF - avp->av_bslot) / ATH_BCBUF; tsfadjust = htole64(tsfadjust << 10); /* TU -> TSF */ DPRINTF(sc, ATH_DEBUG_BEACON, "%s: %s beacons bslot %d intval %u tsfadjust %llu\n", __func__, sc->sc_stagbeacons ? "stagger" : "burst", avp->av_bslot, ni->ni_intval, (long long unsigned) le64toh(tsfadjust)); wh = mtod(m, struct ieee80211_frame *); memcpy(&wh[1], &tsfadjust, sizeof(tsfadjust)); } bf->bf_m = m; bf->bf_node = ieee80211_ref_node(ni); return 0; } /* * Setup the beacon frame for transmit. */ static void ath_beacon_setup(struct ath_softc *sc, struct ath_buf *bf) { #define USE_SHPREAMBLE(_ic) \ (((_ic)->ic_flags & (IEEE80211_F_SHPREAMBLE | IEEE80211_F_USEBARKER))\ == IEEE80211_F_SHPREAMBLE) struct ieee80211_node *ni = bf->bf_node; struct ieee80211com *ic = ni->ni_ic; struct mbuf *m = bf->bf_m; struct ath_hal *ah = sc->sc_ah; struct ath_desc *ds; int flags, antenna; const HAL_RATE_TABLE *rt; u_int8_t rix, rate; HAL_DMA_ADDR bufAddrList[4]; uint32_t segLenList[4]; HAL_11N_RATE_SERIES rc[4]; DPRINTF(sc, ATH_DEBUG_BEACON_PROC, "%s: m %p len %u\n", __func__, m, m->m_len); /* setup descriptors */ ds = bf->bf_desc; bf->bf_last = bf; bf->bf_lastds = ds; flags = HAL_TXDESC_NOACK; if (ic->ic_opmode == IEEE80211_M_IBSS && sc->sc_hasveol) { /* self-linked descriptor */ ath_hal_settxdesclink(sc->sc_ah, ds, bf->bf_daddr); flags |= HAL_TXDESC_VEOL; /* * Let hardware handle antenna switching. */ antenna = sc->sc_txantenna; } else { ath_hal_settxdesclink(sc->sc_ah, ds, 0); /* * Switch antenna every 4 beacons. * XXX assumes two antenna */ if (sc->sc_txantenna != 0) antenna = sc->sc_txantenna; else if (sc->sc_stagbeacons && sc->sc_nbcnvaps != 0) antenna = ((sc->sc_stats.ast_be_xmit / sc->sc_nbcnvaps) & 4 ? 2 : 1); else antenna = (sc->sc_stats.ast_be_xmit & 4 ? 2 : 1); } KASSERT(bf->bf_nseg == 1, ("multi-segment beacon frame; nseg %u", bf->bf_nseg)); /* * Calculate rate code. * XXX everything at min xmit rate */ rix = 0; rt = sc->sc_currates; rate = rt->info[rix].rateCode; if (USE_SHPREAMBLE(ic)) rate |= rt->info[rix].shortPreamble; ath_hal_setuptxdesc(ah, ds , m->m_len + IEEE80211_CRC_LEN /* frame length */ , sizeof(struct ieee80211_frame)/* header length */ , HAL_PKT_TYPE_BEACON /* Atheros packet type */ , ieee80211_get_node_txpower(ni) /* txpower XXX */ , rate, 1 /* series 0 rate/tries */ , HAL_TXKEYIX_INVALID /* no encryption */ , antenna /* antenna mode */ , flags /* no ack, veol for beacons */ , 0 /* rts/cts rate */ , 0 /* rts/cts duration */ ); /* * The EDMA HAL currently assumes that _all_ rate control * settings are done in ath_hal_set11nratescenario(), rather * than in ath_hal_setuptxdesc(). */ if (sc->sc_isedma) { memset(&rc, 0, sizeof(rc)); rc[0].ChSel = sc->sc_txchainmask; rc[0].Tries = 1; rc[0].Rate = rt->info[rix].rateCode; rc[0].RateIndex = rix; rc[0].tx_power_cap = 0x3f; rc[0].PktDuration = ath_hal_computetxtime(ah, rt, roundup(m->m_len, 4), rix, 0, AH_TRUE); ath_hal_set11nratescenario(ah, ds, 0, 0, rc, 4, flags); } /* NB: beacon's BufLen must be a multiple of 4 bytes */ segLenList[0] = roundup(m->m_len, 4); segLenList[1] = segLenList[2] = segLenList[3] = 0; bufAddrList[0] = bf->bf_segs[0].ds_addr; bufAddrList[1] = bufAddrList[2] = bufAddrList[3] = 0; ath_hal_filltxdesc(ah, ds , bufAddrList , segLenList , 0 /* XXX desc id */ , sc->sc_bhalq /* hardware TXQ */ , AH_TRUE /* first segment */ , AH_TRUE /* last segment */ , ds /* first descriptor */ ); #if 0 ath_desc_swap(ds); #endif #undef USE_SHPREAMBLE } void ath_beacon_update(struct ieee80211vap *vap, int item) { struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off; setbit(bo->bo_flags, item); } /* * Handle a beacon miss. */ void ath_beacon_miss(struct ath_softc *sc) { HAL_SURVEY_SAMPLE hs; HAL_BOOL ret; uint32_t hangs; bzero(&hs, sizeof(hs)); ret = ath_hal_get_mib_cycle_counts(sc->sc_ah, &hs); if (ath_hal_gethangstate(sc->sc_ah, 0xffff, &hangs) && hangs != 0) { DPRINTF(sc, ATH_DEBUG_BEACON, "%s: hang=0x%08x\n", __func__, hangs); } #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_MISSED_BEACON)) if_ath_alq_post(&sc->sc_alq, ATH_ALQ_MISSED_BEACON, 0, NULL); #endif DPRINTF(sc, ATH_DEBUG_BEACON, "%s: valid=%d, txbusy=%u, rxbusy=%u, chanbusy=%u, " "extchanbusy=%u, cyclecount=%u\n", __func__, ret, hs.tx_busy, hs.rx_busy, hs.chan_busy, hs.ext_chan_busy, hs.cycle_count); } /* * Transmit a beacon frame at SWBA. Dynamic updates to the * frame contents are done as needed and the slot time is * also adjusted based on current state. */ void ath_beacon_proc(void *arg, int pending) { struct ath_softc *sc = arg; struct ath_hal *ah = sc->sc_ah; struct ieee80211vap *vap; struct ath_buf *bf; int slot, otherant; uint32_t bfaddr; DPRINTF(sc, ATH_DEBUG_BEACON_PROC, "%s: pending %u\n", __func__, pending); /* * Check if the previous beacon has gone out. If * not don't try to post another, skip this period * and wait for the next. Missed beacons indicate * a problem and should not occur. If we miss too * many consecutive beacons reset the device. */ if (ath_hal_numtxpending(ah, sc->sc_bhalq) != 0) { sc->sc_bmisscount++; sc->sc_stats.ast_be_missed++; ath_beacon_miss(sc); DPRINTF(sc, ATH_DEBUG_BEACON, "%s: missed %u consecutive beacons\n", __func__, sc->sc_bmisscount); if (sc->sc_bmisscount >= ath_bstuck_threshold) taskqueue_enqueue(sc->sc_tq, &sc->sc_bstucktask); return; } if (sc->sc_bmisscount != 0) { DPRINTF(sc, ATH_DEBUG_BEACON, "%s: resume beacon xmit after %u misses\n", __func__, sc->sc_bmisscount); sc->sc_bmisscount = 0; #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_RESUME_BEACON)) if_ath_alq_post(&sc->sc_alq, ATH_ALQ_RESUME_BEACON, 0, NULL); #endif } if (sc->sc_stagbeacons) { /* staggered beacons */ struct ieee80211com *ic = &sc->sc_ic; uint32_t tsftu; tsftu = ath_hal_gettsf32(ah) >> 10; /* XXX lintval */ slot = ((tsftu % ic->ic_lintval) * ATH_BCBUF) / ic->ic_lintval; vap = sc->sc_bslot[(slot+1) % ATH_BCBUF]; bfaddr = 0; if (vap != NULL && vap->iv_state >= IEEE80211_S_RUN) { bf = ath_beacon_generate(sc, vap); if (bf != NULL) bfaddr = bf->bf_daddr; } } else { /* burst'd beacons */ uint32_t *bflink = &bfaddr; for (slot = 0; slot < ATH_BCBUF; slot++) { vap = sc->sc_bslot[slot]; if (vap != NULL && vap->iv_state >= IEEE80211_S_RUN) { bf = ath_beacon_generate(sc, vap); /* * XXX TODO: this should use settxdesclinkptr() * otherwise it won't work for EDMA chipsets! */ if (bf != NULL) { /* XXX should do this using the ds */ *bflink = bf->bf_daddr; ath_hal_gettxdesclinkptr(sc->sc_ah, bf->bf_desc, &bflink); } } } /* * XXX TODO: this should use settxdesclinkptr() * otherwise it won't work for EDMA chipsets! */ *bflink = 0; /* terminate list */ } /* * Handle slot time change when a non-ERP station joins/leaves * an 11g network. The 802.11 layer notifies us via callback, * we mark updateslot, then wait one beacon before effecting * the change. This gives associated stations at least one * beacon interval to note the state change. */ /* XXX locking */ if (sc->sc_updateslot == UPDATE) { sc->sc_updateslot = COMMIT; /* commit next beacon */ sc->sc_slotupdate = slot; } else if (sc->sc_updateslot == COMMIT && sc->sc_slotupdate == slot) ath_setslottime(sc); /* commit change to h/w */ /* * Check recent per-antenna transmit statistics and flip * the default antenna if noticeably more frames went out * on the non-default antenna. * XXX assumes 2 anntenae */ if (!sc->sc_diversity && (!sc->sc_stagbeacons || slot == 0)) { otherant = sc->sc_defant & 1 ? 2 : 1; if (sc->sc_ant_tx[otherant] > sc->sc_ant_tx[sc->sc_defant] + 2) ath_setdefantenna(sc, otherant); sc->sc_ant_tx[1] = sc->sc_ant_tx[2] = 0; } /* Program the CABQ with the contents of the CABQ txq and start it */ ATH_TXQ_LOCK(sc->sc_cabq); ath_beacon_cabq_start(sc); ATH_TXQ_UNLOCK(sc->sc_cabq); /* Program the new beacon frame if we have one for this interval */ if (bfaddr != 0) { /* * Stop any current dma and put the new frame on the queue. * This should never fail since we check above that no frames * are still pending on the queue. */ if (! sc->sc_isedma) { if (!ath_hal_stoptxdma(ah, sc->sc_bhalq)) { DPRINTF(sc, ATH_DEBUG_ANY, "%s: beacon queue %u did not stop?\n", __func__, sc->sc_bhalq); } } /* NB: cabq traffic should already be queued and primed */ ath_hal_puttxbuf(ah, sc->sc_bhalq, bfaddr); ath_hal_txstart(ah, sc->sc_bhalq); sc->sc_stats.ast_be_xmit++; } } static void ath_beacon_cabq_start_edma(struct ath_softc *sc) { struct ath_buf *bf, *bf_last; struct ath_txq *cabq = sc->sc_cabq; #if 0 struct ath_buf *bfi; int i = 0; #endif ATH_TXQ_LOCK_ASSERT(cabq); if (TAILQ_EMPTY(&cabq->axq_q)) return; bf = TAILQ_FIRST(&cabq->axq_q); bf_last = TAILQ_LAST(&cabq->axq_q, axq_q_s); /* * This is a dirty, dirty hack to push the contents of * the cabq staging queue into the FIFO. * * This ideally should live in the EDMA code file * and only push things into the CABQ if there's a FIFO * slot. * * We can't treat this like a normal TX queue because * in the case of multi-VAP traffic, we may have to flush * the CABQ each new (staggered) beacon that goes out. * But for non-staggered beacons, we could in theory * handle multicast traffic for all VAPs in one FIFO * push. Just keep all of this in mind if you're wondering * how to correctly/better handle multi-VAP CABQ traffic * with EDMA. */ /* * Is the CABQ FIFO free? If not, complain loudly and * don't queue anything. Maybe we'll flush the CABQ * traffic, maybe we won't. But that'll happen next * beacon interval. */ if (cabq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) { device_printf(sc->sc_dev, "%s: Q%d: CAB FIFO queue=%d?\n", __func__, cabq->axq_qnum, cabq->axq_fifo_depth); return; } /* * Ok, so here's the gymnastics reqiured to make this * all sensible. */ /* * Tag the first/last buffer appropriately. */ bf->bf_flags |= ATH_BUF_FIFOPTR; bf_last->bf_flags |= ATH_BUF_FIFOEND; #if 0 i = 0; TAILQ_FOREACH(bfi, &cabq->axq_q, bf_list) { ath_printtxbuf(sc, bf, cabq->axq_qnum, i, 0); i++; } #endif /* * We now need to push this set of frames onto the tail * of the FIFO queue. We don't adjust the aggregate * count, only the queue depth counter(s). * We also need to blank the link pointer now. */ TAILQ_CONCAT(&cabq->fifo.axq_q, &cabq->axq_q, bf_list); cabq->axq_link = NULL; cabq->fifo.axq_depth += cabq->axq_depth; cabq->axq_depth = 0; /* Bump FIFO queue */ cabq->axq_fifo_depth++; /* Push the first entry into the hardware */ ath_hal_puttxbuf(sc->sc_ah, cabq->axq_qnum, bf->bf_daddr); cabq->axq_flags |= ATH_TXQ_PUTRUNNING; /* NB: gated by beacon so safe to start here */ ath_hal_txstart(sc->sc_ah, cabq->axq_qnum); } static void ath_beacon_cabq_start_legacy(struct ath_softc *sc) { struct ath_buf *bf; struct ath_txq *cabq = sc->sc_cabq; ATH_TXQ_LOCK_ASSERT(cabq); if (TAILQ_EMPTY(&cabq->axq_q)) return; bf = TAILQ_FIRST(&cabq->axq_q); /* Push the first entry into the hardware */ ath_hal_puttxbuf(sc->sc_ah, cabq->axq_qnum, bf->bf_daddr); cabq->axq_flags |= ATH_TXQ_PUTRUNNING; /* NB: gated by beacon so safe to start here */ ath_hal_txstart(sc->sc_ah, cabq->axq_qnum); } /* * Start CABQ transmission - this assumes that all frames are prepped * and ready in the CABQ. */ void ath_beacon_cabq_start(struct ath_softc *sc) { struct ath_txq *cabq = sc->sc_cabq; ATH_TXQ_LOCK_ASSERT(cabq); if (TAILQ_EMPTY(&cabq->axq_q)) return; if (sc->sc_isedma) ath_beacon_cabq_start_edma(sc); else ath_beacon_cabq_start_legacy(sc); } struct ath_buf * ath_beacon_generate(struct ath_softc *sc, struct ieee80211vap *vap) { struct ath_vap *avp = ATH_VAP(vap); struct ath_txq *cabq = sc->sc_cabq; struct ath_buf *bf; struct mbuf *m; int nmcastq, error; KASSERT(vap->iv_state >= IEEE80211_S_RUN, ("not running, state %d", vap->iv_state)); KASSERT(avp->av_bcbuf != NULL, ("no beacon buffer")); /* * Update dynamic beacon contents. If this returns * non-zero then we need to remap the memory because * the beacon frame changed size (probably because * of the TIM bitmap). */ bf = avp->av_bcbuf; m = bf->bf_m; /* XXX lock mcastq? */ nmcastq = avp->av_mcastq.axq_depth; if (ieee80211_beacon_update(bf->bf_node, m, nmcastq)) { /* XXX too conservative? */ bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error != 0) { if_printf(vap->iv_ifp, "%s: bus_dmamap_load_mbuf_sg failed, error %u\n", __func__, error); return NULL; } } if ((vap->iv_bcn_off.bo_tim[4] & 1) && cabq->axq_depth) { DPRINTF(sc, ATH_DEBUG_BEACON, "%s: cabq did not drain, mcastq %u cabq %u\n", __func__, nmcastq, cabq->axq_depth); sc->sc_stats.ast_cabq_busy++; if (sc->sc_nvaps > 1 && sc->sc_stagbeacons) { /* * CABQ traffic from a previous vap is still pending. * We must drain the q before this beacon frame goes * out as otherwise this vap's stations will get cab * frames from a different vap. * XXX could be slow causing us to miss DBA */ /* * XXX TODO: this doesn't stop CABQ DMA - it assumes * that since we're about to transmit a beacon, we've * already stopped transmitting on the CABQ. But this * doesn't at all mean that the CABQ DMA QCU will * accept a new TXDP! So what, should we do a DMA * stop? What if it fails? * * More thought is required here. */ /* * XXX can we even stop TX DMA here? Check what the * reference driver does for cabq for beacons, given * that stopping TX requires RX is paused. */ ath_tx_draintxq(sc, cabq); } } ath_beacon_setup(sc, bf); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE); /* * Enable the CAB queue before the beacon queue to * insure cab frames are triggered by this beacon. */ if (vap->iv_bcn_off.bo_tim[4] & 1) { /* NB: only at DTIM */ ATH_TXQ_LOCK(&avp->av_mcastq); if (nmcastq) { struct ath_buf *bfm, *bfc_last; /* * Move frames from the s/w mcast q to the h/w cab q. * * XXX TODO: if we chain together multiple VAPs * worth of CABQ traffic, should we keep the * MORE data bit set on the last frame of each * intermediary VAP (ie, only clear the MORE * bit of the last frame on the last vap?) */ bfm = TAILQ_FIRST(&avp->av_mcastq.axq_q); ATH_TXQ_LOCK(cabq); /* * If there's already a frame on the CABQ, we * need to link to the end of the last frame. * We can't use axq_link here because * EDMA descriptors require some recalculation * (checksum) to occur. */ bfc_last = ATH_TXQ_LAST(cabq, axq_q_s); if (bfc_last != NULL) { ath_hal_settxdesclink(sc->sc_ah, bfc_last->bf_lastds, bfm->bf_daddr); } ath_txqmove(cabq, &avp->av_mcastq); ATH_TXQ_UNLOCK(cabq); /* * XXX not entirely accurate, in case a mcast * queue frame arrived before we grabbed the TX * lock. */ sc->sc_stats.ast_cabq_xmit += nmcastq; } ATH_TXQ_UNLOCK(&avp->av_mcastq); } return bf; } void ath_beacon_start_adhoc(struct ath_softc *sc, struct ieee80211vap *vap) { struct ath_vap *avp = ATH_VAP(vap); struct ath_hal *ah = sc->sc_ah; struct ath_buf *bf; struct mbuf *m; int error; KASSERT(avp->av_bcbuf != NULL, ("no beacon buffer")); /* * Update dynamic beacon contents. If this returns * non-zero then we need to remap the memory because * the beacon frame changed size (probably because * of the TIM bitmap). */ bf = avp->av_bcbuf; m = bf->bf_m; if (ieee80211_beacon_update(bf->bf_node, m, 0)) { /* XXX too conservative? */ bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error != 0) { if_printf(vap->iv_ifp, "%s: bus_dmamap_load_mbuf_sg failed, error %u\n", __func__, error); return; } } ath_beacon_setup(sc, bf); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE); /* NB: caller is known to have already stopped tx dma */ ath_hal_puttxbuf(ah, sc->sc_bhalq, bf->bf_daddr); ath_hal_txstart(ah, sc->sc_bhalq); } /* * Reclaim beacon resources and return buffer to the pool. */ void ath_beacon_return(struct ath_softc *sc, struct ath_buf *bf) { DPRINTF(sc, ATH_DEBUG_NODE, "%s: free bf=%p, bf_m=%p, bf_node=%p\n", __func__, bf, bf->bf_m, bf->bf_node); if (bf->bf_m != NULL) { bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); m_freem(bf->bf_m); bf->bf_m = NULL; } if (bf->bf_node != NULL) { ieee80211_free_node(bf->bf_node); bf->bf_node = NULL; } TAILQ_INSERT_TAIL(&sc->sc_bbuf, bf, bf_list); } /* * Reclaim beacon resources. */ void ath_beacon_free(struct ath_softc *sc) { struct ath_buf *bf; TAILQ_FOREACH(bf, &sc->sc_bbuf, bf_list) { DPRINTF(sc, ATH_DEBUG_NODE, "%s: free bf=%p, bf_m=%p, bf_node=%p\n", __func__, bf, bf->bf_m, bf->bf_node); if (bf->bf_m != NULL) { bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); m_freem(bf->bf_m); bf->bf_m = NULL; } if (bf->bf_node != NULL) { ieee80211_free_node(bf->bf_node); bf->bf_node = NULL; } } } /* * Configure the beacon and sleep timers. * * When operating as an AP this resets the TSF and sets * up the hardware to notify us when we need to issue beacons. * * When operating in station mode this sets up the beacon * timers according to the timestamp of the last received * beacon and the current TSF, configures PCF and DTIM * handling, programs the sleep registers so the hardware * will wakeup in time to receive beacons, and configures * the beacon miss handling so we'll receive a BMISS * interrupt when we stop seeing beacons from the AP * we've associated with. */ void ath_beacon_config(struct ath_softc *sc, struct ieee80211vap *vap) { #define TSF_TO_TU(_h,_l) \ ((((u_int32_t)(_h)) << 22) | (((u_int32_t)(_l)) >> 10)) #define FUDGE 2 struct ath_hal *ah = sc->sc_ah; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni; u_int32_t nexttbtt, intval, tsftu; u_int32_t nexttbtt_u8, intval_u8; u_int64_t tsf, tsf_beacon; if (vap == NULL) vap = TAILQ_FIRST(&ic->ic_vaps); /* XXX */ /* * Just ensure that we aren't being called when the last * VAP is destroyed. */ if (vap == NULL) { device_printf(sc->sc_dev, "%s: called with no VAPs\n", __func__); return; } ni = ieee80211_ref_node(vap->iv_bss); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); /* extract tstamp from last beacon and convert to TU */ nexttbtt = TSF_TO_TU(le32dec(ni->ni_tstamp.data + 4), le32dec(ni->ni_tstamp.data)); tsf_beacon = ((uint64_t) le32dec(ni->ni_tstamp.data + 4)) << 32; tsf_beacon |= le32dec(ni->ni_tstamp.data); if (ic->ic_opmode == IEEE80211_M_HOSTAP || ic->ic_opmode == IEEE80211_M_MBSS) { /* * For multi-bss ap/mesh support beacons are either staggered * evenly over N slots or burst together. For the former * arrange for the SWBA to be delivered for each slot. * Slots that are not occupied will generate nothing. */ /* NB: the beacon interval is kept internally in TU's */ intval = ni->ni_intval & HAL_BEACON_PERIOD; if (sc->sc_stagbeacons) intval /= ATH_BCBUF; } else { /* NB: the beacon interval is kept internally in TU's */ intval = ni->ni_intval & HAL_BEACON_PERIOD; } + + /* + * Note: rounding up to the next intval can cause problems with + * bad APs when we're in powersave mode. + * + * In STA mode with powersave enabled, beacons are only received + * whenever the beacon timer fires to wake up the hardware. + * Now, if this is rounded up to the next intval, it assumes + * that the AP has started transmitting beacons at TSF values that + * are multiples of intval, versus say being 25 TU off. + * + * The specification (802.11-2012 10.1.3.2 - Beacon Generation in + * Infrastructure Networks) requires APs be beaconing at a + * mutiple of intval. So, if bintval=100, then we shouldn't + * get beacons at intervals other than around multiples of 100. + */ if (nexttbtt == 0) /* e.g. for ap mode */ nexttbtt = intval; - else if (intval) /* NB: can be 0 for monitor mode */ + else nexttbtt = roundup(nexttbtt, intval); + DPRINTF(sc, ATH_DEBUG_BEACON, "%s: nexttbtt %u intval %u (%u)\n", __func__, nexttbtt, intval, ni->ni_intval); if (ic->ic_opmode == IEEE80211_M_STA && !sc->sc_swbmiss) { HAL_BEACON_STATE bs; int dtimperiod, dtimcount; int cfpperiod, cfpcount; /* * Setup dtim and cfp parameters according to * last beacon we received (which may be none). */ dtimperiod = ni->ni_dtim_period; if (dtimperiod <= 0) /* NB: 0 if not known */ dtimperiod = 1; dtimcount = ni->ni_dtim_count; if (dtimcount >= dtimperiod) /* NB: sanity check */ dtimcount = 0; /* XXX? */ cfpperiod = 1; /* NB: no PCF support yet */ cfpcount = 0; /* * Pull nexttbtt forward to reflect the current * TSF and calculate dtim+cfp state for the result. */ tsf = ath_hal_gettsf64(ah); tsftu = TSF_TO_TU(tsf>>32, tsf) + FUDGE; DPRINTF(sc, ATH_DEBUG_BEACON, "%s: beacon tsf=%llu, hw tsf=%llu, nexttbtt=%u, tsftu=%u\n", __func__, (unsigned long long) tsf_beacon, (unsigned long long) tsf, nexttbtt, tsftu); DPRINTF(sc, ATH_DEBUG_BEACON, "%s: beacon tsf=%llu, hw tsf=%llu, tsf delta=%lld\n", __func__, (unsigned long long) tsf_beacon, (unsigned long long) tsf, (long long) tsf - (long long) tsf_beacon); DPRINTF(sc, ATH_DEBUG_BEACON, "%s: nexttbtt=%llu, beacon tsf delta=%lld\n", __func__, (unsigned long long) nexttbtt, (long long) ((long long) nexttbtt * 1024LL) - (long long) tsf_beacon); /* XXX cfpcount? */ if (nexttbtt > tsftu) { uint32_t countdiff, oldtbtt, remainder; oldtbtt = nexttbtt; remainder = (nexttbtt - tsftu) % intval; nexttbtt = tsftu + remainder; countdiff = (oldtbtt - nexttbtt) / intval % dtimperiod; if (dtimcount > countdiff) { dtimcount -= countdiff; } else { dtimcount += dtimperiod - countdiff; } } else { //nexttbtt <= tsftu uint32_t countdiff, oldtbtt, remainder; oldtbtt = nexttbtt; remainder = (tsftu - nexttbtt) % intval; nexttbtt = tsftu - remainder + intval; countdiff = (nexttbtt - oldtbtt) / intval % dtimperiod; if (dtimcount > countdiff) { dtimcount -= countdiff; } else { dtimcount += dtimperiod - countdiff; } } DPRINTF(sc, ATH_DEBUG_BEACON, "%s: adj nexttbtt=%llu, rx tsf delta=%lld\n", __func__, (unsigned long long) nexttbtt, (long long) ((long long)nexttbtt * 1024LL) - (long long)tsf); memset(&bs, 0, sizeof(bs)); bs.bs_intval = intval; bs.bs_nexttbtt = nexttbtt; bs.bs_dtimperiod = dtimperiod*intval; bs.bs_nextdtim = bs.bs_nexttbtt + dtimcount*intval; bs.bs_cfpperiod = cfpperiod*bs.bs_dtimperiod; bs.bs_cfpnext = bs.bs_nextdtim + cfpcount*bs.bs_dtimperiod; bs.bs_cfpmaxduration = 0; #if 0 /* * The 802.11 layer records the offset to the DTIM * bitmap while receiving beacons; use it here to * enable h/w detection of our AID being marked in * the bitmap vector (to indicate frames for us are * pending at the AP). * XXX do DTIM handling in s/w to WAR old h/w bugs * XXX enable based on h/w rev for newer chips */ bs.bs_timoffset = ni->ni_timoff; #endif /* * Calculate the number of consecutive beacons to miss * before taking a BMISS interrupt. * Note that we clamp the result to at most 10 beacons. */ bs.bs_bmissthreshold = vap->iv_bmissthreshold; if (bs.bs_bmissthreshold > 10) bs.bs_bmissthreshold = 10; else if (bs.bs_bmissthreshold <= 0) bs.bs_bmissthreshold = 1; /* * Calculate sleep duration. The configuration is * given in ms. We insure a multiple of the beacon * period is used. Also, if the sleep duration is * greater than the DTIM period then it makes senses * to make it a multiple of that. * * XXX fixed at 100ms */ bs.bs_sleepduration = roundup(IEEE80211_MS_TO_TU(100), bs.bs_intval); if (bs.bs_sleepduration > bs.bs_dtimperiod) bs.bs_sleepduration = roundup(bs.bs_sleepduration, bs.bs_dtimperiod); DPRINTF(sc, ATH_DEBUG_BEACON, "%s: tsf %ju tsf:tu %u intval %u nexttbtt %u dtim %u " "nextdtim %u bmiss %u sleep %u cfp:period %u " "maxdur %u next %u timoffset %u\n" , __func__ , tsf , tsftu , bs.bs_intval , bs.bs_nexttbtt , bs.bs_dtimperiod , bs.bs_nextdtim , bs.bs_bmissthreshold , bs.bs_sleepduration , bs.bs_cfpperiod , bs.bs_cfpmaxduration , bs.bs_cfpnext , bs.bs_timoffset ); ath_hal_intrset(ah, 0); ath_hal_beacontimers(ah, &bs); sc->sc_imask |= HAL_INT_BMISS; ath_hal_intrset(ah, sc->sc_imask); } else { ath_hal_intrset(ah, 0); if (nexttbtt == intval) intval |= HAL_BEACON_RESET_TSF; if (ic->ic_opmode == IEEE80211_M_IBSS) { /* * In IBSS mode enable the beacon timers but only * enable SWBA interrupts if we need to manually * prepare beacon frames. Otherwise we use a * self-linked tx descriptor and let the hardware * deal with things. */ intval |= HAL_BEACON_ENA; if (!sc->sc_hasveol) sc->sc_imask |= HAL_INT_SWBA; if ((intval & HAL_BEACON_RESET_TSF) == 0) { /* * Pull nexttbtt forward to reflect * the current TSF. */ tsf = ath_hal_gettsf64(ah); tsftu = TSF_TO_TU(tsf>>32, tsf) + FUDGE; do { nexttbtt += intval; } while (nexttbtt < tsftu); } ath_beaconq_config(sc); } else if (ic->ic_opmode == IEEE80211_M_HOSTAP || ic->ic_opmode == IEEE80211_M_MBSS) { /* * In AP/mesh mode we enable the beacon timers * and SWBA interrupts to prepare beacon frames. */ intval |= HAL_BEACON_ENA; sc->sc_imask |= HAL_INT_SWBA; /* beacon prepare */ ath_beaconq_config(sc); } /* * Now dirty things because for now, the EDMA HAL has * nexttbtt and intval is TU/8. */ if (sc->sc_isedma) { nexttbtt_u8 = (nexttbtt << 3); intval_u8 = (intval << 3); if (intval & HAL_BEACON_ENA) intval_u8 |= HAL_BEACON_ENA; if (intval & HAL_BEACON_RESET_TSF) intval_u8 |= HAL_BEACON_RESET_TSF; ath_hal_beaconinit(ah, nexttbtt_u8, intval_u8); } else ath_hal_beaconinit(ah, nexttbtt, intval); sc->sc_bmisscount = 0; ath_hal_intrset(ah, sc->sc_imask); /* * When using a self-linked beacon descriptor in * ibss mode load it once here. */ if (ic->ic_opmode == IEEE80211_M_IBSS && sc->sc_hasveol) ath_beacon_start_adhoc(sc, vap); } ieee80211_free_node(ni); ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); #undef FUDGE #undef TSF_TO_TU } Index: projects/clang391-import/sys/dev/ath/if_ath_ioctl.c =================================================================== --- projects/clang391-import/sys/dev/ath/if_ath_ioctl.c (revision 309262) +++ projects/clang391-import/sys/dev/ath/if_ath_ioctl.c (revision 309263) @@ -1,307 +1,309 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Atheros Wireless LAN controller. * * This software is derived from work of Atsushi Onoe; his contribution * is greatly appreciated. */ #include "opt_inet.h" #include "opt_ath.h" /* * This is needed for register operations which are performed * by the driver - eg, calls to ath_hal_gettsf32(). * * It's also required for any AH_DEBUG checks in here, eg the * module dependencies. */ #include "opt_ah.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for mp_ncpus */ #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include #ifdef INET #include #include #endif #include #include /* XXX for softled */ #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include /* * ioctl() related pieces. * * Some subsystems (eg spectral, dfs) have their own ioctl method which * we call. */ /* * Fetch the rate control statistics for the given node. */ static int ath_ioctl_ratestats(struct ath_softc *sc, struct ath_rateioctl *rs) { struct ath_node *an; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni; int error = 0; /* Perform a lookup on the given node */ ni = ieee80211_find_node(&ic->ic_sta, rs->is_u.macaddr); if (ni == NULL) { error = EINVAL; goto bad; } /* Lock the ath_node */ an = ATH_NODE(ni); ATH_NODE_LOCK(an); /* Fetch the rate control stats for this node */ error = ath_rate_fetch_node_stats(sc, an, rs); /* No matter what happens here, just drop through */ /* Unlock the ath_node */ ATH_NODE_UNLOCK(an); /* Unref the node */ ieee80211_node_decref(ni); bad: return (error); } #ifdef ATH_DIAGAPI /* * Diagnostic interface to the HAL. This is used by various * tools to do things like retrieve register contents for * debugging. The mechanism is intentionally opaque so that * it can change frequently w/o concern for compatibility. */ static int ath_ioctl_diag(struct ath_softc *sc, struct ath_diag *ad) { struct ath_hal *ah = sc->sc_ah; u_int id = ad->ad_id & ATH_DIAG_ID; void *indata = NULL; void *outdata = NULL; u_int32_t insize = ad->ad_in_size; u_int32_t outsize = ad->ad_out_size; int error = 0; if (ad->ad_id & ATH_DIAG_IN) { /* * Copy in data. */ indata = malloc(insize, M_TEMP, M_NOWAIT); if (indata == NULL) { error = ENOMEM; goto bad; } error = copyin(ad->ad_in_data, indata, insize); if (error) goto bad; } if (ad->ad_id & ATH_DIAG_DYN) { /* * Allocate a buffer for the results (otherwise the HAL * returns a pointer to a buffer where we can read the * results). Note that we depend on the HAL leaving this * pointer for us to use below in reclaiming the buffer; * may want to be more defensive. */ outdata = malloc(outsize, M_TEMP, M_NOWAIT); if (outdata == NULL) { error = ENOMEM; goto bad; } } ATH_LOCK(sc); if (id != HAL_DIAG_REGS) ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); if (ath_hal_getdiagstate(ah, id, indata, insize, &outdata, &outsize)) { if (outsize < ad->ad_out_size) ad->ad_out_size = outsize; if (outdata != NULL) error = copyout(outdata, ad->ad_out_data, ad->ad_out_size); } else { error = EINVAL; } ATH_LOCK(sc); if (id != HAL_DIAG_REGS) ath_power_restore_power_state(sc); ATH_UNLOCK(sc); bad: if ((ad->ad_id & ATH_DIAG_IN) && indata != NULL) free(indata, M_TEMP); if ((ad->ad_id & ATH_DIAG_DYN) && outdata != NULL) free(outdata, M_TEMP); return error; } #endif /* ATH_DIAGAPI */ int ath_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ifreq *ifr = data; struct ath_softc *sc = ic->ic_softc; switch (cmd) { case SIOCGATHSTATS: { struct ieee80211vap *vap; struct ifnet *ifp; const HAL_RATE_TABLE *rt; /* NB: embed these numbers to get a consistent view */ sc->sc_stats.ast_tx_packets = 0; sc->sc_stats.ast_rx_packets = 0; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; sc->sc_stats.ast_tx_packets += ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); sc->sc_stats.ast_rx_packets += ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); } sc->sc_stats.ast_tx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgtxrssi); sc->sc_stats.ast_rx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgrssi); #ifdef IEEE80211_SUPPORT_TDMA sc->sc_stats.ast_tdma_tsfadjp = TDMA_AVG(sc->sc_avgtsfdeltap); sc->sc_stats.ast_tdma_tsfadjm = TDMA_AVG(sc->sc_avgtsfdeltam); #endif rt = sc->sc_currates; sc->sc_stats.ast_tx_rate = rt->info[sc->sc_txrix].dot11Rate &~ IEEE80211_RATE_BASIC; if (rt->info[sc->sc_txrix].phy & IEEE80211_T_HT) sc->sc_stats.ast_tx_rate |= IEEE80211_RATE_MCS; return copyout(&sc->sc_stats, ifr->ifr_data, sizeof (sc->sc_stats)); } case SIOCGATHAGSTATS: return copyout(&sc->sc_aggr_stats, ifr->ifr_data, sizeof (sc->sc_aggr_stats)); case SIOCZATHSTATS: { int error; error = priv_check(curthread, PRIV_DRIVER); if (error == 0) { memset(&sc->sc_stats, 0, sizeof(sc->sc_stats)); memset(&sc->sc_aggr_stats, 0, sizeof(sc->sc_aggr_stats)); memset(&sc->sc_intr_stats, 0, sizeof(sc->sc_intr_stats)); } return (error); } #ifdef ATH_DIAGAPI case SIOCGATHDIAG: return (ath_ioctl_diag(sc, data)); case SIOCGATHPHYERR: return (ath_ioctl_phyerr(sc, data)); #endif case SIOCGATHSPECTRAL: return (ath_ioctl_spectral(sc, data)); case SIOCGATHNODERATESTATS: return (ath_ioctl_ratestats(sc, data)); + case SIOCGATHBTCOEX: + return (ath_btcoex_ioctl(sc, data)); default: /* * This signals the net80211 layer that we didn't handle this * ioctl. */ return (ENOTTY); } } Index: projects/clang391-import/sys/dev/ath/if_ath_misc.h =================================================================== --- projects/clang391-import/sys/dev/ath/if_ath_misc.h (revision 309262) +++ projects/clang391-import/sys/dev/ath/if_ath_misc.h (revision 309263) @@ -1,142 +1,150 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ #ifndef __IF_ATH_MISC_H__ #define __IF_ATH_MISC_H__ /* * This is where definitions for "public things" in if_ath.c * will go for the time being. * * Anything in here should eventually be moved out of if_ath.c * and into something else. */ extern int ath_rxbuf; extern int ath_txbuf; extern int ath_txbuf_mgmt; extern int ath_tx_findrix(const struct ath_softc *sc, uint8_t rate); extern struct ath_buf * ath_getbuf(struct ath_softc *sc, ath_buf_type_t btype); extern struct ath_buf * _ath_getbuf_locked(struct ath_softc *sc, ath_buf_type_t btype); extern struct ath_buf * ath_buf_clone(struct ath_softc *sc, struct ath_buf *bf); /* XXX change this to NULL the buffer pointer? */ extern void ath_freebuf(struct ath_softc *sc, struct ath_buf *bf); extern void ath_returnbuf_head(struct ath_softc *sc, struct ath_buf *bf); extern void ath_returnbuf_tail(struct ath_softc *sc, struct ath_buf *bf); extern int ath_reset(struct ath_softc *, ATH_RESET_TYPE); extern void ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail); extern void ath_tx_update_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_rc_series *rc, struct ath_tx_status *ts, int frmlen, int nframes, int nbad); extern int ath_hal_gethangstate(struct ath_hal *ah, uint32_t mask, uint32_t *hangs); extern void ath_tx_freebuf(struct ath_softc *sc, struct ath_buf *bf, int status); extern void ath_txq_freeholdingbuf(struct ath_softc *sc, struct ath_txq *txq); extern void ath_txqmove(struct ath_txq *dst, struct ath_txq *src); extern void ath_mode_init(struct ath_softc *sc); extern void ath_setdefantenna(struct ath_softc *sc, u_int antenna); extern void ath_setslottime(struct ath_softc *sc); extern void ath_legacy_attach_comp_func(struct ath_softc *sc); extern void ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq); extern void ath_legacy_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type); extern void ath_tx_process_buf_completion(struct ath_softc *sc, struct ath_txq *txq, struct ath_tx_status *ts, struct ath_buf *bf); extern int ath_stoptxdma(struct ath_softc *sc); extern void ath_tx_update_tim(struct ath_softc *sc, struct ieee80211_node *ni, int enable); /* * This is only here so that the RX proc function can call it. * It's very likely that the "start TX after RX" call should be * done via something in if_ath.c, moving "rx tasklet" into * if_ath.c and do the ath_start() call there. Once that's done, * we can kill this. */ extern void ath_start(struct ifnet *ifp); extern void ath_start_task(void *arg, int npending); extern void ath_tx_dump(struct ath_softc *sc, struct ath_txq *txq); /* * Power state tracking. */ -extern void _ath_power_setpower(struct ath_softc *sc, int power_state, const char *file, int line); -extern void _ath_power_set_selfgen(struct ath_softc *sc, int power_state, const char *file, int line); -extern void _ath_power_set_power_state(struct ath_softc *sc, int power_state, const char *file, int line); -extern void _ath_power_restore_power_state(struct ath_softc *sc, const char *file, int line); +extern void _ath_power_setpower(struct ath_softc *sc, int power_state, + int selfgen, const char *file, int line); +extern void _ath_power_set_selfgen(struct ath_softc *sc, + int power_state, const char *file, int line); +extern void _ath_power_set_power_state(struct ath_softc *sc, + int power_state, const char *file, int line); +extern void _ath_power_restore_power_state(struct ath_softc *sc, + const char *file, int line); -#define ath_power_setpower(sc, ps) _ath_power_setpower(sc, ps, __FILE__, __LINE__) -#define ath_power_setselfgen(sc, ps) _ath_power_set_selfgen(sc, ps, __FILE__, __LINE__) -#define ath_power_set_power_state(sc, ps) _ath_power_set_power_state(sc, ps, __FILE__, __LINE__) -#define ath_power_restore_power_state(sc) _ath_power_restore_power_state(sc, __FILE__, __LINE__) +#define ath_power_setpower(sc, ps, sg) _ath_power_setpower(sc, ps, sg, \ + __FILE__, __LINE__) +#define ath_power_setselfgen(sc, ps) _ath_power_set_selfgen(sc, ps, \ + __FILE__, __LINE__) +#define ath_power_set_power_state(sc, ps) \ + _ath_power_set_power_state(sc, ps, __FILE__, __LINE__) +#define ath_power_restore_power_state(sc) \ + _ath_power_restore_power_state(sc, __FILE__, __LINE__) /* * Kick the frame TX task. */ static inline void ath_tx_kick(struct ath_softc *sc) { /* XXX NULL for now */ } /* * Kick the software TX queue task. */ static inline void ath_tx_swq_kick(struct ath_softc *sc) { taskqueue_enqueue(sc->sc_tq, &sc->sc_txqtask); } #endif Index: projects/clang391-import/sys/dev/ath/if_ath_rx.c =================================================================== --- projects/clang391-import/sys/dev/ath/if_ath_rx.c (revision 309262) +++ projects/clang391-import/sys/dev/ath/if_ath_rx.c (revision 309263) @@ -1,1508 +1,1513 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Atheros Wireless LAN controller. * * This software is derived from work of Atsushi Onoe; his contribution * is greatly appreciated. */ #include "opt_inet.h" #include "opt_ath.h" /* * This is needed for register operations which are performed * by the driver - eg, calls to ath_hal_gettsf32(). * * It's also required for any AH_DEBUG checks in here, eg the * module dependencies. */ #include "opt_ah.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for mp_ncpus */ #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include #ifdef INET #include #include #endif #include #include /* XXX for softled */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ATH_TX99_DIAG #include #endif #ifdef ATH_DEBUG_ALQ #include #endif #include /* * Calculate the receive filter according to the * operating mode and state: * * o always accept unicast, broadcast, and multicast traffic * o accept PHY error frames when hardware doesn't have MIB support * to count and we need them for ANI (sta mode only until recently) * and we are not scanning (ANI is disabled) * NB: older hal's add rx filter bits out of sight and we need to * blindly preserve them * o probe request frames are accepted only when operating in * hostap, adhoc, mesh, or monitor modes * o enable promiscuous mode * - when in monitor mode * - if interface marked PROMISC (assumes bridge setting is filtered) * o accept beacons: * - when operating in station mode for collecting rssi data when * the station is otherwise quiet, or * - when operating in adhoc mode so the 802.11 layer creates * node table entries for peers, * - when scanning * - when doing s/w beacon miss (e.g. for ap+sta) * - when operating in ap mode in 11g to detect overlapping bss that * require protection * - when operating in mesh mode to detect neighbors * o accept control frames: * - when in monitor mode * XXX HT protection for 11n */ u_int32_t ath_calcrxfilter(struct ath_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; u_int32_t rfilt; rfilt = HAL_RX_FILTER_UCAST | HAL_RX_FILTER_BCAST | HAL_RX_FILTER_MCAST; if (!sc->sc_needmib && !sc->sc_scanning) rfilt |= HAL_RX_FILTER_PHYERR; if (ic->ic_opmode != IEEE80211_M_STA) rfilt |= HAL_RX_FILTER_PROBEREQ; /* XXX ic->ic_monvaps != 0? */ if (ic->ic_opmode == IEEE80211_M_MONITOR || ic->ic_promisc > 0) rfilt |= HAL_RX_FILTER_PROM; /* * Only listen to all beacons if we're scanning. * * Otherwise we only really need to hear beacons from * our own BSSID. * * IBSS? software beacon miss? Just receive all beacons. * We need to hear beacons/probe requests from everyone so * we can merge ibss. */ if (ic->ic_opmode == IEEE80211_M_IBSS || sc->sc_swbmiss) { rfilt |= HAL_RX_FILTER_BEACON; } else if (ic->ic_opmode == IEEE80211_M_STA) { if (sc->sc_do_mybeacon && ! sc->sc_scanning) { rfilt |= HAL_RX_FILTER_MYBEACON; } else { /* scanning, non-mybeacon chips */ rfilt |= HAL_RX_FILTER_BEACON; } } /* * NB: We don't recalculate the rx filter when * ic_protmode changes; otherwise we could do * this only when ic_protmode != NONE. */ if (ic->ic_opmode == IEEE80211_M_HOSTAP && IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) rfilt |= HAL_RX_FILTER_BEACON; /* * Enable hardware PS-POLL RX only for hostap mode; * STA mode sends PS-POLL frames but never * receives them. */ if (ath_hal_getcapability(sc->sc_ah, HAL_CAP_PSPOLL, 0, NULL) == HAL_OK && ic->ic_opmode == IEEE80211_M_HOSTAP) rfilt |= HAL_RX_FILTER_PSPOLL; if (sc->sc_nmeshvaps) { rfilt |= HAL_RX_FILTER_BEACON; if (sc->sc_hasbmatch) rfilt |= HAL_RX_FILTER_BSSID; else rfilt |= HAL_RX_FILTER_PROM; } if (ic->ic_opmode == IEEE80211_M_MONITOR) rfilt |= HAL_RX_FILTER_CONTROL; /* * Enable RX of compressed BAR frames only when doing * 802.11n. Required for A-MPDU. */ if (IEEE80211_IS_CHAN_HT(ic->ic_curchan)) rfilt |= HAL_RX_FILTER_COMPBAR; /* * Enable radar PHY errors if requested by the * DFS module. */ if (sc->sc_dodfs) rfilt |= HAL_RX_FILTER_PHYRADAR; /* * Enable spectral PHY errors if requested by the * spectral module. */ if (sc->sc_dospectral) rfilt |= HAL_RX_FILTER_PHYRADAR; DPRINTF(sc, ATH_DEBUG_MODE, "%s: RX filter 0x%x, %s\n", __func__, rfilt, ieee80211_opmode_name[ic->ic_opmode]); return rfilt; } static int ath_legacy_rxbuf_init(struct ath_softc *sc, struct ath_buf *bf) { struct ath_hal *ah = sc->sc_ah; int error; struct mbuf *m; struct ath_desc *ds; /* XXX TODO: ATH_RX_LOCK_ASSERT(sc); */ m = bf->bf_m; if (m == NULL) { /* * NB: by assigning a page to the rx dma buffer we * implicitly satisfy the Atheros requirement that * this buffer be cache-line-aligned and sized to be * multiple of the cache line size. Not doing this * causes weird stuff to happen (for the 5210 at least). */ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { DPRINTF(sc, ATH_DEBUG_ANY, "%s: no mbuf/cluster\n", __func__); sc->sc_stats.ast_rx_nombuf++; return ENOMEM; } m->m_pkthdr.len = m->m_len = m->m_ext.ext_size; error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error != 0) { DPRINTF(sc, ATH_DEBUG_ANY, "%s: bus_dmamap_load_mbuf_sg failed; error %d\n", __func__, error); sc->sc_stats.ast_rx_busdma++; m_freem(m); return error; } KASSERT(bf->bf_nseg == 1, ("multi-segment packet; nseg %u", bf->bf_nseg)); bf->bf_m = m; } bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREREAD); /* * Setup descriptors. For receive we always terminate * the descriptor list with a self-linked entry so we'll * not get overrun under high load (as can happen with a * 5212 when ANI processing enables PHY error frames). * * To insure the last descriptor is self-linked we create * each descriptor as self-linked and add it to the end. As * each additional descriptor is added the previous self-linked * entry is ``fixed'' naturally. This should be safe even * if DMA is happening. When processing RX interrupts we * never remove/process the last, self-linked, entry on the * descriptor list. This insures the hardware always has * someplace to write a new frame. */ /* * 11N: we can no longer afford to self link the last descriptor. * MAC acknowledges BA status as long as it copies frames to host * buffer (or rx fifo). This can incorrectly acknowledge packets * to a sender if last desc is self-linked. */ ds = bf->bf_desc; if (sc->sc_rxslink) ds->ds_link = bf->bf_daddr; /* link to self */ else ds->ds_link = 0; /* terminate the list */ ds->ds_data = bf->bf_segs[0].ds_addr; ath_hal_setuprxdesc(ah, ds , m->m_len /* buffer size */ , 0 ); if (sc->sc_rxlink != NULL) *sc->sc_rxlink = bf->bf_daddr; sc->sc_rxlink = &ds->ds_link; return 0; } /* * Intercept management frames to collect beacon rssi data * and to do ibss merges. */ void ath_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m, int subtype, const struct ieee80211_rx_stats *rxs, int rssi, int nf) { struct ieee80211vap *vap = ni->ni_vap; struct ath_softc *sc = vap->iv_ic->ic_softc; uint64_t tsf_beacon_old, tsf_beacon; uint64_t nexttbtt; int64_t tsf_delta; int32_t tsf_delta_bmiss; int32_t tsf_remainder; uint64_t tsf_beacon_target; int tsf_intval; tsf_beacon_old = ((uint64_t) le32dec(ni->ni_tstamp.data + 4)) << 32; tsf_beacon_old |= le32dec(ni->ni_tstamp.data); #define TU_TO_TSF(_tu) (((u_int64_t)(_tu)) << 10) tsf_intval = 1; if (ni->ni_intval > 0) { tsf_intval = TU_TO_TSF(ni->ni_intval); } #undef TU_TO_TSF /* * Call up first so subsequent work can use information * potentially stored in the node (e.g. for ibss merge). */ ATH_VAP(vap)->av_recv_mgmt(ni, m, subtype, rxs, rssi, nf); switch (subtype) { case IEEE80211_FC0_SUBTYPE_BEACON: /* * Only do the following processing if it's for * the current BSS. * * In scan and IBSS mode we receive all beacons, * which means we need to filter out stuff * that isn't for us or we'll end up constantly * trying to sync / merge to BSSes that aren't * actually us. */ if (IEEE80211_ADDR_EQ(ni->ni_bssid, vap->iv_bss->ni_bssid)) { /* update rssi statistics for use by the hal */ /* XXX unlocked check against vap->iv_bss? */ ATH_RSSI_LPF(sc->sc_halstats.ns_avgbrssi, rssi); tsf_beacon = ((uint64_t) le32dec(ni->ni_tstamp.data + 4)) << 32; tsf_beacon |= le32dec(ni->ni_tstamp.data); nexttbtt = ath_hal_getnexttbtt(sc->sc_ah); /* * Let's calculate the delta and remainder, so we can see * if the beacon timer from the AP is varying by more than * a few TU. (Which would be a huge, huge problem.) */ tsf_delta = (long long) tsf_beacon - (long long) tsf_beacon_old; tsf_delta_bmiss = tsf_delta / tsf_intval; /* * If our delta is greater than half the beacon interval, * let's round the bmiss value up to the next beacon * interval. Ie, we're running really, really early * on the next beacon. */ if (tsf_delta % tsf_intval > (tsf_intval / 2)) tsf_delta_bmiss ++; tsf_beacon_target = tsf_beacon_old + (((unsigned long long) tsf_delta_bmiss) * (long long) tsf_intval); /* * The remainder using '%' is between 0 .. intval-1. * If we're actually running too fast, then the remainder * will be some large number just under intval-1. * So we need to look at whether we're running * before or after the target beacon interval * and if we are, modify how we do the remainder * calculation. */ if (tsf_beacon < tsf_beacon_target) { tsf_remainder = -(tsf_intval - ((tsf_beacon - tsf_beacon_old) % tsf_intval)); } else { tsf_remainder = (tsf_beacon - tsf_beacon_old) % tsf_intval; } - DPRINTF(sc, ATH_DEBUG_BEACON, "%s: old_tsf=%llu, new_tsf=%llu, target_tsf=%llu, delta=%lld, bmiss=%d, remainder=%d\n", + DPRINTF(sc, ATH_DEBUG_BEACON, "%s: old_tsf=%llu (%u), new_tsf=%llu (%u), target_tsf=%llu (%u), delta=%lld, bmiss=%d, remainder=%d\n", __func__, (unsigned long long) tsf_beacon_old, + (unsigned int) (tsf_beacon_old >> 10), (unsigned long long) tsf_beacon, + (unsigned int ) (tsf_beacon >> 10), (unsigned long long) tsf_beacon_target, + (unsigned int) (tsf_beacon_target >> 10), (long long) tsf_delta, tsf_delta_bmiss, tsf_remainder); - DPRINTF(sc, ATH_DEBUG_BEACON, "%s: tsf=%llu, nexttbtt=%llu, delta=%d\n", + DPRINTF(sc, ATH_DEBUG_BEACON, "%s: tsf=%llu (%u), nexttbtt=%llu (%u), delta=%d\n", __func__, (unsigned long long) tsf_beacon, + (unsigned int) (tsf_beacon >> 10), (unsigned long long) nexttbtt, + (unsigned int) (nexttbtt >> 10), (int32_t) tsf_beacon - (int32_t) nexttbtt + tsf_intval); /* We only do syncbeacon on STA VAPs; not on IBSS */ if (vap->iv_opmode == IEEE80211_M_STA && sc->sc_syncbeacon && ni == vap->iv_bss && (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) { DPRINTF(sc, ATH_DEBUG_BEACON, "%s: syncbeacon=1; syncing\n", __func__); /* * Resync beacon timers using the tsf of the beacon * frame we just received. */ ath_beacon_config(sc, vap); sc->sc_syncbeacon = 0; } } /* fall thru... */ case IEEE80211_FC0_SUBTYPE_PROBE_RESP: if (vap->iv_opmode == IEEE80211_M_IBSS && vap->iv_state == IEEE80211_S_RUN && ieee80211_ibss_merge_check(ni)) { uint32_t rstamp = sc->sc_lastrs->rs_tstamp; uint64_t tsf = ath_extend_tsf(sc, rstamp, ath_hal_gettsf64(sc->sc_ah)); /* * Handle ibss merge as needed; check the tsf on the * frame before attempting the merge. The 802.11 spec * says the station should change it's bssid to match * the oldest station with the same ssid, where oldest * is determined by the tsf. Note that hardware * reconfiguration happens through callback to * ath_newstate as the state machine will go from * RUN -> RUN when this happens. */ if (le64toh(ni->ni_tstamp.tsf) >= tsf) { DPRINTF(sc, ATH_DEBUG_STATE, "ibss merge, rstamp %u tsf %ju " "tstamp %ju\n", rstamp, (uintmax_t)tsf, (uintmax_t)ni->ni_tstamp.tsf); (void) ieee80211_ibss_merge(ni); } } break; } } #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT static void ath_rx_tap_vendor(struct ath_softc *sc, struct mbuf *m, const struct ath_rx_status *rs, u_int64_t tsf, int16_t nf) { /* Fill in the extension bitmap */ sc->sc_rx_th.wr_ext_bitmap = htole32(1 << ATH_RADIOTAP_VENDOR_HEADER); /* Fill in the vendor header */ sc->sc_rx_th.wr_vh.vh_oui[0] = 0x7f; sc->sc_rx_th.wr_vh.vh_oui[1] = 0x03; sc->sc_rx_th.wr_vh.vh_oui[2] = 0x00; /* XXX what should this be? */ sc->sc_rx_th.wr_vh.vh_sub_ns = 0; sc->sc_rx_th.wr_vh.vh_skip_len = htole16(sizeof(struct ath_radiotap_vendor_hdr)); /* General version info */ sc->sc_rx_th.wr_v.vh_version = 1; sc->sc_rx_th.wr_v.vh_rx_chainmask = sc->sc_rxchainmask; /* rssi */ sc->sc_rx_th.wr_v.rssi_ctl[0] = rs->rs_rssi_ctl[0]; sc->sc_rx_th.wr_v.rssi_ctl[1] = rs->rs_rssi_ctl[1]; sc->sc_rx_th.wr_v.rssi_ctl[2] = rs->rs_rssi_ctl[2]; sc->sc_rx_th.wr_v.rssi_ext[0] = rs->rs_rssi_ext[0]; sc->sc_rx_th.wr_v.rssi_ext[1] = rs->rs_rssi_ext[1]; sc->sc_rx_th.wr_v.rssi_ext[2] = rs->rs_rssi_ext[2]; /* evm */ sc->sc_rx_th.wr_v.evm[0] = rs->rs_evm0; sc->sc_rx_th.wr_v.evm[1] = rs->rs_evm1; sc->sc_rx_th.wr_v.evm[2] = rs->rs_evm2; /* These are only populated from the AR9300 or later */ sc->sc_rx_th.wr_v.evm[3] = rs->rs_evm3; sc->sc_rx_th.wr_v.evm[4] = rs->rs_evm4; /* direction */ sc->sc_rx_th.wr_v.vh_flags = ATH_VENDOR_PKT_RX; /* RX rate */ sc->sc_rx_th.wr_v.vh_rx_hwrate = rs->rs_rate; /* RX flags */ sc->sc_rx_th.wr_v.vh_rs_flags = rs->rs_flags; if (rs->rs_isaggr) sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_ISAGGR; if (rs->rs_moreaggr) sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_MOREAGGR; /* phyerr info */ if (rs->rs_status & HAL_RXERR_PHY) { sc->sc_rx_th.wr_v.vh_phyerr_code = rs->rs_phyerr; sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_RXPHYERR; } else { sc->sc_rx_th.wr_v.vh_phyerr_code = 0xff; } sc->sc_rx_th.wr_v.vh_rs_status = rs->rs_status; sc->sc_rx_th.wr_v.vh_rssi = rs->rs_rssi; } #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ static void ath_rx_tap(struct ath_softc *sc, struct mbuf *m, const struct ath_rx_status *rs, u_int64_t tsf, int16_t nf) { #define CHAN_HT20 htole32(IEEE80211_CHAN_HT20) #define CHAN_HT40U htole32(IEEE80211_CHAN_HT40U) #define CHAN_HT40D htole32(IEEE80211_CHAN_HT40D) #define CHAN_HT (CHAN_HT20|CHAN_HT40U|CHAN_HT40D) const HAL_RATE_TABLE *rt; uint8_t rix; rt = sc->sc_currates; KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode)); rix = rt->rateCodeToIndex[rs->rs_rate]; sc->sc_rx_th.wr_rate = sc->sc_hwmap[rix].ieeerate; sc->sc_rx_th.wr_flags = sc->sc_hwmap[rix].rxflags; #ifdef AH_SUPPORT_AR5416 sc->sc_rx_th.wr_chan_flags &= ~CHAN_HT; if (rs->rs_status & HAL_RXERR_PHY) { /* * PHY error - make sure the channel flags * reflect the actual channel configuration, * not the received frame. */ if (IEEE80211_IS_CHAN_HT40U(sc->sc_curchan)) sc->sc_rx_th.wr_chan_flags |= CHAN_HT40U; else if (IEEE80211_IS_CHAN_HT40D(sc->sc_curchan)) sc->sc_rx_th.wr_chan_flags |= CHAN_HT40D; else if (IEEE80211_IS_CHAN_HT20(sc->sc_curchan)) sc->sc_rx_th.wr_chan_flags |= CHAN_HT20; } else if (sc->sc_rx_th.wr_rate & IEEE80211_RATE_MCS) { /* HT rate */ struct ieee80211com *ic = &sc->sc_ic; if ((rs->rs_flags & HAL_RX_2040) == 0) sc->sc_rx_th.wr_chan_flags |= CHAN_HT20; else if (IEEE80211_IS_CHAN_HT40U(ic->ic_curchan)) sc->sc_rx_th.wr_chan_flags |= CHAN_HT40U; else sc->sc_rx_th.wr_chan_flags |= CHAN_HT40D; if ((rs->rs_flags & HAL_RX_GI) == 0) sc->sc_rx_th.wr_flags |= IEEE80211_RADIOTAP_F_SHORTGI; } #endif sc->sc_rx_th.wr_tsf = htole64(ath_extend_tsf(sc, rs->rs_tstamp, tsf)); if (rs->rs_status & HAL_RXERR_CRC) sc->sc_rx_th.wr_flags |= IEEE80211_RADIOTAP_F_BADFCS; /* XXX propagate other error flags from descriptor */ sc->sc_rx_th.wr_antnoise = nf; sc->sc_rx_th.wr_antsignal = nf + rs->rs_rssi; sc->sc_rx_th.wr_antenna = rs->rs_antenna; #undef CHAN_HT #undef CHAN_HT20 #undef CHAN_HT40U #undef CHAN_HT40D } static void ath_handle_micerror(struct ieee80211com *ic, struct ieee80211_frame *wh, int keyix) { struct ieee80211_node *ni; /* XXX recheck MIC to deal w/ chips that lie */ /* XXX discard MIC errors on !data frames */ ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh); if (ni != NULL) { ieee80211_notify_michael_failure(ni->ni_vap, wh, keyix); ieee80211_free_node(ni); } } /* * Process a single packet. * * The mbuf must already be synced, unmapped and removed from bf->bf_m * by this stage. * * The mbuf must be consumed by this routine - either passed up the * net80211 stack, put on the holding queue, or freed. */ int ath_rx_pkt(struct ath_softc *sc, struct ath_rx_status *rs, HAL_STATUS status, uint64_t tsf, int nf, HAL_RX_QUEUE qtype, struct ath_buf *bf, struct mbuf *m) { uint64_t rstamp; /* XXX TODO: make this an mbuf tag? */ struct ieee80211_rx_stats rxs; int len, type, i; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni; int is_good = 0; struct ath_rx_edma *re = &sc->sc_rxedma[qtype]; /* * Calculate the correct 64 bit TSF given * the TSF64 register value and rs_tstamp. */ rstamp = ath_extend_tsf(sc, rs->rs_tstamp, tsf); /* These aren't specifically errors */ #ifdef AH_SUPPORT_AR5416 if (rs->rs_flags & HAL_RX_GI) sc->sc_stats.ast_rx_halfgi++; if (rs->rs_flags & HAL_RX_2040) sc->sc_stats.ast_rx_2040++; if (rs->rs_flags & HAL_RX_DELIM_CRC_PRE) sc->sc_stats.ast_rx_pre_crc_err++; if (rs->rs_flags & HAL_RX_DELIM_CRC_POST) sc->sc_stats.ast_rx_post_crc_err++; if (rs->rs_flags & HAL_RX_DECRYPT_BUSY) sc->sc_stats.ast_rx_decrypt_busy_err++; if (rs->rs_flags & HAL_RX_HI_RX_CHAIN) sc->sc_stats.ast_rx_hi_rx_chain++; if (rs->rs_flags & HAL_RX_STBC) sc->sc_stats.ast_rx_stbc++; #endif /* AH_SUPPORT_AR5416 */ if (rs->rs_status != 0) { if (rs->rs_status & HAL_RXERR_CRC) sc->sc_stats.ast_rx_crcerr++; if (rs->rs_status & HAL_RXERR_FIFO) sc->sc_stats.ast_rx_fifoerr++; if (rs->rs_status & HAL_RXERR_PHY) { sc->sc_stats.ast_rx_phyerr++; /* Process DFS radar events */ if ((rs->rs_phyerr == HAL_PHYERR_RADAR) || (rs->rs_phyerr == HAL_PHYERR_FALSE_RADAR_EXT)) { /* Now pass it to the radar processing code */ ath_dfs_process_phy_err(sc, m, rstamp, rs); } /* Be suitably paranoid about receiving phy errors out of the stats array bounds */ if (rs->rs_phyerr < 64) sc->sc_stats.ast_rx_phy[rs->rs_phyerr]++; goto rx_error; /* NB: don't count in ierrors */ } if (rs->rs_status & HAL_RXERR_DECRYPT) { /* * Decrypt error. If the error occurred * because there was no hardware key, then * let the frame through so the upper layers * can process it. This is necessary for 5210 * parts which have no way to setup a ``clear'' * key cache entry. * * XXX do key cache faulting */ if (rs->rs_keyix == HAL_RXKEYIX_INVALID) goto rx_accept; sc->sc_stats.ast_rx_badcrypt++; } /* * Similar as above - if the failure was a keymiss * just punt it up to the upper layers for now. */ if (rs->rs_status & HAL_RXERR_KEYMISS) { sc->sc_stats.ast_rx_keymiss++; goto rx_accept; } if (rs->rs_status & HAL_RXERR_MIC) { sc->sc_stats.ast_rx_badmic++; /* * Do minimal work required to hand off * the 802.11 header for notification. */ /* XXX frag's and qos frames */ len = rs->rs_datalen; if (len >= sizeof (struct ieee80211_frame)) { ath_handle_micerror(ic, mtod(m, struct ieee80211_frame *), sc->sc_splitmic ? rs->rs_keyix-32 : rs->rs_keyix); } } counter_u64_add(ic->ic_ierrors, 1); rx_error: /* * Cleanup any pending partial frame. */ if (re->m_rxpending != NULL) { m_freem(re->m_rxpending); re->m_rxpending = NULL; } /* * When a tap is present pass error frames * that have been requested. By default we * pass decrypt+mic errors but others may be * interesting (e.g. crc). */ if (ieee80211_radiotap_active(ic) && (rs->rs_status & sc->sc_monpass)) { /* NB: bpf needs the mbuf length setup */ len = rs->rs_datalen; m->m_pkthdr.len = m->m_len = len; ath_rx_tap(sc, m, rs, rstamp, nf); #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT ath_rx_tap_vendor(sc, m, rs, rstamp, nf); #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ ieee80211_radiotap_rx_all(ic, m); } /* XXX pass MIC errors up for s/w reclaculation */ m_freem(m); m = NULL; goto rx_next; } rx_accept: len = rs->rs_datalen; m->m_len = len; if (rs->rs_more) { /* * Frame spans multiple descriptors; save * it for the next completed descriptor, it * will be used to construct a jumbogram. */ if (re->m_rxpending != NULL) { /* NB: max frame size is currently 2 clusters */ sc->sc_stats.ast_rx_toobig++; m_freem(re->m_rxpending); } m->m_pkthdr.len = len; re->m_rxpending = m; m = NULL; goto rx_next; } else if (re->m_rxpending != NULL) { /* * This is the second part of a jumbogram, * chain it to the first mbuf, adjust the * frame length, and clear the rxpending state. */ re->m_rxpending->m_next = m; re->m_rxpending->m_pkthdr.len += len; m = re->m_rxpending; re->m_rxpending = NULL; } else { /* * Normal single-descriptor receive; setup packet length. */ m->m_pkthdr.len = len; } /* * Validate rs->rs_antenna. * * Some users w/ AR9285 NICs have reported crashes * here because rs_antenna field is bogusly large. * Let's enforce the maximum antenna limit of 8 * (and it shouldn't be hard coded, but that's a * separate problem) and if there's an issue, print * out an error and adjust rs_antenna to something * sensible. * * This code should be removed once the actual * root cause of the issue has been identified. * For example, it may be that the rs_antenna * field is only valid for the last frame of * an aggregate and it just happens that it is * "mostly" right. (This is a general statement - * the majority of the statistics are only valid * for the last frame in an aggregate. */ if (rs->rs_antenna > 7) { device_printf(sc->sc_dev, "%s: rs_antenna > 7 (%d)\n", __func__, rs->rs_antenna); #ifdef ATH_DEBUG ath_printrxbuf(sc, bf, 0, status == HAL_OK); #endif /* ATH_DEBUG */ rs->rs_antenna = 0; /* XXX better than nothing */ } /* * If this is an AR9285/AR9485, then the receive and LNA * configuration is stored in RSSI[2] / EXTRSSI[2]. * We can extract this out to build a much better * receive antenna profile. * * Yes, this just blurts over the above RX antenna field * for now. It's fine, the AR9285 doesn't really use * that. * * Later on we should store away the fine grained LNA * information and keep separate counters just for * that. It'll help when debugging the AR9285/AR9485 * combined diversity code. */ if (sc->sc_rx_lnamixer) { rs->rs_antenna = 0; /* Bits 0:1 - the LNA configuration used */ rs->rs_antenna |= ((rs->rs_rssi_ctl[2] & HAL_RX_LNA_CFG_USED) >> HAL_RX_LNA_CFG_USED_S); /* Bit 2 - the external RX antenna switch */ if (rs->rs_rssi_ctl[2] & HAL_RX_LNA_EXTCFG) rs->rs_antenna |= 0x4; } sc->sc_stats.ast_ant_rx[rs->rs_antenna]++; /* * Populate the rx status block. When there are bpf * listeners we do the additional work to provide * complete status. Otherwise we fill in only the * material required by ieee80211_input. Note that * noise setting is filled in above. */ if (ieee80211_radiotap_active(ic)) { ath_rx_tap(sc, m, rs, rstamp, nf); #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT ath_rx_tap_vendor(sc, m, rs, rstamp, nf); #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ } /* * From this point on we assume the frame is at least * as large as ieee80211_frame_min; verify that. */ if (len < IEEE80211_MIN_LEN) { if (!ieee80211_radiotap_active(ic)) { DPRINTF(sc, ATH_DEBUG_RECV, "%s: short packet %d\n", __func__, len); sc->sc_stats.ast_rx_tooshort++; } else { /* NB: in particular this captures ack's */ ieee80211_radiotap_rx_all(ic, m); } m_freem(m); m = NULL; goto rx_next; } if (IFF_DUMPPKTS(sc, ATH_DEBUG_RECV)) { const HAL_RATE_TABLE *rt = sc->sc_currates; uint8_t rix = rt->rateCodeToIndex[rs->rs_rate]; ieee80211_dump_pkt(ic, mtod(m, caddr_t), len, sc->sc_hwmap[rix].ieeerate, rs->rs_rssi); } m_adj(m, -IEEE80211_CRC_LEN); /* * Locate the node for sender, track state, and then * pass the (referenced) node up to the 802.11 layer * for its use. */ ni = ieee80211_find_rxnode_withkey(ic, mtod(m, const struct ieee80211_frame_min *), rs->rs_keyix == HAL_RXKEYIX_INVALID ? IEEE80211_KEYIX_NONE : rs->rs_keyix); sc->sc_lastrs = rs; #ifdef AH_SUPPORT_AR5416 if (rs->rs_isaggr) sc->sc_stats.ast_rx_agg++; #endif /* AH_SUPPORT_AR5416 */ /* * Populate the per-chain RSSI values where appropriate. */ bzero(&rxs, sizeof(rxs)); rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI | IEEE80211_R_C_CHAIN | IEEE80211_R_C_NF | IEEE80211_R_C_RSSI | IEEE80211_R_TSF64 | IEEE80211_R_TSF_START; /* XXX TODO: validate */ rxs.c_rssi = rs->rs_rssi; rxs.c_nf = nf; rxs.c_chain = 3; /* XXX TODO: check */ rxs.c_rx_tsf = rstamp; for (i = 0; i < 3; i++) { rxs.c_rssi_ctl[i] = rs->rs_rssi_ctl[i]; rxs.c_rssi_ext[i] = rs->rs_rssi_ext[i]; /* * XXX note: we currently don't track * per-chain noisefloor. */ rxs.c_nf_ctl[i] = nf; rxs.c_nf_ext[i] = nf; } if (ni != NULL) { /* * Only punt packets for ampdu reorder processing for * 11n nodes; net80211 enforces that M_AMPDU is only * set for 11n nodes. */ if (ni->ni_flags & IEEE80211_NODE_HT) m->m_flags |= M_AMPDU; /* * Sending station is known, dispatch directly. */ (void) ieee80211_add_rx_params(m, &rxs); type = ieee80211_input_mimo(ni, m); ieee80211_free_node(ni); m = NULL; /* * Arrange to update the last rx timestamp only for * frames from our ap when operating in station mode. * This assumes the rx key is always setup when * associated. */ if (ic->ic_opmode == IEEE80211_M_STA && rs->rs_keyix != HAL_RXKEYIX_INVALID) is_good = 1; } else { (void) ieee80211_add_rx_params(m, &rxs); type = ieee80211_input_mimo_all(ic, m); m = NULL; } /* * At this point we have passed the frame up the stack; thus * the mbuf is no longer ours. */ /* * Track rx rssi and do any rx antenna management. */ ATH_RSSI_LPF(sc->sc_halstats.ns_avgrssi, rs->rs_rssi); if (sc->sc_diversity) { /* * When using fast diversity, change the default rx * antenna if diversity chooses the other antenna 3 * times in a row. */ if (sc->sc_defant != rs->rs_antenna) { if (++sc->sc_rxotherant >= 3) ath_setdefantenna(sc, rs->rs_antenna); } else sc->sc_rxotherant = 0; } /* Handle slow diversity if enabled */ if (sc->sc_dolnadiv) { ath_lna_rx_comb_scan(sc, rs, ticks, hz); } if (sc->sc_softled) { /* * Blink for any data frame. Otherwise do a * heartbeat-style blink when idle. The latter * is mainly for station mode where we depend on * periodic beacon frames to trigger the poll event. */ if (type == IEEE80211_FC0_TYPE_DATA) { const HAL_RATE_TABLE *rt = sc->sc_currates; ath_led_event(sc, rt->rateCodeToIndex[rs->rs_rate]); } else if (ticks - sc->sc_ledevent >= sc->sc_ledidle) ath_led_event(sc, 0); } rx_next: /* * Debugging - complain if we didn't NULL the mbuf pointer * here. */ if (m != NULL) { device_printf(sc->sc_dev, "%s: mbuf %p should've been freed!\n", __func__, m); } return (is_good); } #define ATH_RX_MAX 128 /* * XXX TODO: break out the "get buffers" from "call ath_rx_pkt()" like * the EDMA code does. * * XXX TODO: then, do all of the RX list management stuff inside * ATH_RX_LOCK() so we don't end up potentially racing. The EDMA * code is doing it right. */ static void ath_rx_proc(struct ath_softc *sc, int resched) { #define PA2DESC(_sc, _pa) \ ((struct ath_desc *)((caddr_t)(_sc)->sc_rxdma.dd_desc + \ ((_pa) - (_sc)->sc_rxdma.dd_desc_paddr))) struct ath_buf *bf; struct ath_hal *ah = sc->sc_ah; #ifdef IEEE80211_SUPPORT_SUPERG struct ieee80211com *ic = &sc->sc_ic; #endif struct ath_desc *ds; struct ath_rx_status *rs; struct mbuf *m; int ngood; HAL_STATUS status; int16_t nf; u_int64_t tsf; int npkts = 0; int kickpcu = 0; int ret; /* XXX we must not hold the ATH_LOCK here */ ATH_UNLOCK_ASSERT(sc); ATH_PCU_UNLOCK_ASSERT(sc); ATH_PCU_LOCK(sc); sc->sc_rxproc_cnt++; kickpcu = sc->sc_kickpcu; ATH_PCU_UNLOCK(sc); ATH_LOCK(sc); ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: called\n", __func__); ngood = 0; nf = ath_hal_getchannoise(ah, sc->sc_curchan); sc->sc_stats.ast_rx_noise = nf; tsf = ath_hal_gettsf64(ah); do { /* * Don't process too many packets at a time; give the * TX thread time to also run - otherwise the TX * latency can jump by quite a bit, causing throughput * degredation. */ if (!kickpcu && npkts >= ATH_RX_MAX) break; bf = TAILQ_FIRST(&sc->sc_rxbuf); if (sc->sc_rxslink && bf == NULL) { /* NB: shouldn't happen */ device_printf(sc->sc_dev, "%s: no buffer!\n", __func__); break; } else if (bf == NULL) { /* * End of List: * this can happen for non-self-linked RX chains */ sc->sc_stats.ast_rx_hitqueueend++; break; } m = bf->bf_m; if (m == NULL) { /* NB: shouldn't happen */ /* * If mbuf allocation failed previously there * will be no mbuf; try again to re-populate it. */ /* XXX make debug msg */ device_printf(sc->sc_dev, "%s: no mbuf!\n", __func__); TAILQ_REMOVE(&sc->sc_rxbuf, bf, bf_list); goto rx_proc_next; } ds = bf->bf_desc; if (ds->ds_link == bf->bf_daddr) { /* NB: never process the self-linked entry at the end */ sc->sc_stats.ast_rx_hitqueueend++; break; } /* XXX sync descriptor memory */ /* * Must provide the virtual address of the current * descriptor, the physical address, and the virtual * address of the next descriptor in the h/w chain. * This allows the HAL to look ahead to see if the * hardware is done with a descriptor by checking the * done bit in the following descriptor and the address * of the current descriptor the DMA engine is working * on. All this is necessary because of our use of * a self-linked list to avoid rx overruns. */ rs = &bf->bf_status.ds_rxstat; status = ath_hal_rxprocdesc(ah, ds, bf->bf_daddr, PA2DESC(sc, ds->ds_link), rs); #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_RECV_DESC) ath_printrxbuf(sc, bf, 0, status == HAL_OK); #endif #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_RXSTATUS)) if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_RXSTATUS, sc->sc_rx_statuslen, (char *) ds); #endif /* ATH_DEBUG_ALQ */ if (status == HAL_EINPROGRESS) break; TAILQ_REMOVE(&sc->sc_rxbuf, bf, bf_list); npkts++; /* * Process a single frame. */ bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); bf->bf_m = NULL; if (ath_rx_pkt(sc, rs, status, tsf, nf, HAL_RX_QUEUE_HP, bf, m)) ngood++; rx_proc_next: /* * If there's a holding buffer, insert that onto * the RX list; the hardware is now definitely not pointing * to it now. */ ret = 0; if (sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf != NULL) { TAILQ_INSERT_TAIL(&sc->sc_rxbuf, sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf, bf_list); ret = ath_rxbuf_init(sc, sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf); } /* * Next, throw our buffer into the holding entry. The hardware * may use the descriptor to read the link pointer before * DMAing the next descriptor in to write out a packet. */ sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf = bf; } while (ret == 0); /* rx signal state monitoring */ ath_hal_rxmonitor(ah, &sc->sc_halstats, sc->sc_curchan); if (ngood) sc->sc_lastrx = tsf; ATH_KTR(sc, ATH_KTR_RXPROC, 2, "ath_rx_proc: npkts=%d, ngood=%d", npkts, ngood); /* Queue DFS tasklet if needed */ if (resched && ath_dfs_tasklet_needed(sc, sc->sc_curchan)) taskqueue_enqueue(sc->sc_tq, &sc->sc_dfstask); /* * Now that all the RX frames were handled that * need to be handled, kick the PCU if there's * been an RXEOL condition. */ if (resched && kickpcu) { ATH_PCU_LOCK(sc); ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_rx_proc: kickpcu"); device_printf(sc->sc_dev, "%s: kickpcu; handled %d packets\n", __func__, npkts); /* * Go through the process of fully tearing down * the RX buffers and reinitialising them. * * There's a hardware bug that causes the RX FIFO * to get confused under certain conditions and * constantly write over the same frame, leading * the RX driver code here to get heavily confused. */ /* * XXX Has RX DMA stopped enough here to just call * ath_startrecv()? * XXX Do we need to use the holding buffer to restart * RX DMA by appending entries to the final * descriptor? Quite likely. */ #if 1 ath_startrecv(sc); #else /* * Disabled for now - it'd be nice to be able to do * this in order to limit the amount of CPU time spent * reinitialising the RX side (and thus minimise RX * drops) however there's a hardware issue that * causes things to get too far out of whack. */ /* * XXX can we hold the PCU lock here? * Are there any net80211 buffer calls involved? */ bf = TAILQ_FIRST(&sc->sc_rxbuf); ath_hal_putrxbuf(ah, bf->bf_daddr, HAL_RX_QUEUE_HP); ath_hal_rxena(ah); /* enable recv descriptors */ ath_mode_init(sc); /* set filters, etc. */ ath_hal_startpcurecv(ah); /* re-enable PCU/DMA engine */ #endif ath_hal_intrset(ah, sc->sc_imask); sc->sc_kickpcu = 0; ATH_PCU_UNLOCK(sc); } #ifdef IEEE80211_SUPPORT_SUPERG if (resched) ieee80211_ff_age_all(ic, 100); #endif /* * Put the hardware to sleep again if we're done with it. */ ATH_LOCK(sc); ath_power_restore_power_state(sc); ATH_UNLOCK(sc); /* * If we hit the maximum number of frames in this round, * reschedule for another immediate pass. This gives * the TX and TX completion routines time to run, which * will reduce latency. */ if (npkts >= ATH_RX_MAX) sc->sc_rx.recv_sched(sc, resched); ATH_PCU_LOCK(sc); sc->sc_rxproc_cnt--; ATH_PCU_UNLOCK(sc); } #undef PA2DESC #undef ATH_RX_MAX /* * Only run the RX proc if it's not already running. * Since this may get run as part of the reset/flush path, * the task can't clash with an existing, running tasklet. */ static void ath_legacy_rx_tasklet(void *arg, int npending) { struct ath_softc *sc = arg; ATH_KTR(sc, ATH_KTR_RXPROC, 1, "ath_rx_proc: pending=%d", npending); DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: pending %u\n", __func__, npending); ATH_PCU_LOCK(sc); if (sc->sc_inreset_cnt > 0) { device_printf(sc->sc_dev, "%s: sc_inreset_cnt > 0; skipping\n", __func__); ATH_PCU_UNLOCK(sc); return; } ATH_PCU_UNLOCK(sc); ath_rx_proc(sc, 1); } static void ath_legacy_flushrecv(struct ath_softc *sc) { ath_rx_proc(sc, 0); } static void ath_legacy_flush_rxpending(struct ath_softc *sc) { /* XXX ATH_RX_LOCK_ASSERT(sc); */ if (sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending != NULL) { m_freem(sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending); sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending = NULL; } if (sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending != NULL) { m_freem(sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending); sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending = NULL; } } static int ath_legacy_flush_rxholdbf(struct ath_softc *sc) { struct ath_buf *bf; /* XXX ATH_RX_LOCK_ASSERT(sc); */ /* * If there are RX holding buffers, free them here and return * them to the list. * * XXX should just verify that bf->bf_m is NULL, as it must * be at this point! */ bf = sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf; if (bf != NULL) { if (bf->bf_m != NULL) m_freem(bf->bf_m); bf->bf_m = NULL; TAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list); (void) ath_rxbuf_init(sc, bf); } sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf = NULL; bf = sc->sc_rxedma[HAL_RX_QUEUE_LP].m_holdbf; if (bf != NULL) { if (bf->bf_m != NULL) m_freem(bf->bf_m); bf->bf_m = NULL; TAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list); (void) ath_rxbuf_init(sc, bf); } sc->sc_rxedma[HAL_RX_QUEUE_LP].m_holdbf = NULL; return (0); } /* * Disable the receive h/w in preparation for a reset. */ static void ath_legacy_stoprecv(struct ath_softc *sc, int dodelay) { #define PA2DESC(_sc, _pa) \ ((struct ath_desc *)((caddr_t)(_sc)->sc_rxdma.dd_desc + \ ((_pa) - (_sc)->sc_rxdma.dd_desc_paddr))) struct ath_hal *ah = sc->sc_ah; ATH_RX_LOCK(sc); ath_hal_stoppcurecv(ah); /* disable PCU */ ath_hal_setrxfilter(ah, 0); /* clear recv filter */ ath_hal_stopdmarecv(ah); /* disable DMA engine */ /* * TODO: see if this particular DELAY() is required; it may be * masking some missing FIFO flush or DMA sync. */ #if 0 if (dodelay) #endif DELAY(3000); /* 3ms is long enough for 1 frame */ #ifdef ATH_DEBUG if (sc->sc_debug & (ATH_DEBUG_RESET | ATH_DEBUG_FATAL)) { struct ath_buf *bf; u_int ix; device_printf(sc->sc_dev, "%s: rx queue %p, link %p\n", __func__, (caddr_t)(uintptr_t) ath_hal_getrxbuf(ah, HAL_RX_QUEUE_HP), sc->sc_rxlink); ix = 0; TAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) { struct ath_desc *ds = bf->bf_desc; struct ath_rx_status *rs = &bf->bf_status.ds_rxstat; HAL_STATUS status = ath_hal_rxprocdesc(ah, ds, bf->bf_daddr, PA2DESC(sc, ds->ds_link), rs); if (status == HAL_OK || (sc->sc_debug & ATH_DEBUG_FATAL)) ath_printrxbuf(sc, bf, ix, status == HAL_OK); ix++; } } #endif (void) ath_legacy_flush_rxpending(sc); (void) ath_legacy_flush_rxholdbf(sc); sc->sc_rxlink = NULL; /* just in case */ ATH_RX_UNLOCK(sc); #undef PA2DESC } /* * XXX TODO: something was calling startrecv without calling * stoprecv. Let's figure out what/why. It was showing up * as a mbuf leak (rxpending) and ath_buf leak (holdbf.) */ /* * Enable the receive h/w following a reset. */ static int ath_legacy_startrecv(struct ath_softc *sc) { struct ath_hal *ah = sc->sc_ah; struct ath_buf *bf; ATH_RX_LOCK(sc); /* * XXX should verify these are already all NULL! */ sc->sc_rxlink = NULL; (void) ath_legacy_flush_rxpending(sc); (void) ath_legacy_flush_rxholdbf(sc); /* * Re-chain all of the buffers in the RX buffer list. */ TAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) { int error = ath_rxbuf_init(sc, bf); if (error != 0) { DPRINTF(sc, ATH_DEBUG_RECV, "%s: ath_rxbuf_init failed %d\n", __func__, error); return error; } } bf = TAILQ_FIRST(&sc->sc_rxbuf); ath_hal_putrxbuf(ah, bf->bf_daddr, HAL_RX_QUEUE_HP); ath_hal_rxena(ah); /* enable recv descriptors */ ath_mode_init(sc); /* set filters, etc. */ ath_hal_startpcurecv(ah); /* re-enable PCU/DMA engine */ ATH_RX_UNLOCK(sc); return 0; } static int ath_legacy_dma_rxsetup(struct ath_softc *sc) { int error; error = ath_descdma_setup(sc, &sc->sc_rxdma, &sc->sc_rxbuf, "rx", sizeof(struct ath_desc), ath_rxbuf, 1); if (error != 0) return (error); return (0); } static int ath_legacy_dma_rxteardown(struct ath_softc *sc) { if (sc->sc_rxdma.dd_desc_len != 0) ath_descdma_cleanup(sc, &sc->sc_rxdma, &sc->sc_rxbuf); return (0); } static void ath_legacy_recv_sched(struct ath_softc *sc, int dosched) { taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask); } static void ath_legacy_recv_sched_queue(struct ath_softc *sc, HAL_RX_QUEUE q, int dosched) { taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask); } void ath_recv_setup_legacy(struct ath_softc *sc) { /* Sensible legacy defaults */ /* * XXX this should be changed to properly support the * exact RX descriptor size for each HAL. */ sc->sc_rx_statuslen = sizeof(struct ath_desc); sc->sc_rx.recv_start = ath_legacy_startrecv; sc->sc_rx.recv_stop = ath_legacy_stoprecv; sc->sc_rx.recv_flush = ath_legacy_flushrecv; sc->sc_rx.recv_tasklet = ath_legacy_rx_tasklet; sc->sc_rx.recv_rxbuf_init = ath_legacy_rxbuf_init; sc->sc_rx.recv_setup = ath_legacy_dma_rxsetup; sc->sc_rx.recv_teardown = ath_legacy_dma_rxteardown; sc->sc_rx.recv_sched = ath_legacy_recv_sched; sc->sc_rx.recv_sched_queue = ath_legacy_recv_sched_queue; } Index: projects/clang391-import/sys/dev/ath/if_ath_tx_edma.c =================================================================== --- projects/clang391-import/sys/dev/ath/if_ath_tx_edma.c (revision 309262) +++ projects/clang391-import/sys/dev/ath/if_ath_tx_edma.c (revision 309263) @@ -1,1042 +1,1061 @@ /*- * Copyright (c) 2012 Adrian Chadd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Atheros Wireless LAN controller. * * This software is derived from work of Atsushi Onoe; his contribution * is greatly appreciated. */ #include "opt_inet.h" #include "opt_ath.h" /* * This is needed for register operations which are performed * by the driver - eg, calls to ath_hal_gettsf32(). * * It's also required for any AH_DEBUG checks in here, eg the * module dependencies. */ #include "opt_ah.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for mp_ncpus */ #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include #ifdef INET #include #include #endif #include #include /* XXX for softled */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ATH_TX99_DIAG #include #endif #include #ifdef ATH_DEBUG_ALQ #include #endif /* * some general macros */ #define INCR(_l, _sz) (_l) ++; (_l) &= ((_sz) - 1) #define DECR(_l, _sz) (_l) --; (_l) &= ((_sz) - 1) /* * XXX doesn't belong here, and should be tunable */ #define ATH_TXSTATUS_RING_SIZE 512 MALLOC_DECLARE(M_ATHDEV); static void ath_edma_tx_processq(struct ath_softc *sc, int dosched); #ifdef ATH_DEBUG_ALQ static void ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes, int fifo_depth, int frame_cnt) { struct if_ath_alq_tx_fifo_push aq; aq.txq = htobe32(txq); aq.nframes = htobe32(nframes); aq.fifo_depth = htobe32(fifo_depth); aq.frame_cnt = htobe32(frame_cnt); if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH, sizeof(aq), (const char *) &aq); } #endif /* ATH_DEBUG_ALQ */ /* * XXX TODO: push an aggregate as a single FIFO slot, even though * it may not meet the TXOP for say, DBA-gated traffic in TDMA mode. * * The TX completion code handles a TX FIFO slot having multiple frames, * aggregate or otherwise, but it may just make things easier to deal * with. * * XXX TODO: track the number of aggregate subframes and put that in the * push alq message. */ static void ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq, int limit) { struct ath_buf *bf, *bf_last; struct ath_buf *bfi, *bfp; int i, sqdepth; TAILQ_HEAD(axq_q_f_s, ath_buf) sq; ATH_TXQ_LOCK_ASSERT(txq); /* * Don't bother doing any work if it's full. */ if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) return; if (TAILQ_EMPTY(&txq->axq_q)) return; TAILQ_INIT(&sq); /* * First pass - walk sq, queue up to 'limit' entries, * subtract them from the staging queue. */ sqdepth = 0; for (i = 0; i < limit; i++) { /* Grab the head entry */ bf = ATH_TXQ_FIRST(txq); if (bf == NULL) break; ATH_TXQ_REMOVE(txq, bf, bf_list); /* Queue it into our staging list */ TAILQ_INSERT_TAIL(&sq, bf, bf_list); /* Ensure the flags are cleared */ bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND); sqdepth++; } /* * Ok, so now we have a staging list of up to 'limit' * frames from the txq. Now let's wrap that up * into its own list and pass that to the hardware * as one FIFO entry. */ bf = TAILQ_FIRST(&sq); bf_last = TAILQ_LAST(&sq, axq_q_s); /* * Ok, so here's the gymnastics reqiured to make this * all sensible. */ /* * Tag the first/last buffer appropriately. */ bf->bf_flags |= ATH_BUF_FIFOPTR; bf_last->bf_flags |= ATH_BUF_FIFOEND; /* * Walk the descriptor list and link them appropriately. */ bfp = NULL; TAILQ_FOREACH(bfi, &sq, bf_list) { if (bfp != NULL) { ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds, bfi->bf_daddr); } bfp = bfi; } i = 0; TAILQ_FOREACH(bfi, &sq, bf_list) { #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_XMIT_DESC) ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0); #endif/* ATH_DEBUG */ #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC)) ath_tx_alq_post(sc, bfi); #endif /* ATH_DEBUG_ALQ */ i++; } /* * We now need to push this set of frames onto the tail * of the FIFO queue. We don't adjust the aggregate * count, only the queue depth counter(s). * We also need to blank the link pointer now. */ TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list); /* Bump total queue tracking in FIFO queue */ txq->fifo.axq_depth += sqdepth; /* Bump FIFO queue */ txq->axq_fifo_depth++; DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC, "%s: queued %d packets; depth=%d, fifo depth=%d\n", __func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth); /* Push the first entry into the hardware */ ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr); /* Push start on the DMA if it's not already started */ ath_hal_txstart(sc->sc_ah, txq->axq_qnum); #ifdef ATH_DEBUG_ALQ ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth, txq->axq_fifo_depth, txq->fifo.axq_depth); #endif /* ATH_DEBUG_ALQ */ } #define TX_BATCH_SIZE 32 /* * Push some frames into the TX FIFO if we have space. */ static void ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq) { ATH_TXQ_LOCK_ASSERT(txq); DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n", __func__, txq->axq_qnum, txq->fifo.axq_depth, txq->axq_fifo_depth, txq->axq_depth, txq->axq_aggr_depth); /* * For now, push up to 32 frames per TX FIFO slot. * If more are in the hardware queue then they'll * get populated when we try to send another frame * or complete a frame - so at most there'll be * 32 non-AMPDU frames per node/TID anyway. * * Note that the hardware staging queue will limit * how many frames in total we will have pushed into * here. * * Later on, we'll want to push less frames into * the TX FIFO since we don't want to necessarily * fill tens or hundreds of milliseconds of potential * frames. * * However, we need more frames right now because of * how the MAC implements the frame scheduling policy. * It only ungates a single FIFO entry at a time, * and will run that until CHNTIME expires or the * end of that FIFO entry descriptor list is reached. * So for TDMA we suffer a big performance penalty - * single TX FIFO entries mean the MAC only sends out * one frame per DBA event, which turned out on average * 6ms per TX frame. * * So, for aggregates it's okay - it'll push two at a * time and this will just do them more efficiently. * For non-aggregates it'll do 4 at a time, up to the * non-aggr limit (non_aggr, which is 32.) They should * be time based rather than a hard count, but I also * do need sleep. */ /* * Do some basic, basic batching to the hardware * queue. * * If we have TX_BATCH_SIZE entries in the staging * queue, then let's try to send them all in one hit. * * Ensure we don't push more than TX_BATCH_SIZE worth * in, otherwise we end up draining 8 slots worth of * 32 frames into the hardware queue and then we don't * attempt to push more frames in until we empty the * FIFO. */ if (txq->axq_depth >= TX_BATCH_SIZE / 2 && txq->fifo.axq_depth <= TX_BATCH_SIZE) { ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE); } /* * Aggregate check: if we have less than two FIFO slots * busy and we have some aggregate frames, queue it. * * Now, ideally we'd just check to see if the scheduler * has given us aggregate frames and push them into the FIFO * as individual slots, as honestly we should just be pushing * a single aggregate in as one FIFO slot. * * Let's do that next once I know this works. */ else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2) ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE); /* * * If we have less, and the TXFIFO isn't empty, let's * wait until we've finished sending the FIFO. * * If we have less, and the TXFIFO is empty, then * send them. */ else if (txq->axq_fifo_depth == 0) { ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE); } } /* * Re-initialise the DMA FIFO with the current contents of * said TXQ. * * This should only be called as part of the chip reset path, as it * assumes the FIFO is currently empty. */ static void ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq) { struct ath_buf *bf; int i = 0; int fifostart = 1; int old_fifo_depth; DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n", __func__, txq->axq_qnum); ATH_TXQ_LOCK_ASSERT(txq); /* * Let's log if the tracked FIFO depth doesn't match * what we actually push in. */ old_fifo_depth = txq->axq_fifo_depth; txq->axq_fifo_depth = 0; /* * Walk the FIFO staging list, looking for "head" entries. * Since we may have a partially completed list of frames, * we push the first frame we see into the FIFO and re-mark * it as the head entry. We then skip entries until we see * FIFO end, at which point we get ready to push another * entry into the FIFO. */ TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) { /* * If we're looking for FIFOEND and we haven't found * it, skip. * * If we're looking for FIFOEND and we've found it, * reset for another descriptor. */ #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_XMIT_DESC) ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0); #endif/* ATH_DEBUG */ #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC)) ath_tx_alq_post(sc, bf); #endif /* ATH_DEBUG_ALQ */ if (fifostart == 0) { if (bf->bf_flags & ATH_BUF_FIFOEND) fifostart = 1; continue; } /* Make sure we're not overflowing the FIFO! */ if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) { device_printf(sc->sc_dev, "%s: Q%d: more frames in the queue; FIFO depth=%d?!\n", __func__, txq->axq_qnum, txq->axq_fifo_depth); } #if 0 DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n", __func__, txq->axq_qnum, txq->axq_fifo_depth, bf, !! (bf->bf_flags & ATH_BUF_FIFOPTR), !! (bf->bf_flags & ATH_BUF_FIFOEND)); #endif /* * Set this to be the first buffer in the FIFO * list - even if it's also the last buffer in * a FIFO list! */ bf->bf_flags |= ATH_BUF_FIFOPTR; /* Push it into the FIFO and bump the FIFO count */ ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr); txq->axq_fifo_depth++; /* * If this isn't the last entry either, let's * clear fifostart so we continue looking for * said last entry. */ if (! (bf->bf_flags & ATH_BUF_FIFOEND)) fifostart = 0; i++; } /* Only bother starting the queue if there's something in it */ if (i > 0) ath_hal_txstart(sc->sc_ah, txq->axq_qnum); DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n", __func__, txq->axq_qnum, old_fifo_depth, txq->axq_fifo_depth); /* And now, let's check! */ if (txq->axq_fifo_depth != old_fifo_depth) { device_printf(sc->sc_dev, "%s: Q%d: FIFO depth should be %d, is %d\n", __func__, txq->axq_qnum, old_fifo_depth, txq->axq_fifo_depth); } } /* * Hand off this frame to a hardware queue. * * Things are a bit hairy in the EDMA world. The TX FIFO is only * 8 entries deep, so we need to keep track of exactly what we've * pushed into the FIFO and what's just sitting in the TX queue, * waiting to go out. * * So this is split into two halves - frames get appended to the * TXQ; then a scheduler is called to push some frames into the * actual TX FIFO. */ static void ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) { ATH_TXQ_LOCK(txq); KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0, ("%s: busy status 0x%x", __func__, bf->bf_flags)); /* * XXX TODO: write a hard-coded check to ensure that * the queue id in the TX descriptor matches txq->axq_qnum. */ /* Update aggr stats */ if (bf->bf_state.bfs_aggr) txq->axq_aggr_depth++; /* Push and update frame stats */ ATH_TXQ_INSERT_TAIL(txq, bf, bf_list); /* * Finally, call the FIFO schedule routine to schedule some * frames to the FIFO. */ ath_edma_tx_fifo_fill(sc, txq); ATH_TXQ_UNLOCK(txq); } /* * Hand off this frame to a multicast software queue. * * The EDMA TX CABQ will get a list of chained frames, chained * together using the next pointer. The single head of that * particular queue is pushed to the hardware CABQ. */ static void ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) { ATH_TX_LOCK_ASSERT(sc); KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0, ("%s: busy status 0x%x", __func__, bf->bf_flags)); ATH_TXQ_LOCK(txq); /* * XXX this is mostly duplicated in ath_tx_handoff_mcast(). */ if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) { struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s); struct ieee80211_frame *wh; /* mark previous frame */ wh = mtod(bf_last->bf_m, struct ieee80211_frame *); wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA; /* re-sync buffer to memory */ bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap, BUS_DMASYNC_PREWRITE); /* link descriptor */ ath_hal_settxdesclink(sc->sc_ah, bf_last->bf_lastds, bf->bf_daddr); } #ifdef ATH_DEBUG_ALQ if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC)) ath_tx_alq_post(sc, bf); #endif /* ATH_DEBUG_ALQ */ ATH_TXQ_INSERT_TAIL(txq, bf, bf_list); ATH_TXQ_UNLOCK(txq); } /* * Handoff this frame to the hardware. * * For the multicast queue, this will treat it as a software queue * and append it to the list, after updating the MORE_DATA flag * in the previous frame. The cabq processing code will ensure * that the queue contents gets transferred over. * * For the hardware queues, this will queue a frame to the queue * like before, then populate the FIFO from that. Since the * EDMA hardware has 8 FIFO slots per TXQ, this ensures that * frames such as management frames don't get prematurely dropped. * * This does imply that a similar flush-hwq-to-fifoq method will * need to be called from the processq function, before the * per-node software scheduler is called. */ static void ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) { DPRINTF(sc, ATH_DEBUG_XMIT_DESC, "%s: called; bf=%p, txq=%p, qnum=%d\n", __func__, bf, txq, txq->axq_qnum); if (txq->axq_qnum == ATH_TXQ_SWQ) ath_edma_xmit_handoff_mcast(sc, txq, bf); else ath_edma_xmit_handoff_hw(sc, txq, bf); } static int ath_edma_setup_txfifo(struct ath_softc *sc, int qnum) { struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum]; te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH, M_ATHDEV, M_NOWAIT | M_ZERO); if (te->m_fifo == NULL) { device_printf(sc->sc_dev, "%s: malloc failed\n", __func__); return (-ENOMEM); } /* * Set initial "empty" state. */ te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0; return (0); } static int ath_edma_free_txfifo(struct ath_softc *sc, int qnum) { struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum]; /* XXX TODO: actually deref the ath_buf entries? */ free(te->m_fifo, M_ATHDEV); return (0); } static int ath_edma_dma_txsetup(struct ath_softc *sc) { int error; int i; error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma, NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE); if (error != 0) return (error); ath_hal_setuptxstatusring(sc->sc_ah, (void *) sc->sc_txsdma.dd_desc, sc->sc_txsdma.dd_desc_paddr, ATH_TXSTATUS_RING_SIZE); for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { ath_edma_setup_txfifo(sc, i); } return (0); } static int ath_edma_dma_txteardown(struct ath_softc *sc) { int i; for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { ath_edma_free_txfifo(sc, i); } ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL); return (0); } /* * Drain all TXQs, potentially after completing the existing completed * frames. */ static void ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type) { int i; DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__); (void) ath_stoptxdma(sc); /* * If reset type is noloss, the TX FIFO needs to be serviced * and those frames need to be handled. * * Otherwise, just toss everything in each TX queue. */ if (reset_type == ATH_RESET_NOLOSS) { ath_edma_tx_processq(sc, 0); for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i)) { ATH_TXQ_LOCK(&sc->sc_txq[i]); /* * Free the holding buffer; DMA is now * stopped. */ ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]); /* * Reset the link pointer to NULL; there's * no frames to chain DMA to. */ sc->sc_txq[i].axq_link = NULL; ATH_TXQ_UNLOCK(&sc->sc_txq[i]); } } } else { for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i)) ath_tx_draintxq(sc, &sc->sc_txq[i]); } } /* XXX dump out the TX completion FIFO contents */ /* XXX dump out the frames */ sc->sc_wd_timer = 0; } /* * TX completion tasklet. */ static void ath_edma_tx_proc(void *arg, int npending) { struct ath_softc *sc = (struct ath_softc *) arg; + ATH_PCU_LOCK(sc); + sc->sc_txproc_cnt++; + ATH_PCU_UNLOCK(sc); + + ATH_LOCK(sc); + ath_power_set_power_state(sc, HAL_PM_AWAKE); + ATH_UNLOCK(sc); + #if 0 DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n", __func__, npending); #endif ath_edma_tx_processq(sc, 1); + + + ATH_PCU_LOCK(sc); + sc->sc_txproc_cnt--; + ATH_PCU_UNLOCK(sc); + + ATH_LOCK(sc); + ath_power_restore_power_state(sc); + ATH_UNLOCK(sc); + + ath_tx_kick(sc); } /* * Process the TX status queue. */ static void ath_edma_tx_processq(struct ath_softc *sc, int dosched) { struct ath_hal *ah = sc->sc_ah; HAL_STATUS status; struct ath_tx_status ts; struct ath_txq *txq; struct ath_buf *bf; struct ieee80211_node *ni; int nacked = 0; int idx; int i; #ifdef ATH_DEBUG /* XXX */ uint32_t txstatus[32]; #endif for (idx = 0; ; idx++) { bzero(&ts, sizeof(ts)); ATH_TXSTATUS_LOCK(sc); #ifdef ATH_DEBUG ath_hal_gettxrawtxdesc(ah, txstatus); #endif status = ath_hal_txprocdesc(ah, NULL, (void *) &ts); ATH_TXSTATUS_UNLOCK(sc); if (status == HAL_EINPROGRESS) break; #ifdef ATH_DEBUG if (sc->sc_debug & ATH_DEBUG_TX_PROC) if (ts.ts_queue_id != sc->sc_bhalq) ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id, idx, (status == HAL_OK)); #endif /* * If there is an error with this descriptor, continue * processing. * * XXX TBD: log some statistics? */ if (status == HAL_EIO) { device_printf(sc->sc_dev, "%s: invalid TX status?\n", __func__); break; } #if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG) if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) { if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS, sc->sc_tx_statuslen, (char *) txstatus); } #endif /* ATH_DEBUG_ALQ */ /* * At this point we have a valid status descriptor. * The QID and descriptor ID (which currently isn't set) * is part of the status. * * We then assume that the descriptor in question is the * -head- of the given QID. Eventually we should verify * this by using the descriptor ID. */ /* * The beacon queue is not currently a "real" queue. * Frames aren't pushed onto it and the lock isn't setup. * So skip it for now; the beacon handling code will * free and alloc more beacon buffers as appropriate. */ if (ts.ts_queue_id == sc->sc_bhalq) continue; txq = &sc->sc_txq[ts.ts_queue_id]; ATH_TXQ_LOCK(txq); bf = ATH_TXQ_FIRST(&txq->fifo); /* * Work around the situation where I'm seeing notifications * for Q1 when no frames are available. That needs to be * debugged but not by crashing _here_. */ if (bf == NULL) { device_printf(sc->sc_dev, "%s: Q%d: empty?\n", __func__, ts.ts_queue_id); ATH_TXQ_UNLOCK(txq); continue; } DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n", __func__, ts.ts_queue_id, bf, !! (bf->bf_flags & ATH_BUF_FIFOPTR), !! (bf->bf_flags & ATH_BUF_FIFOEND)); /* XXX TODO: actually output debugging info about this */ #if 0 /* XXX assert the buffer/descriptor matches the status descid */ if (ts.ts_desc_id != bf->bf_descid) { device_printf(sc->sc_dev, "%s: mismatched descid (qid=%d, tsdescid=%d, " "bfdescid=%d\n", __func__, ts.ts_queue_id, ts.ts_desc_id, bf->bf_descid); } #endif /* This removes the buffer and decrements the queue depth */ ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list); if (bf->bf_state.bfs_aggr) txq->axq_aggr_depth--; /* * If this was the end of a FIFO set, decrement FIFO depth */ if (bf->bf_flags & ATH_BUF_FIFOEND) txq->axq_fifo_depth--; /* * If this isn't the final buffer in a FIFO set, mark * the buffer as busy so it goes onto the holding queue. */ if (! (bf->bf_flags & ATH_BUF_FIFOEND)) bf->bf_flags |= ATH_BUF_BUSY; DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n", __func__, txq->axq_qnum, txq->axq_fifo_depth, txq->fifo.axq_depth); /* XXX assert FIFO depth >= 0 */ ATH_TXQ_UNLOCK(txq); /* * Outside of the TX lock - if the buffer is end * end buffer in this FIFO, we don't need a holding * buffer any longer. */ if (bf->bf_flags & ATH_BUF_FIFOEND) { ATH_TXQ_LOCK(txq); ath_txq_freeholdingbuf(sc, txq); ATH_TXQ_UNLOCK(txq); } /* * First we need to make sure ts_rate is valid. * * Pre-EDMA chips pass the whole TX descriptor to * the proctxdesc function which will then fill out * ts_rate based on the ts_finaltsi (final TX index) * in the TX descriptor. However the TX completion * FIFO doesn't have this information. So here we * do a separate HAL call to populate that information. * * The same problem exists with ts_longretry. * The FreeBSD HAL corrects ts_longretry in the HAL layer; * the AR9380 HAL currently doesn't. So until the HAL * is imported and this can be added, we correct for it * here. */ /* XXX TODO */ /* XXX faked for now. Ew. */ if (ts.ts_finaltsi < 4) { ts.ts_rate = bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode; switch (ts.ts_finaltsi) { case 3: ts.ts_longretry += bf->bf_state.bfs_rc[2].tries; case 2: ts.ts_longretry += bf->bf_state.bfs_rc[1].tries; case 1: ts.ts_longretry += bf->bf_state.bfs_rc[0].tries; } } else { device_printf(sc->sc_dev, "%s: finaltsi=%d\n", __func__, ts.ts_finaltsi); ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode; } /* * XXX This is terrible. * * Right now, some code uses the TX status that is * passed in here, but the completion handlers in the * software TX path also use bf_status.ds_txstat. * Ew. That should all go away. * * XXX It's also possible the rate control completion * routine is called twice. */ memcpy(&bf->bf_status, &ts, sizeof(ts)); ni = bf->bf_node; /* Update RSSI */ /* XXX duplicate from ath_tx_processq */ if (ni != NULL && ts.ts_status == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) { nacked++; sc->sc_stats.ast_tx_rssi = ts.ts_rssi; ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi, ts.ts_rssi); } /* Handle frame completion and rate control update */ ath_tx_process_buf_completion(sc, txq, &ts, bf); /* NB: bf is invalid at this point */ } sc->sc_wd_timer = 0; /* * XXX It's inefficient to do this if the FIFO queue is full, * but there's no easy way right now to only populate * the txq task for _one_ TXQ. This should be fixed. */ if (dosched) { /* Attempt to schedule more hardware frames to the TX FIFO */ for (i = 0; i < HAL_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i)) { ATH_TXQ_LOCK(&sc->sc_txq[i]); ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]); ATH_TXQ_UNLOCK(&sc->sc_txq[i]); } } /* Kick software scheduler */ ath_tx_swq_kick(sc); } } static void ath_edma_attach_comp_func(struct ath_softc *sc) { TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc); } void ath_xmit_setup_edma(struct ath_softc *sc) { /* Fetch EDMA field and buffer sizes */ (void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen); (void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen); (void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps); if (bootverbose) { device_printf(sc->sc_dev, "TX descriptor length: %d\n", sc->sc_tx_desclen); device_printf(sc->sc_dev, "TX status length: %d\n", sc->sc_tx_statuslen); device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n", sc->sc_tx_nmaps); } sc->sc_tx.xmit_setup = ath_edma_dma_txsetup; sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown; sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func; sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart; sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff; sc->sc_tx.xmit_drain = ath_edma_tx_drain; } Index: projects/clang391-import/sys/dev/ath/if_athioctl.h =================================================================== --- projects/clang391-import/sys/dev/ath/if_athioctl.h (revision 309262) +++ projects/clang391-import/sys/dev/ath/if_athioctl.h (revision 309263) @@ -1,450 +1,455 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ /* * Ioctl-related defintions for the Atheros Wireless LAN controller driver. */ #ifndef _DEV_ATH_ATHIOCTL_H #define _DEV_ATH_ATHIOCTL_H struct ath_tx_aggr_stats { u_int32_t aggr_pkts[64]; u_int32_t aggr_single_pkt; u_int32_t aggr_nonbaw_pkt; u_int32_t aggr_aggr_pkt; u_int32_t aggr_baw_closed_single_pkt; u_int32_t aggr_low_hwq_single_pkt; u_int32_t aggr_sched_nopkt; u_int32_t aggr_rts_aggr_limited; }; struct ath_intr_stats { u_int32_t sync_intr[32]; }; struct ath_stats { u_int32_t ast_watchdog; /* device reset by watchdog */ u_int32_t ast_hardware; /* fatal hardware error interrupts */ u_int32_t ast_bmiss; /* beacon miss interrupts */ u_int32_t ast_bmiss_phantom;/* beacon miss interrupts */ u_int32_t ast_bstuck; /* beacon stuck interrupts */ u_int32_t ast_rxorn; /* rx overrun interrupts */ u_int32_t ast_rxeol; /* rx eol interrupts */ u_int32_t ast_txurn; /* tx underrun interrupts */ u_int32_t ast_mib; /* mib interrupts */ u_int32_t ast_intrcoal; /* interrupts coalesced */ u_int32_t ast_tx_packets; /* packet sent on the interface */ u_int32_t ast_tx_mgmt; /* management frames transmitted */ u_int32_t ast_tx_discard; /* frames discarded prior to assoc */ u_int32_t ast_tx_qstop; /* output stopped 'cuz no buffer */ u_int32_t ast_tx_encap; /* tx encapsulation failed */ u_int32_t ast_tx_nonode; /* tx failed 'cuz no node */ u_int32_t ast_tx_nombuf; /* tx failed 'cuz no mbuf */ u_int32_t ast_tx_nomcl; /* tx failed 'cuz no cluster */ u_int32_t ast_tx_linear; /* tx linearized to cluster */ u_int32_t ast_tx_nodata; /* tx discarded empty frame */ u_int32_t ast_tx_busdma; /* tx failed for dma resrcs */ u_int32_t ast_tx_xretries;/* tx failed 'cuz too many retries */ u_int32_t ast_tx_fifoerr; /* tx failed 'cuz FIFO underrun */ u_int32_t ast_tx_filtered;/* tx failed 'cuz xmit filtered */ u_int32_t ast_tx_shortretry;/* tx on-chip retries (short) */ u_int32_t ast_tx_longretry;/* tx on-chip retries (long) */ u_int32_t ast_tx_badrate; /* tx failed 'cuz bogus xmit rate */ u_int32_t ast_tx_noack; /* tx frames with no ack marked */ u_int32_t ast_tx_rts; /* tx frames with rts enabled */ u_int32_t ast_tx_cts; /* tx frames with cts enabled */ u_int32_t ast_tx_shortpre;/* tx frames with short preamble */ u_int32_t ast_tx_altrate; /* tx frames with alternate rate */ u_int32_t ast_tx_protect; /* tx frames with protection */ u_int32_t ast_tx_ctsburst;/* tx frames with cts and bursting */ u_int32_t ast_tx_ctsext; /* tx frames with cts extension */ u_int32_t ast_rx_nombuf; /* rx setup failed 'cuz no mbuf */ u_int32_t ast_rx_busdma; /* rx setup failed for dma resrcs */ u_int32_t ast_rx_orn; /* rx failed 'cuz of desc overrun */ u_int32_t ast_rx_crcerr; /* rx failed 'cuz of bad CRC */ u_int32_t ast_rx_fifoerr; /* rx failed 'cuz of FIFO overrun */ u_int32_t ast_rx_badcrypt;/* rx failed 'cuz decryption */ u_int32_t ast_rx_badmic; /* rx failed 'cuz MIC failure */ u_int32_t ast_rx_phyerr; /* rx failed 'cuz of PHY err */ u_int32_t ast_rx_phy[64]; /* rx PHY error per-code counts */ u_int32_t ast_rx_tooshort;/* rx discarded 'cuz frame too short */ u_int32_t ast_rx_toobig; /* rx discarded 'cuz frame too large */ u_int32_t ast_rx_packets; /* packet recv on the interface */ u_int32_t ast_rx_mgt; /* management frames received */ u_int32_t ast_rx_ctl; /* rx discarded 'cuz ctl frame */ int8_t ast_tx_rssi; /* tx rssi of last ack */ int8_t ast_rx_rssi; /* rx rssi from histogram */ u_int8_t ast_tx_rate; /* IEEE rate of last unicast tx */ u_int32_t ast_be_xmit; /* beacons transmitted */ u_int32_t ast_be_nombuf; /* beacon setup failed 'cuz no mbuf */ u_int32_t ast_per_cal; /* periodic calibration calls */ u_int32_t ast_per_calfail;/* periodic calibration failed */ u_int32_t ast_per_rfgain; /* periodic calibration rfgain reset */ u_int32_t ast_rate_calls; /* rate control checks */ u_int32_t ast_rate_raise; /* rate control raised xmit rate */ u_int32_t ast_rate_drop; /* rate control dropped xmit rate */ u_int32_t ast_ant_defswitch;/* rx/default antenna switches */ u_int32_t ast_ant_txswitch;/* tx antenna switches */ u_int32_t ast_ant_rx[8]; /* rx frames with antenna */ u_int32_t ast_ant_tx[8]; /* tx frames with antenna */ u_int32_t ast_cabq_xmit; /* cabq frames transmitted */ u_int32_t ast_cabq_busy; /* cabq found busy */ u_int32_t ast_tx_raw; /* tx frames through raw api */ u_int32_t ast_ff_txok; /* fast frames tx'd successfully */ u_int32_t ast_ff_txerr; /* fast frames tx'd w/ error */ u_int32_t ast_ff_rx; /* fast frames rx'd */ u_int32_t ast_ff_flush; /* fast frames flushed from staging q */ u_int32_t ast_tx_qfull; /* tx dropped 'cuz of queue limit */ int8_t ast_rx_noise; /* rx noise floor */ u_int32_t ast_tx_nobuf; /* tx dropped 'cuz no ath buffer */ u_int32_t ast_tdma_update;/* TDMA slot timing updates */ u_int32_t ast_tdma_timers;/* TDMA slot update set beacon timers */ u_int32_t ast_tdma_tsf; /* TDMA slot update set TSF */ u_int16_t ast_tdma_tsfadjp;/* TDMA slot adjust+ (usec, smoothed)*/ u_int16_t ast_tdma_tsfadjm;/* TDMA slot adjust- (usec, smoothed)*/ u_int32_t ast_tdma_ack; /* TDMA tx failed 'cuz ACK required */ u_int32_t ast_tx_raw_fail;/* raw tx failed 'cuz h/w down */ u_int32_t ast_tx_nofrag; /* tx dropped 'cuz no ath frag buffer */ u_int32_t ast_be_missed; /* missed beacons */ u_int32_t ast_ani_cal; /* ANI calibrations performed */ u_int32_t ast_rx_agg; /* number of aggregate frames RX'ed */ u_int32_t ast_rx_halfgi; /* RX half-GI */ u_int32_t ast_rx_2040; /* RX 40mhz frame */ u_int32_t ast_rx_pre_crc_err; /* RX pre-delimiter CRC error */ u_int32_t ast_rx_post_crc_err; /* RX post-delimiter CRC error */ u_int32_t ast_rx_decrypt_busy_err; /* RX decrypt engine busy error */ u_int32_t ast_rx_hi_rx_chain; u_int32_t ast_tx_htprotect; /* HT tx frames with protection */ u_int32_t ast_rx_hitqueueend; /* RX hit descr queue end */ u_int32_t ast_tx_timeout; /* Global TX timeout */ u_int32_t ast_tx_cst; /* Carrier sense timeout */ u_int32_t ast_tx_xtxop; /* tx exceeded TXOP */ u_int32_t ast_tx_timerexpired; /* tx exceeded TX_TIMER */ u_int32_t ast_tx_desccfgerr; /* tx desc cfg error */ u_int32_t ast_tx_swretries; /* software TX retries */ u_int32_t ast_tx_swretrymax; /* software TX retry max limit reach */ u_int32_t ast_tx_data_underrun; u_int32_t ast_tx_delim_underrun; u_int32_t ast_tx_aggr_failall; /* aggregate TX failed in its entirety */ u_int32_t ast_tx_getnobuf; u_int32_t ast_tx_getbusybuf; u_int32_t ast_tx_intr; u_int32_t ast_rx_intr; u_int32_t ast_tx_aggr_ok; /* aggregate TX ok */ u_int32_t ast_tx_aggr_fail; /* aggregate TX failed */ u_int32_t ast_tx_mcastq_overflow; /* multicast queue overflow */ u_int32_t ast_rx_keymiss; u_int32_t ast_tx_swfiltered; u_int32_t ast_tx_node_psq_overflow; u_int32_t ast_rx_stbc; /* RX STBC frame */ u_int32_t ast_tx_nodeq_overflow; /* node sw queue overflow */ u_int32_t ast_tx_ldpc; /* TX LDPC frame */ u_int32_t ast_tx_stbc; /* TX STBC frame */ u_int32_t ast_pad[10]; }; #define SIOCGATHSTATS _IOWR('i', 137, struct ifreq) #define SIOCZATHSTATS _IOWR('i', 139, struct ifreq) #define SIOCGATHAGSTATS _IOWR('i', 141, struct ifreq) struct ath_diag { char ad_name[IFNAMSIZ]; /* if name, e.g. "ath0" */ u_int16_t ad_id; #define ATH_DIAG_DYN 0x8000 /* allocate buffer in caller */ #define ATH_DIAG_IN 0x4000 /* copy in parameters */ #define ATH_DIAG_OUT 0x0000 /* copy out results (always) */ #define ATH_DIAG_ID 0x0fff u_int16_t ad_in_size; /* pack to fit, yech */ caddr_t ad_in_data; caddr_t ad_out_data; u_int ad_out_size; }; #define SIOCGATHDIAG _IOWR('i', 138, struct ath_diag) #define SIOCGATHPHYERR _IOWR('i', 140, struct ath_diag) /* * The rate control ioctl has to support multiple potential rate * control classes. For now, instead of trying to support an * abstraction for this in the API, let's just use a TLV * representation for the payload and let userspace sort it out. */ struct ath_rateioctl_tlv { uint16_t tlv_id; uint16_t tlv_len; /* length excluding TLV header */ }; /* * This is purely the six byte MAC address. */ #define ATH_RATE_TLV_MACADDR 0xaab0 /* * The rate control modules may decide to push a mapping table * of rix -> net80211 ratecode as part of the update. */ #define ATH_RATE_TLV_RATETABLE_NENTRIES 64 struct ath_rateioctl_rt { uint16_t nentries; uint16_t pad[1]; uint8_t ratecode[ATH_RATE_TLV_RATETABLE_NENTRIES]; }; #define ATH_RATE_TLV_RATETABLE 0xaab1 /* * This is the sample node statistics structure. * More in ath_rate/sample/sample.h. */ #define ATH_RATE_TLV_SAMPLENODE 0xaab2 struct ath_rateioctl { char if_name[IFNAMSIZ]; /* if name */ union { uint8_t macaddr[IEEE80211_ADDR_LEN]; uint64_t pad; } is_u; uint32_t len; caddr_t buf; }; #define SIOCGATHNODERATESTATS _IOWR('i', 149, struct ath_rateioctl) #define SIOCGATHRATESTATS _IOWR('i', 150, struct ath_rateioctl) /* * Radio capture format. */ #define ATH_RX_RADIOTAP_PRESENT_BASE ( \ (1 << IEEE80211_RADIOTAP_TSFT) | \ (1 << IEEE80211_RADIOTAP_FLAGS) | \ (1 << IEEE80211_RADIOTAP_RATE) | \ (1 << IEEE80211_RADIOTAP_ANTENNA) | \ (1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) | \ (1 << IEEE80211_RADIOTAP_DBM_ANTNOISE) | \ (1 << IEEE80211_RADIOTAP_XCHANNEL) | \ 0) #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT #define ATH_RX_RADIOTAP_PRESENT \ (ATH_RX_RADIOTAP_PRESENT_BASE | \ (1 << IEEE80211_RADIOTAP_VENDOREXT) | \ (1 << IEEE80211_RADIOTAP_EXT) | \ 0) #else #define ATH_RX_RADIOTAP_PRESENT ATH_RX_RADIOTAP_PRESENT_BASE #endif /* ATH_ENABLE_RADIOTAP_PRESENT */ #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT /* * This is higher than the vendor bitmap used inside * the Atheros reference codebase. */ /* Bit 8 */ #define ATH_RADIOTAP_VENDOR_HEADER 8 /* * Using four chains makes all the fields in the * per-chain info header be 4-byte aligned. */ #define ATH_RADIOTAP_MAX_CHAINS 4 /* * AR9380 and later chips are 3x3, which requires * 5 EVM DWORDs in HT40 mode. */ #define ATH_RADIOTAP_MAX_EVM 5 /* * The vendor radiotap header data needs to be: * * + Aligned to a 4 byte address * + .. so all internal fields are 4 bytes aligned; * + .. and no 64 bit fields are allowed. * * So padding is required to ensure this is the case. * * Note that because of the lack of alignment with the * vendor header (6 bytes), the first field must be * two bytes so it can be accessed by alignment-strict * platform (eg MIPS.) */ struct ath_radiotap_vendor_hdr { /* 30 bytes */ uint8_t vh_version; /* 1 */ uint8_t vh_rx_chainmask; /* 1 */ /* At this point it should be 4 byte aligned */ uint32_t evm[ATH_RADIOTAP_MAX_EVM]; /* 5 * 4 = 20 */ uint8_t rssi_ctl[ATH_RADIOTAP_MAX_CHAINS]; /* 4 * 4 = 16 */ uint8_t rssi_ext[ATH_RADIOTAP_MAX_CHAINS]; /* 4 * 4 = 16 */ uint8_t vh_phyerr_code; /* Phy error code, or 0xff */ uint8_t vh_rs_status; /* RX status */ uint8_t vh_rssi; /* Raw RSSI */ uint8_t vh_flags; /* General flags */ #define ATH_VENDOR_PKT_RX 0x01 #define ATH_VENDOR_PKT_TX 0x02 #define ATH_VENDOR_PKT_RXPHYERR 0x04 #define ATH_VENDOR_PKT_ISAGGR 0x08 #define ATH_VENDOR_PKT_MOREAGGR 0x10 uint8_t vh_rx_hwrate; /* hardware RX ratecode */ uint8_t vh_rs_flags; /* RX HAL flags */ uint8_t vh_pad[2]; /* pad to DWORD boundary */ } __packed; #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ struct ath_rx_radiotap_header { struct ieee80211_radiotap_header wr_ihdr; #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT /* Vendor extension header bitmap */ uint32_t wr_ext_bitmap; /* 4 */ /* * This padding is needed because: * + the radiotap header is 8 bytes; * + the extension bitmap is 4 bytes; * + the tsf is 8 bytes, so it must start on an 8 byte * boundary. */ uint32_t wr_pad1; #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ /* Normal radiotap fields */ u_int64_t wr_tsf; u_int8_t wr_flags; u_int8_t wr_rate; int8_t wr_antsignal; int8_t wr_antnoise; u_int8_t wr_antenna; u_int8_t wr_pad[3]; u_int32_t wr_chan_flags; u_int16_t wr_chan_freq; u_int8_t wr_chan_ieee; int8_t wr_chan_maxpow; #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT /* * Vendor header section, as required by the * presence of the vendor extension bit and bitmap * entry. * * XXX This must be aligned to a 4 byte address? * XXX or 8 byte address? */ struct ieee80211_radiotap_vendor_header wr_vh; /* 6 bytes */ /* * Because of the lack of alignment enforced by the above * header, this vendor section won't be aligned in any * useful way. So, this will include a two-byte version * value which will force the structure to be 4-byte aligned. */ struct ath_radiotap_vendor_hdr wr_v; #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ } __packed; #define ATH_TX_RADIOTAP_PRESENT ( \ (1 << IEEE80211_RADIOTAP_FLAGS) | \ (1 << IEEE80211_RADIOTAP_RATE) | \ (1 << IEEE80211_RADIOTAP_DBM_TX_POWER) | \ (1 << IEEE80211_RADIOTAP_ANTENNA) | \ (1 << IEEE80211_RADIOTAP_XCHANNEL) | \ 0) struct ath_tx_radiotap_header { struct ieee80211_radiotap_header wt_ihdr; u_int8_t wt_flags; u_int8_t wt_rate; u_int8_t wt_txpower; u_int8_t wt_antenna; u_int32_t wt_chan_flags; u_int16_t wt_chan_freq; u_int8_t wt_chan_ieee; int8_t wt_chan_maxpow; } __packed; /* * DFS ioctl commands */ #define DFS_SET_THRESH 2 #define DFS_GET_THRESH 3 #define DFS_RADARDETECTS 6 /* * DFS ioctl parameter types */ #define DFS_PARAM_FIRPWR 1 #define DFS_PARAM_RRSSI 2 #define DFS_PARAM_HEIGHT 3 #define DFS_PARAM_PRSSI 4 #define DFS_PARAM_INBAND 5 #define DFS_PARAM_NOL 6 /* XXX not used in FreeBSD */ #define DFS_PARAM_RELSTEP_EN 7 #define DFS_PARAM_RELSTEP 8 #define DFS_PARAM_RELPWR_EN 9 #define DFS_PARAM_RELPWR 10 #define DFS_PARAM_MAXLEN 11 #define DFS_PARAM_USEFIR128 12 #define DFS_PARAM_BLOCKRADAR 13 #define DFS_PARAM_MAXRSSI_EN 14 /* FreeBSD-specific start at 32 */ #define DFS_PARAM_ENABLE 32 #define DFS_PARAM_EN_EXTCH 33 /* * Spectral ioctl parameter types */ #define SPECTRAL_PARAM_FFT_PERIOD 1 #define SPECTRAL_PARAM_SS_PERIOD 2 #define SPECTRAL_PARAM_SS_COUNT 3 #define SPECTRAL_PARAM_SS_SHORT_RPT 4 #define SPECTRAL_PARAM_ENABLED 5 #define SPECTRAL_PARAM_ACTIVE 6 /* * Spectral control parameters */ #define SIOCGATHSPECTRAL _IOWR('i', 151, struct ath_diag) #define SPECTRAL_CONTROL_ENABLE 2 #define SPECTRAL_CONTROL_DISABLE 3 #define SPECTRAL_CONTROL_START 4 #define SPECTRAL_CONTROL_STOP 5 #define SPECTRAL_CONTROL_GET_PARAMS 6 #define SPECTRAL_CONTROL_SET_PARAMS 7 #define SPECTRAL_CONTROL_ENABLE_AT_RESET 8 #define SPECTRAL_CONTROL_DISABLE_AT_RESET 9 +/* + * Bluetooth coexistence control parameters + */ +#define SIOCGATHBTCOEX _IOWR('i', 152, struct ath_diag) + #endif /* _DEV_ATH_ATHIOCTL_H */ Index: projects/clang391-import/sys/dev/hyperv/include/vmbus.h =================================================================== --- projects/clang391-import/sys/dev/hyperv/include/vmbus.h (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/include/vmbus.h (revision 309263) @@ -1,220 +1,223 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMBUS_H_ #define _VMBUS_H_ #include #include /* * VMBUS version is 32 bit, upper 16 bit for major_number and lower * 16 bit for minor_number. * * 0.13 -- Windows Server 2008 * 1.1 -- Windows 7 * 2.4 -- Windows 8 * 3.0 -- Windows 8.1 */ #define VMBUS_VERSION_WS2008 ((0 << 16) | (13)) #define VMBUS_VERSION_WIN7 ((1 << 16) | (1)) #define VMBUS_VERSION_WIN8 ((2 << 16) | (4)) #define VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) #define VMBUS_VERSION_MAJOR(ver) (((uint32_t)(ver)) >> 16) #define VMBUS_VERSION_MINOR(ver) (((uint32_t)(ver)) & 0xffff) /* * GPA stuffs. */ struct vmbus_gpa_range { uint32_t gpa_len; uint32_t gpa_ofs; uint64_t gpa_page[0]; } __packed; /* This is actually vmbus_gpa_range.gpa_page[1] */ struct vmbus_gpa { uint32_t gpa_len; uint32_t gpa_ofs; uint64_t gpa_page; } __packed; #define VMBUS_CHANPKT_SIZE_SHIFT 3 #define VMBUS_CHANPKT_GETLEN(pktlen) \ (((int)(pktlen)) << VMBUS_CHANPKT_SIZE_SHIFT) struct vmbus_chanpkt_hdr { uint16_t cph_type; /* VMBUS_CHANPKT_TYPE_ */ uint16_t cph_hlen; /* header len, in 8 bytes */ uint16_t cph_tlen; /* total len, in 8 bytes */ uint16_t cph_flags; /* VMBUS_CHANPKT_FLAG_ */ uint64_t cph_xactid; } __packed; #define VMBUS_CHANPKT_TYPE_INBAND 0x0006 #define VMBUS_CHANPKT_TYPE_RXBUF 0x0007 #define VMBUS_CHANPKT_TYPE_GPA 0x0009 #define VMBUS_CHANPKT_TYPE_COMP 0x000b #define VMBUS_CHANPKT_FLAG_NONE 0 #define VMBUS_CHANPKT_FLAG_RC 0x0001 /* report completion */ #define VMBUS_CHANPKT_CONST_DATA(pkt) \ (const void *)((const uint8_t *)(pkt) + \ VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)) /* Include padding */ #define VMBUS_CHANPKT_DATALEN(pkt) \ (VMBUS_CHANPKT_GETLEN((pkt)->cph_tlen) -\ VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)) struct vmbus_rxbuf_desc { uint32_t rb_len; uint32_t rb_ofs; } __packed; struct vmbus_chanpkt_rxbuf { struct vmbus_chanpkt_hdr cp_hdr; uint16_t cp_rxbuf_id; uint16_t cp_rsvd; uint32_t cp_rxbuf_cnt; struct vmbus_rxbuf_desc cp_rxbuf[]; } __packed; struct vmbus_chan_br { void *cbr; bus_addr_t cbr_paddr; int cbr_txsz; int cbr_rxsz; }; struct vmbus_channel; +struct vmbus_xact; struct vmbus_xact_ctx; struct hyperv_guid; struct task; struct taskqueue; typedef void (*vmbus_chan_callback_t)(struct vmbus_channel *, void *); static __inline struct vmbus_channel * vmbus_get_channel(device_t dev) { return device_get_ivars(dev); } /* * vmbus_chan_open_br() * * Return values: * 0 Succeeded. * EISCONN Failed, and the memory passed through 'br' is still * connected. Callers must _not_ free the the memory * passed through 'br', if this error happens. * other values Failed. The memory passed through 'br' is no longer * connected. Callers are free to do anything with the * memory passed through 'br'. * * * * vmbus_chan_close_direct() * * NOTE: * Callers of this function _must_ make sure to close all sub-channels before * closing the primary channel. * * Return values: * 0 Succeeded. * EISCONN Failed, and the memory associated with the bufring * is still connected. Callers must _not_ free the the * memory associated with the bufring, if this error * happens. * other values Failed. The memory associated with the bufring is * no longer connected. Callers are free to do anything * with the memory associated with the bufring. */ int vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size, const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg); int vmbus_chan_open_br(struct vmbus_channel *chan, const struct vmbus_chan_br *cbr, const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg); void vmbus_chan_close(struct vmbus_channel *chan); int vmbus_chan_close_direct(struct vmbus_channel *chan); void vmbus_chan_intr_drain(struct vmbus_channel *chan); void vmbus_chan_run_task(struct vmbus_channel *chan, struct task *task); void vmbus_chan_set_orphan(struct vmbus_channel *chan, struct vmbus_xact_ctx *); void vmbus_chan_unset_orphan(struct vmbus_channel *chan); +const void *vmbus_chan_xact_wait(const struct vmbus_channel *chan, + struct vmbus_xact *xact, size_t *resp_len, bool can_sleep); int vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr, int size, uint32_t *gpadl); int vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl); void vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu); void vmbus_chan_cpu_rr(struct vmbus_channel *chan); void vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on); struct vmbus_channel ** vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt); void vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt); void vmbus_subchan_drain(struct vmbus_channel *pri_chan); int vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen, uint64_t *xactid); int vmbus_chan_recv_pkt(struct vmbus_channel *chan, struct vmbus_chanpkt_hdr *pkt, int *pktlen); int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags, void *data, int dlen, uint64_t xactid); int vmbus_chan_send_sglist(struct vmbus_channel *chan, struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid); int vmbus_chan_send_prplist(struct vmbus_channel *chan, struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen, uint64_t xactid); uint32_t vmbus_chan_id(const struct vmbus_channel *chan); uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan); bool vmbus_chan_is_primary(const struct vmbus_channel *chan); bool vmbus_chan_is_revoked(const struct vmbus_channel *chan); const struct hyperv_guid * vmbus_chan_guid_inst(const struct vmbus_channel *chan); int vmbus_chan_prplist_nelem(int br_size, int prpcnt_max, int dlen_max); bool vmbus_chan_rx_empty(const struct vmbus_channel *chan); bool vmbus_chan_tx_empty(const struct vmbus_channel *chan); struct taskqueue * vmbus_chan_mgmt_tq(const struct vmbus_channel *chan); #endif /* !_VMBUS_H_ */ Index: projects/clang391-import/sys/dev/hyperv/include/vmbus_xact.h =================================================================== --- projects/clang391-import/sys/dev/hyperv/include/vmbus_xact.h (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/include/vmbus_xact.h (revision 309263) @@ -1,63 +1,65 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMBUS_XACT_H_ #define _VMBUS_XACT_H_ #include #include struct vmbus_xact; struct vmbus_xact_ctx; struct vmbus_xact_ctx *vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size, size_t priv_size); void vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx); bool vmbus_xact_ctx_orphan(struct vmbus_xact_ctx *ctx); struct vmbus_xact *vmbus_xact_get(struct vmbus_xact_ctx *ctx, size_t req_len); void vmbus_xact_put(struct vmbus_xact *xact); void *vmbus_xact_req_data(const struct vmbus_xact *xact); bus_addr_t vmbus_xact_req_paddr(const struct vmbus_xact *xact); void *vmbus_xact_priv(const struct vmbus_xact *xact, size_t priv_len); void vmbus_xact_activate(struct vmbus_xact *xact); void vmbus_xact_deactivate(struct vmbus_xact *xact); const void *vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len); const void *vmbus_xact_busywait(struct vmbus_xact *xact, size_t *resp_len); +const void *vmbus_xact_poll(struct vmbus_xact *xact, + size_t *resp_len); void vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen); void vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, const void *data, size_t dlen); #endif /* !_VMBUS_XACT_H_ */ Index: projects/clang391-import/sys/dev/hyperv/netvsc/hn_nvs.c =================================================================== --- projects/clang391-import/sys/dev/hyperv/netvsc/hn_nvs.c (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/netvsc/hn_nvs.c (revision 309263) @@ -1,706 +1,721 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Network Virtualization Service. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int hn_nvs_conn_chim(struct hn_softc *); static int hn_nvs_conn_rxbuf(struct hn_softc *); -static int hn_nvs_disconn_chim(struct hn_softc *); -static int hn_nvs_disconn_rxbuf(struct hn_softc *); +static void hn_nvs_disconn_chim(struct hn_softc *); +static void hn_nvs_disconn_rxbuf(struct hn_softc *); static int hn_nvs_conf_ndis(struct hn_softc *, int); static int hn_nvs_init_ndis(struct hn_softc *); static int hn_nvs_doinit(struct hn_softc *, uint32_t); static int hn_nvs_init(struct hn_softc *); static const void *hn_nvs_xact_execute(struct hn_softc *, struct vmbus_xact *, void *, int, size_t *, uint32_t); static void hn_nvs_sent_none(struct hn_nvs_sendctx *, struct hn_softc *, struct vmbus_channel *, const void *, int); struct hn_nvs_sendctx hn_nvs_sendctx_none = HN_NVS_SENDCTX_INITIALIZER(hn_nvs_sent_none, NULL); static const uint32_t hn_nvs_version[] = { HN_NVS_VERSION_5, HN_NVS_VERSION_4, HN_NVS_VERSION_2, HN_NVS_VERSION_1 }; static const void * hn_nvs_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, void *req, int reqlen, size_t *resplen0, uint32_t type) { struct hn_nvs_sendctx sndc; size_t resplen, min_resplen = *resplen0; const struct hn_nvs_hdr *hdr; int error; KASSERT(min_resplen >= sizeof(*hdr), ("invalid minimum response len %zu", min_resplen)); /* * Execute the xact setup by the caller. */ hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact); vmbus_xact_activate(xact); error = hn_nvs_send(sc->hn_prichan, VMBUS_CHANPKT_FLAG_RC, req, reqlen, &sndc); if (error) { vmbus_xact_deactivate(xact); return (NULL); } - if (HN_CAN_SLEEP(sc)) - hdr = vmbus_xact_wait(xact, &resplen); - else - hdr = vmbus_xact_busywait(xact, &resplen); + hdr = vmbus_chan_xact_wait(sc->hn_prichan, xact, &resplen, + HN_CAN_SLEEP(sc)); /* * Check this NVS response message. */ if (resplen < min_resplen) { if_printf(sc->hn_ifp, "invalid NVS resp len %zu\n", resplen); return (NULL); } if (hdr->nvs_type != type) { if_printf(sc->hn_ifp, "unexpected NVS resp 0x%08x, " "expect 0x%08x\n", hdr->nvs_type, type); return (NULL); } /* All pass! */ *resplen0 = resplen; return (hdr); } static __inline int hn_nvs_req_send(struct hn_softc *sc, void *req, int reqlen) { return (hn_nvs_send(sc->hn_prichan, VMBUS_CHANPKT_FLAG_NONE, req, reqlen, &hn_nvs_sendctx_none)); } static int hn_nvs_conn_rxbuf(struct hn_softc *sc) { struct vmbus_xact *xact = NULL; struct hn_nvs_rxbuf_conn *conn; const struct hn_nvs_rxbuf_connresp *resp; size_t resp_len; uint32_t status; int error, rxbuf_size; /* * Limit RXBUF size for old NVS. */ if (sc->hn_nvs_ver <= HN_NVS_VERSION_2) rxbuf_size = HN_RXBUF_SIZE_COMPAT; else rxbuf_size = HN_RXBUF_SIZE; /* * Connect the RXBUF GPADL to the primary channel. * * NOTE: * Only primary channel has RXBUF connected to it. Sub-channels * just share this RXBUF. */ error = vmbus_chan_gpadl_connect(sc->hn_prichan, sc->hn_rxbuf_dma.hv_paddr, rxbuf_size, &sc->hn_rxbuf_gpadl); if (error) { if_printf(sc->hn_ifp, "rxbuf gpadl conn failed: %d\n", error); goto cleanup; } /* * Connect RXBUF to NVS. */ xact = vmbus_xact_get(sc->hn_xact, sizeof(*conn)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for nvs rxbuf conn\n"); error = ENXIO; goto cleanup; } conn = vmbus_xact_req_data(xact); conn->nvs_type = HN_NVS_TYPE_RXBUF_CONN; conn->nvs_gpadl = sc->hn_rxbuf_gpadl; conn->nvs_sig = HN_NVS_RXBUF_SIG; resp_len = sizeof(*resp); resp = hn_nvs_xact_execute(sc, xact, conn, sizeof(*conn), &resp_len, HN_NVS_TYPE_RXBUF_CONNRESP); if (resp == NULL) { if_printf(sc->hn_ifp, "exec nvs rxbuf conn failed\n"); error = EIO; goto cleanup; } status = resp->nvs_status; vmbus_xact_put(xact); xact = NULL; if (status != HN_NVS_STATUS_OK) { if_printf(sc->hn_ifp, "nvs rxbuf conn failed: %x\n", status); error = EIO; goto cleanup; } sc->hn_flags |= HN_FLAG_RXBUF_CONNECTED; return (0); cleanup: if (xact != NULL) vmbus_xact_put(xact); hn_nvs_disconn_rxbuf(sc); return (error); } static int hn_nvs_conn_chim(struct hn_softc *sc) { struct vmbus_xact *xact = NULL; struct hn_nvs_chim_conn *chim; const struct hn_nvs_chim_connresp *resp; size_t resp_len; uint32_t status, sectsz; int error; /* * Connect chimney sending buffer GPADL to the primary channel. * * NOTE: * Only primary channel has chimney sending buffer connected to it. * Sub-channels just share this chimney sending buffer. */ error = vmbus_chan_gpadl_connect(sc->hn_prichan, sc->hn_chim_dma.hv_paddr, HN_CHIM_SIZE, &sc->hn_chim_gpadl); if (error) { if_printf(sc->hn_ifp, "chim gpadl conn failed: %d\n", error); goto cleanup; } /* * Connect chimney sending buffer to NVS */ xact = vmbus_xact_get(sc->hn_xact, sizeof(*chim)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for nvs chim conn\n"); error = ENXIO; goto cleanup; } chim = vmbus_xact_req_data(xact); chim->nvs_type = HN_NVS_TYPE_CHIM_CONN; chim->nvs_gpadl = sc->hn_chim_gpadl; chim->nvs_sig = HN_NVS_CHIM_SIG; resp_len = sizeof(*resp); resp = hn_nvs_xact_execute(sc, xact, chim, sizeof(*chim), &resp_len, HN_NVS_TYPE_CHIM_CONNRESP); if (resp == NULL) { if_printf(sc->hn_ifp, "exec nvs chim conn failed\n"); error = EIO; goto cleanup; } status = resp->nvs_status; sectsz = resp->nvs_sectsz; vmbus_xact_put(xact); xact = NULL; if (status != HN_NVS_STATUS_OK) { if_printf(sc->hn_ifp, "nvs chim conn failed: %x\n", status); error = EIO; goto cleanup; } if (sectsz == 0) { + /* + * Can't use chimney sending buffer; done! + */ if_printf(sc->hn_ifp, "zero chimney sending buffer " "section size\n"); + sc->hn_chim_szmax = 0; + sc->hn_chim_cnt = 0; + sc->hn_flags |= HN_FLAG_CHIM_CONNECTED; return (0); } sc->hn_chim_szmax = sectsz; sc->hn_chim_cnt = HN_CHIM_SIZE / sc->hn_chim_szmax; if (HN_CHIM_SIZE % sc->hn_chim_szmax != 0) { if_printf(sc->hn_ifp, "chimney sending sections are " "not properly aligned\n"); } if (sc->hn_chim_cnt % LONG_BIT != 0) { if_printf(sc->hn_ifp, "discard %d chimney sending sections\n", sc->hn_chim_cnt % LONG_BIT); } sc->hn_chim_bmap_cnt = sc->hn_chim_cnt / LONG_BIT; sc->hn_chim_bmap = malloc(sc->hn_chim_bmap_cnt * sizeof(u_long), M_DEVBUF, M_WAITOK | M_ZERO); /* Done! */ sc->hn_flags |= HN_FLAG_CHIM_CONNECTED; if (bootverbose) { if_printf(sc->hn_ifp, "chimney sending buffer %d/%d\n", sc->hn_chim_szmax, sc->hn_chim_cnt); } return (0); cleanup: if (xact != NULL) vmbus_xact_put(xact); hn_nvs_disconn_chim(sc); return (error); } -static int +static void hn_nvs_disconn_rxbuf(struct hn_softc *sc) { int error; if (sc->hn_flags & HN_FLAG_RXBUF_CONNECTED) { struct hn_nvs_rxbuf_disconn disconn; /* * Disconnect RXBUF from NVS. */ memset(&disconn, 0, sizeof(disconn)); disconn.nvs_type = HN_NVS_TYPE_RXBUF_DISCONN; disconn.nvs_sig = HN_NVS_RXBUF_SIG; /* NOTE: No response. */ error = hn_nvs_req_send(sc, &disconn, sizeof(disconn)); if (error) { if_printf(sc->hn_ifp, "send nvs rxbuf disconn failed: %d\n", error); - return (error); + /* + * Fine for a revoked channel, since the hypervisor + * does not drain TX bufring for a revoked channel. + */ + if (!vmbus_chan_is_revoked(sc->hn_prichan)) + sc->hn_flags |= HN_FLAG_RXBUF_REF; } sc->hn_flags &= ~HN_FLAG_RXBUF_CONNECTED; /* * Wait for the hypervisor to receive this NVS request. * * NOTE: * The TX bufring will not be drained by the hypervisor, * if the primary channel is revoked. */ while (!vmbus_chan_tx_empty(sc->hn_prichan) && !vmbus_chan_is_revoked(sc->hn_prichan)) pause("waittx", 1); /* * Linger long enough for NVS to disconnect RXBUF. */ pause("lingtx", (200 * hz) / 1000); } if (sc->hn_rxbuf_gpadl != 0) { /* * Disconnect RXBUF from primary channel. */ error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, sc->hn_rxbuf_gpadl); if (error) { if_printf(sc->hn_ifp, "rxbuf gpadl disconn failed: %d\n", error); - return (error); + sc->hn_flags |= HN_FLAG_RXBUF_REF; } sc->hn_rxbuf_gpadl = 0; } - return (0); } -static int +static void hn_nvs_disconn_chim(struct hn_softc *sc) { int error; if (sc->hn_flags & HN_FLAG_CHIM_CONNECTED) { struct hn_nvs_chim_disconn disconn; /* * Disconnect chimney sending buffer from NVS. */ memset(&disconn, 0, sizeof(disconn)); disconn.nvs_type = HN_NVS_TYPE_CHIM_DISCONN; disconn.nvs_sig = HN_NVS_CHIM_SIG; /* NOTE: No response. */ error = hn_nvs_req_send(sc, &disconn, sizeof(disconn)); if (error) { if_printf(sc->hn_ifp, "send nvs chim disconn failed: %d\n", error); - return (error); + /* + * Fine for a revoked channel, since the hypervisor + * does not drain TX bufring for a revoked channel. + */ + if (!vmbus_chan_is_revoked(sc->hn_prichan)) + sc->hn_flags |= HN_FLAG_CHIM_REF; } sc->hn_flags &= ~HN_FLAG_CHIM_CONNECTED; /* * Wait for the hypervisor to receive this NVS request. * * NOTE: * The TX bufring will not be drained by the hypervisor, * if the primary channel is revoked. */ while (!vmbus_chan_tx_empty(sc->hn_prichan) && !vmbus_chan_is_revoked(sc->hn_prichan)) pause("waittx", 1); /* * Linger long enough for NVS to disconnect chimney * sending buffer. */ pause("lingtx", (200 * hz) / 1000); } if (sc->hn_chim_gpadl != 0) { /* * Disconnect chimney sending buffer from primary channel. */ error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, sc->hn_chim_gpadl); if (error) { if_printf(sc->hn_ifp, "chim gpadl disconn failed: %d\n", error); - return (error); + sc->hn_flags |= HN_FLAG_CHIM_REF; } sc->hn_chim_gpadl = 0; } if (sc->hn_chim_bmap != NULL) { free(sc->hn_chim_bmap, M_DEVBUF); sc->hn_chim_bmap = NULL; + sc->hn_chim_bmap_cnt = 0; } - return (0); } static int hn_nvs_doinit(struct hn_softc *sc, uint32_t nvs_ver) { struct vmbus_xact *xact; struct hn_nvs_init *init; const struct hn_nvs_init_resp *resp; size_t resp_len; uint32_t status; xact = vmbus_xact_get(sc->hn_xact, sizeof(*init)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for nvs init\n"); return (ENXIO); } init = vmbus_xact_req_data(xact); init->nvs_type = HN_NVS_TYPE_INIT; init->nvs_ver_min = nvs_ver; init->nvs_ver_max = nvs_ver; resp_len = sizeof(*resp); resp = hn_nvs_xact_execute(sc, xact, init, sizeof(*init), &resp_len, HN_NVS_TYPE_INIT_RESP); if (resp == NULL) { if_printf(sc->hn_ifp, "exec init failed\n"); vmbus_xact_put(xact); return (EIO); } status = resp->nvs_status; vmbus_xact_put(xact); if (status != HN_NVS_STATUS_OK) { if (bootverbose) { /* * Caller may try another NVS version, and will log * error if there are no more NVS versions to try, * so don't bark out loud here. */ if_printf(sc->hn_ifp, "nvs init failed for ver 0x%x\n", nvs_ver); } return (EINVAL); } return (0); } /* * Configure MTU and enable VLAN. */ static int hn_nvs_conf_ndis(struct hn_softc *sc, int mtu) { struct hn_nvs_ndis_conf conf; int error; memset(&conf, 0, sizeof(conf)); conf.nvs_type = HN_NVS_TYPE_NDIS_CONF; conf.nvs_mtu = mtu; conf.nvs_caps = HN_NVS_NDIS_CONF_VLAN; /* NOTE: No response. */ error = hn_nvs_req_send(sc, &conf, sizeof(conf)); if (error) { if_printf(sc->hn_ifp, "send nvs ndis conf failed: %d\n", error); return (error); } if (bootverbose) if_printf(sc->hn_ifp, "nvs ndis conf done\n"); sc->hn_caps |= HN_CAP_MTU | HN_CAP_VLAN; return (0); } static int hn_nvs_init_ndis(struct hn_softc *sc) { struct hn_nvs_ndis_init ndis; int error; memset(&ndis, 0, sizeof(ndis)); ndis.nvs_type = HN_NVS_TYPE_NDIS_INIT; ndis.nvs_ndis_major = HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver); ndis.nvs_ndis_minor = HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver); /* NOTE: No response. */ error = hn_nvs_req_send(sc, &ndis, sizeof(ndis)); if (error) if_printf(sc->hn_ifp, "send nvs ndis init failed: %d\n", error); return (error); } static int hn_nvs_init(struct hn_softc *sc) { int i, error; if (device_is_attached(sc->hn_dev)) { /* * NVS version and NDIS version MUST NOT be changed. */ if (bootverbose) { if_printf(sc->hn_ifp, "reinit NVS version 0x%x, " "NDIS version %u.%u\n", sc->hn_nvs_ver, HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); } error = hn_nvs_doinit(sc, sc->hn_nvs_ver); if (error) { if_printf(sc->hn_ifp, "reinit NVS version 0x%x " "failed: %d\n", sc->hn_nvs_ver, error); return (error); } goto done; } /* * Find the supported NVS version and set NDIS version accordingly. */ for (i = 0; i < nitems(hn_nvs_version); ++i) { error = hn_nvs_doinit(sc, hn_nvs_version[i]); if (!error) { sc->hn_nvs_ver = hn_nvs_version[i]; /* Set NDIS version according to NVS version. */ sc->hn_ndis_ver = HN_NDIS_VERSION_6_30; if (sc->hn_nvs_ver <= HN_NVS_VERSION_4) sc->hn_ndis_ver = HN_NDIS_VERSION_6_1; if (bootverbose) { if_printf(sc->hn_ifp, "NVS version 0x%x, " "NDIS version %u.%u\n", sc->hn_nvs_ver, HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); } goto done; } } if_printf(sc->hn_ifp, "no NVS available\n"); return (ENXIO); done: if (sc->hn_nvs_ver >= HN_NVS_VERSION_5) sc->hn_caps |= HN_CAP_HASHVAL; return (0); } int hn_nvs_attach(struct hn_softc *sc, int mtu) { int error; /* * Initialize NVS. */ error = hn_nvs_init(sc); if (error) return (error); if (sc->hn_nvs_ver >= HN_NVS_VERSION_2) { /* * Configure NDIS before initializing it. */ error = hn_nvs_conf_ndis(sc, mtu); if (error) return (error); } /* * Initialize NDIS. */ error = hn_nvs_init_ndis(sc); if (error) return (error); /* * Connect RXBUF. */ error = hn_nvs_conn_rxbuf(sc); if (error) return (error); /* * Connect chimney sending buffer. */ error = hn_nvs_conn_chim(sc); - if (error) + if (error) { + hn_nvs_disconn_rxbuf(sc); return (error); + } return (0); } void hn_nvs_detach(struct hn_softc *sc) { /* NOTE: there are no requests to stop the NVS. */ hn_nvs_disconn_rxbuf(sc); hn_nvs_disconn_chim(sc); } void hn_nvs_sent_xact(struct hn_nvs_sendctx *sndc, struct hn_softc *sc __unused, struct vmbus_channel *chan __unused, const void *data, int dlen) { vmbus_xact_wakeup(sndc->hn_cbarg, data, dlen); } static void hn_nvs_sent_none(struct hn_nvs_sendctx *sndc __unused, struct hn_softc *sc __unused, struct vmbus_channel *chan __unused, const void *data __unused, int dlen __unused) { /* EMPTY */ } int hn_nvs_alloc_subchans(struct hn_softc *sc, int *nsubch0) { struct vmbus_xact *xact; struct hn_nvs_subch_req *req; const struct hn_nvs_subch_resp *resp; int error, nsubch_req; uint32_t nsubch; size_t resp_len; nsubch_req = *nsubch0; KASSERT(nsubch_req > 0, ("invalid # of sub-channels %d", nsubch_req)); xact = vmbus_xact_get(sc->hn_xact, sizeof(*req)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for nvs subch alloc\n"); return (ENXIO); } req = vmbus_xact_req_data(xact); req->nvs_type = HN_NVS_TYPE_SUBCH_REQ; req->nvs_op = HN_NVS_SUBCH_OP_ALLOC; req->nvs_nsubch = nsubch_req; resp_len = sizeof(*resp); resp = hn_nvs_xact_execute(sc, xact, req, sizeof(*req), &resp_len, HN_NVS_TYPE_SUBCH_RESP); if (resp == NULL) { if_printf(sc->hn_ifp, "exec nvs subch alloc failed\n"); error = EIO; goto done; } if (resp->nvs_status != HN_NVS_STATUS_OK) { if_printf(sc->hn_ifp, "nvs subch alloc failed: %x\n", resp->nvs_status); error = EIO; goto done; } nsubch = resp->nvs_nsubch; if (nsubch > nsubch_req) { if_printf(sc->hn_ifp, "%u subchans are allocated, " "requested %d\n", nsubch, nsubch_req); nsubch = nsubch_req; } *nsubch0 = nsubch; error = 0; done: vmbus_xact_put(xact); return (error); } int hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan, struct hn_nvs_sendctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt) { return hn_nvs_send_rndis_sglist(chan, HN_NVS_RNDIS_MTYPE_CTRL, sndc, gpa, gpa_cnt); } Index: projects/clang391-import/sys/dev/hyperv/netvsc/hn_rndis.c =================================================================== --- projects/clang391-import/sys/dev/hyperv/netvsc/hn_rndis.c (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/netvsc/hn_rndis.c (revision 309263) @@ -1,996 +1,993 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HN_RNDIS_RID_COMPAT_MASK 0xffff #define HN_RNDIS_RID_COMPAT_MAX HN_RNDIS_RID_COMPAT_MASK #define HN_RNDIS_XFER_SIZE 2048 #define HN_NDIS_TXCSUM_CAP_IP4 \ (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT) #define HN_NDIS_TXCSUM_CAP_TCP4 \ (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT) #define HN_NDIS_TXCSUM_CAP_TCP6 \ (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \ NDIS_TXCSUM_CAP_IP6EXT) #define HN_NDIS_TXCSUM_CAP_UDP6 \ (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT) #define HN_NDIS_LSOV2_CAP_IP6 \ (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT) static const void *hn_rndis_xact_exec1(struct hn_softc *, struct vmbus_xact *, size_t, struct hn_nvs_sendctx *, size_t *); static const void *hn_rndis_xact_execute(struct hn_softc *, struct vmbus_xact *, uint32_t, size_t, size_t *, uint32_t); static int hn_rndis_query(struct hn_softc *, uint32_t, const void *, size_t, void *, size_t *); static int hn_rndis_query2(struct hn_softc *, uint32_t, const void *, size_t, void *, size_t *, size_t); static int hn_rndis_set(struct hn_softc *, uint32_t, const void *, size_t); static int hn_rndis_init(struct hn_softc *); static int hn_rndis_halt(struct hn_softc *); static int hn_rndis_conf_offload(struct hn_softc *, int); static int hn_rndis_query_hwcaps(struct hn_softc *, struct ndis_offload *); static __inline uint32_t hn_rndis_rid(struct hn_softc *sc) { uint32_t rid; again: rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1); if (rid == 0) goto again; /* Use upper 16 bits for non-compat RNDIS messages. */ return ((rid & 0xffff) << 16); } void hn_rndis_rx_ctrl(struct hn_softc *sc, const void *data, int dlen) { const struct rndis_comp_hdr *comp; const struct rndis_msghdr *hdr; KASSERT(dlen >= sizeof(*hdr), ("invalid RNDIS msg\n")); hdr = data; switch (hdr->rm_type) { case REMOTE_NDIS_INITIALIZE_CMPLT: case REMOTE_NDIS_QUERY_CMPLT: case REMOTE_NDIS_SET_CMPLT: case REMOTE_NDIS_KEEPALIVE_CMPLT: /* unused */ if (dlen < sizeof(*comp)) { if_printf(sc->hn_ifp, "invalid RNDIS cmplt\n"); return; } comp = data; KASSERT(comp->rm_rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid RNDIS rid 0x%08x\n", comp->rm_rid)); vmbus_xact_ctx_wakeup(sc->hn_xact, comp, dlen); break; case REMOTE_NDIS_RESET_CMPLT: /* * Reset completed, no rid. * * NOTE: * RESET is not issued by hn(4), so this message should * _not_ be observed. */ if_printf(sc->hn_ifp, "RESET cmplt received\n"); break; default: if_printf(sc->hn_ifp, "unknown RNDIS msg 0x%x\n", hdr->rm_type); break; } } int hn_rndis_get_eaddr(struct hn_softc *sc, uint8_t *eaddr) { size_t eaddr_len; int error; eaddr_len = ETHER_ADDR_LEN; error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0, eaddr, &eaddr_len); if (error) return (error); if (eaddr_len != ETHER_ADDR_LEN) { if_printf(sc->hn_ifp, "invalid eaddr len %zu\n", eaddr_len); return (EINVAL); } return (0); } int hn_rndis_get_linkstatus(struct hn_softc *sc, uint32_t *link_status) { size_t size; int error; size = sizeof(*link_status); error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0, link_status, &size); if (error) return (error); if (size != sizeof(uint32_t)) { if_printf(sc->hn_ifp, "invalid link status len %zu\n", size); return (EINVAL); } return (0); } static const void * hn_rndis_xact_exec1(struct hn_softc *sc, struct vmbus_xact *xact, size_t reqlen, struct hn_nvs_sendctx *sndc, size_t *comp_len) { struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT]; int gpa_cnt, error; bus_addr_t paddr; KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0, ("invalid request length %zu", reqlen)); /* * Setup the SG list. */ paddr = vmbus_xact_req_paddr(xact); KASSERT((paddr & PAGE_MASK) == 0, ("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr)); for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) { int len = PAGE_SIZE; if (reqlen == 0) break; if (reqlen < len) len = reqlen; gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt; gpa[gpa_cnt].gpa_len = len; gpa[gpa_cnt].gpa_ofs = 0; reqlen -= len; } KASSERT(reqlen == 0, ("still have %zu request data left", reqlen)); /* * Send this RNDIS control message and wait for its completion * message. */ vmbus_xact_activate(xact); error = hn_nvs_send_rndis_ctrl(sc->hn_prichan, sndc, gpa, gpa_cnt); if (error) { vmbus_xact_deactivate(xact); if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error); return (NULL); } - if (HN_CAN_SLEEP(sc)) - return (vmbus_xact_wait(xact, comp_len)); - else - return (vmbus_xact_busywait(xact, comp_len)); + return (vmbus_chan_xact_wait(sc->hn_prichan, xact, comp_len, + HN_CAN_SLEEP(sc))); } static const void * hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid, size_t reqlen, size_t *comp_len0, uint32_t comp_type) { const struct rndis_comp_hdr *comp; size_t comp_len, min_complen = *comp_len0; KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid)); KASSERT(min_complen >= sizeof(*comp), ("invalid minimum complete len %zu", min_complen)); /* * Execute the xact setup by the caller. */ comp = hn_rndis_xact_exec1(sc, xact, reqlen, &hn_nvs_sendctx_none, &comp_len); if (comp == NULL) return (NULL); /* * Check this RNDIS complete message. */ if (comp_len < min_complen) { if (comp_len >= sizeof(*comp)) { /* rm_status field is valid */ if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, " "status 0x%08x\n", comp_len, comp->rm_status); } else { if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n", comp_len); } return (NULL); } if (comp->rm_len < min_complen) { if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n", comp->rm_len); return (NULL); } if (comp->rm_type != comp_type) { if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, " "expect 0x%08x\n", comp->rm_type, comp_type); return (NULL); } if (comp->rm_rid != rid) { if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, " "expect %u\n", comp->rm_rid, rid); return (NULL); } /* All pass! */ *comp_len0 = comp_len; return (comp); } static int hn_rndis_query(struct hn_softc *sc, uint32_t oid, const void *idata, size_t idlen, void *odata, size_t *odlen0) { return (hn_rndis_query2(sc, oid, idata, idlen, odata, odlen0, *odlen0)); } static int hn_rndis_query2(struct hn_softc *sc, uint32_t oid, const void *idata, size_t idlen, void *odata, size_t *odlen0, size_t min_odlen) { struct rndis_query_req *req; const struct rndis_query_comp *comp; struct vmbus_xact *xact; size_t reqlen, odlen = *odlen0, comp_len; int error, ofs; uint32_t rid; reqlen = sizeof(*req) + idlen; xact = vmbus_xact_get(sc->hn_xact, reqlen); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_QUERY_MSG; req->rm_len = reqlen; req->rm_rid = rid; req->rm_oid = oid; /* * XXX * This is _not_ RNDIS Spec conforming: * "This MUST be set to 0 when there is no input data * associated with the OID." * * If this field was set to 0 according to the RNDIS Spec, * Hyper-V would set non-SUCCESS status in the query * completion. */ req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET; if (idlen > 0) { req->rm_infobuflen = idlen; /* Input data immediately follows RNDIS query. */ memcpy(req + 1, idata, idlen); } comp_len = sizeof(*comp) + min_odlen; comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len, REMOTE_NDIS_QUERY_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: " "status 0x%08x\n", oid, comp->rm_status); error = EIO; goto done; } if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) { /* No output data! */ if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid); *odlen0 = 0; error = 0; goto done; } /* * Check output data length and offset. */ /* ofs is the offset from the beginning of comp. */ ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->rm_infobufoffset); if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) { if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, " "%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen); error = EINVAL; goto done; } /* * Save output data. */ if (comp->rm_infobuflen < odlen) odlen = comp->rm_infobuflen; memcpy(odata, ((const uint8_t *)comp) + ofs, odlen); *odlen0 = odlen; error = 0; done: vmbus_xact_put(xact); return (error); } int hn_rndis_query_rsscaps(struct hn_softc *sc, int *rxr_cnt0) { struct ndis_rss_caps in, caps; size_t caps_len; int error, indsz, rxr_cnt, hash_fnidx; uint32_t hash_func = 0, hash_types = 0; *rxr_cnt0 = 0; if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_20) return (EOPNOTSUPP); memset(&in, 0, sizeof(in)); in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS; in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2; in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE; caps_len = NDIS_RSS_CAPS_SIZE; error = hn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES, &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0); if (error) return (error); /* * Preliminary verification. */ if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) { if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n", caps.ndis_hdr.ndis_type); return (EINVAL); } if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) { if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n", caps.ndis_hdr.ndis_rev); return (EINVAL); } if (caps.ndis_hdr.ndis_size > caps_len) { if_printf(sc->hn_ifp, "invalid NDIS objsize %u, " "data size %zu\n", caps.ndis_hdr.ndis_size, caps_len); return (EINVAL); } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) { if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n", caps.ndis_hdr.ndis_size); return (EINVAL); } /* * Save information for later RSS configuration. */ if (caps.ndis_nrxr == 0) { if_printf(sc->hn_ifp, "0 RX rings!?\n"); return (EINVAL); } if (bootverbose) if_printf(sc->hn_ifp, "%u RX rings\n", caps.ndis_nrxr); rxr_cnt = caps.ndis_nrxr; if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE && caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) { if (caps.ndis_nind > NDIS_HASH_INDCNT) { if_printf(sc->hn_ifp, "too many RSS indirect table entries %u\n", caps.ndis_nind); return (EOPNOTSUPP); } if (!powerof2(caps.ndis_nind)) { if_printf(sc->hn_ifp, "RSS indirect table size is not " "power-of-2 %u\n", caps.ndis_nind); } if (bootverbose) { if_printf(sc->hn_ifp, "RSS indirect table size %u\n", caps.ndis_nind); } indsz = caps.ndis_nind; } else { indsz = NDIS_HASH_INDCNT; } if (indsz < rxr_cnt) { if_printf(sc->hn_ifp, "# of RX rings (%d) > " "RSS indirect table size %d\n", rxr_cnt, indsz); rxr_cnt = indsz; } /* * NOTE: * Toeplitz is at the lowest bit, and it is prefered; so ffs(), * instead of fls(), is used here. */ hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK); if (hash_fnidx == 0) { if_printf(sc->hn_ifp, "no hash functions, caps 0x%08x\n", caps.ndis_caps); return (EOPNOTSUPP); } hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */ if (caps.ndis_caps & NDIS_RSS_CAP_IPV4) hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4; if (caps.ndis_caps & NDIS_RSS_CAP_IPV6) hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6; if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX) hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX; if (hash_types == 0) { if_printf(sc->hn_ifp, "no hash types, caps 0x%08x\n", caps.ndis_caps); return (EOPNOTSUPP); } /* Commit! */ sc->hn_rss_ind_size = indsz; sc->hn_rss_hash = hash_func | hash_types; *rxr_cnt0 = rxr_cnt; return (0); } static int hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen) { struct rndis_set_req *req; const struct rndis_set_comp *comp; struct vmbus_xact *xact; size_t reqlen, comp_len; uint32_t rid; int error; KASSERT(dlen > 0, ("invalid dlen %zu", dlen)); reqlen = sizeof(*req) + dlen; xact = vmbus_xact_get(sc->hn_xact, reqlen); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_SET_MSG; req->rm_len = reqlen; req->rm_rid = rid; req->rm_oid = oid; req->rm_infobuflen = dlen; req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET; /* Data immediately follows RNDIS set. */ memcpy(req + 1, data, dlen); comp_len = sizeof(*comp); comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len, REMOTE_NDIS_SET_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: " "status 0x%08x\n", oid, comp->rm_status); error = EIO; goto done; } error = 0; done: vmbus_xact_put(xact); return (error); } static int hn_rndis_conf_offload(struct hn_softc *sc, int mtu) { struct ndis_offload hwcaps; struct ndis_offload_params params; uint32_t caps = 0; size_t paramsz; int error, tso_maxsz, tso_minsg; error = hn_rndis_query_hwcaps(sc, &hwcaps); if (error) { if_printf(sc->hn_ifp, "hwcaps query failed: %d\n", error); return (error); } /* NOTE: 0 means "no change" */ memset(¶ms, 0, sizeof(params)); params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT; if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_30) { params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2; paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1; } else { params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3; paramsz = NDIS_OFFLOAD_PARAMS_SIZE; } params.ndis_hdr.ndis_size = paramsz; /* * TSO4/TSO6 setup. */ tso_maxsz = IP_MAXPACKET; tso_minsg = 2; if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) { caps |= HN_CAP_TSO4; params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON; if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz) tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz; if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg) tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg; } if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) && (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) == HN_NDIS_LSOV2_CAP_IP6) { caps |= HN_CAP_TSO6; params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON; if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz) tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz; if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg) tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg; } sc->hn_ndis_tso_szmax = 0; sc->hn_ndis_tso_sgmin = 0; if (caps & (HN_CAP_TSO4 | HN_CAP_TSO6)) { KASSERT(tso_maxsz <= IP_MAXPACKET, ("invalid NDIS TSO maxsz %d", tso_maxsz)); KASSERT(tso_minsg >= 2, ("invalid NDIS TSO minsg %d", tso_minsg)); if (tso_maxsz < tso_minsg * mtu) { if_printf(sc->hn_ifp, "invalid NDIS TSO config: " "maxsz %d, minsg %d, mtu %d; " "disable TSO4 and TSO6\n", tso_maxsz, tso_minsg, mtu); caps &= ~(HN_CAP_TSO4 | HN_CAP_TSO6); params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF; params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF; } else { sc->hn_ndis_tso_szmax = tso_maxsz; sc->hn_ndis_tso_sgmin = tso_minsg; if (bootverbose) { if_printf(sc->hn_ifp, "NDIS TSO " "szmax %d sgmin %d\n", sc->hn_ndis_tso_szmax, sc->hn_ndis_tso_sgmin); } } } /* IPv4 checksum */ if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4) == HN_NDIS_TXCSUM_CAP_IP4) { caps |= HN_CAP_IPCS; params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX; } if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) { if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX) params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX; else params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX; } /* TCP4 checksum */ if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4) == HN_NDIS_TXCSUM_CAP_TCP4) { caps |= HN_CAP_TCP4CS; params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX; } if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) { if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX) params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX; else params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX; } /* UDP4 checksum */ if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { caps |= HN_CAP_UDP4CS; params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX; } if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) { if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX) params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX; else params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX; } /* TCP6 checksum */ if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6) == HN_NDIS_TXCSUM_CAP_TCP6) { caps |= HN_CAP_TCP6CS; params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX; } if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) { if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX) params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX; else params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX; } /* UDP6 checksum */ if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_UDP6) == HN_NDIS_TXCSUM_CAP_UDP6) { caps |= HN_CAP_UDP6CS; params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX; } if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) { if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX) params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX; else params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX; } if (bootverbose) { if_printf(sc->hn_ifp, "offload csum: " "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n", params.ndis_ip4csum, params.ndis_tcp4csum, params.ndis_udp4csum, params.ndis_tcp6csum, params.ndis_udp6csum); if_printf(sc->hn_ifp, "offload lsov2: ip4 %u, ip6 %u\n", params.ndis_lsov2_ip4, params.ndis_lsov2_ip6); } error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, paramsz); if (error) { if_printf(sc->hn_ifp, "offload config failed: %d\n", error); return (error); } if (bootverbose) if_printf(sc->hn_ifp, "offload config done\n"); sc->hn_caps |= caps; return (0); } int hn_rndis_conf_rss(struct hn_softc *sc, uint16_t flags) { struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; struct ndis_rss_params *prm = &rss->rss_params; int error, rss_size; /* * Only NDIS 6.20+ is supported: * We only support 4bytes element in indirect table, which has been * adopted since NDIS 6.20. */ KASSERT(sc->hn_ndis_ver >= HN_NDIS_VERSION_6_20, ("NDIS 6.20+ is required, NDIS version 0x%08x", sc->hn_ndis_ver)); /* XXX only one can be specified through, popcnt? */ KASSERT((sc->hn_rss_hash & NDIS_HASH_FUNCTION_MASK), ("no hash func")); KASSERT((sc->hn_rss_hash & NDIS_HASH_TYPE_MASK), ("no hash types")); KASSERT(sc->hn_rss_ind_size > 0, ("no indirect table size")); if (bootverbose) { if_printf(sc->hn_ifp, "RSS indirect table size %d, " "hash 0x%08x\n", sc->hn_rss_ind_size, sc->hn_rss_hash); } /* * NOTE: * DO NOT whack rss_key and rss_ind, which are setup by the caller. */ memset(prm, 0, sizeof(*prm)); rss_size = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->hn_rss_ind_size); prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS; prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2; prm->ndis_hdr.ndis_size = rss_size; prm->ndis_flags = flags; prm->ndis_hash = sc->hn_rss_hash; prm->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->hn_rss_ind_size; prm->ndis_indoffset = __offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]); prm->ndis_keysize = sizeof(rss->rss_key); prm->ndis_keyoffset = __offsetof(struct ndis_rssprm_toeplitz, rss_key[0]); error = hn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS, rss, rss_size); if (error) { if_printf(sc->hn_ifp, "RSS config failed: %d\n", error); } else { if (bootverbose) if_printf(sc->hn_ifp, "RSS config done\n"); } return (error); } int hn_rndis_set_rxfilter(struct hn_softc *sc, uint32_t filter) { int error; error = hn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER, &filter, sizeof(filter)); if (error) { if_printf(sc->hn_ifp, "set RX filter 0x%08x failed: %d\n", filter, error); } else { if (bootverbose) { if_printf(sc->hn_ifp, "set RX filter 0x%08x done\n", filter); } } return (error); } static int hn_rndis_init(struct hn_softc *sc) { struct rndis_init_req *req; const struct rndis_init_comp *comp; struct vmbus_xact *xact; size_t comp_len; uint32_t rid; int error; xact = vmbus_xact_get(sc->hn_xact, sizeof(*req)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS init\n"); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_INITIALIZE_MSG; req->rm_len = sizeof(*req); req->rm_rid = rid; req->rm_ver_major = RNDIS_VERSION_MAJOR; req->rm_ver_minor = RNDIS_VERSION_MINOR; req->rm_max_xfersz = HN_RNDIS_XFER_SIZE; comp_len = RNDIS_INIT_COMP_SIZE_MIN; comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len, REMOTE_NDIS_INITIALIZE_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS init failed\n"); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n", comp->rm_status); error = EIO; goto done; } sc->hn_rndis_agg_size = comp->rm_pktmaxsz; sc->hn_rndis_agg_pkts = comp->rm_pktmaxcnt; sc->hn_rndis_agg_align = 1U << comp->rm_align; if (bootverbose) { if_printf(sc->hn_ifp, "RNDIS ver %u.%u, pktsz %u, pktcnt %u, " "align %u\n", comp->rm_ver_major, comp->rm_ver_minor, sc->hn_rndis_agg_size, sc->hn_rndis_agg_pkts, sc->hn_rndis_agg_align); } error = 0; done: vmbus_xact_put(xact); return (error); } static int hn_rndis_halt(struct hn_softc *sc) { struct vmbus_xact *xact; struct rndis_halt_req *halt; struct hn_nvs_sendctx sndc; size_t comp_len; xact = vmbus_xact_get(sc->hn_xact, sizeof(*halt)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS halt\n"); return (ENXIO); } halt = vmbus_xact_req_data(xact); halt->rm_type = REMOTE_NDIS_HALT_MSG; halt->rm_len = sizeof(*halt); halt->rm_rid = hn_rndis_rid(sc); /* No RNDIS completion; rely on NVS message send completion */ hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact); hn_rndis_xact_exec1(sc, xact, sizeof(*halt), &sndc, &comp_len); vmbus_xact_put(xact); if (bootverbose) if_printf(sc->hn_ifp, "RNDIS halt done\n"); return (0); } static int hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps) { struct ndis_offload in; size_t caps_len, size; int error; memset(&in, 0, sizeof(in)); in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD; if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_30) { in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3; size = NDIS_OFFLOAD_SIZE; } else if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_1) { in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2; size = NDIS_OFFLOAD_SIZE_6_1; } else { in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1; size = NDIS_OFFLOAD_SIZE_6_0; } in.ndis_hdr.ndis_size = size; caps_len = NDIS_OFFLOAD_SIZE; error = hn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES, &in, size, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0); if (error) return (error); /* * Preliminary verification. */ if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) { if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n", caps->ndis_hdr.ndis_type); return (EINVAL); } if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) { if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n", caps->ndis_hdr.ndis_rev); return (EINVAL); } if (caps->ndis_hdr.ndis_size > caps_len) { if_printf(sc->hn_ifp, "invalid NDIS objsize %u, " "data size %zu\n", caps->ndis_hdr.ndis_size, caps_len); return (EINVAL); } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) { if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n", caps->ndis_hdr.ndis_size); return (EINVAL); } if (bootverbose) { /* * NOTE: * caps->ndis_hdr.ndis_size MUST be checked before accessing * NDIS 6.1+ specific fields. */ if_printf(sc->hn_ifp, "hwcaps rev %u\n", caps->ndis_hdr.ndis_rev); if_printf(sc->hn_ifp, "hwcaps csum: " "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, " "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n", caps->ndis_csum.ndis_ip4_txcsum, caps->ndis_csum.ndis_ip4_txenc, caps->ndis_csum.ndis_ip4_rxcsum, caps->ndis_csum.ndis_ip4_rxenc, caps->ndis_csum.ndis_ip6_txcsum, caps->ndis_csum.ndis_ip6_txenc, caps->ndis_csum.ndis_ip6_rxcsum, caps->ndis_csum.ndis_ip6_rxenc); if_printf(sc->hn_ifp, "hwcaps lsov2: " "ip4 maxsz %u minsg %u encap 0x%x, " "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n", caps->ndis_lsov2.ndis_ip4_maxsz, caps->ndis_lsov2.ndis_ip4_minsg, caps->ndis_lsov2.ndis_ip4_encap, caps->ndis_lsov2.ndis_ip6_maxsz, caps->ndis_lsov2.ndis_ip6_minsg, caps->ndis_lsov2.ndis_ip6_encap, caps->ndis_lsov2.ndis_ip6_opts); } return (0); } int hn_rndis_attach(struct hn_softc *sc, int mtu) { int error; /* * Initialize RNDIS. */ error = hn_rndis_init(sc); if (error) return (error); /* * Configure NDIS offload settings. - * XXX no offloading, if error happened? */ hn_rndis_conf_offload(sc, mtu); return (0); } void hn_rndis_detach(struct hn_softc *sc) { /* Halt the RNDIS. */ hn_rndis_halt(sc); } Index: projects/clang391-import/sys/dev/hyperv/netvsc/if_hn.c =================================================================== --- projects/clang391-import/sys/dev/hyperv/netvsc/if_hn.c (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/netvsc/if_hn.c (revision 309263) @@ -1,5321 +1,5387 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmbus_if.h" #define HN_IFSTART_SUPPORT #define HN_RING_CNT_DEF_MAX 8 /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 #define HN_RNDIS_PKT_LEN \ (sizeof(struct rndis_packet_msg) + \ HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE #define HN_TX_DATA_BOUNDARY PAGE_SIZE #define HN_TX_DATA_MAXSIZE IP_MAXPACKET #define HN_TX_DATA_SEGSIZE PAGE_SIZE /* -1 for RNDIS packet message */ #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) #define HN_DIRECT_TX_SIZE_DEF 128 #define HN_EARLY_TXEOF_THRESH 8 #define HN_PKTBUF_LEN_DEF (16 * 1024) #define HN_LROENT_CNT_DEF 128 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) #define HN_LRO_ACKCNT_DEF 1 #define HN_LOCK_INIT(sc) \ sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) #define HN_LOCK(sc) \ do { \ while (sx_try_xlock(&(sc)->hn_lock) == 0) \ DELAY(1000); \ } while (0) #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) #define HN_CSUM_IP_HWASSIST(sc) \ ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) #define HN_CSUM_IP6_HWASSIST(sc) \ ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) #define HN_PKTSIZE_MIN(align) \ roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ HN_RNDIS_PKT_LEN, (align)) #define HN_PKTSIZE(m, align) \ roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) struct hn_txdesc { #ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; #endif STAILQ_ENTRY(hn_txdesc) agg_link; /* Aggregated txdescs, in sending order. */ STAILQ_HEAD(, hn_txdesc) agg_list; /* The oldest packet, if transmission aggregation happens. */ struct mbuf *m; struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ struct hn_nvs_sendctx send_ctx; uint32_t chim_index; int chim_size; bus_dmamap_t data_dmap; bus_addr_t rndis_pkt_paddr; struct rndis_packet_msg *rndis_pkt; bus_dmamap_t rndis_pkt_dmap; }; #define HN_TXD_FLAG_ONLIST 0x0001 #define HN_TXD_FLAG_DMAMAP 0x0002 #define HN_TXD_FLAG_ONAGG 0x0004 struct hn_rxinfo { uint32_t vlan_info; uint32_t csum_info; uint32_t hash_info; uint32_t hash_value; }; #define HN_RXINFO_VLAN 0x0001 #define HN_RXINFO_CSUM 0x0002 #define HN_RXINFO_HASHINF 0x0004 #define HN_RXINFO_HASHVAL 0x0008 #define HN_RXINFO_ALL \ (HN_RXINFO_VLAN | \ HN_RXINFO_CSUM | \ HN_RXINFO_HASHINF | \ HN_RXINFO_HASHVAL) #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff #define HN_NDIS_RXCSUM_INFO_INVALID 0 #define HN_NDIS_HASH_INFO_INVALID 0 static int hn_probe(device_t); static int hn_attach(device_t); static int hn_detach(device_t); static int hn_shutdown(device_t); static void hn_chan_callback(struct vmbus_channel *, void *); static void hn_init(void *); static int hn_ioctl(struct ifnet *, u_long, caddr_t); #ifdef HN_IFSTART_SUPPORT static void hn_start(struct ifnet *); #endif static int hn_transmit(struct ifnet *, struct mbuf *); static void hn_xmit_qflush(struct ifnet *); static int hn_ifmedia_upd(struct ifnet *); static void hn_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int hn_rndis_rxinfo(const void *, int, struct hn_rxinfo *); static void hn_rndis_rx_data(struct hn_rx_ring *, const void *, int); static void hn_rndis_rx_status(struct hn_softc *, const void *, int); static void hn_nvs_handle_notify(struct hn_softc *, const struct vmbus_chanpkt_hdr *); static void hn_nvs_handle_comp(struct hn_softc *, struct vmbus_channel *, const struct vmbus_chanpkt_hdr *); static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, struct vmbus_channel *, const struct vmbus_chanpkt_hdr *); static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, struct vmbus_channel *, uint64_t); #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); #if __FreeBSD_version < 1100095 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); #else static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); static void hn_stop(struct hn_softc *); static void hn_init_locked(struct hn_softc *); static int hn_chan_attach(struct hn_softc *, struct vmbus_channel *); static void hn_chan_detach(struct hn_softc *, struct vmbus_channel *); static int hn_attach_subchans(struct hn_softc *); static void hn_detach_allchans(struct hn_softc *); static void hn_chan_rollup(struct hn_rx_ring *, struct hn_tx_ring *); static void hn_set_ring_inuse(struct hn_softc *, int); static int hn_synth_attach(struct hn_softc *, int); static void hn_synth_detach(struct hn_softc *); static int hn_synth_alloc_subchans(struct hn_softc *, int *); +static bool hn_synth_attachable(const struct hn_softc *); static void hn_suspend(struct hn_softc *); static void hn_suspend_data(struct hn_softc *); static void hn_suspend_mgmt(struct hn_softc *); static void hn_resume(struct hn_softc *); static void hn_resume_data(struct hn_softc *); static void hn_resume_mgmt(struct hn_softc *); static void hn_suspend_mgmt_taskfunc(void *, int); static void hn_chan_drain(struct hn_softc *, struct vmbus_channel *); static void hn_update_link_status(struct hn_softc *); static void hn_change_network(struct hn_softc *); static void hn_link_taskfunc(void *, int); static void hn_netchg_init_taskfunc(void *, int); static void hn_netchg_status_taskfunc(void *, int); static void hn_link_status(struct hn_softc *); static int hn_create_rx_data(struct hn_softc *, int); static void hn_destroy_rx_data(struct hn_softc *); static int hn_check_iplen(const struct mbuf *, int); static int hn_set_rxfilter(struct hn_softc *); static int hn_rss_reconfig(struct hn_softc *); -static void hn_rss_ind_fixup(struct hn_softc *, int); +static void hn_rss_ind_fixup(struct hn_softc *); static int hn_rxpkt(struct hn_rx_ring *, const void *, int, const struct hn_rxinfo *); static int hn_tx_ring_create(struct hn_softc *, int); static void hn_tx_ring_destroy(struct hn_tx_ring *); static int hn_create_tx_data(struct hn_softc *, int); static void hn_fixup_tx_data(struct hn_softc *); static void hn_destroy_tx_data(struct hn_softc *); static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); static void hn_txdesc_gc(struct hn_tx_ring *, struct hn_txdesc *); static int hn_encap(struct ifnet *, struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, struct hn_txdesc *); static void hn_set_chim_size(struct hn_softc *, int); static void hn_set_tso_maxsize(struct hn_softc *, int, int); static bool hn_tx_ring_pending(struct hn_tx_ring *); static void hn_tx_ring_qflush(struct hn_tx_ring *); static void hn_resume_tx(struct hn_softc *, int); static void hn_set_txagg(struct hn_softc *); static void *hn_try_txagg(struct ifnet *, struct hn_tx_ring *, struct hn_txdesc *, int); static int hn_get_txswq_depth(const struct hn_tx_ring *); static void hn_txpkt_done(struct hn_nvs_sendctx *, struct hn_softc *, struct vmbus_channel *, const void *, int); static int hn_txpkt_sglist(struct hn_tx_ring *, struct hn_txdesc *); static int hn_txpkt_chim(struct hn_tx_ring *, struct hn_txdesc *); static int hn_xmit(struct hn_tx_ring *, int); static void hn_xmit_taskfunc(void *, int); static void hn_xmit_txeof(struct hn_tx_ring *); static void hn_xmit_txeof_taskfunc(void *, int); #ifdef HN_IFSTART_SUPPORT static int hn_start_locked(struct hn_tx_ring *, int); static void hn_start_taskfunc(void *, int); static void hn_start_txeof(struct hn_tx_ring *); static void hn_start_txeof_taskfunc(void *, int); #endif SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V network interface"); /* Trust tcp segements verification on host side. */ static int hn_trust_hosttcp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, &hn_trust_hosttcp, 0, "Trust tcp segement verification on host side, " "when csum info is missing (global setting)"); /* Trust udp datagrams verification on host side. */ static int hn_trust_hostudp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, &hn_trust_hostudp, 0, "Trust udp datagram verification on host side, " "when csum info is missing (global setting)"); /* Trust ip packets verification on host side. */ static int hn_trust_hostip = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, &hn_trust_hostip, 0, "Trust ip packet verification on host side, " "when csum info is missing (global setting)"); /* Limit TSO burst size */ static int hn_tso_maxlen = IP_MAXPACKET; SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, &hn_tso_maxlen, 0, "TSO burst limit"); /* Limit chimney send size */ static int hn_tx_chimney_size = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, &hn_tx_chimney_size, 0, "Chimney send packet size limit"); /* Limit the size of packet for direct transmission */ static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); /* # of LRO entries per RX ring */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, &hn_lro_entry_count, 0, "LRO entry count"); #endif #endif /* Use shared TX taskqueue */ static int hn_share_tx_taskq = 0; SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); #ifndef HN_USE_TXDESC_BUFRING static int hn_use_txdesc_bufring = 0; #else static int hn_use_txdesc_bufring = 1; #endif SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); /* Bind TX taskqueue to the target CPU */ static int hn_bind_tx_taskq = -1; SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); #ifdef HN_IFSTART_SUPPORT /* Use ifnet.if_start instead of ifnet.if_transmit */ static int hn_use_if_start = 0; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); #endif /* # of channels to use */ static int hn_chan_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hn_chan_cnt, 0, "# of channels to use; each channel has one RX ring and one TX ring"); /* # of transmit rings to use */ static int hn_tx_ring_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, &hn_tx_ring_cnt, 0, "# of TX rings to use"); /* Software TX ring deptch */ static int hn_tx_swq_depth = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ #if __FreeBSD_version >= 1100095 static u_int hn_lro_mbufq_depth = 0; SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); #endif /* Packet transmission aggregation size limit */ static int hn_tx_agg_size = -1; SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); /* Packet transmission aggregation count limit */ -static int hn_tx_agg_pkts = 0; +static int hn_tx_agg_pkts = -1; SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); static u_int hn_cpu_index; /* next CPU for channel */ static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */ static const uint8_t hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; static device_method_t hn_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hn_probe), DEVMETHOD(device_attach, hn_attach), DEVMETHOD(device_detach, hn_detach), DEVMETHOD(device_shutdown, hn_shutdown), DEVMETHOD_END }; static driver_t hn_driver = { "hn", hn_methods, sizeof(struct hn_softc) }; static devclass_t hn_devclass; DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); #if __FreeBSD_version >= 1100099 static void hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) { int i; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; } #endif static int hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && txd->chim_size == 0, ("invalid rndis sglist txd")); return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); } static int hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) { struct hn_nvs_rndis rndis; KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && txd->chim_size > 0, ("invalid rndis chim txd")); rndis.nvs_type = HN_NVS_TYPE_RNDIS; rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; rndis.nvs_chim_idx = txd->chim_index; rndis.nvs_chim_sz = txd->chim_size; return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, &rndis, sizeof(rndis), &txd->send_ctx)); } static __inline uint32_t hn_chim_alloc(struct hn_softc *sc) { int i, bmap_cnt = sc->hn_chim_bmap_cnt; u_long *bmap = sc->hn_chim_bmap; uint32_t ret = HN_NVS_CHIM_IDX_INVALID; for (i = 0; i < bmap_cnt; ++i) { int idx; idx = ffsl(~bmap[i]); if (idx == 0) continue; --idx; /* ffsl is 1-based */ KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, ("invalid i %d and idx %d", i, idx)); if (atomic_testandset_long(&bmap[i], idx)) continue; ret = i * LONG_BIT + idx; break; } return (ret); } static __inline void hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) { u_long mask; uint32_t idx; idx = chim_idx / LONG_BIT; KASSERT(idx < sc->hn_chim_bmap_cnt, ("invalid chimney index 0x%x", chim_idx)); mask = 1UL << (chim_idx % LONG_BIT); KASSERT(sc->hn_chim_bmap[idx] & mask, ("index bitmap 0x%lx, chimney index %u, " "bitmap idx %d, bitmask 0x%lx", sc->hn_chim_bmap[idx], chim_idx, idx, mask)); atomic_clear_long(&sc->hn_chim_bmap[idx], mask); } #if defined(INET6) || defined(INET) /* * NOTE: If this function failed, the m_head would be freed. */ static __inline struct mbuf * hn_tso_fixup(struct mbuf *m_head) { struct ether_vlan_header *evl; struct tcphdr *th; int ehlen; KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); #define PULLUP_HDR(m, len) \ do { \ if (__predict_false((m)->m_len < (len))) { \ (m) = m_pullup((m), (len)); \ if ((m) == NULL) \ return (NULL); \ } \ } while (0) PULLUP_HDR(m_head, sizeof(*evl)); evl = mtod(m_head, struct ether_vlan_header *); if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ehlen = ETHER_HDR_LEN; #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { struct ip *ip; int iphlen; PULLUP_HDR(m_head, ehlen + sizeof(*ip)); ip = mtodo(m_head, ehlen); iphlen = ip->ip_hl << 2; PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); th = mtodo(m_head, ehlen + iphlen); ip->ip_len = 0; ip->ip_sum = 0; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { struct ip6_hdr *ip6; PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); ip6 = mtodo(m_head, ehlen); if (ip6->ip6_nxt != IPPROTO_TCP) { m_freem(m_head); return (NULL); } PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); th = mtodo(m_head, ehlen + sizeof(*ip6)); ip6->ip6_plen = 0; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); } #endif return (m_head); #undef PULLUP_HDR } #endif /* INET6 || INET */ static int hn_set_rxfilter(struct hn_softc *sc) { struct ifnet *ifp = sc->hn_ifp; uint32_t filter; int error = 0; HN_LOCK_ASSERT(sc); if (ifp->if_flags & IFF_PROMISC) { filter = NDIS_PACKET_TYPE_PROMISCUOUS; } else { filter = NDIS_PACKET_TYPE_DIRECTED; if (ifp->if_flags & IFF_BROADCAST) filter |= NDIS_PACKET_TYPE_BROADCAST; /* TODO: support multicast list */ if ((ifp->if_flags & IFF_ALLMULTI) || !TAILQ_EMPTY(&ifp->if_multiaddrs)) filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; } if (sc->hn_rx_filter != filter) { error = hn_rndis_set_rxfilter(sc, filter); if (!error) sc->hn_rx_filter = filter; } return (error); } static void hn_set_txagg(struct hn_softc *sc) { uint32_t size, pkts; int i; /* * Setup aggregation size. */ if (sc->hn_agg_size < 0) size = UINT32_MAX; else size = sc->hn_agg_size; if (sc->hn_rndis_agg_size < size) size = sc->hn_rndis_agg_size; + /* NOTE: We only aggregate packets using chimney sending buffers. */ + if (size > (uint32_t)sc->hn_chim_szmax) + size = sc->hn_chim_szmax; + if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { /* Disable */ size = 0; pkts = 0; goto done; } /* NOTE: Type of the per TX ring setting is 'int'. */ if (size > INT_MAX) size = INT_MAX; - /* NOTE: We only aggregate packets using chimney sending buffers. */ - if (size > (uint32_t)sc->hn_chim_szmax) - size = sc->hn_chim_szmax; - /* * Setup aggregation packet count. */ if (sc->hn_agg_pkts < 0) pkts = UINT32_MAX; else pkts = sc->hn_agg_pkts; if (sc->hn_rndis_agg_pkts < pkts) pkts = sc->hn_rndis_agg_pkts; if (pkts <= 1) { /* Disable */ size = 0; pkts = 0; goto done; } /* NOTE: Type of the per TX ring setting is 'short'. */ if (pkts > SHRT_MAX) pkts = SHRT_MAX; done: /* NOTE: Type of the per TX ring setting is 'short'. */ if (sc->hn_rndis_agg_align > SHRT_MAX) { /* Disable */ size = 0; pkts = 0; } if (bootverbose) { if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", size, pkts, sc->hn_rndis_agg_align); } for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; mtx_lock(&txr->hn_tx_lock); txr->hn_agg_szmax = size; txr->hn_agg_pktmax = pkts; txr->hn_agg_align = sc->hn_rndis_agg_align; mtx_unlock(&txr->hn_tx_lock); } } static int hn_get_txswq_depth(const struct hn_tx_ring *txr) { KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); if (hn_tx_swq_depth < txr->hn_txdesc_cnt) return txr->hn_txdesc_cnt; return hn_tx_swq_depth; } static int hn_rss_reconfig(struct hn_softc *sc) { int error; HN_LOCK_ASSERT(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) return (ENXIO); /* * Disable RSS first. * * NOTE: * Direct reconfiguration by setting the UNCHG flags does * _not_ work properly. */ if (bootverbose) if_printf(sc->hn_ifp, "disable RSS\n"); error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); if (error) { if_printf(sc->hn_ifp, "RSS disable failed\n"); return (error); } /* * Reenable the RSS w/ the updated RSS key or indirect * table. */ if (bootverbose) if_printf(sc->hn_ifp, "reconfig RSS\n"); error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); if (error) { if_printf(sc->hn_ifp, "RSS reconfig failed\n"); return (error); } return (0); } static void -hn_rss_ind_fixup(struct hn_softc *sc, int nchan) +hn_rss_ind_fixup(struct hn_softc *sc) { struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; - int i; + int i, nchan; + nchan = sc->hn_rx_ring_inuse; KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); /* * Check indirect table to make sure that all channels in it * can be used. */ for (i = 0; i < NDIS_HASH_INDCNT; ++i) { if (rss->rss_ind[i] >= nchan) { if_printf(sc->hn_ifp, "RSS indirect table %d fixup: %u -> %d\n", i, rss->rss_ind[i], nchan - 1); rss->rss_ind[i] = nchan - 1; } } } static int hn_ifmedia_upd(struct ifnet *ifp __unused) { return EOPNOTSUPP; } static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct hn_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ static const struct hyperv_guid g_net_vsc_device_type = { .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} }; static int hn_probe(device_t dev) { if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &g_net_vsc_device_type) == 0) { device_set_desc(dev, "Hyper-V Network Interface"); return BUS_PROBE_DEFAULT; } return ENXIO; } static int hn_attach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; uint8_t eaddr[ETHER_ADDR_LEN]; struct ifnet *ifp = NULL; int error, ring_cnt, tx_ring_cnt; sc->hn_dev = dev; sc->hn_prichan = vmbus_get_channel(dev); HN_LOCK_INIT(sc); /* * Initialize these tunables once. */ sc->hn_agg_size = hn_tx_agg_size; sc->hn_agg_pkts = hn_tx_agg_pkts; /* * Setup taskqueue for transmission. */ if (hn_tx_taskq == NULL) { sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, PI_NET, &cpu_set, "%s tx", device_get_nameunit(dev)); } else { taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", device_get_nameunit(dev)); } } else { sc->hn_tx_taskq = hn_tx_taskq; } /* * Setup taskqueue for mangement tasks, e.g. link status. */ sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", device_get_nameunit(dev)); TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, hn_netchg_status_taskfunc, sc); /* * Allocate ifnet and setup its name earlier, so that if_printf * can be used by functions, which will be called after * ether_ifattach(). */ ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Initialize ifmedia earlier so that it can be unconditionally * destroyed, if error happened later on. */ ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); /* * Figure out the # of RX rings (ring_cnt) and the # of TX rings * to use (tx_ring_cnt). * * NOTE: * The # of RX rings to use is same as the # of channels to use. */ ring_cnt = hn_chan_cnt; if (ring_cnt <= 0) { /* Default */ ring_cnt = mp_ncpus; if (ring_cnt > HN_RING_CNT_DEF_MAX) ring_cnt = HN_RING_CNT_DEF_MAX; } else if (ring_cnt > mp_ncpus) { ring_cnt = mp_ncpus; } tx_ring_cnt = hn_tx_ring_cnt; if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) tx_ring_cnt = ring_cnt; #ifdef HN_IFSTART_SUPPORT if (hn_use_if_start) { /* ifnet.if_start only needs one TX ring. */ tx_ring_cnt = 1; } #endif /* * Set the leader CPU for channels. */ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; /* * Create enough TX/RX rings, even if only limited number of * channels can be allocated. */ error = hn_create_tx_data(sc, tx_ring_cnt); if (error) goto failed; error = hn_create_rx_data(sc, ring_cnt); if (error) goto failed; /* * Create transaction context for NVS and RNDIS transactions. */ sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); if (sc->hn_xact == NULL) { error = ENXIO; goto failed; } /* * Install orphan handler for the revocation of this device's * primary channel. * * NOTE: * The processing order is critical here: * Install the orphan handler, _before_ testing whether this * device's primary channel has been revoked or not. */ vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); if (vmbus_chan_is_revoked(sc->hn_prichan)) { error = ENXIO; goto failed; } /* * Attach the synthetic parts, i.e. NVS and RNDIS. */ error = hn_synth_attach(sc, ETHERMTU); if (error) goto failed; error = hn_rndis_get_eaddr(sc, eaddr); if (error) goto failed; #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring_inuse > 1) { /* * Reduce TCP segment aggregation limit for multiple * RX rings to increase ACK timeliness. */ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); } #endif /* * Fixup TX stuffs after synthetic parts are attached. */ hn_fixup_tx_data(sc); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, &sc->hn_nvs_ver, 0, "NVS version"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_ndis_version_sysctl, "A", "NDIS version"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_caps_sysctl, "A", "capabilities"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_hwassist_sysctl, "A", "hwassist"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rxfilter_sysctl, "A", "rxfilter"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rss_hash_sysctl, "A", "RSS hash"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_rss_key_sysctl, "IU", "RSS key"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_rss_ind_sysctl, "IU", "RSS indirect table"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, "RNDIS offered packet transmission aggregation size limit"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, "RNDIS offered packet transmission aggregation count limit"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, "RNDIS packet transmission aggregation alignment"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_txagg_size_sysctl, "I", "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_txagg_pkts_sysctl, "I", "Packet transmission aggregation packets, " "0 -- disable, -1 -- auto"); /* * Setup the ifmedia, which has been initialized earlier. */ ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); /* XXX ifmedia_set really should do this for us */ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; /* * Setup the ifnet for this interface. */ ifp->if_baudrate = IF_Gbps(10); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = hn_ioctl; ifp->if_init = hn_init; #ifdef HN_IFSTART_SUPPORT if (hn_use_if_start) { int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); ifp->if_snd.ifq_drv_maxlen = qdepth - 1; IFQ_SET_READY(&ifp->if_snd); } else #endif { ifp->if_transmit = hn_transmit; ifp->if_qflush = hn_xmit_qflush; } ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; #ifdef foo /* We can't diff IPv6 packets from IPv4 packets on RX path. */ ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; #endif if (sc->hn_caps & HN_CAP_VLAN) { /* XXX not sure about VLAN_MTU. */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; } ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; if (ifp->if_hwassist & HN_CSUM_IP_MASK) ifp->if_capabilities |= IFCAP_TXCSUM; if (ifp->if_hwassist & HN_CSUM_IP6_MASK) ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; if (sc->hn_caps & HN_CAP_TSO4) { ifp->if_capabilities |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_IP_TSO; } if (sc->hn_caps & HN_CAP_TSO6) { ifp->if_capabilities |= IFCAP_TSO6; ifp->if_hwassist |= CSUM_IP6_TSO; } /* Enable all available capabilities by default. */ ifp->if_capenable = ifp->if_capabilities; /* * Disable IPv6 TSO and TXCSUM by default, they still can * be enabled through SIOCSIFCAP. */ ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; } ether_ifattach(ifp, eaddr); if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { if_printf(ifp, "TSO segcnt %u segsz %u\n", ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } /* Inform the upper layer about the long frame support. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* * Kick off link status check. */ sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; hn_update_link_status(sc); return (0); failed: if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) hn_synth_detach(sc); hn_detach(dev); return (error); } static int hn_detach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->hn_ifp; if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { /* * In case that the vmbus missed the orphan handler * installation. */ vmbus_xact_ctx_orphan(sc->hn_xact); } if (device_is_attached(dev)) { HN_LOCK(sc); if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) hn_stop(sc); /* * NOTE: * hn_stop() only suspends data, so managment * stuffs have to be suspended manually here. */ hn_suspend_mgmt(sc); hn_synth_detach(sc); } HN_UNLOCK(sc); ether_ifdetach(ifp); } ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); hn_destroy_tx_data(sc); if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); taskqueue_free(sc->hn_mgmt_taskq0); if (sc->hn_xact != NULL) { /* * Uninstall the orphan handler _before_ the xact is * destructed. */ vmbus_chan_unset_orphan(sc->hn_prichan); vmbus_xact_ctx_destroy(sc->hn_xact); } if_free(ifp); HN_LOCK_DESTROY(sc); return (0); } static int hn_shutdown(device_t dev) { return (0); } static void hn_link_status(struct hn_softc *sc) { uint32_t link_status; int error; error = hn_rndis_get_linkstatus(sc, &link_status); if (error) { /* XXX what to do? */ return; } if (link_status == NDIS_MEDIA_STATE_CONNECTED) sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; else sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; if_link_state_change(sc->hn_ifp, (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? LINK_STATE_UP : LINK_STATE_DOWN); } static void hn_link_taskfunc(void *xsc, int pending __unused) { struct hn_softc *sc = xsc; if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) return; hn_link_status(sc); } static void hn_netchg_init_taskfunc(void *xsc, int pending __unused) { struct hn_softc *sc = xsc; /* Prevent any link status checks from running. */ sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; /* * Fake up a [link down --> link up] state change; 5 seconds * delay is used, which closely simulates miibus reaction * upon link down event. */ sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 5 * hz); } static void hn_netchg_status_taskfunc(void *xsc, int pending __unused) { struct hn_softc *sc = xsc; /* Re-allow link status checks. */ sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; hn_link_status(sc); } static void hn_update_link_status(struct hn_softc *sc) { if (sc->hn_mgmt_taskq != NULL) taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); } static void hn_change_network(struct hn_softc *sc) { if (sc->hn_mgmt_taskq != NULL) taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); } static __inline int hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); if (m_new == NULL) return ENOBUFS; else *m_head = m = m_new; txr->hn_tx_collapsed++; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } return error; } static __inline int hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, ("put an onlist txd %#x", txd->flags)); KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, ("put an onagg txd %#x", txd->flags)); KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; if (!STAILQ_EMPTY(&txd->agg_list)) { struct hn_txdesc *tmp_txd; while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { int freed; KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), ("resursive aggregation on aggregated txdesc")); KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), ("not aggregated txdesc")); KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("aggregated txdesc uses dmamap")); KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("aggregated txdesc consumes " "chimney sending buffer")); KASSERT(tmp_txd->chim_size == 0, ("aggregated txdesc has non-zero " "chimney sending size")); STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; freed = hn_txdesc_put(txr, tmp_txd); KASSERT(freed, ("failed to free aggregated txdesc")); } } if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("chim txd uses dmamap")); hn_chim_free(txr->hn_sc, txd->chim_index); txd->chim_index = HN_NVS_CHIM_IDX_INVALID; txd->chim_size = 0; } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; } txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); #else atomic_add_int(&txr->hn_txdesc_avail, 1); buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif return 1; } static __inline struct hn_txdesc * hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { KASSERT(txr->hn_txdesc_avail > 0, ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail--; SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); #else txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); #endif if (txd != NULL) { #ifdef HN_USE_TXDESC_BUFRING atomic_subtract_int(&txr->hn_txdesc_avail, 1); #endif KASSERT(txd->m == NULL && txd->refs == 0 && STAILQ_EMPTY(&txd->agg_list) && txd->chim_index == HN_NVS_CHIM_IDX_INVALID && txd->chim_size == 0 && (txd->flags & HN_TXD_FLAG_ONLIST) && (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; txd->refs = 1; } return txd; } static __inline void hn_txdesc_hold(struct hn_txdesc *txd) { /* 0->1 transition will never work */ KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); atomic_add_int(&txd->refs, 1); } static __inline void hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) { KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, ("recursive aggregation on aggregating txdesc")); KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, ("already aggregated")); KASSERT(STAILQ_EMPTY(&txd->agg_list), ("recursive aggregation on to-be-aggregated txdesc")); txd->flags |= HN_TXD_FLAG_ONAGG; STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); } static bool hn_tx_ring_pending(struct hn_tx_ring *txr) { bool pending = false; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) pending = true; mtx_unlock_spin(&txr->hn_txlist_spin); #else if (!buf_ring_full(txr->hn_txdesc_br)) pending = true; #endif return (pending); } static __inline void hn_txeof(struct hn_tx_ring *txr) { txr->hn_has_txeof = 0; txr->hn_txeof(txr); } static void hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan, const void *data __unused, int dlen __unused) { struct hn_txdesc *txd = sndc->hn_cbarg; struct hn_tx_ring *txr; txr = txd->txr; KASSERT(txr->hn_chan == chan, ("channel mismatch, on chan%u, should be chan%u", - vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan))); + vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); txr->hn_has_txeof = 1; hn_txdesc_put(txr, txd); ++txr->hn_txdone_cnt; if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { txr->hn_txdone_cnt = 0; if (txr->hn_oactive) hn_txeof(txr); } } static void hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) { #if defined(INET) || defined(INET6) tcp_lro_flush_all(&rxr->hn_lro); #endif /* * NOTE: * 'txr' could be NULL, if multiple channels and * ifnet.if_start method are enabled. */ if (txr == NULL || !txr->hn_has_txeof) return; txr->hn_txdone_cnt = 0; hn_txeof(txr); } static __inline uint32_t hn_rndis_pktmsg_offset(uint32_t ofs) { KASSERT(ofs >= sizeof(struct rndis_packet_msg), ("invalid RNDIS packet msg offset %u", ofs)); return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); } static __inline void * hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, size_t pi_dlen, uint32_t pi_type) { const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); struct rndis_pktinfo *pi; KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); /* * Per-packet-info does not move; it only grows. * * NOTE: * rm_pktinfooffset in this phase counts from the beginning * of rndis_packet_msg. */ KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, ("%u pktinfo overflows RNDIS packet msg", pi_type)); pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + pkt->rm_pktinfolen); pkt->rm_pktinfolen += pi_size; pi->rm_size = pi_size; pi->rm_type = pi_type; pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; /* Data immediately follow per-packet-info. */ pkt->rm_dataoffset += pi_size; /* Update RNDIS packet msg length */ pkt->rm_len += pi_size; return (pi->rm_data); } static __inline int hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) { struct hn_txdesc *txd; struct mbuf *m; int error, pkts; txd = txr->hn_agg_txd; KASSERT(txd != NULL, ("no aggregate txdesc")); /* * Since hn_txpkt() will reset this temporary stat, save * it now, so that oerrors can be updated properly, if * hn_txpkt() ever fails. */ pkts = txr->hn_stat_pkts; /* * Since txd's mbuf will _not_ be freed upon hn_txpkt() * failure, save it for later freeing, if hn_txpkt() ever * fails. */ m = txd->m; error = hn_txpkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m is not. */ m_freem(m); txr->hn_flush_failed++; if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); } /* Reset all aggregation states. */ txr->hn_agg_txd = NULL; txr->hn_agg_szleft = 0; txr->hn_agg_pktleft = 0; txr->hn_agg_prevpkt = NULL; return (error); } static void * hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, int pktsize) { void *chim; if (txr->hn_agg_txd != NULL) { if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { struct hn_txdesc *agg_txd = txr->hn_agg_txd; struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; int olen; /* * Update the previous RNDIS packet's total length, * it can be increased due to the mandatory alignment * padding for this RNDIS packet. And update the * aggregating txdesc's chimney sending buffer size * accordingly. * * XXX * Zero-out the padding, as required by the RNDIS spec. */ olen = pkt->rm_len; pkt->rm_len = roundup2(olen, txr->hn_agg_align); agg_txd->chim_size += pkt->rm_len - olen; /* Link this txdesc to the parent. */ hn_txdesc_agg(agg_txd, txd); chim = (uint8_t *)pkt + pkt->rm_len; /* Save the current packet for later fixup. */ txr->hn_agg_prevpkt = chim; txr->hn_agg_pktleft--; txr->hn_agg_szleft -= pktsize; if (txr->hn_agg_szleft <= HN_PKTSIZE_MIN(txr->hn_agg_align)) { /* * Probably can't aggregate more packets, * flush this aggregating txdesc proactively. */ txr->hn_agg_pktleft = 0; } /* Done! */ return (chim); } hn_flush_txagg(ifp, txr); } KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); txr->hn_tx_chimney_tried++; txd->chim_index = hn_chim_alloc(txr->hn_sc); if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) return (NULL); txr->hn_tx_chimney++; chim = txr->hn_sc->hn_chim + (txd->chim_index * txr->hn_sc->hn_chim_szmax); if (txr->hn_agg_pktmax > 1 && txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { txr->hn_agg_txd = txd; txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; txr->hn_agg_prevpkt = chim; } return (chim); } /* * NOTE: * If this function fails, then both txd and m_head0 will be freed. */ static int hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; struct rndis_packet_msg *pkt; uint32_t *pi_data; void *chim = NULL; int pkt_hlen, pkt_size; pkt = txd->rndis_pkt; pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); if (pkt_size < txr->hn_chim_size) { chim = hn_try_txagg(ifp, txr, txd, pkt_size); if (chim != NULL) pkt = chim; } else { if (txr->hn_agg_txd != NULL) hn_flush_txagg(ifp, txr); } pkt->rm_type = REMOTE_NDIS_PACKET_MSG; pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len; pkt->rm_dataoffset = sizeof(*pkt); pkt->rm_datalen = m_head->m_pkthdr.len; pkt->rm_oobdataoffset = 0; pkt->rm_oobdatalen = 0; pkt->rm_oobdataelements = 0; pkt->rm_pktinfooffset = sizeof(*pkt); pkt->rm_pktinfolen = 0; pkt->rm_vchandle = 0; pkt->rm_reserved = 0; if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { /* * Set the hash value for this packet, so that the host could * dispatch the TX done event for this packet back to this TX * ring's channel. */ pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); *pi_data = txr->hn_tx_idx; } if (m_head->m_flags & M_VLANTAG) { pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); *pi_data = NDIS_VLAN_INFO_MAKE( EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { #if defined(INET6) || defined(INET) pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, m_head->m_pkthdr.tso_segsz); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, m_head->m_pkthdr.tso_segsz); } #endif #endif /* INET6 || INET */ } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); if (m_head->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP)) { *pi_data = NDIS_TXCSUM_INFO_IPV6; } else { *pi_data = NDIS_TXCSUM_INFO_IPV4; if (m_head->m_pkthdr.csum_flags & CSUM_IP) *pi_data |= NDIS_TXCSUM_INFO_IPCS; } if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) *pi_data |= NDIS_TXCSUM_INFO_TCPCS; else if (m_head->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) *pi_data |= NDIS_TXCSUM_INFO_UDPCS; } pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; /* Convert RNDIS packet message offsets */ pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset); pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); /* * Fast path: Chimney sending. */ if (chim != NULL) { struct hn_txdesc *tgt_txd = txd; if (txr->hn_agg_txd != NULL) { tgt_txd = txr->hn_agg_txd; #ifdef INVARIANTS *m_head0 = NULL; #endif } KASSERT(pkt == chim, ("RNDIS pkt not in chimney sending buffer")); KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, ("chimney sending buffer is not used")); tgt_txd->chim_size += pkt->rm_len; m_copydata(m_head, 0, m_head->m_pkthdr.len, ((uint8_t *)chim) + pkt_hlen); txr->hn_gpa_cnt = 0; txr->hn_sendpkt = hn_txpkt_chim; goto done; } KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("chimney buffer is used")); KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (__predict_false(error)) { int freed; /* * This mbuf is not linked w/ the txd yet, so free it now. */ m_freem(m_head); *m_head0 = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); txr->hn_txdma_failed++; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; /* +1 RNDIS packet message */ txr->hn_gpa_cnt = nsegs + 1; /* send packet with page buffer */ txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; txr->hn_gpa[0].gpa_len = pkt_hlen; /* * Fill the page buffers with mbuf info after the page * buffer for RNDIS packet message. */ for (i = 0; i < nsegs; ++i) { struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; gpa->gpa_page = atop(segs[i].ds_addr); gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; gpa->gpa_len = segs[i].ds_len; } txd->chim_index = HN_NVS_CHIM_IDX_INVALID; txd->chim_size = 0; txr->hn_sendpkt = hn_txpkt_sglist; done: txd->m = m_head; /* Set the completion routine */ hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); /* Update temporary stats for later use. */ txr->hn_stat_pkts++; txr->hn_stat_size += m_head->m_pkthdr.len; if (m_head->m_flags & M_MCAST) txr->hn_stat_mcasts++; return 0; } /* * NOTE: * If this function fails, then txd will be freed, but the mbuf * associated w/ the txd will _not_ be freed. */ static int hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) { int error, send_failed = 0; again: /* * Make sure that this txd and any aggregated txds are not freed * before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); error = txr->hn_sendpkt(txr, txd); if (!error) { if (bpf_peers_present(ifp->if_bpf)) { const struct hn_txdesc *tmp_txd; ETHER_BPF_MTAP(ifp, txd->m); STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) ETHER_BPF_MTAP(ifp, tmp_txd->m); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); #ifdef HN_IFSTART_SUPPORT if (!hn_use_if_start) #endif { if_inc_counter(ifp, IFCOUNTER_OBYTES, txr->hn_stat_size); if (txr->hn_stat_mcasts != 0) { if_inc_counter(ifp, IFCOUNTER_OMCASTS, txr->hn_stat_mcasts); } } txr->hn_pkts += txr->hn_stat_pkts; txr->hn_sends++; } hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; /* * This should "really rarely" happen. * * XXX Too many RX to be acked or too many sideband * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ txr->hn_has_txeof = 1; if (!send_failed) { txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_has_txeof; * in case that we missed the last * netvsc_channel_rollup(). */ goto again; } if_printf(ifp, "send failed\n"); /* * Caller will perform further processing on the * associated mbuf, so don't free it in hn_txdesc_put(); * only unload it from the DMA map in hn_txdesc_put(), * if it was loaded. */ txd->m = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); txr->hn_send_failed++; } /* Reset temporary stats, after this sending is done. */ txr->hn_stat_size = 0; txr->hn_stat_pkts = 0; txr->hn_stat_mcasts = 0; return (error); } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. * There should be an equivalent in the kernel mbuf code, * but there does not appear to be one yet. * * Differs from m_append() in that additional mbufs are * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * * Return 1 if able to complete the job; otherwise 0. */ static int hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space; remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) break; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } #if defined(INET) || defined(INET6) static __inline int hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) { #if __FreeBSD_version >= 1100095 if (hn_lro_mbufq_depth) { tcp_lro_queue_mbuf(lc, m); return 0; } #endif return tcp_lro_rx(lc, m, 0); } #endif static int hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, const struct hn_rxinfo *info) { struct ifnet *ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1; int hash_type; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (0); /* * Bail out if packet contains more data than configured MTU. */ if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) { return (0); } else if (dlen <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } memcpy(mtod(m_new, void *), data, dlen); m_new->m_pkthdr.len = m_new->m_len = dlen; rxr->hn_small_pkts++; } else { /* * Get an mbuf with a cluster. For packets 2K or less, * get a standard 2K cluster. For anything larger, get a * 4K cluster. Any buffers larger than 4K can cause problems * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; if (dlen > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } hv_m_append(m_new, dlen, data); } m_new->m_pkthdr.rcvif = ifp; if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { /* IP csum offload */ if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; } /* * XXX * As of this write (Oct 28th, 2016), host side will turn * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so * the do_lro setting here is actually _not_ accurate. We * depend on the RSS hash type check to reset do_lro. */ if ((info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) do_lro = 1; } else { const struct ether_header *eh; uint16_t etype; int hoff; hoff = sizeof(*eh); if (m_new->m_len < hoff) goto skip; eh = mtod(m_new, struct ether_header *); etype = ntohs(eh->ether_type); if (etype == ETHERTYPE_VLAN) { const struct ether_vlan_header *evl; hoff = sizeof(*evl); if (m_new->m_len < hoff) goto skip; evl = mtod(m_new, struct ether_vlan_header *); etype = ntohs(evl->evl_proto); } if (etype == ETHERTYPE_IP) { int pr; pr = hn_check_iplen(m_new, hoff); if (pr == IPPROTO_TCP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_TCP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } do_lro = 1; } else if (pr == IPPROTO_UDP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_UDP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } } else if (pr != IPPROTO_DONE && do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); } } } skip: if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( NDIS_VLAN_INFO_ID(info->vlan_info), NDIS_VLAN_INFO_PRI(info->vlan_info), NDIS_VLAN_INFO_CFI(info->vlan_info)); m_new->m_flags |= M_VLANTAG; } if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { rxr->hn_rss_pkts++; m_new->m_pkthdr.flowid = info->hash_value; hash_type = M_HASHTYPE_OPAQUE_HASH; if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == NDIS_HASH_FUNCTION_TOEPLITZ) { uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); /* * NOTE: * do_lro is resetted, if the hash types are not TCP * related. See the comment in the above csum_flags * setup section. */ switch (type) { case NDIS_HASH_IPV4: hash_type = M_HASHTYPE_RSS_IPV4; do_lro = 0; break; case NDIS_HASH_TCP_IPV4: hash_type = M_HASHTYPE_RSS_TCP_IPV4; break; case NDIS_HASH_IPV6: hash_type = M_HASHTYPE_RSS_IPV6; do_lro = 0; break; case NDIS_HASH_IPV6_EX: hash_type = M_HASHTYPE_RSS_IPV6_EX; do_lro = 0; break; case NDIS_HASH_TCP_IPV6: hash_type = M_HASHTYPE_RSS_TCP_IPV6; break; case NDIS_HASH_TCP_IPV6_EX: hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; break; } } } else { m_new->m_pkthdr.flowid = rxr->hn_rx_idx; hash_type = M_HASHTYPE_OPAQUE; } M_HASHTYPE_SET(m_new, hash_type); /* * Note: Moved RX completion back to hv_nv_on_receive() so all * messages (not just data messages) will trigger a response. */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rxr->hn_pkts++; if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxr->hn_lro; if (lro->lro_cnt) { rxr->hn_lro_tried++; if (hn_lro_rx(lro, m_new) == 0) { /* DONE! */ return 0; } } #endif } /* We're not holding the lock here, so don't release it */ (*ifp->if_input)(ifp, m_new); return (0); } static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct hn_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int mask, error = 0; switch (cmd) { case SIOCSIFMTU: if (ifr->ifr_mtu > HN_MTU_MAX) { error = EINVAL; break; } HN_LOCK(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { HN_UNLOCK(sc); break; } if ((sc->hn_caps & HN_CAP_MTU) == 0) { /* Can't change MTU */ HN_UNLOCK(sc); error = EOPNOTSUPP; break; } if (ifp->if_mtu == ifr->ifr_mtu) { HN_UNLOCK(sc); break; } /* * Suspend this interface before the synthetic parts * are ripped. */ hn_suspend(sc); /* * Detach the synthetics parts, i.e. NVS and RNDIS. */ hn_synth_detach(sc); /* * Reattach the synthetic parts, i.e. NVS and RNDIS, * with the new MTU setting. */ error = hn_synth_attach(sc, ifr->ifr_mtu); if (error) { HN_UNLOCK(sc); break; } /* * Commit the requested MTU, after the synthetic parts * have been successfully attached. */ ifp->if_mtu = ifr->ifr_mtu; /* * Make sure that various parameters based on MTU are * still valid, after the MTU change. */ if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) hn_set_chim_size(sc, sc->hn_chim_szmax); hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); #endif /* * All done! Resume the interface now. */ hn_resume(sc); HN_UNLOCK(sc); break; case SIOCSIFFLAGS: HN_LOCK(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { HN_UNLOCK(sc); break; } if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* * Caller meight hold mutex, e.g. * bpf; use busy-wait for the RNDIS * reply. */ HN_NO_SLEEPING(sc); hn_set_rxfilter(sc); HN_SLEEPING_OK(sc); } else { hn_init_locked(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) hn_stop(sc); } sc->hn_if_flags = ifp->if_flags; HN_UNLOCK(sc); break; case SIOCSIFCAP: HN_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); else ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); else ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); } /* TODO: flip RNDIS offload parameters for RXCSUM. */ if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; #ifdef foo /* We can't diff IPv6 packets from IPv4 packets on RX path. */ if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; #endif if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; else ifp->if_hwassist &= ~CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; else ifp->if_hwassist &= ~CSUM_IP6_TSO; } HN_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: HN_LOCK(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { HN_UNLOCK(sc); break; } if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* * Multicast uses mutex; use busy-wait for * the RNDIS reply. */ HN_NO_SLEEPING(sc); hn_set_rxfilter(sc); HN_SLEEPING_OK(sc); } HN_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void hn_stop(struct hn_softc *sc) { struct ifnet *ifp = sc->hn_ifp; int i; HN_LOCK_ASSERT(sc); KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, ("synthetic parts were not attached")); /* Clear RUNNING bit _before_ hn_suspend_data() */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); hn_suspend_data(sc); /* Clear OACTIVE bit. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; } static void hn_init_locked(struct hn_softc *sc) { struct ifnet *ifp = sc->hn_ifp; int i; HN_LOCK_ASSERT(sc); if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) return; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; /* Configure RX filter */ hn_set_rxfilter(sc); /* Clear OACTIVE bit. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; /* Clear TX 'suspended' bit. */ hn_resume_tx(sc, sc->hn_tx_ring_inuse); /* Everything is ready; unleash! */ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); } static void hn_init(void *xsc) { struct hn_softc *sc = xsc; HN_LOCK(sc); hn_init_locked(sc); HN_UNLOCK(sc); } #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; unsigned int lenlim; int error; lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; HN_LOCK(sc); if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || lenlim > TCP_LRO_LENGTH_MAX) { HN_UNLOCK(sc); return EINVAL; } hn_set_lro_lenlim(sc, lenlim); HN_UNLOCK(sc); return 0; } static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ackcnt, error, i; /* * lro_ackcnt_lim is append count limit, * +1 to turn it into aggregation limit. */ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; error = sysctl_handle_int(oidp, &ackcnt, 0, req); if (error || req->newptr == NULL) return error; if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) return EINVAL; /* * Convert aggregation limit back to append * count limit. */ --ackcnt; HN_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; HN_UNLOCK(sc); return 0; } #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int hcsum = arg2; int on, error, i; on = 0; if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) on = 1; error = sysctl_handle_int(oidp, &on, 0, req); if (error || req->newptr == NULL) return error; HN_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (on) rxr->hn_trust_hcsum |= hcsum; else rxr->hn_trust_hcsum &= ~hcsum; } HN_UNLOCK(sc); return 0; } static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int chim_size, error; chim_size = sc->hn_tx_ring[0].hn_chim_size; error = sysctl_handle_int(oidp, &chim_size, 0, req); if (error || req->newptr == NULL) return error; if (chim_size > sc->hn_chim_szmax || chim_size <= 0) return EINVAL; HN_LOCK(sc); hn_set_chim_size(sc, chim_size); HN_UNLOCK(sc); return 0; } #if __FreeBSD_version < 1100095 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((int *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; *((int *)((uint8_t *)rxr + ofs)) = 0; } return 0; } #else static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((uint64_t *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; } return 0; } #endif static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; u_long stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((u_long *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { rxr = &sc->hn_rx_ring[i]; *((u_long *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_tx_ring *txr; u_long stat; stat = 0; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { txr = &sc->hn_tx_ring[i]; stat += *((u_long *)((uint8_t *)txr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { txr = &sc->hn_tx_ring[i]; *((u_long *)((uint8_t *)txr + ofs)) = 0; } return 0; } static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error, conf; struct hn_tx_ring *txr; txr = &sc->hn_tx_ring[0]; conf = *((int *)((uint8_t *)txr + ofs)); error = sysctl_handle_int(oidp, &conf, 0, req); if (error || req->newptr == NULL) return error; HN_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { txr = &sc->hn_tx_ring[i]; *((int *)((uint8_t *)txr + ofs)) = conf; } HN_UNLOCK(sc); return 0; } static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error, size; size = sc->hn_agg_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || req->newptr == NULL) return (error); HN_LOCK(sc); sc->hn_agg_size = size; hn_set_txagg(sc); HN_UNLOCK(sc); return (0); } static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error, pkts; pkts = sc->hn_agg_pkts; error = sysctl_handle_int(oidp, &pkts, 0, req); if (error || req->newptr == NULL) return (error); HN_LOCK(sc); sc->hn_agg_pkts = pkts; hn_set_txagg(sc); HN_UNLOCK(sc); return (0); } static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int pkts; pkts = sc->hn_tx_ring[0].hn_agg_pktmax; return (sysctl_handle_int(oidp, &pkts, 0, req)); } static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int align; align = sc->hn_tx_ring[0].hn_agg_align; return (sysctl_handle_int(oidp, &align, 0, req)); } static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char verstr[16]; snprintf(verstr, sizeof(verstr), "%u.%u", HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); } static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char caps_str[128]; uint32_t caps; HN_LOCK(sc); caps = sc->hn_caps; HN_UNLOCK(sc); snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); } static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char assist_str[128]; uint32_t hwassist; HN_LOCK(sc); hwassist = sc->hn_ifp->if_hwassist; HN_UNLOCK(sc); snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); } static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char filter_str[128]; uint32_t filter; HN_LOCK(sc); filter = sc->hn_rx_filter; HN_UNLOCK(sc); snprintf(filter_str, sizeof(filter_str), "%b", filter, NDIS_PACKET_TYPES); return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); } static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error; HN_LOCK(sc); error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); if (error || req->newptr == NULL) goto back; error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); if (error) goto back; sc->hn_flags |= HN_FLAG_HAS_RSSKEY; if (sc->hn_rx_ring_inuse > 1) { error = hn_rss_reconfig(sc); } else { /* Not RSS capable, at least for now; just save the RSS key. */ error = 0; } back: HN_UNLOCK(sc); return (error); } static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int error; HN_LOCK(sc); error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); if (error || req->newptr == NULL) goto back; /* * Don't allow RSS indirect table change, if this interface is not * RSS capable currently. */ if (sc->hn_rx_ring_inuse == 1) { error = EOPNOTSUPP; goto back; } error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); if (error) goto back; sc->hn_flags |= HN_FLAG_HAS_RSSIND; - hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse); + hn_rss_ind_fixup(sc); error = hn_rss_reconfig(sc); back: HN_UNLOCK(sc); return (error); } static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char hash_str[128]; uint32_t hash; HN_LOCK(sc); hash = sc->hn_rss_hash; HN_UNLOCK(sc); snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); } static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; int len, iphlen, iplen; const struct tcphdr *th; int thoff; /* TCP data offset */ len = hoff + sizeof(struct ip); /* The packet must be at least the size of an IP header. */ if (m->m_pkthdr.len < len) return IPPROTO_DONE; /* The fixed IP header must reside completely in the first mbuf. */ if (m->m_len < len) return IPPROTO_DONE; ip = mtodo(m, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; if (iphlen < sizeof(struct ip)) /* minimum header length */ return IPPROTO_DONE; /* The full IP header must reside completely in the one mbuf. */ if (m->m_len < hoff + iphlen) return IPPROTO_DONE; iplen = ntohs(ip->ip_len); /* * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ if (m->m_pkthdr.len < hoff + iplen) return IPPROTO_DONE; /* * Ignore IP fragments. */ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) return IPPROTO_DONE; /* * The TCP/IP or UDP/IP header must be entirely contained within * the first fragment of a packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (iplen < iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); thoff = th->th_off << 2; if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + thoff) return IPPROTO_DONE; break; case IPPROTO_UDP: if (iplen < iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; break; default: if (iplen < iphlen) return IPPROTO_DONE; break; } return ip->ip_p; } static int hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev = sc->hn_dev; #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 int lroent_cnt; #endif #endif int i; /* * Create RXBUF for reception. * * NOTE: * - It is shared by all channels. * - A large enough buffer is allocated, certain version of NVSes * may further limit the usable space. */ sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_rxbuf == NULL) { device_printf(sc->hn_dev, "allocate rxbuf failed\n"); return (ENOMEM); } sc->hn_rx_ring_cnt = ring_cnt; sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, M_DEVBUF, M_WAITOK | M_ZERO); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 lroent_cnt = hn_lro_entry_count; if (lroent_cnt < TCP_LRO_ENTRIES) lroent_cnt = TCP_LRO_ENTRIES; if (bootverbose) device_printf(dev, "LRO: entry count %d\n", lroent_cnt); #endif #endif /* INET || INET6 */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Create dev.hn.UNIT.rx sysctl tree */ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, &rxr->hn_br_dma, BUS_DMA_WAITOK); if (rxr->hn_br == NULL) { device_printf(dev, "allocate bufring failed\n"); return (ENOMEM); } if (hn_trust_hosttcp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; if (hn_trust_hostudp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; if (hn_trust_hostip) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; rxr->hn_ifp = sc->hn_ifp; if (i < sc->hn_tx_ring_cnt) rxr->hn_txr = &sc->hn_tx_ring[i]; rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); rxr->hn_rx_idx = i; rxr->hn_rxbuf = sc->hn_rxbuf; /* * Initialize LRO. */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, hn_lro_mbufq_depth); #else tcp_lro_init(&rxr->hn_lro); rxr->hn_lro.ifp = sc->hn_ifp; #endif #if __FreeBSD_version >= 1100099 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif #endif /* INET || INET6 */ if (sc->hn_rx_sysctl_tree != NULL) { char name[16]; /* * Create per RX ring sysctl tree: * dev.hn.UNIT.rx.RINGID */ snprintf(name, sizeof(name), "%d", i); rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (rxr->hn_rx_sysctl_tree != NULL) { SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "packets", CTLFLAG_RW, &rxr->hn_pkts, "# of packets received"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "rss_pkts", CTLFLAG_RW, &rxr->hn_rss_pkts, "# of packets w/ RSS info received"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "pktbuf_len", CTLFLAG_RD, &rxr->hn_pktbuf_len, 0, "Temporary channel packet buffer length"); } } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_queued), #if __FreeBSD_version < 1100095 hn_rx_stat_int_sysctl, #else hn_rx_stat_u64_sysctl, #endif "LU", "LRO queued"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), #if __FreeBSD_version < 1100095 hn_rx_stat_int_sysctl, #else hn_rx_stat_u64_sysctl, #endif "LU", "LRO flushed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro_tried), hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); #if __FreeBSD_version >= 1100099 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_lenlim_sysctl, "IU", "Max # of data bytes to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_ackcnt_sysctl, "I", "Max # of ACKs to be aggregated by LRO"); #endif SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", "Trust tcp segement verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, hn_trust_hcsum_sysctl, "I", "Trust udp datagram verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, hn_trust_hcsum_sysctl, "I", "Trust ip packet verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_ip), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_tcp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_udp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_trusted), hn_rx_stat_ulong_sysctl, "LU", "# of packets that we trust host's csum verification"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_small_pkts), hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_ack_failed), hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); return (0); } static void hn_destroy_rx_data(struct hn_softc *sc) { int i; if (sc->hn_rxbuf != NULL) { - hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); + if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) + hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); + else + device_printf(sc->hn_dev, "RXBUF is referenced\n"); sc->hn_rxbuf = NULL; } if (sc->hn_rx_ring_cnt == 0) return; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (rxr->hn_br == NULL) continue; - hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); + if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { + hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); + } else { + device_printf(sc->hn_dev, + "%dth channel bufring is referenced", i); + } rxr->hn_br = NULL; #if defined(INET) || defined(INET6) tcp_lro_free(&rxr->hn_lro); #endif free(rxr->hn_pktbuf, M_DEVBUF); } free(sc->hn_rx_ring, M_DEVBUF); sc->hn_rx_ring = NULL; sc->hn_rx_ring_cnt = 0; sc->hn_rx_ring_inuse = 0; } static int hn_tx_ring_create(struct hn_softc *sc, int id) { struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; device_t dev = sc->hn_dev; bus_dma_tag_t parent_dtag; int error, i; txr->hn_sc = sc; txr->hn_tx_idx = id; #ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); #endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_DEVBUF, M_WAITOK | M_ZERO); #ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); #else txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, M_WAITOK, &txr->hn_tx_lock); #endif txr->hn_tx_taskq = sc->hn_tx_taskq; #ifdef HN_IFSTART_SUPPORT if (hn_use_if_start) { txr->hn_txeof = hn_start_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); } else #endif { int br_depth; txr->hn_txeof = hn_xmit_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); br_depth = hn_get_txswq_depth(txr); txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, M_WAITOK, &txr->hn_tx_lock); } txr->hn_direct_tx_size = hn_direct_tx_size; /* * Always schedule transmission instead of trying to do direct * transmission. This one gives the best performance so far. */ txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(dev); /* DMA tag for RNDIS packet messages. */ error = bus_dma_tag_create(parent_dtag, /* parent */ HN_RNDIS_PKT_ALIGN, /* alignment */ HN_RNDIS_PKT_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_RNDIS_PKT_LEN, /* maxsize */ 1, /* nsegments */ HN_RNDIS_PKT_LEN, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_rndis_dtag); if (error) { device_printf(dev, "failed to create rndis dmatag\n"); return error; } /* DMA tag for data. */ error = bus_dma_tag_create(parent_dtag, /* parent */ 1, /* alignment */ HN_TX_DATA_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_TX_DATA_MAXSIZE, /* maxsize */ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ HN_TX_DATA_SEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_data_dtag); if (error) { device_printf(dev, "failed to create data dmatag\n"); return error; } for (i = 0; i < txr->hn_txdesc_cnt; ++i) { struct hn_txdesc *txd = &txr->hn_txdesc[i]; txd->txr = txr; txd->chim_index = HN_NVS_CHIM_IDX_INVALID; STAILQ_INIT(&txd->agg_list); /* * Allocate and load RNDIS packet message. */ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_pkt, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &txd->rndis_pkt_dmap); if (error) { device_printf(dev, "failed to allocate rndis_packet_msg, %d\n", i); return error; } error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap, txd->rndis_pkt, HN_RNDIS_PKT_LEN, hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to load rndis_packet_msg, %d\n", i); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); return error; } /* DMA map for TX data. */ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(dev, "failed to allocate tx data dmamap\n"); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); #else buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; if (sc->hn_tx_sysctl_tree != NULL) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; char name[16]; /* * Create per TX ring sysctl tree: * dev.hn.UNIT.tx.RINGID */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); snprintf(name, sizeof(name), "%d", id); txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (txr->hn_tx_sysctl_tree != NULL) { child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", CTLFLAG_RD, &txr->hn_txdesc_avail, 0, "# of available TX descs"); #ifdef HN_IFSTART_SUPPORT if (!hn_use_if_start) #endif { SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", CTLFLAG_RD, &txr->hn_oactive, 0, "over active"); } SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", CTLFLAG_RW, &txr->hn_pkts, "# of packets transmitted"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", CTLFLAG_RW, &txr->hn_sends, "# of sends"); } } return 0; } static void hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) { struct hn_tx_ring *txr = txd->txr; KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } static void hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT(txd->refs == 0 || txd->refs == 1, ("invalid txd refs %d", txd->refs)); /* Aggregated txds will be freed by their aggregating txd. */ if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { int freed; freed = hn_txdesc_put(txr, txd); KASSERT(freed, ("can't free txdesc")); } } static void hn_tx_ring_destroy(struct hn_tx_ring *txr) { int i; if (txr->hn_txdesc == NULL) return; /* * NOTE: * Because the freeing of aggregated txds will be deferred * to the aggregating txd, two passes are used here: * - The first pass GCes any pending txds. This GC is necessary, * since if the channels are revoked, hypervisor will not * deliver send-done for all pending txds. * - The second pass frees the busdma stuffs, i.e. after all txds * were freed. */ for (i = 0; i < txr->hn_txdesc_cnt; ++i) hn_txdesc_gc(txr, &txr->hn_txdesc[i]); for (i = 0; i < txr->hn_txdesc_cnt; ++i) hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); #ifdef HN_USE_TXDESC_BUFRING buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); #endif free(txr->hn_txdesc, M_DEVBUF); txr->hn_txdesc = NULL; if (txr->hn_mbuf_br != NULL) buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); #ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); #endif mtx_destroy(&txr->hn_tx_lock); } static int hn_create_tx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int i; /* * Create TXBUF for chimney sending. * * NOTE: It is shared by all channels. */ sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_chim == NULL) { device_printf(sc->hn_dev, "allocate txbuf failed\n"); return (ENOMEM); } sc->hn_tx_ring_cnt = ring_cnt; sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, M_DEVBUF, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); /* Create dev.hn.UNIT.tx sysctl tree */ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { int error; error = hn_tx_ring_create(sc, i); if (error) return error; } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_no_txdescs), hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_send_failed), hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_txdma_failed), hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_flush_failed), hn_tx_stat_ulong_sysctl, "LU", "# of packet transmission aggregation flush failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_collapsed), hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, "# of total TX descs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", CTLFLAG_RD, &sc->hn_chim_szmax, 0, "Chimney send packet size upper boundary"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_chim_size_sysctl, "I", "Chimney send packet size limit"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_direct_tx_size), hn_tx_conf_int_sysctl, "I", "Size of the packet for direct transmission"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_sched_tx), hn_tx_conf_int_sysctl, "I", "Always schedule transmission " "instead of doing direct transmission"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, "Applied packet transmission aggregation size"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_txagg_pktmax_sysctl, "I", "Applied packet transmission aggregation packets"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_txagg_align_sysctl, "I", "Applied packet transmission aggregation alignment"); return 0; } static void hn_set_chim_size(struct hn_softc *sc, int chim_size) { int i; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) sc->hn_tx_ring[i].hn_chim_size = chim_size; } static void hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) { struct ifnet *ifp = sc->hn_ifp; int tso_minlen; if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) return; KASSERT(sc->hn_ndis_tso_sgmin >= 2, ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); tso_minlen = sc->hn_ndis_tso_sgmin * mtu; KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && sc->hn_ndis_tso_szmax <= IP_MAXPACKET, ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); if (tso_maxlen < tso_minlen) tso_maxlen = tso_minlen; else if (tso_maxlen > IP_MAXPACKET) tso_maxlen = IP_MAXPACKET; if (tso_maxlen > sc->hn_ndis_tso_szmax) tso_maxlen = sc->hn_ndis_tso_szmax; ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); if (bootverbose) if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); } static void hn_fixup_tx_data(struct hn_softc *sc) { uint64_t csum_assist; int i; hn_set_chim_size(sc, sc->hn_chim_szmax); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_chim_szmax) hn_set_chim_size(sc, hn_tx_chimney_size); csum_assist = 0; if (sc->hn_caps & HN_CAP_IPCS) csum_assist |= CSUM_IP; if (sc->hn_caps & HN_CAP_TCP4CS) csum_assist |= CSUM_IP_TCP; if (sc->hn_caps & HN_CAP_UDP4CS) csum_assist |= CSUM_IP_UDP; if (sc->hn_caps & HN_CAP_TCP6CS) csum_assist |= CSUM_IP6_TCP; if (sc->hn_caps & HN_CAP_UDP6CS) csum_assist |= CSUM_IP6_UDP; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) sc->hn_tx_ring[i].hn_csum_assist = csum_assist; if (sc->hn_caps & HN_CAP_HASHVAL) { /* * Support HASHVAL pktinfo on TX path. */ if (bootverbose) if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; } } static void hn_destroy_tx_data(struct hn_softc *sc) { int i; if (sc->hn_chim != NULL) { - hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); + if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { + hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); + } else { + device_printf(sc->hn_dev, + "chimney sending buffer is referenced"); + } sc->hn_chim = NULL; } if (sc->hn_tx_ring_cnt == 0) return; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) hn_tx_ring_destroy(&sc->hn_tx_ring[i]); free(sc->hn_tx_ring, M_DEVBUF); sc->hn_tx_ring = NULL; sc->hn_tx_ring_cnt = 0; sc->hn_tx_ring_inuse = 0; } #ifdef HN_IFSTART_SUPPORT static void hn_start_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static int hn_start_locked(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; int sched = 0; KASSERT(hn_use_if_start, ("hn_start_locked is called, when if_start is disabled")); KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); mtx_assert(&txr->hn_tx_lock, MA_OWNED); KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); if (__predict_false(txr->hn_suspended)) return (0); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (0); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { struct hn_txdesc *txd; struct mbuf *m_head; int error; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); sched = 1; break; } #if defined(INET6) || defined(INET) if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { m_head = hn_tso_fixup(m_head); if (__predict_false(m_head == NULL)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); continue; } } #endif txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } error = hn_encap(ifp, txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ KASSERT(txr->hn_agg_txd == NULL, ("encap failed w/ pending aggregating txdesc")); continue; } if (txr->hn_agg_pktleft == 0) { if (txr->hn_agg_txd != NULL) { KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); error = hn_flush_txagg(ifp, txr); if (__predict_false(error)) { atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } else { KASSERT(m_head != NULL, ("mbuf was freed")); error = hn_txpkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } } #ifdef INVARIANTS else { KASSERT(txr->hn_agg_txd != NULL, ("no aggregating txdesc")); KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); } #endif } /* Flush pending aggerated transmission. */ if (txr->hn_agg_txd != NULL) hn_flush_txagg(ifp, txr); return (sched); } static void hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } static void hn_start_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_start_txeof(struct hn_tx_ring *txr) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } #endif /* HN_IFSTART_SUPPORT */ static int hn_xmit(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; struct mbuf *m_head; int sched = 0; mtx_assert(&txr->hn_tx_lock, MA_OWNED); #ifdef HN_IFSTART_SUPPORT KASSERT(hn_use_if_start == 0, ("hn_xmit is called, when if_start is enabled")); #endif KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); if (__predict_false(txr->hn_suspended)) return (0); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) return (0); while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { struct hn_txdesc *txd; int error; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); sched = 1; break; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } error = hn_encap(ifp, txr, txd, &m_head); if (error) { /* Both txd and m_head are freed; discard */ KASSERT(txr->hn_agg_txd == NULL, ("encap failed w/ pending aggregating txdesc")); drbr_advance(ifp, txr->hn_mbuf_br); continue; } if (txr->hn_agg_pktleft == 0) { if (txr->hn_agg_txd != NULL) { KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); error = hn_flush_txagg(ifp, txr); if (__predict_false(error)) { txr->hn_oactive = 1; break; } } else { KASSERT(m_head != NULL, ("mbuf was freed")); error = hn_txpkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } } } #ifdef INVARIANTS else { KASSERT(txr->hn_agg_txd != NULL, ("no aggregating txdesc")); KASSERT(m_head == NULL, ("pending mbuf for aggregating txdesc")); } #endif /* Sent */ drbr_advance(ifp, txr->hn_mbuf_br); } /* Flush pending aggerated transmission. */ if (txr->hn_agg_txd != NULL) hn_flush_txagg(ifp, txr); return (sched); } static int hn_transmit(struct ifnet *ifp, struct mbuf *m) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr; int error, idx = 0; #if defined(INET6) || defined(INET) /* * Perform TSO packet header fixup now, since the TSO * packet header should be cache-hot. */ if (m->m_pkthdr.csum_flags & CSUM_TSO) { m = hn_tso_fixup(m); if (__predict_false(m == NULL)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return EIO; } } #endif /* * Select the TX ring based on flowid */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; txr = &sc->hn_tx_ring[idx]; error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); if (error) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return error; } if (txr->hn_oactive) return 0; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return 0; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); return 0; } static void hn_tx_ring_qflush(struct hn_tx_ring *txr) { struct mbuf *m; mtx_lock(&txr->hn_tx_lock); while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) m_freem(m); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) hn_tx_ring_qflush(&sc->hn_tx_ring[i]); if_qflush(ifp); } static void hn_xmit_txeof(struct hn_tx_ring *txr) { if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; txr->hn_oactive = 0; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the oactive earlier, with the hope, that * others could catch up. The task will clear the * oactive again with the hn_tx_lock to avoid possible * races. */ txr->hn_oactive = 0; taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_xmit_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); txr->hn_oactive = 0; hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static int hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) { struct vmbus_chan_br cbr; struct hn_rx_ring *rxr; struct hn_tx_ring *txr = NULL; int idx, error; idx = vmbus_chan_subidx(chan); /* * Link this channel to RX/TX ring. */ KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("RX ring %d already attached", idx)); rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; if (bootverbose) { if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", idx, vmbus_chan_id(chan)); } if (idx < sc->hn_tx_ring_inuse) { txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("TX ring %d already attached", idx)); txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; txr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", idx, vmbus_chan_id(chan)); } } /* Bind this channel to a proper CPU. */ vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); /* * Open this channel */ cbr.cbr = rxr->hn_br; cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; cbr.cbr_txsz = HN_TXBR_SIZE; cbr.cbr_rxsz = HN_RXBR_SIZE; error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); if (error) { - if_printf(sc->hn_ifp, "open chan%u failed: %d\n", - vmbus_chan_id(chan), error); - rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; - if (txr != NULL) - txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; + if (error == EISCONN) { + if_printf(sc->hn_ifp, "bufring is connected after " + "chan%u open failure\n", vmbus_chan_id(chan)); + rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; + } else { + if_printf(sc->hn_ifp, "open chan%u failed: %d\n", + vmbus_chan_id(chan), error); + } } return (error); } static void hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) { struct hn_rx_ring *rxr; - int idx; + int idx, error; idx = vmbus_chan_subidx(chan); /* * Link this channel to RX/TX ring. */ KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), ("RX ring %d is not attached", idx)); rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; if (idx < sc->hn_tx_ring_inuse) { struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), ("TX ring %d is not attached attached", idx)); txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; } /* * Close this channel. * * NOTE: * Channel closing does _not_ destroy the target channel. */ - vmbus_chan_close(chan); + error = vmbus_chan_close_direct(chan); + if (error == EISCONN) { + if_printf(sc->hn_ifp, "chan%u bufring is connected " + "after being closed\n", vmbus_chan_id(chan)); + rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; + } else if (error) { + if_printf(sc->hn_ifp, "chan%u close failed: %d\n", + vmbus_chan_id(chan), error); + } } static int hn_attach_subchans(struct hn_softc *sc) { struct vmbus_channel **subchans; int subchan_cnt = sc->hn_rx_ring_inuse - 1; int i, error = 0; - if (subchan_cnt == 0) - return (0); + KASSERT(subchan_cnt > 0, ("no sub-channels")); /* Attach the sub-channels. */ subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) { - error = hn_chan_attach(sc, subchans[i]); - if (error) - break; + int error1; + + error1 = hn_chan_attach(sc, subchans[i]); + if (error1) { + error = error1; + /* Move on; all channels will be detached later. */ + } } vmbus_subchan_rel(subchans, subchan_cnt); if (error) { if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); } else { if (bootverbose) { if_printf(sc->hn_ifp, "%d sub-channels attached\n", subchan_cnt); } } return (error); } static void hn_detach_allchans(struct hn_softc *sc) { struct vmbus_channel **subchans; int subchan_cnt = sc->hn_rx_ring_inuse - 1; int i; if (subchan_cnt == 0) goto back; /* Detach the sub-channels. */ subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) hn_chan_detach(sc, subchans[i]); vmbus_subchan_rel(subchans, subchan_cnt); back: /* * Detach the primary channel, _after_ all sub-channels * are detached. */ hn_chan_detach(sc, sc->hn_prichan); /* Wait for sub-channels to be destroyed, if any. */ vmbus_subchan_drain(sc->hn_prichan); #ifdef INVARIANTS for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { KASSERT((sc->hn_rx_ring[i].hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("%dth RX ring is still attached", i)); } for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { KASSERT((sc->hn_tx_ring[i].hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("%dth TX ring is still attached", i)); } #endif } static int hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) { struct vmbus_channel **subchans; int nchan, rxr_cnt, error; nchan = *nsubch + 1; if (nchan == 1) { /* * Multiple RX/TX rings are not requested. */ *nsubch = 0; return (0); } /* * Query RSS capabilities, e.g. # of RX rings, and # of indirect * table entries. */ error = hn_rndis_query_rsscaps(sc, &rxr_cnt); if (error) { /* No RSS; this is benign. */ *nsubch = 0; return (0); } if (bootverbose) { if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", rxr_cnt, nchan); } if (nchan > rxr_cnt) nchan = rxr_cnt; if (nchan == 1) { if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); *nsubch = 0; return (0); } /* * Allocate sub-channels from NVS. */ *nsubch = nchan - 1; error = hn_nvs_alloc_subchans(sc, nsubch); if (error || *nsubch == 0) { /* Failed to allocate sub-channels. */ *nsubch = 0; return (0); } /* * Wait for all sub-channels to become ready before moving on. */ subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); vmbus_subchan_rel(subchans, *nsubch); return (0); } +static bool +hn_synth_attachable(const struct hn_softc *sc) +{ + int i; + + if (sc->hn_flags & HN_FLAG_ERRORS) + return (false); + + for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { + const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; + + if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) + return (false); + } + return (true); +} + static int hn_synth_attach(struct hn_softc *sc, int mtu) { +#define ATTACHED_NVS 0x0002 +#define ATTACHED_RNDIS 0x0004 + struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; int error, nsubch, nchan, i; - uint32_t old_caps; + uint32_t old_caps, attached = 0; KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, ("synthetic parts were attached")); + if (!hn_synth_attachable(sc)) + return (ENXIO); + /* Save capabilities for later verification. */ old_caps = sc->hn_caps; sc->hn_caps = 0; /* Clear RSS stuffs. */ sc->hn_rss_ind_size = 0; sc->hn_rss_hash = 0; /* * Attach the primary channel _before_ attaching NVS and RNDIS. */ error = hn_chan_attach(sc, sc->hn_prichan); if (error) - return (error); + goto failed; /* * Attach NVS. */ error = hn_nvs_attach(sc, mtu); if (error) - return (error); + goto failed; + attached |= ATTACHED_NVS; /* * Attach RNDIS _after_ NVS is attached. */ error = hn_rndis_attach(sc, mtu); if (error) - return (error); + goto failed; + attached |= ATTACHED_RNDIS; /* * Make sure capabilities are not changed. */ if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", old_caps, sc->hn_caps); - /* Restore old capabilities and abort. */ - sc->hn_caps = old_caps; - return ENXIO; + error = ENXIO; + goto failed; } /* * Allocate sub-channels for multi-TX/RX rings. * * NOTE: * The # of RX rings that can be used is equivalent to the # of * channels to be requested. */ nsubch = sc->hn_rx_ring_cnt - 1; error = hn_synth_alloc_subchans(sc, &nsubch); if (error) - return (error); + goto failed; + /* NOTE: _Full_ synthetic parts detach is required now. */ + sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; + /* + * Set the # of TX/RX rings that could be used according to + * the # of channels that NVS offered. + */ nchan = nsubch + 1; + hn_set_ring_inuse(sc, nchan); if (nchan == 1) { /* Only the primary channel can be used; done */ goto back; } /* - * Configure RSS key and indirect table _after_ all sub-channels - * are allocated. + * Attach the sub-channels. + * + * NOTE: hn_set_ring_inuse() _must_ have been called. */ + error = hn_attach_subchans(sc); + if (error) + goto failed; + /* + * Configure RSS key and indirect table _after_ all sub-channels + * are attached. + */ if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { /* * RSS key is not set yet; set it to the default RSS key. */ if (bootverbose) if_printf(sc->hn_ifp, "setup default RSS key\n"); memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); sc->hn_flags |= HN_FLAG_HAS_RSSKEY; } if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { /* * RSS indirect table is not set yet; set it up in round- * robin fashion. */ if (bootverbose) { if_printf(sc->hn_ifp, "setup default RSS indirect " "table\n"); } for (i = 0; i < NDIS_HASH_INDCNT; ++i) rss->rss_ind[i] = i % nchan; sc->hn_flags |= HN_FLAG_HAS_RSSIND; } else { /* * # of usable channels may be changed, so we have to * make sure that all entries in RSS indirect table * are valid. + * + * NOTE: hn_set_ring_inuse() _must_ have been called. */ - hn_rss_ind_fixup(sc, nchan); + hn_rss_ind_fixup(sc); } error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); - if (error) { - /* - * Failed to configure RSS key or indirect table; only - * the primary channel can be used. - */ - nchan = 1; - } + if (error) + goto failed; back: /* - * Set the # of TX/RX rings that could be used according to - * the # of channels that NVS offered. - */ - hn_set_ring_inuse(sc, nchan); - - /* - * Attach the sub-channels, if any. - */ - error = hn_attach_subchans(sc); - if (error) - return (error); - - /* * Fixup transmission aggregation setup. */ hn_set_txagg(sc); - - sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; return (0); + +failed: + if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { + hn_synth_detach(sc); + } else { + if (attached & ATTACHED_RNDIS) + hn_rndis_detach(sc); + if (attached & ATTACHED_NVS) + hn_nvs_detach(sc); + hn_chan_detach(sc, sc->hn_prichan); + /* Restore old capabilities. */ + sc->hn_caps = old_caps; + } + return (error); + +#undef ATTACHED_RNDIS +#undef ATTACHED_NVS } /* * NOTE: * The interface must have been suspended though hn_suspend(), before * this function get called. */ static void hn_synth_detach(struct hn_softc *sc) { - HN_LOCK_ASSERT(sc); KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, ("synthetic parts were not attached")); /* Detach the RNDIS first. */ hn_rndis_detach(sc); /* Detach NVS. */ hn_nvs_detach(sc); /* Detach all of the channels. */ hn_detach_allchans(sc); sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; } static void hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) { KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, ("invalid ring count %d", ring_cnt)); if (sc->hn_tx_ring_cnt > ring_cnt) sc->hn_tx_ring_inuse = ring_cnt; else sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_rx_ring_inuse = ring_cnt; if (bootverbose) { if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); } } static void hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) { /* * NOTE: * The TX bufring will not be drained by the hypervisor, * if the primary channel is revoked. */ while (!vmbus_chan_rx_empty(chan) || (!vmbus_chan_is_revoked(sc->hn_prichan) && !vmbus_chan_tx_empty(chan))) pause("waitch", 1); vmbus_chan_intr_drain(chan); } static void hn_suspend_data(struct hn_softc *sc) { struct vmbus_channel **subch = NULL; struct hn_tx_ring *txr; int i, nsubch; HN_LOCK_ASSERT(sc); /* * Suspend TX. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; mtx_lock(&txr->hn_tx_lock); txr->hn_suspended = 1; mtx_unlock(&txr->hn_tx_lock); /* No one is able send more packets now. */ /* * Wait for all pending sends to finish. * * NOTE: * We will _not_ receive all pending send-done, if the * primary channel is revoked. */ while (hn_tx_ring_pending(txr) && !vmbus_chan_is_revoked(sc->hn_prichan)) pause("hnwtx", 1 /* 1 tick */); } /* * Disable RX by clearing RX filter. */ sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* * Give RNDIS enough time to flush all pending data packets. */ pause("waitrx", (200 * hz) / 1000); /* * Drain RX/TX bufrings and interrupts. */ nsubch = sc->hn_rx_ring_inuse - 1; if (nsubch > 0) subch = vmbus_subchan_get(sc->hn_prichan, nsubch); if (subch != NULL) { for (i = 0; i < nsubch; ++i) hn_chan_drain(sc, subch[i]); } hn_chan_drain(sc, sc->hn_prichan); if (subch != NULL) vmbus_subchan_rel(subch, nsubch); /* * Drain any pending TX tasks. * * NOTE: * The above hn_chan_drain() can dispatch TX tasks, so the TX * tasks will have to be drained _after_ the above hn_chan_drain() * calls. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) { ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; } static void hn_suspend_mgmt(struct hn_softc *sc) { struct task task; HN_LOCK_ASSERT(sc); /* * Make sure that hn_mgmt_taskq0 can nolonger be accessed * through hn_mgmt_taskq. */ TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); vmbus_chan_run_task(sc->hn_prichan, &task); /* * Make sure that all pending management tasks are completed. */ taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); taskqueue_drain_all(sc->hn_mgmt_taskq0); } static void hn_suspend(struct hn_softc *sc) { if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) hn_suspend_data(sc); hn_suspend_mgmt(sc); } static void hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) { int i; KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, ("invalid TX ring count %d", tx_ring_cnt)); for (i = 0; i < tx_ring_cnt; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; mtx_lock(&txr->hn_tx_lock); txr->hn_suspended = 0; mtx_unlock(&txr->hn_tx_lock); } } static void hn_resume_data(struct hn_softc *sc) { int i; HN_LOCK_ASSERT(sc); /* * Re-enable RX. */ hn_set_rxfilter(sc); /* * Make sure to clear suspend status on "all" TX rings, * since hn_tx_ring_inuse can be changed after * hn_suspend_data(). */ hn_resume_tx(sc, sc->hn_tx_ring_cnt); #ifdef HN_IFSTART_SUPPORT if (!hn_use_if_start) #endif { /* * Flush unused drbrs, since hn_tx_ring_inuse may be * reduced. */ for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) hn_tx_ring_qflush(&sc->hn_tx_ring[i]); } /* * Kick start TX. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; /* * Use txeof task, so that any pending oactive can be * cleared properly. */ taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_resume_mgmt(struct hn_softc *sc) { sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; /* * Kick off network change detection, if it was pending. * If no network change was pending, start link status * checks, which is more lightweight than network change * detection. */ if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) hn_change_network(sc); else hn_update_link_status(sc); } static void hn_resume(struct hn_softc *sc) { if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) hn_resume_data(sc); hn_resume_mgmt(sc); } static void hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) { const struct rndis_status_msg *msg; int ofs; if (dlen < sizeof(*msg)) { if_printf(sc->hn_ifp, "invalid RNDIS status\n"); return; } msg = data; switch (msg->rm_status) { case RNDIS_STATUS_MEDIA_CONNECT: case RNDIS_STATUS_MEDIA_DISCONNECT: hn_update_link_status(sc); break; case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: /* Not really useful; ignore. */ break; case RNDIS_STATUS_NETWORK_CHANGE: ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); if (dlen < ofs + msg->rm_stbuflen || msg->rm_stbuflen < sizeof(uint32_t)) { if_printf(sc->hn_ifp, "network changed\n"); } else { uint32_t change; memcpy(&change, ((const uint8_t *)msg) + ofs, sizeof(change)); if_printf(sc->hn_ifp, "network changed, change %u\n", change); } hn_change_network(sc); break; default: if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", msg->rm_status); break; } } static int hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) { const struct rndis_pktinfo *pi = info_data; uint32_t mask = 0; while (info_dlen != 0) { const void *data; uint32_t dlen; if (__predict_false(info_dlen < sizeof(*pi))) return (EINVAL); if (__predict_false(info_dlen < pi->rm_size)) return (EINVAL); info_dlen -= pi->rm_size; if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) return (EINVAL); if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) return (EINVAL); dlen = pi->rm_size - pi->rm_pktinfooffset; data = pi->rm_data; switch (pi->rm_type) { case NDIS_PKTINFO_TYPE_VLAN: if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) return (EINVAL); info->vlan_info = *((const uint32_t *)data); mask |= HN_RXINFO_VLAN; break; case NDIS_PKTINFO_TYPE_CSUM: if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) return (EINVAL); info->csum_info = *((const uint32_t *)data); mask |= HN_RXINFO_CSUM; break; case HN_NDIS_PKTINFO_TYPE_HASHVAL: if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) return (EINVAL); info->hash_value = *((const uint32_t *)data); mask |= HN_RXINFO_HASHVAL; break; case HN_NDIS_PKTINFO_TYPE_HASHINF: if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) return (EINVAL); info->hash_info = *((const uint32_t *)data); mask |= HN_RXINFO_HASHINF; break; default: goto next; } if (mask == HN_RXINFO_ALL) { /* All found; done */ break; } next: pi = (const struct rndis_pktinfo *) ((const uint8_t *)pi + pi->rm_size); } /* * Final fixup. * - If there is no hash value, invalidate the hash info. */ if ((mask & HN_RXINFO_HASHVAL) == 0) info->hash_info = HN_NDIS_HASH_INFO_INVALID; return (0); } static __inline bool hn_rndis_check_overlap(int off, int len, int check_off, int check_len) { if (off < check_off) { if (__predict_true(off + len <= check_off)) return (false); } else if (off > check_off) { if (__predict_true(check_off + check_len <= off)) return (false); } return (true); } static void hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) { const struct rndis_packet_msg *pkt; struct hn_rxinfo info; int data_off, pktinfo_off, data_len, pktinfo_len; /* * Check length. */ if (__predict_false(dlen < sizeof(*pkt))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); return; } pkt = data; if (__predict_false(dlen < pkt->rm_len)) { if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " "dlen %d, msglen %u\n", dlen, pkt->rm_len); return; } if (__predict_false(pkt->rm_len < pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " "msglen %u, data %u, oob %u, pktinfo %u\n", pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, pkt->rm_pktinfolen); return; } if (__predict_false(pkt->rm_datalen == 0)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); return; } /* * Check offests. */ #define IS_OFFSET_INVALID(ofs) \ ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) /* XXX Hyper-V does not meet data offset alignment requirement */ if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "data offset %u\n", pkt->rm_dataoffset); return; } if (__predict_false(pkt->rm_oobdataoffset > 0 && IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob offset %u\n", pkt->rm_oobdataoffset); return; } if (__predict_true(pkt->rm_pktinfooffset > 0) && __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo offset %u\n", pkt->rm_pktinfooffset); return; } #undef IS_OFFSET_INVALID data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); data_len = pkt->rm_datalen; pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); pktinfo_len = pkt->rm_pktinfolen; /* * Check OOB coverage. */ if (__predict_false(pkt->rm_oobdatalen != 0)) { int oob_off, oob_len; if_printf(rxr->hn_ifp, "got oobdata\n"); oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); oob_len = pkt->rm_oobdatalen; if (__predict_false(oob_off + oob_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overflow, msglen %u, oob abs %d len %d\n", pkt->rm_len, oob_off, oob_len); return; } /* * Check against data. */ if (hn_rndis_check_overlap(oob_off, oob_len, data_off, data_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overlaps data, oob abs %d len %d, " "data abs %d len %d\n", oob_off, oob_len, data_off, data_len); return; } /* * Check against pktinfo. */ if (pktinfo_len != 0 && hn_rndis_check_overlap(oob_off, oob_len, pktinfo_off, pktinfo_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overlaps pktinfo, oob abs %d len %d, " "pktinfo abs %d len %d\n", oob_off, oob_len, pktinfo_off, pktinfo_len); return; } } /* * Check per-packet-info coverage and find useful per-packet-info. */ info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; info.hash_info = HN_NDIS_HASH_INFO_INVALID; if (__predict_true(pktinfo_len != 0)) { bool overlap; int error; if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo overflow, msglen %u, " "pktinfo abs %d len %d\n", pkt->rm_len, pktinfo_off, pktinfo_len); return; } /* * Check packet info coverage. */ overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, data_off, data_len); if (__predict_false(overlap)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo overlap data, pktinfo abs %d len %d, " "data abs %d len %d\n", pktinfo_off, pktinfo_len, data_off, data_len); return; } /* * Find useful per-packet-info. */ error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, pktinfo_len, &info); if (__predict_false(error)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " "pktinfo\n"); return; } } if (__predict_false(data_off + data_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "data overflow, msglen %u, data abs %d len %d\n", pkt->rm_len, data_off, data_len); return; } hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); } static __inline void hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) { const struct rndis_msghdr *hdr; if (__predict_false(dlen < sizeof(*hdr))) { if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); return; } hdr = data; if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { /* Hot data path. */ hn_rndis_rx_data(rxr, data, dlen); /* Done! */ return; } if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); else hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); } static void hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) { const struct hn_nvs_hdr *hdr; if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { if_printf(sc->hn_ifp, "invalid nvs notify\n"); return; } hdr = VMBUS_CHANPKT_CONST_DATA(pkt); if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { /* Useless; ignore */ return; } if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); } static void hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt) { struct hn_nvs_sendctx *sndc; sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), VMBUS_CHANPKT_DATALEN(pkt)); /* * NOTE: * 'sndc' CAN NOT be accessed anymore, since it can be freed by * its callback. */ } static void hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkthdr) { const struct vmbus_chanpkt_rxbuf *pkt; const struct hn_nvs_hdr *nvs_hdr; int count, i, hlen; if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); return; } nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); /* Make sure that this is a RNDIS message. */ if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", nvs_hdr->nvs_type); return; } hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); if (__predict_false(hlen < sizeof(*pkt))) { if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); return; } pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", pkt->cp_rxbuf_id); return; } count = pkt->cp_rxbuf_cnt; if (__predict_false(hlen < __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); return; } /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ for (i = 0; i < count; ++i) { int ofs, len; ofs = pkt->cp_rxbuf[i].rb_ofs; len = pkt->cp_rxbuf[i].rb_len; if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " "ofs %d, len %d\n", i, ofs, len); continue; } hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); } /* * Ack the consumed RXBUF associated w/ this channel packet, * so that this RXBUF can be recycled by the hypervisor. */ hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); } static void hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, uint64_t tid) { struct hn_nvs_rndis_ack ack; int retries, error; ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; ack.nvs_status = HN_NVS_STATUS_OK; retries = 0; again: error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); if (__predict_false(error == EAGAIN)) { /* * NOTE: * This should _not_ happen in real world, since the * consumption of the TX bufring from the TX path is * controlled. */ if (rxr->hn_ack_failed == 0) if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); rxr->hn_ack_failed++; retries++; if (retries < 10) { DELAY(100); goto again; } /* RXBUF leaks! */ if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); } } static void hn_chan_callback(struct vmbus_channel *chan, void *xrxr) { struct hn_rx_ring *rxr = xrxr; struct hn_softc *sc = rxr->hn_ifp->if_softc; for (;;) { struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; int error, pktlen; pktlen = rxr->hn_pktbuf_len; error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); if (__predict_false(error == ENOBUFS)) { void *nbuf; int nlen; /* * Expand channel packet buffer. * * XXX * Use M_WAITOK here, since allocation failure * is fatal. */ nlen = rxr->hn_pktbuf_len * 2; while (nlen < pktlen) nlen *= 2; nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", rxr->hn_pktbuf_len, nlen); free(rxr->hn_pktbuf, M_DEVBUF); rxr->hn_pktbuf = nbuf; rxr->hn_pktbuf_len = nlen; /* Retry! */ continue; } else if (__predict_false(error == EAGAIN)) { /* No more channel packets; done! */ break; } KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); switch (pkt->cph_type) { case VMBUS_CHANPKT_TYPE_COMP: hn_nvs_handle_comp(sc, chan, pkt); break; case VMBUS_CHANPKT_TYPE_RXBUF: hn_nvs_handle_rxbuf(rxr, chan, pkt); break; case VMBUS_CHANPKT_TYPE_INBAND: hn_nvs_handle_notify(sc, pkt); break; default: if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", pkt->cph_type); break; } } hn_chan_rollup(rxr, rxr->hn_txr); } static void hn_tx_taskq_create(void *arg __unused) { if (vm_guest != VM_GUEST_HV) return; if (!hn_share_tx_taskq) return; hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, &cpu_set, "hn tx"); } else { taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); } } SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_tx_taskq_create, NULL); static void hn_tx_taskq_destroy(void *arg __unused) { if (hn_tx_taskq != NULL) taskqueue_free(hn_tx_taskq); } SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_tx_taskq_destroy, NULL); Index: projects/clang391-import/sys/dev/hyperv/netvsc/if_hnvar.h =================================================================== --- projects/clang391-import/sys/dev/hyperv/netvsc/if_hnvar.h (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/netvsc/if_hnvar.h (revision 309263) @@ -1,273 +1,278 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _IF_HNVAR_H_ #define _IF_HNVAR_H_ #define HN_USE_TXDESC_BUFRING #define HN_CHIM_SIZE (15 * 1024 * 1024) #define HN_RXBUF_SIZE (16 * 1024 * 1024) #define HN_RXBUF_SIZE_COMPAT (15 * 1024 * 1024) /* Claimed to be 12232B */ #define HN_MTU_MAX (9 * 1024) #define HN_TXBR_SIZE (128 * PAGE_SIZE) #define HN_RXBR_SIZE (128 * PAGE_SIZE) #define HN_XACT_REQ_PGCNT 2 #define HN_XACT_RESP_PGCNT 2 #define HN_XACT_REQ_SIZE (HN_XACT_REQ_PGCNT * PAGE_SIZE) #define HN_XACT_RESP_SIZE (HN_XACT_RESP_PGCNT * PAGE_SIZE) #define HN_GPACNT_MAX 32 struct hn_txdesc; #ifndef HN_USE_TXDESC_BUFRING SLIST_HEAD(hn_txdesc_list, hn_txdesc); #else struct buf_ring; #endif struct hn_tx_ring; struct hn_rx_ring { struct ifnet *hn_ifp; struct hn_tx_ring *hn_txr; void *hn_pktbuf; int hn_pktbuf_len; uint8_t *hn_rxbuf; /* shadow sc->hn_rxbuf */ int hn_rx_idx; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ struct lro_ctrl hn_lro; u_long hn_csum_ip; u_long hn_csum_tcp; u_long hn_csum_udp; u_long hn_csum_trusted; u_long hn_lro_tried; u_long hn_small_pkts; u_long hn_pkts; u_long hn_rss_pkts; u_long hn_ack_failed; /* Rarely used stuffs */ struct sysctl_oid *hn_rx_sysctl_tree; int hn_rx_flags; void *hn_br; /* TX/RX bufring */ struct hyperv_dma hn_br_dma; } __aligned(CACHE_LINE_SIZE); #define HN_TRUST_HCSUM_IP 0x0001 #define HN_TRUST_HCSUM_TCP 0x0002 #define HN_TRUST_HCSUM_UDP 0x0004 -#define HN_RX_FLAG_ATTACHED 0x1 +#define HN_RX_FLAG_ATTACHED 0x0001 +#define HN_RX_FLAG_BR_REF 0x0002 struct hn_tx_ring { #ifndef HN_USE_TXDESC_BUFRING struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; #else struct buf_ring *hn_txdesc_br; #endif int hn_txdesc_cnt; int hn_txdesc_avail; u_short hn_has_txeof; u_short hn_txdone_cnt; int hn_sched_tx; void (*hn_txeof)(struct hn_tx_ring *); struct taskqueue *hn_tx_taskq; struct task hn_tx_task; struct task hn_txeof_task; struct buf_ring *hn_mbuf_br; int hn_oactive; int hn_tx_idx; int hn_tx_flags; struct mtx hn_tx_lock; struct hn_softc *hn_sc; struct vmbus_channel *hn_chan; int hn_direct_tx_size; int hn_chim_size; bus_dma_tag_t hn_tx_data_dtag; uint64_t hn_csum_assist; /* Applied packet transmission aggregation limits. */ int hn_agg_szmax; short hn_agg_pktmax; short hn_agg_align; /* Packet transmission aggregation states. */ struct hn_txdesc *hn_agg_txd; int hn_agg_szleft; short hn_agg_pktleft; struct rndis_packet_msg *hn_agg_prevpkt; /* Temporary stats for each sends. */ int hn_stat_size; short hn_stat_pkts; short hn_stat_mcasts; int (*hn_sendpkt)(struct hn_tx_ring *, struct hn_txdesc *); int hn_suspended; int hn_gpa_cnt; struct vmbus_gpa hn_gpa[HN_GPACNT_MAX]; u_long hn_no_txdescs; u_long hn_send_failed; u_long hn_txdma_failed; u_long hn_tx_collapsed; u_long hn_tx_chimney_tried; u_long hn_tx_chimney; u_long hn_pkts; u_long hn_sends; u_long hn_flush_failed; /* Rarely used stuffs */ struct hn_txdesc *hn_txdesc; bus_dma_tag_t hn_tx_rndis_dtag; struct sysctl_oid *hn_tx_sysctl_tree; } __aligned(CACHE_LINE_SIZE); -#define HN_TX_FLAG_ATTACHED 0x1 -#define HN_TX_FLAG_HASHVAL 0x2 /* support HASHVAL pktinfo */ +#define HN_TX_FLAG_ATTACHED 0x0001 +#define HN_TX_FLAG_HASHVAL 0x0002 /* support HASHVAL pktinfo */ /* * Device-specific softc structure */ struct hn_softc { struct ifnet *hn_ifp; struct ifmedia hn_media; device_t hn_dev; int hn_if_flags; struct sx hn_lock; struct vmbus_channel *hn_prichan; int hn_rx_ring_cnt; int hn_rx_ring_inuse; struct hn_rx_ring *hn_rx_ring; int hn_tx_ring_cnt; int hn_tx_ring_inuse; struct hn_tx_ring *hn_tx_ring; uint8_t *hn_chim; u_long *hn_chim_bmap; int hn_chim_bmap_cnt; int hn_chim_cnt; int hn_chim_szmax; int hn_cpu; struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; struct sysctl_oid *hn_rx_sysctl_tree; struct vmbus_xact_ctx *hn_xact; uint32_t hn_nvs_ver; uint32_t hn_rx_filter; /* Packet transmission aggregation user settings. */ int hn_agg_size; int hn_agg_pkts; struct taskqueue *hn_mgmt_taskq; struct taskqueue *hn_mgmt_taskq0; struct task hn_link_task; struct task hn_netchg_init; struct timeout_task hn_netchg_status; uint32_t hn_link_flags; /* HN_LINK_FLAG_ */ uint32_t hn_caps; /* HN_CAP_ */ uint32_t hn_flags; /* HN_FLAG_ */ void *hn_rxbuf; uint32_t hn_rxbuf_gpadl; struct hyperv_dma hn_rxbuf_dma; uint32_t hn_chim_gpadl; struct hyperv_dma hn_chim_dma; uint32_t hn_rndis_rid; uint32_t hn_ndis_ver; int hn_ndis_tso_szmax; int hn_ndis_tso_sgmin; uint32_t hn_rndis_agg_size; uint32_t hn_rndis_agg_pkts; uint32_t hn_rndis_agg_align; int hn_rss_ind_size; uint32_t hn_rss_hash; /* NDIS_HASH_ */ struct ndis_rssprm_toeplitz hn_rss; }; #define HN_FLAG_RXBUF_CONNECTED 0x0001 #define HN_FLAG_CHIM_CONNECTED 0x0002 #define HN_FLAG_HAS_RSSKEY 0x0004 #define HN_FLAG_HAS_RSSIND 0x0008 #define HN_FLAG_SYNTH_ATTACHED 0x0010 #define HN_FLAG_NO_SLEEPING 0x0020 +#define HN_FLAG_RXBUF_REF 0x0040 +#define HN_FLAG_CHIM_REF 0x0080 + +#define HN_FLAG_ERRORS (HN_FLAG_RXBUF_REF | HN_FLAG_CHIM_REF) #define HN_NO_SLEEPING(sc) \ do { \ (sc)->hn_flags |= HN_FLAG_NO_SLEEPING; \ } while (0) #define HN_SLEEPING_OK(sc) \ do { \ (sc)->hn_flags &= ~HN_FLAG_NO_SLEEPING; \ } while (0) #define HN_CAN_SLEEP(sc) \ (((sc)->hn_flags & HN_FLAG_NO_SLEEPING) == 0) #define HN_CAP_VLAN 0x0001 #define HN_CAP_MTU 0x0002 #define HN_CAP_IPCS 0x0004 #define HN_CAP_TCP4CS 0x0008 #define HN_CAP_TCP6CS 0x0010 #define HN_CAP_UDP4CS 0x0020 #define HN_CAP_UDP6CS 0x0040 #define HN_CAP_TSO4 0x0080 #define HN_CAP_TSO6 0x0100 #define HN_CAP_HASHVAL 0x0200 /* Capability description for use with printf(9) %b identifier. */ #define HN_CAP_BITS \ "\020\1VLAN\2MTU\3IPCS\4TCP4CS\5TCP6CS" \ "\6UDP4CS\7UDP6CS\10TSO4\11TSO6\12HASHVAL" #define HN_LINK_FLAG_LINKUP 0x0001 #define HN_LINK_FLAG_NETCHG 0x0002 #endif /* !_IF_HNVAR_H_ */ Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus.c =================================================================== --- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus.c (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus.c (revision 309263) @@ -1,1462 +1,1483 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * VM Bus Driver Implementation */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "pcib_if.h" #include "vmbus_if.h" #define VMBUS_GPADL_START 0xe1e10 struct vmbus_msghc { struct vmbus_xact *mh_xact; struct hypercall_postmsg_in mh_inprm_save; }; static int vmbus_probe(device_t); static int vmbus_attach(device_t); static int vmbus_detach(device_t); static int vmbus_read_ivar(device_t, device_t, int, uintptr_t *); static int vmbus_child_pnpinfo_str(device_t, device_t, char *, size_t); static struct resource *vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); static int vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs); static int vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs); static int vmbus_alloc_msix(device_t bus, device_t dev, int *irq); static int vmbus_release_msix(device_t bus, device_t dev, int irq); static int vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr, uint32_t *data); static uint32_t vmbus_get_version_method(device_t, device_t); static int vmbus_probe_guid_method(device_t, device_t, const struct hyperv_guid *); static uint32_t vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu); static int vmbus_init(struct vmbus_softc *); static int vmbus_connect(struct vmbus_softc *, uint32_t); static int vmbus_req_channels(struct vmbus_softc *sc); static void vmbus_disconnect(struct vmbus_softc *); static int vmbus_scan(struct vmbus_softc *); static void vmbus_scan_teardown(struct vmbus_softc *); static void vmbus_scan_done(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_chanmsg_handle(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_msg_task(void *, int); static void vmbus_synic_setup(void *); static void vmbus_synic_teardown(void *); static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS); static int vmbus_dma_alloc(struct vmbus_softc *); static void vmbus_dma_free(struct vmbus_softc *); static int vmbus_intr_setup(struct vmbus_softc *); static void vmbus_intr_teardown(struct vmbus_softc *); static int vmbus_doattach(struct vmbus_softc *); static void vmbus_event_proc_dummy(struct vmbus_softc *, int); static struct vmbus_softc *vmbus_sc; extern inthand_t IDTVEC(vmbus_isr); static const uint32_t vmbus_version[] = { VMBUS_VERSION_WIN8_1, VMBUS_VERSION_WIN8, VMBUS_VERSION_WIN7, VMBUS_VERSION_WS2008 }; static const vmbus_chanmsg_proc_t vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = { VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done), VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP) }; static device_method_t vmbus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, vmbus_read_ivar), DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str), DEVMETHOD(bus_alloc_resource, vmbus_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), #if __FreeBSD_version >= 1100000 DEVMETHOD(bus_get_cpus, bus_generic_get_cpus), #endif /* pcib interface */ DEVMETHOD(pcib_alloc_msi, vmbus_alloc_msi), DEVMETHOD(pcib_release_msi, vmbus_release_msi), DEVMETHOD(pcib_alloc_msix, vmbus_alloc_msix), DEVMETHOD(pcib_release_msix, vmbus_release_msix), DEVMETHOD(pcib_map_msi, vmbus_map_msi), /* Vmbus interface */ DEVMETHOD(vmbus_get_version, vmbus_get_version_method), DEVMETHOD(vmbus_probe_guid, vmbus_probe_guid_method), DEVMETHOD(vmbus_get_vcpu_id, vmbus_get_vcpu_id_method), DEVMETHOD_END }; static driver_t vmbus_driver = { "vmbus", vmbus_methods, sizeof(struct vmbus_softc) }; static devclass_t vmbus_devclass; DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL); MODULE_DEPEND(vmbus, acpi, 1, 1, 1); MODULE_DEPEND(vmbus, pci, 1, 1, 1); MODULE_VERSION(vmbus, 1); static __inline struct vmbus_softc * vmbus_get_softc(void) { return vmbus_sc; } void vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize) { struct hypercall_postmsg_in *inprm; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); inprm = vmbus_xact_req_data(mh->mh_xact); memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE); inprm->hc_connid = VMBUS_CONNID_MESSAGE; inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL; inprm->hc_dsize = dsize; } struct vmbus_msghc * vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize) { struct vmbus_msghc *mh; struct vmbus_xact *xact; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); xact = vmbus_xact_get(sc->vmbus_xc, dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0])); if (xact == NULL) return (NULL); mh = vmbus_xact_priv(xact, sizeof(*mh)); mh->mh_xact = xact; vmbus_msghc_reset(mh, dsize); return (mh); } void vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { vmbus_xact_put(mh->mh_xact); } void * vmbus_msghc_dataptr(struct vmbus_msghc *mh) { struct hypercall_postmsg_in *inprm; inprm = vmbus_xact_req_data(mh->mh_xact); return (inprm->hc_data); } int vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) { sbintime_t time = SBT_1MS; struct hypercall_postmsg_in *inprm; bus_addr_t inprm_paddr; int i; inprm = vmbus_xact_req_data(mh->mh_xact); inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact); /* * Save the input parameter so that we could restore the input * parameter if the Hypercall failed. * * XXX * Is this really necessary?! i.e. Will the Hypercall ever * overwrite the input parameter? */ memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE); /* * In order to cope with transient failures, e.g. insufficient * resources on host side, we retry the post message Hypercall * several times. 20 retries seem sufficient. */ #define HC_RETRY_MAX 20 for (i = 0; i < HC_RETRY_MAX; ++i) { uint64_t status; status = hypercall_post_message(inprm_paddr); if (status == HYPERCALL_STATUS_SUCCESS) return 0; pause_sbt("hcpmsg", time, 0, C_HARDCLOCK); if (time < SBT_1S * 2) time *= 2; /* Restore input parameter and try again */ memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE); } #undef HC_RETRY_MAX return EIO; } int vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { int error; vmbus_xact_activate(mh->mh_xact); error = vmbus_msghc_exec_noresult(mh); if (error) vmbus_xact_deactivate(mh->mh_xact); return error; } +void +vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) +{ + + vmbus_xact_deactivate(mh->mh_xact); +} + const struct vmbus_message * vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { size_t resp_len; return (vmbus_xact_wait(mh->mh_xact, &resp_len)); } +const struct vmbus_message * +vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) +{ + size_t resp_len; + + return (vmbus_xact_poll(mh->mh_xact, &resp_len)); +} + void vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg)); } uint32_t vmbus_gpadl_alloc(struct vmbus_softc *sc) { - return atomic_fetchadd_int(&sc->vmbus_gpadl, 1); + uint32_t gpadl; + +again: + gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1); + if (gpadl == 0) + goto again; + return (gpadl); } static int vmbus_connect(struct vmbus_softc *sc, uint32_t version) { struct vmbus_chanmsg_connect *req; const struct vmbus_message *msg; struct vmbus_msghc *mh; int error, done = 0; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) return ENXIO; req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT; req->chm_ver = version; req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr; req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr; req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr; error = vmbus_msghc_exec(sc, mh); if (error) { vmbus_msghc_put(sc, mh); return error; } msg = vmbus_msghc_wait_result(sc, mh); done = ((const struct vmbus_chanmsg_connect_resp *) msg->msg_data)->chm_done; vmbus_msghc_put(sc, mh); return (done ? 0 : EOPNOTSUPP); } static int vmbus_init(struct vmbus_softc *sc) { int i; for (i = 0; i < nitems(vmbus_version); ++i) { int error; error = vmbus_connect(sc, vmbus_version[i]); if (!error) { sc->vmbus_version = vmbus_version[i]; device_printf(sc->vmbus_dev, "version %u.%u\n", VMBUS_VERSION_MAJOR(sc->vmbus_version), VMBUS_VERSION_MINOR(sc->vmbus_version)); return 0; } } return ENXIO; } static void vmbus_disconnect(struct vmbus_softc *sc) { struct vmbus_chanmsg_disconnect *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for disconnect\n"); return; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "disconnect msg hypercall failed\n"); } } static int vmbus_req_channels(struct vmbus_softc *sc) { struct vmbus_chanmsg_chrequest *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) return ENXIO; req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); return error; } static void vmbus_scan_done_task(void *xsc, int pending __unused) { struct vmbus_softc *sc = xsc; mtx_lock(&Giant); sc->vmbus_scandone = true; mtx_unlock(&Giant); wakeup(&sc->vmbus_scandone); } static void vmbus_scan_done(struct vmbus_softc *sc, const struct vmbus_message *msg __unused) { taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task); } static int vmbus_scan(struct vmbus_softc *sc) { int error; /* * Identify, probe and attach for non-channel devices. */ bus_generic_probe(sc->vmbus_dev); bus_generic_attach(sc->vmbus_dev); /* * This taskqueue serializes vmbus devices' attach and detach * for channel offer and rescind messages. */ sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK, taskqueue_thread_enqueue, &sc->vmbus_devtq); taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev"); TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc); /* * This taskqueue handles sub-channel detach, so that vmbus * device's detach running in vmbus_devtq can drain its sub- * channels. */ sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK, taskqueue_thread_enqueue, &sc->vmbus_subchtq); taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch"); /* * Start vmbus scanning. */ error = vmbus_req_channels(sc); if (error) { device_printf(sc->vmbus_dev, "channel request failed: %d\n", error); return (error); } /* * Wait for all vmbus devices from the initial channel offers to be * attached. */ GIANT_REQUIRED; while (!sc->vmbus_scandone) mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0); if (bootverbose) { device_printf(sc->vmbus_dev, "device scan, probe and attach " "done\n"); } return (0); } static void vmbus_scan_teardown(struct vmbus_softc *sc) { GIANT_REQUIRED; if (sc->vmbus_devtq != NULL) { mtx_unlock(&Giant); taskqueue_free(sc->vmbus_devtq); mtx_lock(&Giant); sc->vmbus_devtq = NULL; } if (sc->vmbus_subchtq != NULL) { mtx_unlock(&Giant); taskqueue_free(sc->vmbus_subchtq); mtx_lock(&Giant); sc->vmbus_subchtq = NULL; } } static void vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_chanmsg_proc_t msg_proc; uint32_t msg_type; msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type; if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) { device_printf(sc->vmbus_dev, "unknown message type 0x%x\n", msg_type); return; } msg_proc = vmbus_chanmsg_handlers[msg_type]; if (msg_proc != NULL) msg_proc(sc, msg); /* Channel specific processing */ vmbus_chan_msgproc(sc, msg); } static void vmbus_msg_task(void *xsc, int pending __unused) { struct vmbus_softc *sc = xsc; volatile struct vmbus_message *msg; msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE; for (;;) { if (msg->msg_type == HYPERV_MSGTYPE_NONE) { /* No message */ break; } else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) { /* Channel message */ vmbus_chanmsg_handle(sc, __DEVOLATILE(const struct vmbus_message *, msg)); } msg->msg_type = HYPERV_MSGTYPE_NONE; /* * Make sure the write to msg_type (i.e. set to * HYPERV_MSGTYPE_NONE) happens before we read the * msg_flags and EOMing. Otherwise, the EOMing will * not deliver any more messages since there is no * empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(MSR_HV_EOM, 0); } } } static __inline int vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu) { volatile struct vmbus_message *msg; struct vmbus_message *msg_base; msg_base = VMBUS_PCPU_GET(sc, message, cpu); /* * Check event timer. * * TODO: move this to independent IDT vector. */ msg = msg_base + VMBUS_SINT_TIMER; if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) { msg->msg_type = HYPERV_MSGTYPE_NONE; vmbus_et_intr(frame); /* * Make sure the write to msg_type (i.e. set to * HYPERV_MSGTYPE_NONE) happens before we read the * msg_flags and EOMing. Otherwise, the EOMing will * not deliver any more messages since there is no * empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(MSR_HV_EOM, 0); } } /* * Check events. Hot path for network and storage I/O data; high rate. * * NOTE: * As recommended by the Windows guest fellows, we check events before * checking messages. */ sc->vmbus_event_proc(sc, cpu); /* * Check messages. Mainly management stuffs; ultra low rate. */ msg = msg_base + VMBUS_SINT_MESSAGE; if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) { taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu), VMBUS_PCPU_PTR(sc, message_task, cpu)); } return (FILTER_HANDLED); } void vmbus_handle_intr(struct trapframe *trap_frame) { struct vmbus_softc *sc = vmbus_get_softc(); int cpu = curcpu; /* * Disable preemption. */ critical_enter(); /* * Do a little interrupt counting. */ (*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++; vmbus_handle_intr1(sc, trap_frame, cpu); /* * Enable preemption. */ critical_exit(); } static void vmbus_synic_setup(void *xsc) { struct vmbus_softc *sc = xsc; int cpu = curcpu; uint64_t val, orig; uint32_t sint; if (hyperv_features & CPUID_HV_MSR_VP_INDEX) { /* Save virtual processor id. */ VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX); } else { /* Set virtual processor id to 0 for compatibility. */ VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0; } /* * Setup the SynIC message. */ orig = rdmsr(MSR_HV_SIMP); val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) | ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) << MSR_HV_SIMP_PGSHIFT); wrmsr(MSR_HV_SIMP, val); /* * Setup the SynIC event flags. */ orig = rdmsr(MSR_HV_SIEFP); val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) | ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu) >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT); wrmsr(MSR_HV_SIEFP, val); /* * Configure and unmask SINT for message and event flags. */ sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; orig = rdmsr(sint); val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | (orig & MSR_HV_SINT_RSVD_MASK); wrmsr(sint, val); /* * Configure and unmask SINT for timer. */ sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; orig = rdmsr(sint); val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | (orig & MSR_HV_SINT_RSVD_MASK); wrmsr(sint, val); /* * All done; enable SynIC. */ orig = rdmsr(MSR_HV_SCONTROL); val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK); wrmsr(MSR_HV_SCONTROL, val); } static void vmbus_synic_teardown(void *arg) { uint64_t orig; uint32_t sint; /* * Disable SynIC. */ orig = rdmsr(MSR_HV_SCONTROL); wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK)); /* * Mask message and event flags SINT. */ sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; orig = rdmsr(sint); wrmsr(sint, orig | MSR_HV_SINT_MASKED); /* * Mask timer SINT. */ sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; orig = rdmsr(sint); wrmsr(sint, orig | MSR_HV_SINT_MASKED); /* * Teardown SynIC message. */ orig = rdmsr(MSR_HV_SIMP); wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK)); /* * Teardown SynIC event flags. */ orig = rdmsr(MSR_HV_SIEFP); wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK)); } static int vmbus_dma_alloc(struct vmbus_softc *sc) { bus_dma_tag_t parent_dtag; uint8_t *evtflags; int cpu; parent_dtag = bus_get_dma_tag(sc->vmbus_dev); CPU_FOREACH(cpu) { void *ptr; /* * Per-cpu messages and event flags. */ ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, message, cpu) = ptr; ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr; } evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (evtflags == NULL) return ENOMEM; sc->vmbus_rx_evtflags = (u_long *)evtflags; sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2)); sc->vmbus_evtflags = evtflags; sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->vmbus_mnf1 == NULL) return ENOMEM; sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->vmbus_mnf2 == NULL) return ENOMEM; return 0; } static void vmbus_dma_free(struct vmbus_softc *sc) { int cpu; if (sc->vmbus_evtflags != NULL) { hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags); sc->vmbus_evtflags = NULL; sc->vmbus_rx_evtflags = NULL; sc->vmbus_tx_evtflags = NULL; } if (sc->vmbus_mnf1 != NULL) { hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1); sc->vmbus_mnf1 = NULL; } if (sc->vmbus_mnf2 != NULL) { hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2); sc->vmbus_mnf2 = NULL; } CPU_FOREACH(cpu) { if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) { hyperv_dmamem_free( VMBUS_PCPU_PTR(sc, message_dma, cpu), VMBUS_PCPU_GET(sc, message, cpu)); VMBUS_PCPU_GET(sc, message, cpu) = NULL; } if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) { hyperv_dmamem_free( VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), VMBUS_PCPU_GET(sc, event_flags, cpu)); VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL; } } } static int vmbus_intr_setup(struct vmbus_softc *sc) { int cpu; CPU_FOREACH(cpu) { char buf[MAXCOMLEN + 1]; cpuset_t cpu_mask; /* Allocate an interrupt counter for Hyper-V interrupt */ snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu); intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu)); /* * Setup taskqueue to handle events. Task will be per- * channel. */ VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast( "hyperv event", M_WAITOK, taskqueue_thread_enqueue, VMBUS_PCPU_PTR(sc, event_tq, cpu)); CPU_SETOF(cpu, &cpu_mask); taskqueue_start_threads_cpuset( VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask, "hvevent%d", cpu); /* * Setup tasks and taskqueues to handle messages. */ VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast( "hyperv msg", M_WAITOK, taskqueue_thread_enqueue, VMBUS_PCPU_PTR(sc, message_tq, cpu)); CPU_SETOF(cpu, &cpu_mask); taskqueue_start_threads_cpuset( VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask, "hvmsg%d", cpu); TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0, vmbus_msg_task, sc); } /* * All Hyper-V ISR required resources are setup, now let's find a * free IDT vector for Hyper-V ISR and set it up. */ sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr)); if (sc->vmbus_idtvec < 0) { device_printf(sc->vmbus_dev, "cannot find free IDT vector\n"); return ENXIO; } if (bootverbose) { device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n", sc->vmbus_idtvec); } return 0; } static void vmbus_intr_teardown(struct vmbus_softc *sc) { int cpu; if (sc->vmbus_idtvec >= 0) { lapic_ipi_free(sc->vmbus_idtvec); sc->vmbus_idtvec = -1; } CPU_FOREACH(cpu) { if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) { taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu)); VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL; } if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) { taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu), VMBUS_PCPU_PTR(sc, message_task, cpu)); taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu)); VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL; } } } static int vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { return (ENOENT); } static int vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen) { const struct vmbus_channel *chan; char guidbuf[HYPERV_GUID_STRLEN]; chan = vmbus_get_channel(child); if (chan == NULL) { /* Event timer device, which does not belong to a channel */ return (0); } strlcat(buf, "classid=", buflen); hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf)); strlcat(buf, guidbuf, buflen); strlcat(buf, " deviceid=", buflen); hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf)); strlcat(buf, guidbuf, buflen); return (0); } int vmbus_add_child(struct vmbus_channel *chan) { struct vmbus_softc *sc = chan->ch_vmbus; device_t parent = sc->vmbus_dev; mtx_lock(&Giant); chan->ch_dev = device_add_child(parent, NULL, -1); if (chan->ch_dev == NULL) { mtx_unlock(&Giant); device_printf(parent, "device_add_child for chan%u failed\n", chan->ch_id); return (ENXIO); } device_set_ivars(chan->ch_dev, chan); device_probe_and_attach(chan->ch_dev); mtx_unlock(&Giant); return (0); } int vmbus_delete_child(struct vmbus_channel *chan) { int error = 0; mtx_lock(&Giant); if (chan->ch_dev != NULL) { error = device_delete_child(chan->ch_vmbus->vmbus_dev, chan->ch_dev); chan->ch_dev = NULL; } mtx_unlock(&Giant); return (error); } static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS) { struct vmbus_softc *sc = arg1; char verstr[16]; snprintf(verstr, sizeof(verstr), "%u.%u", VMBUS_VERSION_MAJOR(sc->vmbus_version), VMBUS_VERSION_MINOR(sc->vmbus_version)); return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); } /* * We need the function to make sure the MMIO resource is allocated from the * ranges found in _CRS. * * For the release function, we can use bus_generic_release_resource(). */ static struct resource * vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { device_t parent = device_get_parent(dev); struct resource *res; #ifdef NEW_PCIB if (type == SYS_RES_MEMORY) { struct vmbus_softc *sc = device_get_softc(dev); res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type, rid, start, end, count, flags); } else #endif { res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start, end, count, flags); } return (res); } static device_t get_nexus(device_t vmbus) { device_t acpi = device_get_parent(vmbus); device_t nexus = device_get_parent(acpi); return (nexus); } static int vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs) { return (PCIB_ALLOC_MSI(get_nexus(bus), dev, count, maxcount, irqs)); } static int vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs) { return (PCIB_RELEASE_MSI(get_nexus(bus), dev, count, irqs)); } static int vmbus_alloc_msix(device_t bus, device_t dev, int *irq) { return (PCIB_ALLOC_MSIX(get_nexus(bus), dev, irq)); } static int vmbus_release_msix(device_t bus, device_t dev, int irq) { return (PCIB_RELEASE_MSIX(get_nexus(bus), dev, irq)); } static int vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr, uint32_t *data) { return (PCIB_MAP_MSI(get_nexus(bus), dev, irq, addr, data)); } static uint32_t vmbus_get_version_method(device_t bus, device_t dev) { struct vmbus_softc *sc = device_get_softc(bus); return sc->vmbus_version; } static int vmbus_probe_guid_method(device_t bus, device_t dev, const struct hyperv_guid *guid) { const struct vmbus_channel *chan = vmbus_get_channel(dev); if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0) return 0; return ENXIO; } static uint32_t vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu) { const struct vmbus_softc *sc = device_get_softc(bus); return (VMBUS_PCPU_GET(sc, vcpuid, cpu)); } #ifdef NEW_PCIB #define VTPM_BASE_ADDR 0xfed40000 #define FOUR_GB (1ULL << 32) enum parse_pass { parse_64, parse_32 }; struct parse_context { device_t vmbus_dev; enum parse_pass pass; }; static ACPI_STATUS parse_crs(ACPI_RESOURCE *res, void *ctx) { const struct parse_context *pc = ctx; device_t vmbus_dev = pc->vmbus_dev; struct vmbus_softc *sc = device_get_softc(vmbus_dev); UINT64 start, end; switch (res->Type) { case ACPI_RESOURCE_TYPE_ADDRESS32: start = res->Data.Address32.Address.Minimum; end = res->Data.Address32.Address.Maximum; break; case ACPI_RESOURCE_TYPE_ADDRESS64: start = res->Data.Address64.Address.Minimum; end = res->Data.Address64.Address.Maximum; break; default: /* Unused types. */ return (AE_OK); } /* * We don't use <1MB addresses. */ if (end < 0x100000) return (AE_OK); /* Don't conflict with vTPM. */ if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR) end = VTPM_BASE_ADDR - 1; if ((pc->pass == parse_32 && start < FOUR_GB) || (pc->pass == parse_64 && start >= FOUR_GB)) pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY, start, end, 0); return (AE_OK); } static void vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass) { struct parse_context pc; ACPI_STATUS status; if (bootverbose) device_printf(dev, "walking _CRS, pass=%d\n", pass); pc.vmbus_dev = vmbus_dev; pc.pass = pass; status = AcpiWalkResources(acpi_get_handle(dev), "_CRS", parse_crs, &pc); if (bootverbose && ACPI_FAILURE(status)) device_printf(dev, "_CRS: not found, pass=%d\n", pass); } static void vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass) { device_t acpi0, pcib0 = NULL; device_t *children; int i, count; /* Try to find _CRS on VMBus device */ vmbus_get_crs(dev, dev, pass); /* Try to find _CRS on VMBus device's parent */ acpi0 = device_get_parent(dev); vmbus_get_crs(acpi0, dev, pass); /* Try to locate pcib0 and find _CRS on it */ if (device_get_children(acpi0, &children, &count) != 0) return; for (i = 0; i < count; i++) { if (!device_is_attached(children[i])) continue; if (strcmp("pcib0", device_get_nameunit(children[i]))) continue; pcib0 = children[i]; break; } if (pcib0) vmbus_get_crs(pcib0, dev, pass); free(children, M_TEMP); } static void vmbus_get_mmio_res(device_t dev) { struct vmbus_softc *sc = device_get_softc(dev); /* * We walk the resources twice to make sure that: in the resource * list, the 32-bit resources appear behind the 64-bit resources. * NB: resource_list_add() uses INSERT_TAIL. This way, when we * iterate through the list to find a range for a 64-bit BAR in * vmbus_alloc_resource(), we can make sure we try to use >4GB * ranges first. */ pcib_host_res_init(dev, &sc->vmbus_mmio_res); vmbus_get_mmio_res_pass(dev, parse_64); vmbus_get_mmio_res_pass(dev, parse_32); } static void vmbus_free_mmio_res(device_t dev) { struct vmbus_softc *sc = device_get_softc(dev); pcib_host_res_free(dev, &sc->vmbus_mmio_res); } #endif /* NEW_PCIB */ static int vmbus_probe(device_t dev) { char *id[] = { "VMBUS", NULL }; if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL || device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV || (hyperv_features & CPUID_HV_MSR_SYNIC) == 0) return (ENXIO); device_set_desc(dev, "Hyper-V Vmbus"); return (BUS_PROBE_DEFAULT); } /** * @brief Main vmbus driver initialization routine. * * Here, we * - initialize the vmbus driver context * - setup various driver entry points * - invoke the vmbus hv main init routine * - get the irq resource * - invoke the vmbus to add the vmbus root device * - setup the vmbus root device * - retrieve the channel offers */ static int vmbus_doattach(struct vmbus_softc *sc) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int ret; if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED) return (0); #ifdef NEW_PCIB vmbus_get_mmio_res(sc->vmbus_dev); #endif sc->vmbus_flags |= VMBUS_FLAG_ATTACHED; sc->vmbus_gpadl = VMBUS_GPADL_START; mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF); TAILQ_INIT(&sc->vmbus_prichans); mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF); TAILQ_INIT(&sc->vmbus_chans); sc->vmbus_chmap = malloc( sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF, M_WAITOK | M_ZERO); /* * Create context for "post message" Hypercalls */ sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev), HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE, sizeof(struct vmbus_msghc)); if (sc->vmbus_xc == NULL) { ret = ENXIO; goto cleanup; } /* * Allocate DMA stuffs. */ ret = vmbus_dma_alloc(sc); if (ret != 0) goto cleanup; /* * Setup interrupt. */ ret = vmbus_intr_setup(sc); if (ret != 0) goto cleanup; /* * Setup SynIC. */ if (bootverbose) device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started); smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc); sc->vmbus_flags |= VMBUS_FLAG_SYNIC; /* * Initialize vmbus, e.g. connect to Hypervisor. */ ret = vmbus_init(sc); if (ret != 0) goto cleanup; if (sc->vmbus_version == VMBUS_VERSION_WS2008 || sc->vmbus_version == VMBUS_VERSION_WIN7) sc->vmbus_event_proc = vmbus_event_proc_compat; else sc->vmbus_event_proc = vmbus_event_proc; ret = vmbus_scan(sc); if (ret != 0) goto cleanup; ctx = device_get_sysctl_ctx(sc->vmbus_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev)); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, vmbus_sysctl_version, "A", "vmbus version"); return (ret); cleanup: vmbus_scan_teardown(sc); vmbus_intr_teardown(sc); vmbus_dma_free(sc); if (sc->vmbus_xc != NULL) { vmbus_xact_ctx_destroy(sc->vmbus_xc); sc->vmbus_xc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_prichan_lock); mtx_destroy(&sc->vmbus_chan_lock); return (ret); } static void vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused) { } static int vmbus_attach(device_t dev) { vmbus_sc = device_get_softc(dev); vmbus_sc->vmbus_dev = dev; vmbus_sc->vmbus_idtvec = -1; /* * Event processing logic will be configured: * - After the vmbus protocol version negotiation. * - Before we request channel offers. */ vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy; #ifndef EARLY_AP_STARTUP /* * If the system has already booted and thread * scheduling is possible indicated by the global * cold set to zero, we just call the driver * initialization directly. */ if (!cold) #endif vmbus_doattach(vmbus_sc); return (0); } static int vmbus_detach(device_t dev) { struct vmbus_softc *sc = device_get_softc(dev); bus_generic_detach(dev); vmbus_chan_destroy_all(sc); vmbus_scan_teardown(sc); vmbus_disconnect(sc); if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) { sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC; smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL); } vmbus_intr_teardown(sc); vmbus_dma_free(sc); if (sc->vmbus_xc != NULL) { vmbus_xact_ctx_destroy(sc->vmbus_xc); sc->vmbus_xc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_prichan_lock); mtx_destroy(&sc->vmbus_chan_lock); #ifdef NEW_PCIB vmbus_free_mmio_res(dev); #endif return (0); } #ifndef EARLY_AP_STARTUP static void vmbus_sysinit(void *arg __unused) { struct vmbus_softc *sc = vmbus_get_softc(); if (vm_guest != VM_GUEST_HV || sc == NULL) return; /* * If the system has already booted and thread * scheduling is possible, as indicated by the * global cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_doattach(sc); } /* * NOTE: * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is * initialized. */ SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL); #endif /* !EARLY_AP_STARTUP */ Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_chan.c =================================================================== --- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_chan.c (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_chan.c (revision 309263) @@ -1,1923 +1,2001 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void vmbus_chan_update_evtflagcnt( struct vmbus_softc *, const struct vmbus_channel *); static int vmbus_chan_close_internal( struct vmbus_channel *); static int vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS); static void vmbus_chan_sysctl_create( struct vmbus_channel *); static struct vmbus_channel *vmbus_chan_alloc(struct vmbus_softc *); static void vmbus_chan_free(struct vmbus_channel *); static int vmbus_chan_add(struct vmbus_channel *); static void vmbus_chan_cpu_default(struct vmbus_channel *); static int vmbus_chan_release(struct vmbus_channel *); static void vmbus_chan_set_chmap(struct vmbus_channel *); static void vmbus_chan_clear_chmap(struct vmbus_channel *); static void vmbus_chan_detach(struct vmbus_channel *); static bool vmbus_chan_wait_revoke( const struct vmbus_channel *); static void vmbus_chan_ins_prilist(struct vmbus_softc *, struct vmbus_channel *); static void vmbus_chan_rem_prilist(struct vmbus_softc *, struct vmbus_channel *); static void vmbus_chan_ins_list(struct vmbus_softc *, struct vmbus_channel *); static void vmbus_chan_rem_list(struct vmbus_softc *, struct vmbus_channel *); static void vmbus_chan_ins_sublist(struct vmbus_channel *, struct vmbus_channel *); static void vmbus_chan_rem_sublist(struct vmbus_channel *, struct vmbus_channel *); static void vmbus_chan_task(void *, int); static void vmbus_chan_task_nobatch(void *, int); static void vmbus_chan_clrchmap_task(void *, int); static void vmbus_prichan_attach_task(void *, int); static void vmbus_subchan_attach_task(void *, int); static void vmbus_prichan_detach_task(void *, int); static void vmbus_subchan_detach_task(void *, int); static void vmbus_chan_msgproc_choffer(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_chan_msgproc_chrescind( struct vmbus_softc *, const struct vmbus_message *); static int vmbus_chan_printf(const struct vmbus_channel *, const char *, ...) __printflike(2, 3); /* * Vmbus channel message processing. */ static const vmbus_chanmsg_proc_t vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = { VMBUS_CHANMSG_PROC(CHOFFER, vmbus_chan_msgproc_choffer), VMBUS_CHANMSG_PROC(CHRESCIND, vmbus_chan_msgproc_chrescind), VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP) }; /* * Notify host that there are data pending on our TX bufring. */ static __inline void vmbus_chan_signal_tx(const struct vmbus_channel *chan) { atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask); if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF) atomic_set_int(chan->ch_montrig, chan->ch_montrig_mask); else hypercall_signal_event(chan->ch_monprm_dma.hv_paddr); } static void vmbus_chan_ins_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan) { mtx_assert(&sc->vmbus_prichan_lock, MA_OWNED); if (atomic_testandset_int(&chan->ch_stflags, VMBUS_CHAN_ST_ONPRIL_SHIFT)) panic("channel is already on the prilist"); TAILQ_INSERT_TAIL(&sc->vmbus_prichans, chan, ch_prilink); } static void vmbus_chan_rem_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan) { mtx_assert(&sc->vmbus_prichan_lock, MA_OWNED); if (atomic_testandclear_int(&chan->ch_stflags, VMBUS_CHAN_ST_ONPRIL_SHIFT) == 0) panic("channel is not on the prilist"); TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink); } static void vmbus_chan_ins_sublist(struct vmbus_channel *prichan, struct vmbus_channel *chan) { mtx_assert(&prichan->ch_subchan_lock, MA_OWNED); if (atomic_testandset_int(&chan->ch_stflags, VMBUS_CHAN_ST_ONSUBL_SHIFT)) panic("channel is already on the sublist"); TAILQ_INSERT_TAIL(&prichan->ch_subchans, chan, ch_sublink); /* Bump sub-channel count. */ prichan->ch_subchan_cnt++; } static void vmbus_chan_rem_sublist(struct vmbus_channel *prichan, struct vmbus_channel *chan) { mtx_assert(&prichan->ch_subchan_lock, MA_OWNED); KASSERT(prichan->ch_subchan_cnt > 0, ("invalid subchan_cnt %d", prichan->ch_subchan_cnt)); prichan->ch_subchan_cnt--; if (atomic_testandclear_int(&chan->ch_stflags, VMBUS_CHAN_ST_ONSUBL_SHIFT) == 0) panic("channel is not on the sublist"); TAILQ_REMOVE(&prichan->ch_subchans, chan, ch_sublink); } static void vmbus_chan_ins_list(struct vmbus_softc *sc, struct vmbus_channel *chan) { mtx_assert(&sc->vmbus_chan_lock, MA_OWNED); if (atomic_testandset_int(&chan->ch_stflags, VMBUS_CHAN_ST_ONLIST_SHIFT)) panic("channel is already on the list"); TAILQ_INSERT_TAIL(&sc->vmbus_chans, chan, ch_link); } static void vmbus_chan_rem_list(struct vmbus_softc *sc, struct vmbus_channel *chan) { mtx_assert(&sc->vmbus_chan_lock, MA_OWNED); if (atomic_testandclear_int(&chan->ch_stflags, VMBUS_CHAN_ST_ONLIST_SHIFT) == 0) panic("channel is not on the list"); TAILQ_REMOVE(&sc->vmbus_chans, chan, ch_link); } static int vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS) { struct vmbus_channel *chan = arg1; int mnf = 0; if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF) mnf = 1; return sysctl_handle_int(oidp, &mnf, 0, req); } static void vmbus_chan_sysctl_create(struct vmbus_channel *chan) { struct sysctl_oid *ch_tree, *chid_tree, *br_tree; struct sysctl_ctx_list *ctx; uint32_t ch_id; char name[16]; /* * Add sysctl nodes related to this channel to this * channel's sysctl ctx, so that they can be destroyed * independently upon close of this channel, which can * happen even if the device is not detached. */ ctx = &chan->ch_sysctl_ctx; sysctl_ctx_init(ctx); /* * Create dev.NAME.UNIT.channel tree. */ ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)), OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; /* * Create dev.NAME.UNIT.channel.CHANID tree. */ if (VMBUS_CHAN_ISPRIMARY(chan)) ch_id = chan->ch_id; else ch_id = chan->ch_prichan->ch_id; snprintf(name, sizeof(name), "%d", ch_id); chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * Create dev.NAME.UNIT.channel.CHANID.sub tree. */ ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; /* * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree. * * NOTE: * chid_tree is changed to this new sysctl tree. */ snprintf(name, sizeof(name), "%d", chan->ch_subidx); chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id"); } SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, chan, 0, vmbus_chan_sysctl_mnf, "I", "has monitor notification facilities"); br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "br", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (br_tree != NULL) { /* * Create sysctl tree for RX bufring. */ vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_rxbr.rxbr, "rx"); /* * Create sysctl tree for TX bufring. */ vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_txbr.txbr, "tx"); } } int vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size, const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg) { struct vmbus_chan_br cbr; int error; /* * Allocate the TX+RX bufrings. */ KASSERT(chan->ch_bufring == NULL, ("bufrings are allocated")); chan->ch_bufring = hyperv_dmamem_alloc(bus_get_dma_tag(chan->ch_dev), PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma, BUS_DMA_WAITOK); if (chan->ch_bufring == NULL) { vmbus_chan_printf(chan, "bufring allocation failed\n"); return (ENOMEM); } cbr.cbr = chan->ch_bufring; cbr.cbr_paddr = chan->ch_bufring_dma.hv_paddr; cbr.cbr_txsz = txbr_size; cbr.cbr_rxsz = rxbr_size; error = vmbus_chan_open_br(chan, &cbr, udata, udlen, cb, cbarg); if (error) { if (error == EISCONN) { /* * XXX * The bufring GPADL is still connected; abandon * this bufring, instead of having mysterious * crash or trashed data later on. */ vmbus_chan_printf(chan, "chan%u bufring GPADL " "is still connected upon channel open error; " "leak %d bytes memory\n", chan->ch_id, txbr_size + rxbr_size); } else { hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring); } chan->ch_bufring = NULL; } return (error); } int vmbus_chan_open_br(struct vmbus_channel *chan, const struct vmbus_chan_br *cbr, const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg) { struct vmbus_softc *sc = chan->ch_vmbus; - const struct vmbus_chanmsg_chopen_resp *resp; const struct vmbus_message *msg; struct vmbus_chanmsg_chopen *req; struct vmbus_msghc *mh; uint32_t status; int error, txbr_size, rxbr_size; task_fn_t *task_fn; uint8_t *br; if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) { vmbus_chan_printf(chan, "invalid udata len %d for chan%u\n", udlen, chan->ch_id); return (EINVAL); } br = cbr->cbr; txbr_size = cbr->cbr_txsz; rxbr_size = cbr->cbr_rxsz; KASSERT((txbr_size & PAGE_MASK) == 0, ("send bufring size is not multiple page")); KASSERT((rxbr_size & PAGE_MASK) == 0, ("recv bufring size is not multiple page")); KASSERT((cbr->cbr_paddr & PAGE_MASK) == 0, ("bufring is not page aligned")); /* * Zero out the TX/RX bufrings, in case that they were used before. */ memset(br, 0, txbr_size + rxbr_size); if (atomic_testandset_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED_SHIFT)) panic("double-open chan%u", chan->ch_id); chan->ch_cb = cb; chan->ch_cbarg = cbarg; vmbus_chan_update_evtflagcnt(sc, chan); chan->ch_tq = VMBUS_PCPU_GET(chan->ch_vmbus, event_tq, chan->ch_cpuid); if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) task_fn = vmbus_chan_task; else task_fn = vmbus_chan_task_nobatch; TASK_INIT(&chan->ch_task, 0, task_fn, chan); /* TX bufring comes first */ vmbus_txbr_setup(&chan->ch_txbr, br, txbr_size); /* RX bufring immediately follows TX bufring */ vmbus_rxbr_setup(&chan->ch_rxbr, br + txbr_size, rxbr_size); /* Create sysctl tree for this channel */ vmbus_chan_sysctl_create(chan); /* * Connect the bufrings, both RX and TX, to this channel. */ KASSERT(chan->ch_bufring_gpadl == 0, ("bufring GPADL is still connected")); error = vmbus_chan_gpadl_connect(chan, cbr->cbr_paddr, txbr_size + rxbr_size, &chan->ch_bufring_gpadl); if (error) { vmbus_chan_printf(chan, "failed to connect bufring GPADL to chan%u\n", chan->ch_id); goto failed; } /* * Install this channel, before it is opened, but after everything * else has been setup. */ vmbus_chan_set_chmap(chan); /* * Open channel w/ the bufring GPADL on the target CPU. */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { vmbus_chan_printf(chan, "can not get msg hypercall for chopen(chan%u)\n", chan->ch_id); error = ENXIO; goto failed; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN; req->chm_chanid = chan->ch_id; req->chm_openid = chan->ch_id; req->chm_gpadl = chan->ch_bufring_gpadl; req->chm_vcpuid = chan->ch_vcpuid; req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT; if (udlen > 0) memcpy(req->chm_udata, udata, udlen); error = vmbus_msghc_exec(sc, mh); if (error) { vmbus_chan_printf(chan, "chopen(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); goto failed; } - msg = vmbus_msghc_wait_result(sc, mh); - resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data; - status = resp->chm_status; + for (;;) { + msg = vmbus_msghc_poll_result(sc, mh); + if (msg != NULL) + break; + if (vmbus_chan_is_revoked(chan)) { + int i; + /* + * NOTE: + * Hypervisor does _not_ send response CHOPEN to + * a revoked channel. + */ + vmbus_chan_printf(chan, + "chan%u is revoked, when it is being opened\n", + chan->ch_id); + + /* + * XXX + * Add extra delay before cancel the hypercall + * execution; mainly to close any possible + * CHRESCIND and CHOPEN_RESP races on the + * hypervisor side. + */ +#define REVOKE_LINGER 100 + for (i = 0; i < REVOKE_LINGER; ++i) { + msg = vmbus_msghc_poll_result(sc, mh); + if (msg != NULL) + break; + DELAY(1000); + } +#undef REVOKE_LINGER + if (msg == NULL) + vmbus_msghc_exec_cancel(sc, mh); + break; + } + DELAY(1000); + } + if (msg != NULL) { + status = ((const struct vmbus_chanmsg_chopen_resp *) + msg->msg_data)->chm_status; + } else { + /* XXX any non-0 value is ok here. */ + status = 0xff; + } + vmbus_msghc_put(sc, mh); if (status == 0) { if (bootverbose) vmbus_chan_printf(chan, "chan%u opened\n", chan->ch_id); return (0); } vmbus_chan_printf(chan, "failed to open chan%u\n", chan->ch_id); error = ENXIO; failed: sysctl_ctx_free(&chan->ch_sysctl_ctx); vmbus_chan_clear_chmap(chan); if (chan->ch_bufring_gpadl != 0) { int error1; error1 = vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl); if (error1) { /* * Give caller a hint that the bufring GPADL is still * connected. */ error = EISCONN; } chan->ch_bufring_gpadl = 0; } atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED); return (error); } int vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr, int size, uint32_t *gpadl0) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_conn *req; const struct vmbus_message *msg; size_t reqsz; uint32_t gpadl, status; int page_count, range_len, i, cnt, error; uint64_t page_id; + KASSERT(*gpadl0 == 0, ("GPADL is not zero")); + /* * Preliminary checks. */ KASSERT((size & PAGE_MASK) == 0, ("invalid GPA size %d, not multiple page size", size)); page_count = size >> PAGE_SHIFT; KASSERT((paddr & PAGE_MASK) == 0, ("GPA is not page aligned %jx", (uintmax_t)paddr)); page_id = paddr >> PAGE_SHIFT; range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]); /* * We don't support multiple GPA ranges. */ if (range_len > UINT16_MAX) { vmbus_chan_printf(chan, "GPA too large, %d pages\n", page_count); return EOPNOTSUPP; } /* * Allocate GPADL id. */ gpadl = vmbus_gpadl_alloc(sc); /* * Connect this GPADL to the target channel. * * NOTE: * Since each message can only hold small set of page * addresses, several messages may be required to * complete the connection. */ if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn, chm_range.gpa_page[cnt]); mh = vmbus_msghc_get(sc, reqsz); if (mh == NULL) { vmbus_chan_printf(chan, "can not get msg hypercall for gpadl_conn(chan%u)\n", chan->ch_id); return EIO; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; req->chm_range_len = range_len; req->chm_range_cnt = 1; req->chm_range.gpa_len = size; req->chm_range.gpa_ofs = 0; for (i = 0; i < cnt; ++i) req->chm_range.gpa_page[i] = page_id++; error = vmbus_msghc_exec(sc, mh); if (error) { vmbus_chan_printf(chan, "gpadl_conn(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); return error; } while (page_count > 0) { struct vmbus_chanmsg_gpadl_subconn *subreq; if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn, chm_gpa_page[cnt]); vmbus_msghc_reset(mh, reqsz); subreq = vmbus_msghc_dataptr(mh); subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN; subreq->chm_gpadl = gpadl; for (i = 0; i < cnt; ++i) subreq->chm_gpa_page[i] = page_id++; vmbus_msghc_exec_noresult(mh); } KASSERT(page_count == 0, ("invalid page count %d", page_count)); msg = vmbus_msghc_wait_result(sc, mh); status = ((const struct vmbus_chanmsg_gpadl_connresp *) msg->msg_data)->chm_status; vmbus_msghc_put(sc, mh); if (status != 0) { vmbus_chan_printf(chan, "gpadl_conn(chan%u) failed: %u\n", chan->ch_id, status); return EIO; } /* Done; commit the GPADL id. */ *gpadl0 = gpadl; if (bootverbose) { vmbus_chan_printf(chan, "gpadl_conn(chan%u) succeeded\n", chan->ch_id); } return 0; } static bool vmbus_chan_wait_revoke(const struct vmbus_channel *chan) { #define WAIT_COUNT 200 /* 200ms */ int i; for (i = 0; i < WAIT_COUNT; ++i) { if (vmbus_chan_is_revoked(chan)) return (true); /* Not sure about the context; use busy-wait. */ DELAY(1000); } return (false); #undef WAIT_COUNT } /* * Disconnect the GPA from the target channel */ int vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_disconn *req; int error; + KASSERT(gpadl != 0, ("GPADL is zero")); + mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { vmbus_chan_printf(chan, "can not get msg hypercall for gpadl_disconn(chan%u)\n", chan->ch_id); return (EBUSY); } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; error = vmbus_msghc_exec(sc, mh); if (error) { vmbus_msghc_put(sc, mh); if (vmbus_chan_wait_revoke(chan)) { /* * Error is benign; this channel is revoked, * so this GPADL will not be touched anymore. */ vmbus_chan_printf(chan, "gpadl_disconn(revoked chan%u) msg hypercall " "exec failed: %d\n", chan->ch_id, error); return (0); } vmbus_chan_printf(chan, "gpadl_disconn(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); return (error); } vmbus_msghc_wait_result(sc, mh); /* Discard result; no useful information */ vmbus_msghc_put(sc, mh); return (0); } static void vmbus_chan_detach(struct vmbus_channel *chan) { int refs; KASSERT(chan->ch_refs > 0, ("chan%u: invalid refcnt %d", chan->ch_id, chan->ch_refs)); refs = atomic_fetchadd_int(&chan->ch_refs, -1); #ifdef INVARIANTS if (VMBUS_CHAN_ISPRIMARY(chan)) { KASSERT(refs == 1, ("chan%u: invalid refcnt %d for prichan", chan->ch_id, refs + 1)); } #endif if (refs == 1) { /* * Detach the target channel. */ if (bootverbose) { vmbus_chan_printf(chan, "chan%u detached\n", chan->ch_id); } taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_detach_task); } } static void vmbus_chan_clrchmap_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; critical_enter(); chan->ch_vmbus->vmbus_chmap[chan->ch_id] = NULL; critical_exit(); } static void vmbus_chan_clear_chmap(struct vmbus_channel *chan) { struct task chmap_task; TASK_INIT(&chmap_task, 0, vmbus_chan_clrchmap_task, chan); taskqueue_enqueue(chan->ch_tq, &chmap_task); taskqueue_drain(chan->ch_tq, &chmap_task); } static void vmbus_chan_set_chmap(struct vmbus_channel *chan) { __compiler_membar(); chan->ch_vmbus->vmbus_chmap[chan->ch_id] = chan; } static int vmbus_chan_close_internal(struct vmbus_channel *chan) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_chclose *req; uint32_t old_stflags; int error; /* * NOTE: * Sub-channels are closed upon their primary channel closing, * so they can be closed even before they are opened. */ for (;;) { old_stflags = chan->ch_stflags; if (atomic_cmpset_int(&chan->ch_stflags, old_stflags, old_stflags & ~VMBUS_CHAN_ST_OPENED)) break; } if ((old_stflags & VMBUS_CHAN_ST_OPENED) == 0) { /* Not opened yet; done */ if (bootverbose) { vmbus_chan_printf(chan, "chan%u not opened\n", chan->ch_id); } return (0); } /* * Free this channel's sysctl tree attached to its device's * sysctl tree. */ sysctl_ctx_free(&chan->ch_sysctl_ctx); /* * NOTE: * Order is critical. This channel _must_ be uninstalled first, * else the channel task may be enqueued by the IDT after it has * been drained. */ vmbus_chan_clear_chmap(chan); taskqueue_drain(chan->ch_tq, &chan->ch_task); chan->ch_tq = NULL; /* * Close this channel. */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { vmbus_chan_printf(chan, "can not get msg hypercall for chclose(chan%u)\n", chan->ch_id); error = ENXIO; goto disconnect; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { vmbus_chan_printf(chan, "chclose(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); goto disconnect; } if (bootverbose) vmbus_chan_printf(chan, "chan%u closed\n", chan->ch_id); disconnect: /* * Disconnect the TX+RX bufrings from this channel. */ if (chan->ch_bufring_gpadl != 0) { int error1; error1 = vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl); if (error1) { /* * XXX * The bufring GPADL is still connected; abandon * this bufring, instead of having mysterious * crash or trashed data later on. */ vmbus_chan_printf(chan, "chan%u bufring GPADL " "is still connected after close\n", chan->ch_id); chan->ch_bufring = NULL; /* * Give caller a hint that the bufring GPADL is * still connected. */ error = EISCONN; } chan->ch_bufring_gpadl = 0; } /* * Destroy the TX+RX bufrings. */ if (chan->ch_bufring != NULL) { hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring); chan->ch_bufring = NULL; } return (error); } int vmbus_chan_close_direct(struct vmbus_channel *chan) { int error; #ifdef INVARIANTS if (VMBUS_CHAN_ISPRIMARY(chan)) { struct vmbus_channel *subchan; /* * All sub-channels _must_ have been closed, or are _not_ * opened at all. */ mtx_lock(&chan->ch_subchan_lock); TAILQ_FOREACH(subchan, &chan->ch_subchans, ch_sublink) { KASSERT( (subchan->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0, ("chan%u: subchan%u is still opened", chan->ch_id, subchan->ch_subidx)); } mtx_unlock(&chan->ch_subchan_lock); } #endif error = vmbus_chan_close_internal(chan); if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * This sub-channel is referenced, when it is linked to * the primary channel; drop that reference now. */ vmbus_chan_detach(chan); } return (error); } /* * Caller should make sure that all sub-channels have * been added to 'chan' and all to-be-closed channels * are not being opened. */ void vmbus_chan_close(struct vmbus_channel *chan) { int subchan_cnt; if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * Sub-channel is closed when its primary channel * is closed; done. */ return; } /* * Close all sub-channels, if any. */ subchan_cnt = chan->ch_subchan_cnt; if (subchan_cnt > 0) { struct vmbus_channel **subchan; int i; subchan = vmbus_subchan_get(chan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) { vmbus_chan_close_internal(subchan[i]); /* * This sub-channel is referenced, when it is * linked to the primary channel; drop that * reference now. */ vmbus_chan_detach(subchan[i]); } vmbus_subchan_rel(subchan, subchan_cnt); } /* Then close the primary channel. */ vmbus_chan_close_internal(chan); } void vmbus_chan_intr_drain(struct vmbus_channel *chan) { taskqueue_drain(chan->ch_tq, &chan->ch_task); } int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt pkt; int pktlen, pad_pktlen, hlen, error; uint64_t pad = 0; struct iovec iov[3]; boolean_t send_evt; hlen = sizeof(pkt); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr), ("invalid packet size %d", pad_pktlen)); pkt.cp_hdr.cph_type = type; pkt.cp_hdr.cph_flags = flags; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; iov[0].iov_base = &pkt; iov[0].iov_len = hlen; iov[1].iov_base = data; iov[1].iov_len = dlen; iov[2].iov_base = &pad; iov[2].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 3, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_send_sglist(struct vmbus_channel *chan, struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_sglist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr), ("invalid packet size %d", pad_pktlen)); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_gpa_cnt = sglen; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = sg; iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen; iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_send_prplist(struct vmbus_channel *chan, struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_prplist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; hlen = __offsetof(struct vmbus_chanpkt_prplist, cp_range[0].gpa_page[prp_cnt]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr), ("invalid packet size %d", pad_pktlen)); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_range_cnt = 1; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = prp; iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]); iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen0, uint64_t *xactid) { struct vmbus_chanpkt_hdr pkt; int error, dlen, hlen; error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt)); if (error) return (error); if (__predict_false(pkt.cph_hlen < VMBUS_CHANPKT_HLEN_MIN)) { vmbus_chan_printf(chan, "invalid hlen %u\n", pkt.cph_hlen); /* XXX this channel is dead actually. */ return (EIO); } if (__predict_false(pkt.cph_hlen > pkt.cph_tlen)) { vmbus_chan_printf(chan, "invalid hlen %u and tlen %u\n", pkt.cph_hlen, pkt.cph_tlen); /* XXX this channel is dead actually. */ return (EIO); } hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen); dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen; if (*dlen0 < dlen) { /* Return the size of this packet's data. */ *dlen0 = dlen; return (ENOBUFS); } *xactid = pkt.cph_xactid; *dlen0 = dlen; /* Skip packet header */ error = vmbus_rxbr_read(&chan->ch_rxbr, data, dlen, hlen); KASSERT(!error, ("vmbus_rxbr_read failed")); return (0); } int vmbus_chan_recv_pkt(struct vmbus_channel *chan, struct vmbus_chanpkt_hdr *pkt, int *pktlen0) { int error, pktlen, pkt_hlen; pkt_hlen = sizeof(*pkt); error = vmbus_rxbr_peek(&chan->ch_rxbr, pkt, pkt_hlen); if (error) return (error); if (__predict_false(pkt->cph_hlen < VMBUS_CHANPKT_HLEN_MIN)) { vmbus_chan_printf(chan, "invalid hlen %u\n", pkt->cph_hlen); /* XXX this channel is dead actually. */ return (EIO); } if (__predict_false(pkt->cph_hlen > pkt->cph_tlen)) { vmbus_chan_printf(chan, "invalid hlen %u and tlen %u\n", pkt->cph_hlen, pkt->cph_tlen); /* XXX this channel is dead actually. */ return (EIO); } pktlen = VMBUS_CHANPKT_GETLEN(pkt->cph_tlen); if (*pktlen0 < pktlen) { /* Return the size of this packet. */ *pktlen0 = pktlen; return (ENOBUFS); } *pktlen0 = pktlen; /* * Skip the fixed-size packet header, which has been filled * by the above vmbus_rxbr_peek(). */ error = vmbus_rxbr_read(&chan->ch_rxbr, pkt + 1, pktlen - pkt_hlen, pkt_hlen); KASSERT(!error, ("vmbus_rxbr_read failed")); return (0); } static void vmbus_chan_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; vmbus_chan_callback_t cb = chan->ch_cb; void *cbarg = chan->ch_cbarg; /* * Optimize host to guest signaling by ensuring: * 1. While reading the channel, we disable interrupts from * host. * 2. Ensure that we process all posted messages from the host * before returning from this callback. * 3. Once we return, enable signaling from the host. Once this * state is set we check to see if additional packets are * available to read. In this case we repeat the process. * * NOTE: Interrupt has been disabled in the ISR. */ for (;;) { uint32_t left; cb(chan, cbarg); left = vmbus_rxbr_intr_unmask(&chan->ch_rxbr); if (left == 0) { /* No more data in RX bufring; done */ break; } vmbus_rxbr_intr_mask(&chan->ch_rxbr); } } static void vmbus_chan_task_nobatch(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; chan->ch_cb(chan, chan->ch_cbarg); } static __inline void vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags, int flag_cnt) { int f; for (f = 0; f < flag_cnt; ++f) { uint32_t chid_base; u_long flags; int chid_ofs; if (event_flags[f] == 0) continue; flags = atomic_swap_long(&event_flags[f], 0); chid_base = f << VMBUS_EVTFLAG_SHIFT; while ((chid_ofs = ffsl(flags)) != 0) { struct vmbus_channel *chan; --chid_ofs; /* NOTE: ffsl is 1-based */ flags &= ~(1UL << chid_ofs); chan = sc->vmbus_chmap[chid_base + chid_ofs]; if (__predict_false(chan == NULL)) { /* Channel is closed. */ continue; } __compiler_membar(); if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) vmbus_rxbr_intr_mask(&chan->ch_rxbr); taskqueue_enqueue(chan->ch_tq, &chan->ch_task); } } } void vmbus_event_proc(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; /* * On Host with Win8 or above, the event page can be checked directly * to get the id of the channel that has the pending interrupt. */ eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; vmbus_event_flags_proc(sc, eventf->evt_flags, VMBUS_PCPU_GET(sc, event_flags_cnt, cpu)); } void vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) { vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags, VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT); } } static void vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc, const struct vmbus_channel *chan) { volatile int *flag_cnt_ptr; int flag_cnt; flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1; flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid); for (;;) { int old_flag_cnt; old_flag_cnt = *flag_cnt_ptr; if (old_flag_cnt >= flag_cnt) break; if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) { if (bootverbose) { vmbus_chan_printf(chan, "chan%u update cpu%d flag_cnt to %d\n", chan->ch_id, chan->ch_cpuid, flag_cnt); } break; } } } static struct vmbus_channel * vmbus_chan_alloc(struct vmbus_softc *sc) { struct vmbus_channel *chan; chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO); chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param), &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (chan->ch_monprm == NULL) { device_printf(sc->vmbus_dev, "monprm alloc failed\n"); free(chan, M_DEVBUF); return NULL; } chan->ch_refs = 1; chan->ch_vmbus = sc; mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF); sx_init(&chan->ch_orphan_lock, "vmbus chorphan"); TAILQ_INIT(&chan->ch_subchans); vmbus_rxbr_init(&chan->ch_rxbr); vmbus_txbr_init(&chan->ch_txbr); return chan; } static void vmbus_chan_free(struct vmbus_channel *chan) { KASSERT(TAILQ_EMPTY(&chan->ch_subchans) && chan->ch_subchan_cnt == 0, ("still owns sub-channels")); KASSERT((chan->ch_stflags & (VMBUS_CHAN_ST_OPENED | VMBUS_CHAN_ST_ONPRIL | VMBUS_CHAN_ST_ONSUBL | VMBUS_CHAN_ST_ONLIST)) == 0, ("free busy channel")); KASSERT(chan->ch_orphan_xact == NULL, ("still has orphan xact installed")); KASSERT(chan->ch_refs == 0, ("chan%u: invalid refcnt %d", chan->ch_id, chan->ch_refs)); hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm); mtx_destroy(&chan->ch_subchan_lock); sx_destroy(&chan->ch_orphan_lock); vmbus_rxbr_deinit(&chan->ch_rxbr); vmbus_txbr_deinit(&chan->ch_txbr); free(chan, M_DEVBUF); } static int vmbus_chan_add(struct vmbus_channel *newchan) { struct vmbus_softc *sc = newchan->ch_vmbus; struct vmbus_channel *prichan; if (newchan->ch_id == 0) { /* * XXX * Chan0 will neither be processed nor should be offered; * skip it. */ device_printf(sc->vmbus_dev, "got chan0 offer, discard\n"); return EINVAL; } else if (newchan->ch_id >= VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid chan%u offer\n", newchan->ch_id); return EINVAL; } mtx_lock(&sc->vmbus_prichan_lock); TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) { /* * Sub-channel will have the same type GUID and instance * GUID as its primary channel. */ if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type, sizeof(struct hyperv_guid)) == 0 && memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst, sizeof(struct hyperv_guid)) == 0) break; } if (VMBUS_CHAN_ISPRIMARY(newchan)) { if (prichan == NULL) { /* Install the new primary channel */ vmbus_chan_ins_prilist(sc, newchan); mtx_unlock(&sc->vmbus_prichan_lock); goto done; } else { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "duplicated primary chan%u\n", newchan->ch_id); return EINVAL; } } else { /* Sub-channel */ if (prichan == NULL) { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "no primary chan for chan%u\n", newchan->ch_id); return EINVAL; } /* * Found the primary channel for this sub-channel and * move on. * * XXX refcnt prichan */ } mtx_unlock(&sc->vmbus_prichan_lock); /* * This is a sub-channel; link it with the primary channel. */ KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan), ("new channel is not sub-channel")); KASSERT(prichan != NULL, ("no primary channel")); /* * Reference count this sub-channel; it will be dereferenced * when this sub-channel is closed. */ KASSERT(newchan->ch_refs == 1, ("chan%u: invalid refcnt %d", newchan->ch_id, newchan->ch_refs)); atomic_add_int(&newchan->ch_refs, 1); newchan->ch_prichan = prichan; newchan->ch_dev = prichan->ch_dev; mtx_lock(&prichan->ch_subchan_lock); vmbus_chan_ins_sublist(prichan, newchan); mtx_unlock(&prichan->ch_subchan_lock); /* * Notify anyone that is interested in this sub-channel, * after this sub-channel is setup. */ wakeup(prichan); done: /* * Hook this channel up for later revocation. */ mtx_lock(&sc->vmbus_chan_lock); vmbus_chan_ins_list(sc, newchan); mtx_unlock(&sc->vmbus_chan_lock); if (bootverbose) { vmbus_chan_printf(newchan, "chan%u subidx%u offer\n", newchan->ch_id, newchan->ch_subidx); } /* Select default cpu for this channel. */ vmbus_chan_cpu_default(newchan); return 0; } void vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu) { KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu)); if (chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WS2008 || chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WIN7) { /* Only cpu0 is supported */ cpu = 0; } chan->ch_cpuid = cpu; chan->ch_vcpuid = VMBUS_PCPU_GET(chan->ch_vmbus, vcpuid, cpu); if (bootverbose) { vmbus_chan_printf(chan, "chan%u assigned to cpu%u [vcpu%u]\n", chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid); } } void vmbus_chan_cpu_rr(struct vmbus_channel *chan) { static uint32_t vmbus_chan_nextcpu; int cpu; cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus; vmbus_chan_cpu_set(chan, cpu); } static void vmbus_chan_cpu_default(struct vmbus_channel *chan) { /* * By default, pin the channel to cpu0. Devices having * special channel-cpu mapping requirement should call * vmbus_chan_cpu_{set,rr}(). */ vmbus_chan_cpu_set(chan, 0); } static void vmbus_chan_msgproc_choffer(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_choffer *offer; struct vmbus_channel *chan; task_fn_t *detach_fn, *attach_fn; int error; offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data; chan = vmbus_chan_alloc(sc); if (chan == NULL) { device_printf(sc->vmbus_dev, "allocate chan%u failed\n", offer->chm_chanid); return; } chan->ch_id = offer->chm_chanid; chan->ch_subidx = offer->chm_subidx; chan->ch_guid_type = offer->chm_chtype; chan->ch_guid_inst = offer->chm_chinst; /* Batch reading is on by default */ chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT; if (sc->vmbus_version != VMBUS_VERSION_WS2008) chan->ch_monprm->mp_connid = offer->chm_connid; if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) { int trig_idx; /* * Setup MNF stuffs. */ chan->ch_txflags |= VMBUS_CHAN_TXF_HASMNF; trig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN; if (trig_idx >= VMBUS_MONTRIGS_MAX) panic("invalid monitor trigger %u", offer->chm_montrig); chan->ch_montrig = &sc->vmbus_mnf2->mnf_trigs[trig_idx].mt_pending; chan->ch_montrig_mask = 1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN); } /* * Setup event flag. */ chan->ch_evtflag = &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT]; chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK); /* * Setup attach and detach tasks. */ if (VMBUS_CHAN_ISPRIMARY(chan)) { chan->ch_mgmt_tq = sc->vmbus_devtq; attach_fn = vmbus_prichan_attach_task; detach_fn = vmbus_prichan_detach_task; } else { chan->ch_mgmt_tq = sc->vmbus_subchtq; attach_fn = vmbus_subchan_attach_task; detach_fn = vmbus_subchan_detach_task; } TASK_INIT(&chan->ch_attach_task, 0, attach_fn, chan); TASK_INIT(&chan->ch_detach_task, 0, detach_fn, chan); error = vmbus_chan_add(chan); if (error) { device_printf(sc->vmbus_dev, "add chan%u failed: %d\n", chan->ch_id, error); atomic_subtract_int(&chan->ch_refs, 1); vmbus_chan_free(chan); return; } taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_attach_task); } static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_chrescind *note; struct vmbus_channel *chan; note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data; if (note->chm_chanid > VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid revoked chan%u\n", note->chm_chanid); return; } /* * Find and remove the target channel from the channel list. */ mtx_lock(&sc->vmbus_chan_lock); TAILQ_FOREACH(chan, &sc->vmbus_chans, ch_link) { if (chan->ch_id == note->chm_chanid) break; } if (chan == NULL) { mtx_unlock(&sc->vmbus_chan_lock); device_printf(sc->vmbus_dev, "chan%u is not offered\n", note->chm_chanid); return; } vmbus_chan_rem_list(sc, chan); mtx_unlock(&sc->vmbus_chan_lock); if (VMBUS_CHAN_ISPRIMARY(chan)) { /* * The target channel is a primary channel; remove the * target channel from the primary channel list now, * instead of later, so that it will not be found by * other sub-channel offers, which are processed in * this thread. */ mtx_lock(&sc->vmbus_prichan_lock); vmbus_chan_rem_prilist(sc, chan); mtx_unlock(&sc->vmbus_prichan_lock); } /* * NOTE: * The following processing order is critical: * Set the REVOKED state flag before orphaning the installed xact. */ if (atomic_testandset_int(&chan->ch_stflags, VMBUS_CHAN_ST_REVOKED_SHIFT)) panic("channel has already been revoked"); sx_xlock(&chan->ch_orphan_lock); if (chan->ch_orphan_xact != NULL) vmbus_xact_ctx_orphan(chan->ch_orphan_xact); sx_xunlock(&chan->ch_orphan_lock); if (bootverbose) vmbus_chan_printf(chan, "chan%u revoked\n", note->chm_chanid); vmbus_chan_detach(chan); } static int vmbus_chan_release(struct vmbus_channel *chan) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_chanmsg_chfree *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { vmbus_chan_printf(chan, "can not get msg hypercall for chfree(chan%u)\n", chan->ch_id); return (ENXIO); } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { vmbus_chan_printf(chan, "chfree(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); } else { if (bootverbose) vmbus_chan_printf(chan, "chan%u freed\n", chan->ch_id); } return (error); } static void vmbus_prichan_detach_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("chan%u is not primary channel", chan->ch_id)); /* Delete and detach the device associated with this channel. */ vmbus_delete_child(chan); /* Release this channel (back to vmbus). */ vmbus_chan_release(chan); /* Free this channel's resource. */ vmbus_chan_free(chan); } static void vmbus_subchan_detach_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; struct vmbus_channel *pri_chan = chan->ch_prichan; KASSERT(!VMBUS_CHAN_ISPRIMARY(chan), ("chan%u is primary channel", chan->ch_id)); /* Release this channel (back to vmbus). */ vmbus_chan_release(chan); /* Unlink from its primary channel's sub-channel list. */ mtx_lock(&pri_chan->ch_subchan_lock); vmbus_chan_rem_sublist(pri_chan, chan); mtx_unlock(&pri_chan->ch_subchan_lock); /* Notify anyone that is waiting for this sub-channel to vanish. */ wakeup(pri_chan); /* Free this channel's resource. */ vmbus_chan_free(chan); } static void vmbus_prichan_attach_task(void *xchan, int pending __unused) { /* * Add device for this primary channel. */ vmbus_add_child(xchan); } static void vmbus_subchan_attach_task(void *xchan __unused, int pending __unused) { /* Nothing */ } void vmbus_chan_destroy_all(struct vmbus_softc *sc) { /* * Detach all devices and destroy the corresponding primary * channels. */ for (;;) { struct vmbus_channel *chan; mtx_lock(&sc->vmbus_chan_lock); TAILQ_FOREACH(chan, &sc->vmbus_chans, ch_link) { if (VMBUS_CHAN_ISPRIMARY(chan)) break; } if (chan == NULL) { /* No more primary channels; done. */ mtx_unlock(&sc->vmbus_chan_lock); break; } vmbus_chan_rem_list(sc, chan); mtx_unlock(&sc->vmbus_chan_lock); mtx_lock(&sc->vmbus_prichan_lock); vmbus_chan_rem_prilist(sc, chan); mtx_unlock(&sc->vmbus_prichan_lock); taskqueue_enqueue(chan->ch_mgmt_tq, &chan->ch_detach_task); } } struct vmbus_channel ** vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt) { struct vmbus_channel **ret, *chan; int i; KASSERT(subchan_cnt > 0, ("invalid sub-channel count %d", subchan_cnt)); ret = malloc(subchan_cnt * sizeof(struct vmbus_channel *), M_TEMP, M_WAITOK); mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt < subchan_cnt) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0); i = 0; TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) { /* TODO: refcnt chan */ ret[i] = chan; ++i; if (i == subchan_cnt) break; } KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d", pri_chan->ch_subchan_cnt, subchan_cnt)); mtx_unlock(&pri_chan->ch_subchan_lock); return ret; } void vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt __unused) { free(subchan, M_TEMP); } void vmbus_subchan_drain(struct vmbus_channel *pri_chan) { mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt > 0) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0); mtx_unlock(&pri_chan->ch_subchan_lock); } void vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_chanmsg_proc_t msg_proc; uint32_t msg_type; msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type; KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX, ("invalid message type %u", msg_type)); msg_proc = vmbus_chan_msgprocs[msg_type]; if (msg_proc != NULL) msg_proc(sc, msg); } void vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on) { if (!on) chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD; else chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; } uint32_t vmbus_chan_id(const struct vmbus_channel *chan) { return chan->ch_id; } uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan) { return chan->ch_subidx; } bool vmbus_chan_is_primary(const struct vmbus_channel *chan) { if (VMBUS_CHAN_ISPRIMARY(chan)) return true; else return false; } const struct hyperv_guid * vmbus_chan_guid_inst(const struct vmbus_channel *chan) { return &chan->ch_guid_inst; } int vmbus_chan_prplist_nelem(int br_size, int prpcnt_max, int dlen_max) { int elem_size; elem_size = __offsetof(struct vmbus_chanpkt_prplist, cp_range[0].gpa_page[prpcnt_max]); elem_size += dlen_max; elem_size = VMBUS_CHANPKT_TOTLEN(elem_size); return (vmbus_br_nelem(br_size, elem_size)); } bool vmbus_chan_tx_empty(const struct vmbus_channel *chan) { return (vmbus_txbr_empty(&chan->ch_txbr)); } bool vmbus_chan_rx_empty(const struct vmbus_channel *chan) { return (vmbus_rxbr_empty(&chan->ch_rxbr)); } static int vmbus_chan_printf(const struct vmbus_channel *chan, const char *fmt, ...) { va_list ap; device_t dev; int retval; if (chan->ch_dev == NULL || !device_is_alive(chan->ch_dev)) dev = chan->ch_vmbus->vmbus_dev; else dev = chan->ch_dev; retval = device_print_prettyname(dev); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } void vmbus_chan_run_task(struct vmbus_channel *chan, struct task *task) { taskqueue_enqueue(chan->ch_tq, task); taskqueue_drain(chan->ch_tq, task); } struct taskqueue * vmbus_chan_mgmt_tq(const struct vmbus_channel *chan) { return (chan->ch_mgmt_tq); } bool vmbus_chan_is_revoked(const struct vmbus_channel *chan) { if (chan->ch_stflags & VMBUS_CHAN_ST_REVOKED) return (true); return (false); } void vmbus_chan_set_orphan(struct vmbus_channel *chan, struct vmbus_xact_ctx *xact) { sx_xlock(&chan->ch_orphan_lock); chan->ch_orphan_xact = xact; sx_xunlock(&chan->ch_orphan_lock); } void vmbus_chan_unset_orphan(struct vmbus_channel *chan) { sx_xlock(&chan->ch_orphan_lock); chan->ch_orphan_xact = NULL; sx_xunlock(&chan->ch_orphan_lock); +} + +const void * +vmbus_chan_xact_wait(const struct vmbus_channel *chan, + struct vmbus_xact *xact, size_t *resp_len, bool can_sleep) +{ + const void *ret; + + if (can_sleep) + ret = vmbus_xact_wait(xact, resp_len); + else + ret = vmbus_xact_busywait(xact, resp_len); + if (vmbus_chan_is_revoked(chan)) { + /* + * This xact probably is interrupted, and the + * interruption can race the reply reception, + * so we have to make sure that there are nothing + * left on the RX bufring, i.e. this xact will + * not be touched, once this function returns. + * + * Since the hypervisor will not put more data + * onto the RX bufring once the channel is revoked, + * the following loop will be terminated, once all + * data are drained by the driver's channel + * callback. + */ + while (!vmbus_chan_rx_empty(chan)) { + if (can_sleep) + pause("chxact", 1); + else + DELAY(1000); + } + } + return (ret); } Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_var.h =================================================================== --- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_var.h (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_var.h (revision 309263) @@ -1,167 +1,172 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMBUS_VAR_H_ #define _VMBUS_VAR_H_ #include #include #include #include #include #include /* * NOTE: DO NOT CHANGE THIS. */ #define VMBUS_SINT_MESSAGE 2 /* * NOTE: * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE. * - DO NOT set it to 0. */ #define VMBUS_SINT_TIMER 4 /* * NOTE: DO NOT CHANGE THESE */ #define VMBUS_CONNID_MESSAGE 1 #define VMBUS_CONNID_EVENT 2 struct vmbus_message; struct vmbus_softc; typedef void (*vmbus_chanmsg_proc_t)(struct vmbus_softc *, const struct vmbus_message *); #define VMBUS_CHANMSG_PROC(name, func) \ [VMBUS_CHANMSG_TYPE_##name] = func #define VMBUS_CHANMSG_PROC_WAKEUP(name) \ VMBUS_CHANMSG_PROC(name, vmbus_msghc_wakeup) struct vmbus_pcpu_data { u_long *intr_cnt; /* Hyper-V interrupt counter */ struct vmbus_message *message; /* shared messages */ uint32_t vcpuid; /* virtual cpuid */ int event_flags_cnt;/* # of event flags */ struct vmbus_evtflags *event_flags; /* event flags from host */ /* Rarely used fields */ struct hyperv_dma message_dma; /* busdma glue */ struct hyperv_dma event_flags_dma;/* busdma glue */ struct taskqueue *event_tq; /* event taskq */ struct taskqueue *message_tq; /* message taskq */ struct task message_task; /* message task */ } __aligned(CACHE_LINE_SIZE); #if __FreeBSD_version < 1100000 typedef u_long rman_res_t; #endif struct vmbus_softc { void (*vmbus_event_proc)(struct vmbus_softc *, int); u_long *vmbus_tx_evtflags; /* event flags to host */ struct vmbus_mnf *vmbus_mnf2; /* monitored by host */ u_long *vmbus_rx_evtflags; /* compat evtflgs from host */ struct vmbus_channel **vmbus_chmap; struct vmbus_xact_ctx *vmbus_xc; struct vmbus_pcpu_data vmbus_pcpu[MAXCPU]; /* * Rarely used fields */ device_t vmbus_dev; int vmbus_idtvec; uint32_t vmbus_flags; /* see VMBUS_FLAG_ */ uint32_t vmbus_version; uint32_t vmbus_gpadl; /* Shared memory for vmbus_{rx,tx}_evtflags */ void *vmbus_evtflags; struct hyperv_dma vmbus_evtflags_dma; void *vmbus_mnf1; /* monitored by VM, unused */ struct hyperv_dma vmbus_mnf1_dma; struct hyperv_dma vmbus_mnf2_dma; bool vmbus_scandone; struct task vmbus_scandone_task; struct taskqueue *vmbus_devtq; /* for dev attach/detach */ struct taskqueue *vmbus_subchtq; /* for sub-chan attach/detach */ /* Primary channels */ struct mtx vmbus_prichan_lock; TAILQ_HEAD(, vmbus_channel) vmbus_prichans; /* Complete channel list */ struct mtx vmbus_chan_lock; TAILQ_HEAD(, vmbus_channel) vmbus_chans; #ifdef NEW_PCIB /* The list of usable MMIO ranges for PCIe pass-through */ struct pcib_host_resources vmbus_mmio_res; #endif }; #define VMBUS_FLAG_ATTACHED 0x0001 /* vmbus was attached */ #define VMBUS_FLAG_SYNIC 0x0002 /* SynIC was setup */ #define VMBUS_PCPU_GET(sc, field, cpu) (sc)->vmbus_pcpu[(cpu)].field #define VMBUS_PCPU_PTR(sc, field, cpu) &(sc)->vmbus_pcpu[(cpu)].field struct vmbus_channel; struct trapframe; struct vmbus_message; struct vmbus_msghc; void vmbus_handle_intr(struct trapframe *); int vmbus_add_child(struct vmbus_channel *); int vmbus_delete_child(struct vmbus_channel *); void vmbus_et_intr(struct trapframe *); uint32_t vmbus_gpadl_alloc(struct vmbus_softc *); struct vmbus_msghc * vmbus_msghc_get(struct vmbus_softc *, size_t); void vmbus_msghc_put(struct vmbus_softc *, struct vmbus_msghc *); void *vmbus_msghc_dataptr(struct vmbus_msghc *); int vmbus_msghc_exec_noresult(struct vmbus_msghc *); int vmbus_msghc_exec(struct vmbus_softc *, struct vmbus_msghc *); +void vmbus_msghc_exec_cancel(struct vmbus_softc *, + struct vmbus_msghc *); const struct vmbus_message * vmbus_msghc_wait_result(struct vmbus_softc *, + struct vmbus_msghc *); +const struct vmbus_message * + vmbus_msghc_poll_result(struct vmbus_softc *, struct vmbus_msghc *); void vmbus_msghc_wakeup(struct vmbus_softc *, const struct vmbus_message *); void vmbus_msghc_reset(struct vmbus_msghc *, size_t); #endif /* !_VMBUS_VAR_H_ */ Index: projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_xact.c =================================================================== --- projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_xact.c (revision 309262) +++ projects/clang391-import/sys/dev/hyperv/vmbus/vmbus_xact.c (revision 309263) @@ -1,406 +1,442 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include struct vmbus_xact { struct vmbus_xact_ctx *x_ctx; void *x_priv; void *x_req; struct hyperv_dma x_req_dma; const void *x_resp; size_t x_resp_len; void *x_resp0; }; struct vmbus_xact_ctx { size_t xc_req_size; size_t xc_resp_size; size_t xc_priv_size; struct mtx xc_lock; /* * Protected by xc_lock. */ uint32_t xc_flags; /* VMBUS_XACT_CTXF_ */ struct vmbus_xact *xc_free; struct vmbus_xact *xc_active; struct vmbus_xact *xc_orphan; }; #define VMBUS_XACT_CTXF_DESTROY 0x0001 static struct vmbus_xact *vmbus_xact_alloc(struct vmbus_xact_ctx *, bus_dma_tag_t); static void vmbus_xact_free(struct vmbus_xact *); static struct vmbus_xact *vmbus_xact_get1(struct vmbus_xact_ctx *, uint32_t); -const void *vmbus_xact_wait1(struct vmbus_xact *, size_t *, +static const void *vmbus_xact_wait1(struct vmbus_xact *, size_t *, bool); +static const void *vmbus_xact_return(struct vmbus_xact *, + size_t *); static void vmbus_xact_save_resp(struct vmbus_xact *, const void *, size_t); static void vmbus_xact_ctx_free(struct vmbus_xact_ctx *); static struct vmbus_xact * vmbus_xact_alloc(struct vmbus_xact_ctx *ctx, bus_dma_tag_t parent_dtag) { struct vmbus_xact *xact; xact = malloc(sizeof(*xact), M_DEVBUF, M_WAITOK | M_ZERO); xact->x_ctx = ctx; /* XXX assume that page aligned is enough */ xact->x_req = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, ctx->xc_req_size, &xact->x_req_dma, BUS_DMA_WAITOK); if (xact->x_req == NULL) { free(xact, M_DEVBUF); return (NULL); } if (ctx->xc_priv_size != 0) xact->x_priv = malloc(ctx->xc_priv_size, M_DEVBUF, M_WAITOK); xact->x_resp0 = malloc(ctx->xc_resp_size, M_DEVBUF, M_WAITOK); return (xact); } static void vmbus_xact_free(struct vmbus_xact *xact) { hyperv_dmamem_free(&xact->x_req_dma, xact->x_req); free(xact->x_resp0, M_DEVBUF); if (xact->x_priv != NULL) free(xact->x_priv, M_DEVBUF); free(xact, M_DEVBUF); } static struct vmbus_xact * vmbus_xact_get1(struct vmbus_xact_ctx *ctx, uint32_t dtor_flag) { struct vmbus_xact *xact; mtx_lock(&ctx->xc_lock); while ((ctx->xc_flags & dtor_flag) == 0 && ctx->xc_free == NULL) mtx_sleep(&ctx->xc_free, &ctx->xc_lock, 0, "gxact", 0); if (ctx->xc_flags & dtor_flag) { /* Being destroyed */ xact = NULL; } else { xact = ctx->xc_free; KASSERT(xact != NULL, ("no free xact")); KASSERT(xact->x_resp == NULL, ("xact has pending response")); ctx->xc_free = NULL; } mtx_unlock(&ctx->xc_lock); return (xact); } struct vmbus_xact_ctx * vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size, size_t priv_size) { struct vmbus_xact_ctx *ctx; KASSERT(req_size > 0, ("request size is 0")); KASSERT(resp_size > 0, ("response size is 0")); ctx = malloc(sizeof(*ctx), M_DEVBUF, M_WAITOK | M_ZERO); ctx->xc_req_size = req_size; ctx->xc_resp_size = resp_size; ctx->xc_priv_size = priv_size; ctx->xc_free = vmbus_xact_alloc(ctx, dtag); if (ctx->xc_free == NULL) { free(ctx, M_DEVBUF); return (NULL); } mtx_init(&ctx->xc_lock, "vmbus xact", NULL, MTX_DEF); return (ctx); } bool vmbus_xact_ctx_orphan(struct vmbus_xact_ctx *ctx) { mtx_lock(&ctx->xc_lock); if (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) { mtx_unlock(&ctx->xc_lock); return (false); } ctx->xc_flags |= VMBUS_XACT_CTXF_DESTROY; mtx_unlock(&ctx->xc_lock); wakeup(&ctx->xc_free); wakeup(&ctx->xc_active); ctx->xc_orphan = vmbus_xact_get1(ctx, 0); if (ctx->xc_orphan == NULL) panic("can't get xact"); return (true); } static void vmbus_xact_ctx_free(struct vmbus_xact_ctx *ctx) { KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY, ("xact ctx was not orphaned")); KASSERT(ctx->xc_orphan != NULL, ("no orphaned xact")); vmbus_xact_free(ctx->xc_orphan); mtx_destroy(&ctx->xc_lock); free(ctx, M_DEVBUF); } void vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx) { vmbus_xact_ctx_orphan(ctx); vmbus_xact_ctx_free(ctx); } struct vmbus_xact * vmbus_xact_get(struct vmbus_xact_ctx *ctx, size_t req_len) { struct vmbus_xact *xact; if (req_len > ctx->xc_req_size) panic("invalid request size %zu", req_len); xact = vmbus_xact_get1(ctx, VMBUS_XACT_CTXF_DESTROY); if (xact == NULL) return (NULL); memset(xact->x_req, 0, req_len); return (xact); } void vmbus_xact_put(struct vmbus_xact *xact) { struct vmbus_xact_ctx *ctx = xact->x_ctx; KASSERT(ctx->xc_active == NULL, ("pending active xact")); xact->x_resp = NULL; mtx_lock(&ctx->xc_lock); KASSERT(ctx->xc_free == NULL, ("has free xact")); ctx->xc_free = xact; mtx_unlock(&ctx->xc_lock); wakeup(&ctx->xc_free); } void * vmbus_xact_req_data(const struct vmbus_xact *xact) { return (xact->x_req); } bus_addr_t vmbus_xact_req_paddr(const struct vmbus_xact *xact) { return (xact->x_req_dma.hv_paddr); } void * vmbus_xact_priv(const struct vmbus_xact *xact, size_t priv_len) { if (priv_len > xact->x_ctx->xc_priv_size) panic("invalid priv size %zu", priv_len); return (xact->x_priv); } void vmbus_xact_activate(struct vmbus_xact *xact) { struct vmbus_xact_ctx *ctx = xact->x_ctx; KASSERT(xact->x_resp == NULL, ("xact has pending response")); mtx_lock(&ctx->xc_lock); KASSERT(ctx->xc_active == NULL, ("pending active xact")); ctx->xc_active = xact; mtx_unlock(&ctx->xc_lock); } void vmbus_xact_deactivate(struct vmbus_xact *xact) { struct vmbus_xact_ctx *ctx = xact->x_ctx; mtx_lock(&ctx->xc_lock); KASSERT(ctx->xc_active == xact, ("xact mismatch")); ctx->xc_active = NULL; mtx_unlock(&ctx->xc_lock); } -const void * -vmbus_xact_wait1(struct vmbus_xact *xact, size_t *resp_len, - bool can_sleep) +static const void * +vmbus_xact_return(struct vmbus_xact *xact, size_t *resp_len) { struct vmbus_xact_ctx *ctx = xact->x_ctx; const void *resp; - mtx_lock(&ctx->xc_lock); - - KASSERT(ctx->xc_active == xact, ("xact mismatch")); - while (xact->x_resp == NULL && - (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) { - if (can_sleep) { - mtx_sleep(&ctx->xc_active, &ctx->xc_lock, 0, - "wxact", 0); - } else { - mtx_unlock(&ctx->xc_lock); - DELAY(1000); - mtx_lock(&ctx->xc_lock); - } - } + mtx_assert(&ctx->xc_lock, MA_OWNED); KASSERT(ctx->xc_active == xact, ("xact trashed")); if ((ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) && xact->x_resp == NULL) { uint8_t b = 0; /* * Orphaned and no response was received yet; fake up * an one byte response. */ printf("vmbus: xact ctx was orphaned w/ pending xact\n"); vmbus_xact_save_resp(ctx->xc_active, &b, sizeof(b)); } KASSERT(xact->x_resp != NULL, ("no response")); ctx->xc_active = NULL; resp = xact->x_resp; *resp_len = xact->x_resp_len; + return (resp); +} + +static const void * +vmbus_xact_wait1(struct vmbus_xact *xact, size_t *resp_len, + bool can_sleep) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + const void *resp; + + mtx_lock(&ctx->xc_lock); + + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + while (xact->x_resp == NULL && + (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) { + if (can_sleep) { + mtx_sleep(&ctx->xc_active, &ctx->xc_lock, 0, + "wxact", 0); + } else { + mtx_unlock(&ctx->xc_lock); + DELAY(1000); + mtx_lock(&ctx->xc_lock); + } + } + resp = vmbus_xact_return(xact, resp_len); + mtx_unlock(&ctx->xc_lock); return (resp); } const void * vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len) { return (vmbus_xact_wait1(xact, resp_len, true /* can sleep */)); } const void * vmbus_xact_busywait(struct vmbus_xact *xact, size_t *resp_len) { return (vmbus_xact_wait1(xact, resp_len, false /* can't sleep */)); +} + +const void * +vmbus_xact_poll(struct vmbus_xact *xact, size_t *resp_len) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + const void *resp; + + mtx_lock(&ctx->xc_lock); + + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + if (xact->x_resp == NULL && + (ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY) == 0) { + mtx_unlock(&ctx->xc_lock); + *resp_len = 0; + return (NULL); + } + resp = vmbus_xact_return(xact, resp_len); + + mtx_unlock(&ctx->xc_lock); + + return (resp); } static void vmbus_xact_save_resp(struct vmbus_xact *xact, const void *data, size_t dlen) { struct vmbus_xact_ctx *ctx = xact->x_ctx; size_t cplen = dlen; mtx_assert(&ctx->xc_lock, MA_OWNED); if (cplen > ctx->xc_resp_size) { printf("vmbus: xact response truncated %zu -> %zu\n", cplen, ctx->xc_resp_size); cplen = ctx->xc_resp_size; } KASSERT(ctx->xc_active == xact, ("xact mismatch")); memcpy(xact->x_resp0, data, cplen); xact->x_resp_len = cplen; xact->x_resp = xact->x_resp0; } void vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen) { struct vmbus_xact_ctx *ctx = xact->x_ctx; int do_wakeup = 0; mtx_lock(&ctx->xc_lock); /* * NOTE: * xc_active could be NULL, if the ctx has been orphaned. */ if (ctx->xc_active != NULL) { vmbus_xact_save_resp(xact, data, dlen); do_wakeup = 1; } else { KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY, ("no active xact pending")); printf("vmbus: drop xact response\n"); } mtx_unlock(&ctx->xc_lock); if (do_wakeup) wakeup(&ctx->xc_active); } void vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, const void *data, size_t dlen) { int do_wakeup = 0; mtx_lock(&ctx->xc_lock); /* * NOTE: * xc_active could be NULL, if the ctx has been orphaned. */ if (ctx->xc_active != NULL) { vmbus_xact_save_resp(ctx->xc_active, data, dlen); do_wakeup = 1; } else { KASSERT(ctx->xc_flags & VMBUS_XACT_CTXF_DESTROY, ("no active xact pending")); printf("vmbus: drop xact response\n"); } mtx_unlock(&ctx->xc_lock); if (do_wakeup) wakeup(&ctx->xc_active); } Index: projects/clang391-import/sys/dev/usb/wlan/if_rsu.c =================================================================== --- projects/clang391-import/sys/dev/usb/wlan/if_rsu.c (revision 309262) +++ projects/clang391-import/sys/dev/usb/wlan/if_rsu.c (revision 309263) @@ -1,2980 +1,3053 @@ /* $OpenBSD: if_rsu.c,v 1.17 2013/04/15 09:23:01 mglocker Exp $ */ /*- * Copyright (c) 2010 Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for Realtek RTL8188SU/RTL8191SU/RTL8192SU. * * TODO: * o h/w crypto * o hostap / ibss / mesh * o sensible RSSI levels * o power-save operation */ #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "usbdevs.h" #define USB_DEBUG_VAR rsu_debug #include #include #ifdef USB_DEBUG static int rsu_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, rsu, CTLFLAG_RW, 0, "USB rsu"); SYSCTL_INT(_hw_usb_rsu, OID_AUTO, debug, CTLFLAG_RWTUN, &rsu_debug, 0, "Debug level"); #define RSU_DPRINTF(_sc, _flg, ...) \ do \ if (((_flg) == (RSU_DEBUG_ANY)) || (rsu_debug & (_flg))) \ device_printf((_sc)->sc_dev, __VA_ARGS__); \ while (0) #else #define RSU_DPRINTF(_sc, _flg, ...) #endif static int rsu_enable_11n = 1; TUNABLE_INT("hw.usb.rsu.enable_11n", &rsu_enable_11n); #define RSU_DEBUG_ANY 0xffffffff #define RSU_DEBUG_TX 0x00000001 #define RSU_DEBUG_RX 0x00000002 #define RSU_DEBUG_RESET 0x00000004 #define RSU_DEBUG_CALIB 0x00000008 #define RSU_DEBUG_STATE 0x00000010 #define RSU_DEBUG_SCAN 0x00000020 #define RSU_DEBUG_FWCMD 0x00000040 #define RSU_DEBUG_TXDONE 0x00000080 #define RSU_DEBUG_FW 0x00000100 #define RSU_DEBUG_FWDBG 0x00000200 #define RSU_DEBUG_AMPDU 0x00000400 static const STRUCT_USB_HOST_ID rsu_devs[] = { #define RSU_HT_NOT_SUPPORTED 0 #define RSU_HT_SUPPORTED 1 #define RSU_DEV_HT(v,p) { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, \ RSU_HT_SUPPORTED) } #define RSU_DEV(v,p) { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, \ RSU_HT_NOT_SUPPORTED) } RSU_DEV(ASUS, RTL8192SU), RSU_DEV(AZUREWAVE, RTL8192SU_4), RSU_DEV_HT(ACCTON, RTL8192SU), RSU_DEV_HT(ASUS, USBN10), RSU_DEV_HT(AZUREWAVE, RTL8192SU_1), RSU_DEV_HT(AZUREWAVE, RTL8192SU_2), RSU_DEV_HT(AZUREWAVE, RTL8192SU_3), RSU_DEV_HT(AZUREWAVE, RTL8192SU_5), RSU_DEV_HT(BELKIN, RTL8192SU_1), RSU_DEV_HT(BELKIN, RTL8192SU_2), RSU_DEV_HT(BELKIN, RTL8192SU_3), RSU_DEV_HT(CONCEPTRONIC2, RTL8192SU_1), RSU_DEV_HT(CONCEPTRONIC2, RTL8192SU_2), RSU_DEV_HT(CONCEPTRONIC2, RTL8192SU_3), RSU_DEV_HT(COREGA, RTL8192SU), RSU_DEV_HT(DLINK2, DWA131A1), RSU_DEV_HT(DLINK2, RTL8192SU_1), RSU_DEV_HT(DLINK2, RTL8192SU_2), RSU_DEV_HT(EDIMAX, RTL8192SU_1), RSU_DEV_HT(EDIMAX, RTL8192SU_2), RSU_DEV_HT(EDIMAX, EW7622UMN), RSU_DEV_HT(GUILLEMOT, HWGUN54), RSU_DEV_HT(GUILLEMOT, HWNUM300), RSU_DEV_HT(HAWKING, RTL8192SU_1), RSU_DEV_HT(HAWKING, RTL8192SU_2), RSU_DEV_HT(PLANEX2, GWUSNANO), RSU_DEV_HT(REALTEK, RTL8171), RSU_DEV_HT(REALTEK, RTL8172), RSU_DEV_HT(REALTEK, RTL8173), RSU_DEV_HT(REALTEK, RTL8174), RSU_DEV_HT(REALTEK, RTL8192SU), RSU_DEV_HT(REALTEK, RTL8712), RSU_DEV_HT(REALTEK, RTL8713), RSU_DEV_HT(SENAO, RTL8192SU_1), RSU_DEV_HT(SENAO, RTL8192SU_2), RSU_DEV_HT(SITECOMEU, WL349V1), RSU_DEV_HT(SITECOMEU, WL353), RSU_DEV_HT(SWEEX2, LW154), RSU_DEV_HT(TRENDNET, TEW646UBH), #undef RSU_DEV_HT #undef RSU_DEV }; static device_probe_t rsu_match; static device_attach_t rsu_attach; static device_detach_t rsu_detach; static usb_callback_t rsu_bulk_tx_callback_be_bk; static usb_callback_t rsu_bulk_tx_callback_vi_vo; static usb_callback_t rsu_bulk_tx_callback_h2c; static usb_callback_t rsu_bulk_rx_callback; static usb_error_t rsu_do_request(struct rsu_softc *, struct usb_device_request *, void *); static struct ieee80211vap * rsu_vap_create(struct ieee80211com *, const char name[], int, enum ieee80211_opmode, int, const uint8_t bssid[], const uint8_t mac[]); static void rsu_vap_delete(struct ieee80211vap *); static void rsu_scan_start(struct ieee80211com *); static void rsu_scan_end(struct ieee80211com *); static void rsu_getradiocaps(struct ieee80211com *, int, int *, struct ieee80211_channel[]); static void rsu_set_channel(struct ieee80211com *); static void rsu_scan_curchan(struct ieee80211_scan_state *, unsigned long); static void rsu_scan_mindwell(struct ieee80211_scan_state *); +static uint8_t rsu_get_multi_pos(const uint8_t[]); +static void rsu_set_multi(struct rsu_softc *); static void rsu_update_mcast(struct ieee80211com *); static int rsu_alloc_rx_list(struct rsu_softc *); static void rsu_free_rx_list(struct rsu_softc *); static int rsu_alloc_tx_list(struct rsu_softc *); static void rsu_free_tx_list(struct rsu_softc *); static void rsu_free_list(struct rsu_softc *, struct rsu_data [], int); static struct rsu_data *_rsu_getbuf(struct rsu_softc *); static struct rsu_data *rsu_getbuf(struct rsu_softc *); static void rsu_freebuf(struct rsu_softc *, struct rsu_data *); static int rsu_write_region_1(struct rsu_softc *, uint16_t, uint8_t *, int); static void rsu_write_1(struct rsu_softc *, uint16_t, uint8_t); static void rsu_write_2(struct rsu_softc *, uint16_t, uint16_t); static void rsu_write_4(struct rsu_softc *, uint16_t, uint32_t); static int rsu_read_region_1(struct rsu_softc *, uint16_t, uint8_t *, int); static uint8_t rsu_read_1(struct rsu_softc *, uint16_t); static uint16_t rsu_read_2(struct rsu_softc *, uint16_t); static uint32_t rsu_read_4(struct rsu_softc *, uint16_t); static int rsu_fw_iocmd(struct rsu_softc *, uint32_t); static uint8_t rsu_efuse_read_1(struct rsu_softc *, uint16_t); static int rsu_read_rom(struct rsu_softc *); static int rsu_fw_cmd(struct rsu_softc *, uint8_t, void *, int); static void rsu_calib_task(void *, int); static void rsu_tx_task(void *, int); static int rsu_newstate(struct ieee80211vap *, enum ieee80211_state, int); #ifdef notyet static void rsu_set_key(struct rsu_softc *, const struct ieee80211_key *); static void rsu_delete_key(struct rsu_softc *, const struct ieee80211_key *); #endif static int rsu_site_survey(struct rsu_softc *, struct ieee80211_scan_ssid *); static int rsu_join_bss(struct rsu_softc *, struct ieee80211_node *); static int rsu_disconnect(struct rsu_softc *); static int rsu_hwrssi_to_rssi(struct rsu_softc *, int hw_rssi); static void rsu_event_survey(struct rsu_softc *, uint8_t *, int); static void rsu_event_join_bss(struct rsu_softc *, uint8_t *, int); static void rsu_rx_event(struct rsu_softc *, uint8_t, uint8_t *, int); static void rsu_rx_multi_event(struct rsu_softc *, uint8_t *, int); #if 0 static int8_t rsu_get_rssi(struct rsu_softc *, int, void *); #endif static struct mbuf * rsu_rx_frame(struct rsu_softc *, uint8_t *, int); static struct mbuf * rsu_rx_multi_frame(struct rsu_softc *, uint8_t *, int); static struct mbuf * rsu_rxeof(struct usb_xfer *, struct rsu_data *); static void rsu_txeof(struct usb_xfer *, struct rsu_data *); static int rsu_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void rsu_init(struct rsu_softc *); static int rsu_tx_start(struct rsu_softc *, struct ieee80211_node *, struct mbuf *, struct rsu_data *); static int rsu_transmit(struct ieee80211com *, struct mbuf *); static void rsu_start(struct rsu_softc *); static void _rsu_start(struct rsu_softc *); static void rsu_parent(struct ieee80211com *); static void rsu_stop(struct rsu_softc *); static void rsu_ms_delay(struct rsu_softc *, int); static device_method_t rsu_methods[] = { DEVMETHOD(device_probe, rsu_match), DEVMETHOD(device_attach, rsu_attach), DEVMETHOD(device_detach, rsu_detach), DEVMETHOD_END }; static driver_t rsu_driver = { .name = "rsu", .methods = rsu_methods, .size = sizeof(struct rsu_softc) }; static devclass_t rsu_devclass; DRIVER_MODULE(rsu, uhub, rsu_driver, rsu_devclass, NULL, 0); MODULE_DEPEND(rsu, wlan, 1, 1, 1); MODULE_DEPEND(rsu, usb, 1, 1, 1); MODULE_DEPEND(rsu, firmware, 1, 1, 1); MODULE_VERSION(rsu, 1); USB_PNP_HOST_INFO(rsu_devs); static const uint8_t rsu_chan_2ghz[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; static uint8_t rsu_wme_ac_xfer_map[4] = { [WME_AC_BE] = RSU_BULK_TX_BE_BK, [WME_AC_BK] = RSU_BULK_TX_BE_BK, [WME_AC_VI] = RSU_BULK_TX_VI_VO, [WME_AC_VO] = RSU_BULK_TX_VI_VO, }; /* XXX hard-coded */ #define RSU_H2C_ENDPOINT 3 static const struct usb_config rsu_config[RSU_N_TRANSFER] = { [RSU_BULK_RX] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_IN, .bufsize = RSU_RXBUFSZ, .flags = { .pipe_bof = 1, .short_xfer_ok = 1 }, .callback = rsu_bulk_rx_callback }, [RSU_BULK_TX_BE_BK] = { .type = UE_BULK, .endpoint = 0x06, .direction = UE_DIR_OUT, .bufsize = RSU_TXBUFSZ, .flags = { .ext_buffer = 1, .pipe_bof = 1, .force_short_xfer = 1 }, .callback = rsu_bulk_tx_callback_be_bk, .timeout = RSU_TX_TIMEOUT }, [RSU_BULK_TX_VI_VO] = { .type = UE_BULK, .endpoint = 0x04, .direction = UE_DIR_OUT, .bufsize = RSU_TXBUFSZ, .flags = { .ext_buffer = 1, .pipe_bof = 1, .force_short_xfer = 1 }, .callback = rsu_bulk_tx_callback_vi_vo, .timeout = RSU_TX_TIMEOUT }, [RSU_BULK_TX_H2C] = { .type = UE_BULK, .endpoint = 0x0d, .direction = UE_DIR_OUT, .bufsize = RSU_TXBUFSZ, .flags = { .ext_buffer = 1, .pipe_bof = 1, .short_xfer_ok = 1 }, .callback = rsu_bulk_tx_callback_h2c, .timeout = RSU_TX_TIMEOUT }, }; static int rsu_match(device_t self) { struct usb_attach_arg *uaa = device_get_ivars(self); if (uaa->usb_mode != USB_MODE_HOST || uaa->info.bIfaceIndex != 0 || uaa->info.bConfigIndex != 0) return (ENXIO); return (usbd_lookup_id_by_uaa(rsu_devs, sizeof(rsu_devs), uaa)); } static int rsu_send_mgmt(struct ieee80211_node *ni, int type, int arg) { return (ENOTSUP); } static void rsu_update_chw(struct ieee80211com *ic) { } /* * notification from net80211 that it'd like to do A-MPDU on the given TID. * * Note: this actually hangs traffic at the present moment, so don't use it. * The firmware debug does indiciate it's sending and establishing a TX AMPDU * session, but then no traffic flows. */ static int rsu_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { #if 0 struct rsu_softc *sc = ni->ni_ic->ic_softc; struct r92s_add_ba_req req; /* Don't enable if it's requested or running */ if (IEEE80211_AMPDU_REQUESTED(tap)) return (0); if (IEEE80211_AMPDU_RUNNING(tap)) return (0); /* We've decided to send addba; so send it */ req.tid = htole32(tap->txa_tid); /* Attempt net80211 state */ if (ieee80211_ampdu_tx_request_ext(ni, tap->txa_tid) != 1) return (0); /* Send the firmware command */ RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: establishing AMPDU TX for TID %d\n", __func__, tap->txa_tid); RSU_LOCK(sc); if (rsu_fw_cmd(sc, R92S_CMD_ADDBA_REQ, &req, sizeof(req)) != 1) { RSU_UNLOCK(sc); /* Mark failure */ (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 0); return (0); } RSU_UNLOCK(sc); /* Mark success; we don't get any further notifications */ (void) ieee80211_ampdu_tx_request_active_ext(ni, tap->txa_tid, 1); #endif /* Return 0, we're driving this ourselves */ return (0); } static int rsu_wme_update(struct ieee80211com *ic) { /* Firmware handles this; not our problem */ return (0); } static int rsu_attach(device_t self) { struct usb_attach_arg *uaa = device_get_ivars(self); struct rsu_softc *sc = device_get_softc(self); struct ieee80211com *ic = &sc->sc_ic; int error; uint8_t iface_index; struct usb_interface *iface; const char *rft; device_set_usb_desc(self); sc->sc_udev = uaa->device; sc->sc_dev = self; if (rsu_enable_11n) sc->sc_ht = !! (USB_GET_DRIVER_INFO(uaa) & RSU_HT_SUPPORTED); /* Get number of endpoints */ iface = usbd_get_iface(sc->sc_udev, 0); sc->sc_nendpoints = iface->idesc->bNumEndpoints; /* Endpoints are hard-coded for now, so enforce 4-endpoint only */ if (sc->sc_nendpoints != 4) { device_printf(sc->sc_dev, "the driver currently only supports 4-endpoint devices\n"); return (ENXIO); } mtx_init(&sc->sc_mtx, device_get_nameunit(self), MTX_NETWORK_LOCK, MTX_DEF); TIMEOUT_TASK_INIT(taskqueue_thread, &sc->calib_task, 0, rsu_calib_task, sc); TASK_INIT(&sc->tx_task, 0, rsu_tx_task, sc); mbufq_init(&sc->sc_snd, ifqmaxlen); /* Allocate Tx/Rx buffers. */ error = rsu_alloc_rx_list(sc); if (error != 0) { device_printf(sc->sc_dev, "could not allocate Rx buffers\n"); goto fail_usb; } error = rsu_alloc_tx_list(sc); if (error != 0) { device_printf(sc->sc_dev, "could not allocate Tx buffers\n"); rsu_free_rx_list(sc); goto fail_usb; } iface_index = 0; error = usbd_transfer_setup(uaa->device, &iface_index, sc->sc_xfer, rsu_config, RSU_N_TRANSFER, sc, &sc->sc_mtx); if (error) { device_printf(sc->sc_dev, "could not allocate USB transfers, err=%s\n", usbd_errstr(error)); goto fail_usb; } RSU_LOCK(sc); /* Read chip revision. */ sc->cut = MS(rsu_read_4(sc, R92S_PMC_FSM), R92S_PMC_FSM_CUT); if (sc->cut != 3) sc->cut = (sc->cut >> 1) + 1; error = rsu_read_rom(sc); RSU_UNLOCK(sc); if (error != 0) { device_printf(self, "could not read ROM\n"); goto fail_rom; } /* Figure out TX/RX streams */ switch (sc->rom[84]) { case 0x0: sc->sc_rftype = RTL8712_RFCONFIG_1T1R; sc->sc_nrxstream = 1; sc->sc_ntxstream = 1; rft = "1T1R"; break; case 0x1: sc->sc_rftype = RTL8712_RFCONFIG_1T2R; sc->sc_nrxstream = 2; sc->sc_ntxstream = 1; rft = "1T2R"; break; case 0x2: sc->sc_rftype = RTL8712_RFCONFIG_2T2R; sc->sc_nrxstream = 2; sc->sc_ntxstream = 2; rft = "2T2R"; break; default: device_printf(sc->sc_dev, "%s: unknown board type (rfconfig=0x%02x)\n", __func__, sc->rom[84]); goto fail_rom; } IEEE80211_ADDR_COPY(ic->ic_macaddr, &sc->rom[0x12]); device_printf(self, "MAC/BB RTL8712 cut %d %s\n", sc->cut, rft); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(self); ic->ic_phytype = IEEE80211_T_OFDM; /* Not only, but not used. */ ic->ic_opmode = IEEE80211_M_STA; /* Default to BSS mode. */ /* Set device capabilities. */ ic->ic_caps = IEEE80211_C_STA | /* station mode */ #if 0 IEEE80211_C_BGSCAN | /* Background scan. */ #endif IEEE80211_C_SHPREAMBLE | /* Short preamble supported. */ IEEE80211_C_WME | /* WME/QoS */ IEEE80211_C_SHSLOT | /* Short slot time supported. */ IEEE80211_C_WPA; /* WPA/RSN. */ /* Check if HT support is present. */ if (sc->sc_ht) { device_printf(sc->sc_dev, "%s: enabling 11n\n", __func__); /* Enable basic HT */ ic->ic_htcaps = IEEE80211_HTC_HT | #if 0 IEEE80211_HTC_AMPDU | #endif IEEE80211_HTC_AMSDU | IEEE80211_HTCAP_MAXAMSDU_3839 | IEEE80211_HTCAP_SMPS_OFF; ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40; /* set number of spatial streams */ ic->ic_txstream = sc->sc_ntxstream; ic->ic_rxstream = sc->sc_nrxstream; } ic->ic_flags_ext |= IEEE80211_FEXT_SCAN_OFFLOAD; rsu_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); ieee80211_ifattach(ic); ic->ic_raw_xmit = rsu_raw_xmit; ic->ic_scan_start = rsu_scan_start; ic->ic_scan_end = rsu_scan_end; ic->ic_getradiocaps = rsu_getradiocaps; ic->ic_set_channel = rsu_set_channel; ic->ic_scan_curchan = rsu_scan_curchan; ic->ic_scan_mindwell = rsu_scan_mindwell; ic->ic_vap_create = rsu_vap_create; ic->ic_vap_delete = rsu_vap_delete; ic->ic_update_mcast = rsu_update_mcast; ic->ic_parent = rsu_parent; ic->ic_transmit = rsu_transmit; ic->ic_send_mgmt = rsu_send_mgmt; ic->ic_update_chw = rsu_update_chw; ic->ic_ampdu_enable = rsu_ampdu_enable; ic->ic_wme.wme_update = rsu_wme_update; ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), RSU_TX_RADIOTAP_PRESENT, &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap), RSU_RX_RADIOTAP_PRESENT); if (bootverbose) ieee80211_announce(ic); return (0); fail_rom: usbd_transfer_unsetup(sc->sc_xfer, RSU_N_TRANSFER); fail_usb: mtx_destroy(&sc->sc_mtx); return (ENXIO); } static int rsu_detach(device_t self) { struct rsu_softc *sc = device_get_softc(self); struct ieee80211com *ic = &sc->sc_ic; RSU_LOCK(sc); rsu_stop(sc); RSU_UNLOCK(sc); usbd_transfer_unsetup(sc->sc_xfer, RSU_N_TRANSFER); /* * Free buffers /before/ we detach from net80211, else node * references to destroyed vaps will lead to a panic. */ /* Free Tx/Rx buffers. */ RSU_LOCK(sc); rsu_free_tx_list(sc); rsu_free_rx_list(sc); RSU_UNLOCK(sc); /* Frames are freed; detach from net80211 */ ieee80211_ifdetach(ic); taskqueue_drain_timeout(taskqueue_thread, &sc->calib_task); taskqueue_drain(taskqueue_thread, &sc->tx_task); mtx_destroy(&sc->sc_mtx); return (0); } static usb_error_t rsu_do_request(struct rsu_softc *sc, struct usb_device_request *req, void *data) { usb_error_t err; int ntries = 10; RSU_ASSERT_LOCKED(sc); while (ntries--) { err = usbd_do_request_flags(sc->sc_udev, &sc->sc_mtx, req, data, 0, NULL, 250 /* ms */); if (err == 0 || err == USB_ERR_NOT_CONFIGURED) break; DPRINTFN(1, "Control request failed, %s (retrying)\n", usbd_errstr(err)); rsu_ms_delay(sc, 10); } return (err); } static struct ieee80211vap * rsu_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct rsu_vap *uvp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return (NULL); uvp = malloc(sizeof(struct rsu_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &uvp->vap; if (ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid) != 0) { /* out of memory */ free(uvp, M_80211_VAP); return (NULL); } /* override state transition machine */ uvp->newstate = vap->iv_newstate; vap->iv_newstate = rsu_newstate; /* Limits from the r92su driver */ vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_16; vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_32K; /* complete setup */ ieee80211_vap_attach(vap, ieee80211_media_change, ieee80211_media_status, mac); ic->ic_opmode = opmode; return (vap); } static void rsu_vap_delete(struct ieee80211vap *vap) { struct rsu_vap *uvp = RSU_VAP(vap); ieee80211_vap_detach(vap); free(uvp, M_80211_VAP); } static void rsu_scan_start(struct ieee80211com *ic) { struct rsu_softc *sc = ic->ic_softc; struct ieee80211_scan_state *ss = ic->ic_scan; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); int error; /* Scanning is done by the firmware. */ RSU_LOCK(sc); sc->sc_active_scan = !!(ss->ss_flags & IEEE80211_SCAN_ACTIVE); /* XXX TODO: force awake if in network-sleep? */ error = rsu_site_survey(sc, ss->ss_nssid > 0 ? &ss->ss_ssid[0] : NULL); RSU_UNLOCK(sc); if (error != 0) { device_printf(sc->sc_dev, "could not send site survey command\n"); ieee80211_cancel_scan(vap); } } static void rsu_scan_end(struct ieee80211com *ic) { /* Nothing to do here. */ } static void rsu_getradiocaps(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { struct rsu_softc *sc = ic->ic_softc; uint8_t bands[IEEE80211_MODE_BYTES]; /* Set supported .11b and .11g rates. */ memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); if (sc->sc_ht) setbit(bands, IEEE80211_MODE_11NG); ieee80211_add_channel_list_2ghz(chans, maxchans, nchans, rsu_chan_2ghz, nitems(rsu_chan_2ghz), bands, 0); } static void rsu_set_channel(struct ieee80211com *ic __unused) { /* We are unable to switch channels, yet. */ } static void rsu_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { /* Scan is done in rsu_scan_start(). */ } /** * Called by the net80211 framework to indicate * the minimum dwell time has been met, terminate the scan. * We don't actually terminate the scan as the firmware will notify * us when it's finished and we have no way to interrupt it. */ static void rsu_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } +/* + * The same as rtwn_get_multi_pos() / rtwn_set_multi(). + */ +static uint8_t +rsu_get_multi_pos(const uint8_t maddr[]) +{ + uint64_t mask = 0x00004d101df481b4; + uint8_t pos = 0x27; /* initial value */ + int i, j; + + for (i = 0; i < IEEE80211_ADDR_LEN; i++) + for (j = (i == 0) ? 1 : 0; j < 8; j++) + if ((maddr[i] >> j) & 1) + pos ^= (mask >> (i * 8 + j - 1)); + + pos &= 0x3f; + + return (pos); +} + static void +rsu_set_multi(struct rsu_softc *sc) +{ + struct ieee80211com *ic = &sc->sc_ic; + uint32_t mfilt[2]; + + RSU_ASSERT_LOCKED(sc); + + /* general structure was copied from ath(4). */ + if (ic->ic_allmulti == 0) { + struct ieee80211vap *vap; + struct ifnet *ifp; + struct ifmultiaddr *ifma; + + /* + * Merge multicast addresses to form the hardware filter. + */ + mfilt[0] = mfilt[1] = 0; + TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { + ifp = vap->iv_ifp; + if_maddr_rlock(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + caddr_t dl; + uint8_t pos; + + dl = LLADDR((struct sockaddr_dl *) + ifma->ifma_addr); + pos = rsu_get_multi_pos(dl); + + mfilt[pos / 32] |= (1 << (pos % 32)); + } + if_maddr_runlock(ifp); + } + } else + mfilt[0] = mfilt[1] = ~0; + + rsu_write_4(sc, R92S_MAR + 0, mfilt[0]); + rsu_write_4(sc, R92S_MAR + 4, mfilt[1]); + + RSU_DPRINTF(sc, RSU_DEBUG_STATE, "%s: MC filter %08x:%08x\n", + __func__, mfilt[0], mfilt[1]); +} + +static void rsu_update_mcast(struct ieee80211com *ic) { - /* XXX do nothing? */ + struct rsu_softc *sc = ic->ic_softc; + + RSU_LOCK(sc); + if (sc->sc_running) + rsu_set_multi(sc); + RSU_UNLOCK(sc); } static int rsu_alloc_list(struct rsu_softc *sc, struct rsu_data data[], int ndata, int maxsz) { int i, error; for (i = 0; i < ndata; i++) { struct rsu_data *dp = &data[i]; dp->sc = sc; dp->m = NULL; dp->buf = malloc(maxsz, M_USBDEV, M_NOWAIT); if (dp->buf == NULL) { device_printf(sc->sc_dev, "could not allocate buffer\n"); error = ENOMEM; goto fail; } dp->ni = NULL; } return (0); fail: rsu_free_list(sc, data, ndata); return (error); } static int rsu_alloc_rx_list(struct rsu_softc *sc) { int error, i; error = rsu_alloc_list(sc, sc->sc_rx, RSU_RX_LIST_COUNT, RSU_RXBUFSZ); if (error != 0) return (error); STAILQ_INIT(&sc->sc_rx_active); STAILQ_INIT(&sc->sc_rx_inactive); for (i = 0; i < RSU_RX_LIST_COUNT; i++) STAILQ_INSERT_HEAD(&sc->sc_rx_inactive, &sc->sc_rx[i], next); return (0); } static int rsu_alloc_tx_list(struct rsu_softc *sc) { int error, i; error = rsu_alloc_list(sc, sc->sc_tx, RSU_TX_LIST_COUNT, RSU_TXBUFSZ); if (error != 0) return (error); STAILQ_INIT(&sc->sc_tx_inactive); for (i = 0; i != RSU_N_TRANSFER; i++) { STAILQ_INIT(&sc->sc_tx_active[i]); STAILQ_INIT(&sc->sc_tx_pending[i]); } for (i = 0; i < RSU_TX_LIST_COUNT; i++) { STAILQ_INSERT_HEAD(&sc->sc_tx_inactive, &sc->sc_tx[i], next); } return (0); } static void rsu_free_tx_list(struct rsu_softc *sc) { int i; /* prevent further allocations from TX list(s) */ STAILQ_INIT(&sc->sc_tx_inactive); for (i = 0; i != RSU_N_TRANSFER; i++) { STAILQ_INIT(&sc->sc_tx_active[i]); STAILQ_INIT(&sc->sc_tx_pending[i]); } rsu_free_list(sc, sc->sc_tx, RSU_TX_LIST_COUNT); } static void rsu_free_rx_list(struct rsu_softc *sc) { /* prevent further allocations from RX list(s) */ STAILQ_INIT(&sc->sc_rx_inactive); STAILQ_INIT(&sc->sc_rx_active); rsu_free_list(sc, sc->sc_rx, RSU_RX_LIST_COUNT); } static void rsu_free_list(struct rsu_softc *sc, struct rsu_data data[], int ndata) { int i; for (i = 0; i < ndata; i++) { struct rsu_data *dp = &data[i]; if (dp->buf != NULL) { free(dp->buf, M_USBDEV); dp->buf = NULL; } if (dp->ni != NULL) { ieee80211_free_node(dp->ni); dp->ni = NULL; } } } static struct rsu_data * _rsu_getbuf(struct rsu_softc *sc) { struct rsu_data *bf; bf = STAILQ_FIRST(&sc->sc_tx_inactive); if (bf != NULL) STAILQ_REMOVE_HEAD(&sc->sc_tx_inactive, next); else bf = NULL; return (bf); } static struct rsu_data * rsu_getbuf(struct rsu_softc *sc) { struct rsu_data *bf; RSU_ASSERT_LOCKED(sc); bf = _rsu_getbuf(sc); if (bf == NULL) { RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: no buffers\n", __func__); } return (bf); } static void rsu_freebuf(struct rsu_softc *sc, struct rsu_data *bf) { RSU_ASSERT_LOCKED(sc); STAILQ_INSERT_TAIL(&sc->sc_tx_inactive, bf, next); } static int rsu_write_region_1(struct rsu_softc *sc, uint16_t addr, uint8_t *buf, int len) { usb_device_request_t req; req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = R92S_REQ_REGS; USETW(req.wValue, addr); USETW(req.wIndex, 0); USETW(req.wLength, len); return (rsu_do_request(sc, &req, buf)); } static void rsu_write_1(struct rsu_softc *sc, uint16_t addr, uint8_t val) { rsu_write_region_1(sc, addr, &val, 1); } static void rsu_write_2(struct rsu_softc *sc, uint16_t addr, uint16_t val) { val = htole16(val); rsu_write_region_1(sc, addr, (uint8_t *)&val, 2); } static void rsu_write_4(struct rsu_softc *sc, uint16_t addr, uint32_t val) { val = htole32(val); rsu_write_region_1(sc, addr, (uint8_t *)&val, 4); } static int rsu_read_region_1(struct rsu_softc *sc, uint16_t addr, uint8_t *buf, int len) { usb_device_request_t req; req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = R92S_REQ_REGS; USETW(req.wValue, addr); USETW(req.wIndex, 0); USETW(req.wLength, len); return (rsu_do_request(sc, &req, buf)); } static uint8_t rsu_read_1(struct rsu_softc *sc, uint16_t addr) { uint8_t val; if (rsu_read_region_1(sc, addr, &val, 1) != 0) return (0xff); return (val); } static uint16_t rsu_read_2(struct rsu_softc *sc, uint16_t addr) { uint16_t val; if (rsu_read_region_1(sc, addr, (uint8_t *)&val, 2) != 0) return (0xffff); return (le16toh(val)); } static uint32_t rsu_read_4(struct rsu_softc *sc, uint16_t addr) { uint32_t val; if (rsu_read_region_1(sc, addr, (uint8_t *)&val, 4) != 0) return (0xffffffff); return (le32toh(val)); } static int rsu_fw_iocmd(struct rsu_softc *sc, uint32_t iocmd) { int ntries; rsu_write_4(sc, R92S_IOCMD_CTRL, iocmd); rsu_ms_delay(sc, 1); for (ntries = 0; ntries < 50; ntries++) { if (rsu_read_4(sc, R92S_IOCMD_CTRL) == 0) return (0); rsu_ms_delay(sc, 1); } return (ETIMEDOUT); } static uint8_t rsu_efuse_read_1(struct rsu_softc *sc, uint16_t addr) { uint32_t reg; int ntries; reg = rsu_read_4(sc, R92S_EFUSE_CTRL); reg = RW(reg, R92S_EFUSE_CTRL_ADDR, addr); reg &= ~R92S_EFUSE_CTRL_VALID; rsu_write_4(sc, R92S_EFUSE_CTRL, reg); /* Wait for read operation to complete. */ for (ntries = 0; ntries < 100; ntries++) { reg = rsu_read_4(sc, R92S_EFUSE_CTRL); if (reg & R92S_EFUSE_CTRL_VALID) return (MS(reg, R92S_EFUSE_CTRL_DATA)); rsu_ms_delay(sc, 1); } device_printf(sc->sc_dev, "could not read efuse byte at address 0x%x\n", addr); return (0xff); } static int rsu_read_rom(struct rsu_softc *sc) { uint8_t *rom = sc->rom; uint16_t addr = 0; uint32_t reg; uint8_t off, msk; int i; /* Make sure that ROM type is eFuse and that autoload succeeded. */ reg = rsu_read_1(sc, R92S_EE_9346CR); if ((reg & (R92S_9356SEL | R92S_EEPROM_EN)) != R92S_EEPROM_EN) return (EIO); /* Turn on 2.5V to prevent eFuse leakage. */ reg = rsu_read_1(sc, R92S_EFUSE_TEST + 3); rsu_write_1(sc, R92S_EFUSE_TEST + 3, reg | 0x80); rsu_ms_delay(sc, 1); rsu_write_1(sc, R92S_EFUSE_TEST + 3, reg & ~0x80); /* Read full ROM image. */ memset(&sc->rom, 0xff, sizeof(sc->rom)); while (addr < 512) { reg = rsu_efuse_read_1(sc, addr); if (reg == 0xff) break; addr++; off = reg >> 4; msk = reg & 0xf; for (i = 0; i < 4; i++) { if (msk & (1 << i)) continue; rom[off * 8 + i * 2 + 0] = rsu_efuse_read_1(sc, addr); addr++; rom[off * 8 + i * 2 + 1] = rsu_efuse_read_1(sc, addr); addr++; } } #ifdef USB_DEBUG if (rsu_debug >= 5) { /* Dump ROM content. */ printf("\n"); for (i = 0; i < sizeof(sc->rom); i++) printf("%02x:", rom[i]); printf("\n"); } #endif return (0); } static int rsu_fw_cmd(struct rsu_softc *sc, uint8_t code, void *buf, int len) { const uint8_t which = RSU_H2C_ENDPOINT; struct rsu_data *data; struct r92s_tx_desc *txd; struct r92s_fw_cmd_hdr *cmd; int cmdsz; int xferlen; RSU_ASSERT_LOCKED(sc); data = rsu_getbuf(sc); if (data == NULL) return (ENOMEM); /* Blank the entire payload, just to be safe */ memset(data->buf, '\0', RSU_TXBUFSZ); /* Round-up command length to a multiple of 8 bytes. */ /* XXX TODO: is this required? */ cmdsz = (len + 7) & ~7; xferlen = sizeof(*txd) + sizeof(*cmd) + cmdsz; KASSERT(xferlen <= RSU_TXBUFSZ, ("%s: invalid length", __func__)); memset(data->buf, 0, xferlen); /* Setup Tx descriptor. */ txd = (struct r92s_tx_desc *)data->buf; txd->txdw0 = htole32( SM(R92S_TXDW0_OFFSET, sizeof(*txd)) | SM(R92S_TXDW0_PKTLEN, sizeof(*cmd) + cmdsz) | R92S_TXDW0_OWN | R92S_TXDW0_FSG | R92S_TXDW0_LSG); txd->txdw1 = htole32(SM(R92S_TXDW1_QSEL, R92S_TXDW1_QSEL_H2C)); /* Setup command header. */ cmd = (struct r92s_fw_cmd_hdr *)&txd[1]; cmd->len = htole16(cmdsz); cmd->code = code; cmd->seq = sc->cmd_seq; sc->cmd_seq = (sc->cmd_seq + 1) & 0x7f; /* Copy command payload. */ memcpy(&cmd[1], buf, len); RSU_DPRINTF(sc, RSU_DEBUG_TX | RSU_DEBUG_FWCMD, "%s: Tx cmd code=0x%x len=0x%x\n", __func__, code, cmdsz); data->buflen = xferlen; STAILQ_INSERT_TAIL(&sc->sc_tx_pending[which], data, next); usbd_transfer_start(sc->sc_xfer[which]); return (0); } /* ARGSUSED */ static void rsu_calib_task(void *arg, int pending __unused) { struct rsu_softc *sc = arg; #ifdef notyet uint32_t reg; #endif RSU_DPRINTF(sc, RSU_DEBUG_CALIB, "%s: running calibration task\n", __func__); RSU_LOCK(sc); #ifdef notyet /* Read WPS PBC status. */ rsu_write_1(sc, R92S_MAC_PINMUX_CTRL, R92S_GPIOMUX_EN | SM(R92S_GPIOSEL_GPIO, R92S_GPIOSEL_GPIO_JTAG)); rsu_write_1(sc, R92S_GPIO_IO_SEL, rsu_read_1(sc, R92S_GPIO_IO_SEL) & ~R92S_GPIO_WPS); reg = rsu_read_1(sc, R92S_GPIO_CTRL); if (reg != 0xff && (reg & R92S_GPIO_WPS)) DPRINTF(("WPS PBC is pushed\n")); #endif /* Read current signal level. */ if (rsu_fw_iocmd(sc, 0xf4000001) == 0) { sc->sc_currssi = rsu_read_4(sc, R92S_IOCMD_DATA); RSU_DPRINTF(sc, RSU_DEBUG_CALIB, "%s: RSSI=%d (%d)\n", __func__, sc->sc_currssi, rsu_hwrssi_to_rssi(sc, sc->sc_currssi)); } if (sc->sc_calibrating) taskqueue_enqueue_timeout(taskqueue_thread, &sc->calib_task, hz); RSU_UNLOCK(sc); } static void rsu_tx_task(void *arg, int pending __unused) { struct rsu_softc *sc = arg; RSU_LOCK(sc); _rsu_start(sc); RSU_UNLOCK(sc); } #define RSU_PWR_UNKNOWN 0x0 #define RSU_PWR_ACTIVE 0x1 #define RSU_PWR_OFF 0x2 #define RSU_PWR_SLEEP 0x3 /* * Set the current power state. * * The rtlwifi code doesn't do this so aggressively; it * waits for an idle period after association with * no traffic before doing this. * * For now - it's on in all states except RUN, and * in RUN it'll transition to allow sleep. */ struct r92s_pwr_cmd { uint8_t mode; uint8_t smart_ps; uint8_t bcn_pass_time; }; static int rsu_set_fw_power_state(struct rsu_softc *sc, int state) { struct r92s_set_pwr_mode cmd; //struct r92s_pwr_cmd cmd; int error; RSU_ASSERT_LOCKED(sc); /* only change state if required */ if (sc->sc_curpwrstate == state) return (0); memset(&cmd, 0, sizeof(cmd)); switch (state) { case RSU_PWR_ACTIVE: /* Force the hardware awake */ rsu_write_1(sc, R92S_USB_HRPWM, R92S_USB_HRPWM_PS_ST_ACTIVE | R92S_USB_HRPWM_PS_ALL_ON); cmd.mode = R92S_PS_MODE_ACTIVE; break; case RSU_PWR_SLEEP: cmd.mode = R92S_PS_MODE_DTIM; /* XXX configurable? */ cmd.smart_ps = 1; /* XXX 2 if doing p2p */ cmd.bcn_pass_time = 5; /* in 100mS usb.c, linux/rtlwifi */ break; case RSU_PWR_OFF: cmd.mode = R92S_PS_MODE_RADIOOFF; break; default: device_printf(sc->sc_dev, "%s: unknown ps mode (%d)\n", __func__, state); return (ENXIO); } RSU_DPRINTF(sc, RSU_DEBUG_RESET, "%s: setting ps mode to %d (mode %d)\n", __func__, state, cmd.mode); error = rsu_fw_cmd(sc, R92S_CMD_SET_PWR_MODE, &cmd, sizeof(cmd)); if (error == 0) sc->sc_curpwrstate = state; return (error); } static int rsu_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct rsu_vap *uvp = RSU_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct rsu_softc *sc = ic->ic_softc; struct ieee80211_node *ni; struct ieee80211_rateset *rs; enum ieee80211_state ostate; int error, startcal = 0; ostate = vap->iv_state; RSU_DPRINTF(sc, RSU_DEBUG_STATE, "%s: %s -> %s\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); IEEE80211_UNLOCK(ic); if (ostate == IEEE80211_S_RUN) { RSU_LOCK(sc); /* Stop calibration. */ sc->sc_calibrating = 0; RSU_UNLOCK(sc); taskqueue_drain_timeout(taskqueue_thread, &sc->calib_task); taskqueue_drain(taskqueue_thread, &sc->tx_task); /* Disassociate from our current BSS. */ RSU_LOCK(sc); rsu_disconnect(sc); } else RSU_LOCK(sc); switch (nstate) { case IEEE80211_S_INIT: (void) rsu_set_fw_power_state(sc, RSU_PWR_ACTIVE); break; case IEEE80211_S_AUTH: ni = ieee80211_ref_node(vap->iv_bss); (void) rsu_set_fw_power_state(sc, RSU_PWR_ACTIVE); error = rsu_join_bss(sc, ni); ieee80211_free_node(ni); if (error != 0) { device_printf(sc->sc_dev, "could not send join command\n"); } break; case IEEE80211_S_RUN: ni = ieee80211_ref_node(vap->iv_bss); rs = &ni->ni_rates; /* Indicate highest supported rate. */ ni->ni_txrate = rs->rs_rates[rs->rs_nrates - 1]; (void) rsu_set_fw_power_state(sc, RSU_PWR_SLEEP); ieee80211_free_node(ni); startcal = 1; break; default: break; } if (startcal != 0) { sc->sc_calibrating = 1; /* Start periodic calibration. */ taskqueue_enqueue_timeout(taskqueue_thread, &sc->calib_task, hz); } RSU_UNLOCK(sc); IEEE80211_LOCK(ic); return (uvp->newstate(vap, nstate, arg)); } #ifdef notyet static void rsu_set_key(struct rsu_softc *sc, const struct ieee80211_key *k) { struct r92s_fw_cmd_set_key key; memset(&key, 0, sizeof(key)); /* Map net80211 cipher to HW crypto algorithm. */ switch (k->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_WEP: if (k->wk_keylen < 8) key.algo = R92S_KEY_ALGO_WEP40; else key.algo = R92S_KEY_ALGO_WEP104; break; case IEEE80211_CIPHER_TKIP: key.algo = R92S_KEY_ALGO_TKIP; break; case IEEE80211_CIPHER_AES_CCM: key.algo = R92S_KEY_ALGO_AES; break; default: return; } key.id = k->wk_keyix; key.grpkey = (k->wk_flags & IEEE80211_KEY_GROUP) != 0; memcpy(key.key, k->wk_key, MIN(k->wk_keylen, sizeof(key.key))); (void)rsu_fw_cmd(sc, R92S_CMD_SET_KEY, &key, sizeof(key)); } static void rsu_delete_key(struct rsu_softc *sc, const struct ieee80211_key *k) { struct r92s_fw_cmd_set_key key; memset(&key, 0, sizeof(key)); key.id = k->wk_keyix; (void)rsu_fw_cmd(sc, R92S_CMD_SET_KEY, &key, sizeof(key)); } #endif static int rsu_site_survey(struct rsu_softc *sc, struct ieee80211_scan_ssid *ssid) { struct r92s_fw_cmd_sitesurvey cmd; RSU_ASSERT_LOCKED(sc); memset(&cmd, 0, sizeof(cmd)); /* TODO: passive channels? */ if (sc->sc_active_scan) cmd.active = htole32(1); cmd.limit = htole32(48); if (ssid != NULL) { sc->sc_extra_scan = 1; cmd.ssidlen = htole32(ssid->len); memcpy(cmd.ssid, ssid->ssid, ssid->len); } #ifdef USB_DEBUG if (rsu_debug & (RSU_DEBUG_SCAN | RSU_DEBUG_FWCMD)) { device_printf(sc->sc_dev, "sending site survey command, active %d", le32toh(cmd.active)); if (ssid != NULL) { printf(", ssid: "); ieee80211_print_essid(cmd.ssid, le32toh(cmd.ssidlen)); } printf("\n"); } #endif return (rsu_fw_cmd(sc, R92S_CMD_SITE_SURVEY, &cmd, sizeof(cmd))); } static int rsu_join_bss(struct rsu_softc *sc, struct ieee80211_node *ni) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; struct ndis_wlan_bssid_ex *bss; struct ndis_802_11_fixed_ies *fixed; struct r92s_fw_cmd_auth auth; uint8_t buf[sizeof(*bss) + 128] __aligned(4); uint8_t *frm; uint8_t opmode; int error; RSU_ASSERT_LOCKED(sc); /* Let the FW decide the opmode based on the capinfo field. */ opmode = NDIS802_11AUTOUNKNOWN; RSU_DPRINTF(sc, RSU_DEBUG_RESET, "%s: setting operating mode to %d\n", __func__, opmode); error = rsu_fw_cmd(sc, R92S_CMD_SET_OPMODE, &opmode, sizeof(opmode)); if (error != 0) return (error); memset(&auth, 0, sizeof(auth)); if (vap->iv_flags & IEEE80211_F_WPA) { auth.mode = R92S_AUTHMODE_WPA; auth.dot1x = (ni->ni_authmode == IEEE80211_AUTH_8021X); } else auth.mode = R92S_AUTHMODE_OPEN; RSU_DPRINTF(sc, RSU_DEBUG_RESET, "%s: setting auth mode to %d\n", __func__, auth.mode); error = rsu_fw_cmd(sc, R92S_CMD_SET_AUTH, &auth, sizeof(auth)); if (error != 0) return (error); memset(buf, 0, sizeof(buf)); bss = (struct ndis_wlan_bssid_ex *)buf; IEEE80211_ADDR_COPY(bss->macaddr, ni->ni_bssid); bss->ssid.ssidlen = htole32(ni->ni_esslen); memcpy(bss->ssid.ssid, ni->ni_essid, ni->ni_esslen); if (vap->iv_flags & (IEEE80211_F_PRIVACY | IEEE80211_F_WPA)) bss->privacy = htole32(1); bss->rssi = htole32(ni->ni_avgrssi); if (ic->ic_curmode == IEEE80211_MODE_11B) bss->networktype = htole32(NDIS802_11DS); else bss->networktype = htole32(NDIS802_11OFDM24); bss->config.len = htole32(sizeof(bss->config)); bss->config.bintval = htole32(ni->ni_intval); bss->config.dsconfig = htole32(ieee80211_chan2ieee(ic, ni->ni_chan)); bss->inframode = htole32(NDIS802_11INFRASTRUCTURE); /* XXX verify how this is supposed to look! */ memcpy(bss->supprates, ni->ni_rates.rs_rates, ni->ni_rates.rs_nrates); /* Write the fixed fields of the beacon frame. */ fixed = (struct ndis_802_11_fixed_ies *)&bss[1]; memcpy(&fixed->tstamp, ni->ni_tstamp.data, 8); fixed->bintval = htole16(ni->ni_intval); fixed->capabilities = htole16(ni->ni_capinfo); /* Write IEs to be included in the association request. */ frm = (uint8_t *)&fixed[1]; frm = ieee80211_add_rsn(frm, vap); frm = ieee80211_add_wpa(frm, vap); frm = ieee80211_add_qos(frm, ni); if ((ic->ic_flags & IEEE80211_F_WME) && (ni->ni_ies.wme_ie != NULL)) frm = ieee80211_add_wme_info(frm, &ic->ic_wme); if (ni->ni_flags & IEEE80211_NODE_HT) { frm = ieee80211_add_htcap(frm, ni); frm = ieee80211_add_htinfo(frm, ni); } bss->ieslen = htole32(frm - (uint8_t *)fixed); bss->len = htole32(((frm - buf) + 3) & ~3); RSU_DPRINTF(sc, RSU_DEBUG_RESET | RSU_DEBUG_FWCMD, "%s: sending join bss command to %s chan %d\n", __func__, ether_sprintf(bss->macaddr), le32toh(bss->config.dsconfig)); return (rsu_fw_cmd(sc, R92S_CMD_JOIN_BSS, buf, sizeof(buf))); } static int rsu_disconnect(struct rsu_softc *sc) { uint32_t zero = 0; /* :-) */ /* Disassociate from our current BSS. */ RSU_DPRINTF(sc, RSU_DEBUG_STATE | RSU_DEBUG_FWCMD, "%s: sending disconnect command\n", __func__); return (rsu_fw_cmd(sc, R92S_CMD_DISCONNECT, &zero, sizeof(zero))); } /* * Map the hardware provided RSSI value to a signal level. * For the most part it's just something we divide by and cap * so it doesn't overflow the representation by net80211. */ static int rsu_hwrssi_to_rssi(struct rsu_softc *sc, int hw_rssi) { int v; if (hw_rssi == 0) return (0); v = hw_rssi >> 4; if (v > 80) v = 80; return (v); } static void rsu_event_survey(struct rsu_softc *sc, uint8_t *buf, int len) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_frame *wh; struct ndis_wlan_bssid_ex *bss; struct ieee80211_rx_stats rxs; struct mbuf *m; int pktlen; if (__predict_false(len < sizeof(*bss))) return; bss = (struct ndis_wlan_bssid_ex *)buf; if (__predict_false(len < sizeof(*bss) + le32toh(bss->ieslen))) return; RSU_DPRINTF(sc, RSU_DEBUG_SCAN, "%s: found BSS %s: len=%d chan=%d inframode=%d " "networktype=%d privacy=%d, RSSI=%d\n", __func__, ether_sprintf(bss->macaddr), le32toh(bss->len), le32toh(bss->config.dsconfig), le32toh(bss->inframode), le32toh(bss->networktype), le32toh(bss->privacy), le32toh(bss->rssi)); /* Build a fake beacon frame to let net80211 do all the parsing. */ /* XXX TODO: just call the new scan API methods! */ pktlen = sizeof(*wh) + le32toh(bss->ieslen); if (__predict_false(pktlen > MCLBYTES)) return; m = m_get2(pktlen, M_NOWAIT, MT_DATA, M_PKTHDR); if (__predict_false(m == NULL)) return; wh = mtod(m, struct ieee80211_frame *); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_BEACON; wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; USETW(wh->i_dur, 0); IEEE80211_ADDR_COPY(wh->i_addr1, ieee80211broadcastaddr); IEEE80211_ADDR_COPY(wh->i_addr2, bss->macaddr); IEEE80211_ADDR_COPY(wh->i_addr3, bss->macaddr); *(uint16_t *)wh->i_seq = 0; memcpy(&wh[1], (uint8_t *)&bss[1], le32toh(bss->ieslen)); /* Finalize mbuf. */ m->m_pkthdr.len = m->m_len = pktlen; /* Set channel flags for input path */ bzero(&rxs, sizeof(rxs)); rxs.r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ; rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI; rxs.c_ieee = le32toh(bss->config.dsconfig); rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_2GHZ); /* This is a number from 0..100; so let's just divide it down a bit */ rxs.c_rssi = le32toh(bss->rssi) / 2; rxs.c_nf = -96; if (ieee80211_add_rx_params(m, &rxs) == 0) return; /* XXX avoid a LOR */ RSU_UNLOCK(sc); ieee80211_input_mimo_all(ic, m); RSU_LOCK(sc); } static void rsu_event_join_bss(struct rsu_softc *sc, uint8_t *buf, int len) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = vap->iv_bss; struct r92s_event_join_bss *rsp; uint32_t tmp; int res; if (__predict_false(len < sizeof(*rsp))) return; rsp = (struct r92s_event_join_bss *)buf; res = (int)le32toh(rsp->join_res); RSU_DPRINTF(sc, RSU_DEBUG_STATE | RSU_DEBUG_FWCMD, "%s: Rx join BSS event len=%d res=%d\n", __func__, len, res); /* * XXX Don't do this; there's likely a better way to tell * the caller we failed. */ if (res <= 0) { RSU_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); RSU_LOCK(sc); return; } tmp = le32toh(rsp->associd); if (tmp >= vap->iv_max_aid) { DPRINTF("Assoc ID overflow\n"); tmp = 1; } RSU_DPRINTF(sc, RSU_DEBUG_STATE | RSU_DEBUG_FWCMD, "%s: associated with %s associd=%d\n", __func__, ether_sprintf(rsp->bss.macaddr), tmp); /* XXX is this required? What's the top two bits for again? */ ni->ni_associd = tmp | 0xc000; RSU_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_RUN, IEEE80211_FC0_SUBTYPE_ASSOC_RESP); RSU_LOCK(sc); } static void rsu_event_addba_req_report(struct rsu_softc *sc, uint8_t *buf, int len) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct r92s_add_ba_event *ba = (void *) buf; struct ieee80211_node *ni; if (len < sizeof(*ba)) { device_printf(sc->sc_dev, "%s: short read (%d)\n", __func__, len); return; } if (vap == NULL) return; RSU_DPRINTF(sc, RSU_DEBUG_AMPDU, "%s: mac=%s, tid=%d, ssn=%d\n", __func__, ether_sprintf(ba->mac_addr), (int) ba->tid, (int) le16toh(ba->ssn)); /* XXX do node lookup; this is STA specific */ ni = ieee80211_ref_node(vap->iv_bss); ieee80211_ampdu_rx_start_ext(ni, ba->tid, le16toh(ba->ssn) >> 4, 32); ieee80211_free_node(ni); } static void rsu_rx_event(struct rsu_softc *sc, uint8_t code, uint8_t *buf, int len) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); RSU_DPRINTF(sc, RSU_DEBUG_RX | RSU_DEBUG_FWCMD, "%s: Rx event code=%d len=%d\n", __func__, code, len); switch (code) { case R92S_EVT_SURVEY: rsu_event_survey(sc, buf, len); break; case R92S_EVT_SURVEY_DONE: RSU_DPRINTF(sc, RSU_DEBUG_SCAN, "%s: %s scan done, found %d BSS\n", __func__, sc->sc_extra_scan ? "direct" : "broadcast", le32toh(*(uint32_t *)buf)); if (sc->sc_extra_scan == 1) { /* Send broadcast probe request. */ sc->sc_extra_scan = 0; if (vap != NULL && rsu_site_survey(sc, NULL) != 0) { RSU_UNLOCK(sc); ieee80211_cancel_scan(vap); RSU_LOCK(sc); } break; } if (vap != NULL) { RSU_UNLOCK(sc); ieee80211_scan_done(vap); RSU_LOCK(sc); } break; case R92S_EVT_JOIN_BSS: if (vap->iv_state == IEEE80211_S_AUTH) rsu_event_join_bss(sc, buf, len); break; case R92S_EVT_DEL_STA: RSU_DPRINTF(sc, RSU_DEBUG_FWCMD | RSU_DEBUG_STATE, "%s: disassociated from %s\n", __func__, ether_sprintf(buf)); if (vap->iv_state == IEEE80211_S_RUN && IEEE80211_ADDR_EQ(vap->iv_bss->ni_bssid, buf)) { RSU_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); RSU_LOCK(sc); } break; case R92S_EVT_WPS_PBC: RSU_DPRINTF(sc, RSU_DEBUG_RX | RSU_DEBUG_FWCMD, "%s: WPS PBC pushed.\n", __func__); break; case R92S_EVT_FWDBG: buf[60] = '\0'; RSU_DPRINTF(sc, RSU_DEBUG_FWDBG, "FWDBG: %s\n", (char *)buf); break; case R92S_EVT_ADDBA_REQ_REPORT: rsu_event_addba_req_report(sc, buf, len); break; default: device_printf(sc->sc_dev, "%s: unhandled code (%d)\n", __func__, code); break; } } static void rsu_rx_multi_event(struct rsu_softc *sc, uint8_t *buf, int len) { struct r92s_fw_cmd_hdr *cmd; int cmdsz; RSU_DPRINTF(sc, RSU_DEBUG_RX, "%s: Rx events len=%d\n", __func__, len); /* Skip Rx status. */ buf += sizeof(struct r92s_rx_stat); len -= sizeof(struct r92s_rx_stat); /* Process all events. */ for (;;) { /* Check that command header fits. */ if (__predict_false(len < sizeof(*cmd))) break; cmd = (struct r92s_fw_cmd_hdr *)buf; /* Check that command payload fits. */ cmdsz = le16toh(cmd->len); if (__predict_false(len < sizeof(*cmd) + cmdsz)) break; /* Process firmware event. */ rsu_rx_event(sc, cmd->code, (uint8_t *)&cmd[1], cmdsz); if (!(cmd->seq & R92S_FW_CMD_MORE)) break; buf += sizeof(*cmd) + cmdsz; len -= sizeof(*cmd) + cmdsz; } } #if 0 static int8_t rsu_get_rssi(struct rsu_softc *sc, int rate, void *physt) { static const int8_t cckoff[] = { 14, -2, -20, -40 }; struct r92s_rx_phystat *phy; struct r92s_rx_cck *cck; uint8_t rpt; int8_t rssi; if (rate <= 3) { cck = (struct r92s_rx_cck *)physt; rpt = (cck->agc_rpt >> 6) & 0x3; rssi = cck->agc_rpt & 0x3e; rssi = cckoff[rpt] - rssi; } else { /* OFDM/HT. */ phy = (struct r92s_rx_phystat *)physt; rssi = ((le32toh(phy->phydw1) >> 1) & 0x7f) - 106; } return (rssi); } #endif static struct mbuf * rsu_rx_frame(struct rsu_softc *sc, uint8_t *buf, int pktlen) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_frame *wh; struct r92s_rx_stat *stat; uint32_t rxdw0, rxdw3; struct mbuf *m; uint8_t rate; int infosz; stat = (struct r92s_rx_stat *)buf; rxdw0 = le32toh(stat->rxdw0); rxdw3 = le32toh(stat->rxdw3); if (__predict_false(rxdw0 & R92S_RXDW0_CRCERR)) { counter_u64_add(ic->ic_ierrors, 1); return NULL; } if (__predict_false(pktlen < sizeof(*wh) || pktlen > MCLBYTES)) { counter_u64_add(ic->ic_ierrors, 1); return NULL; } rate = MS(rxdw3, R92S_RXDW3_RATE); infosz = MS(rxdw0, R92S_RXDW0_INFOSZ) * 8; #if 0 /* Get RSSI from PHY status descriptor if present. */ if (infosz != 0) *rssi = rsu_get_rssi(sc, rate, &stat[1]); else *rssi = 0; #endif RSU_DPRINTF(sc, RSU_DEBUG_RX, "%s: Rx frame len=%d rate=%d infosz=%d\n", __func__, pktlen, rate, infosz); m = m_get2(pktlen, M_NOWAIT, MT_DATA, M_PKTHDR); if (__predict_false(m == NULL)) { counter_u64_add(ic->ic_ierrors, 1); return NULL; } /* Hardware does Rx TCP checksum offload. */ if (rxdw3 & R92S_RXDW3_TCPCHKVALID) { if (__predict_true(rxdw3 & R92S_RXDW3_TCPCHKRPT)) m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; } wh = (struct ieee80211_frame *)((uint8_t *)&stat[1] + infosz); memcpy(mtod(m, uint8_t *), wh, pktlen); m->m_pkthdr.len = m->m_len = pktlen; if (ieee80211_radiotap_active(ic)) { struct rsu_rx_radiotap_header *tap = &sc->sc_rxtap; /* Map HW rate index to 802.11 rate. */ tap->wr_flags = 2; if (!(rxdw3 & R92S_RXDW3_HTC)) { switch (rate) { /* CCK. */ case 0: tap->wr_rate = 2; break; case 1: tap->wr_rate = 4; break; case 2: tap->wr_rate = 11; break; case 3: tap->wr_rate = 22; break; /* OFDM. */ case 4: tap->wr_rate = 12; break; case 5: tap->wr_rate = 18; break; case 6: tap->wr_rate = 24; break; case 7: tap->wr_rate = 36; break; case 8: tap->wr_rate = 48; break; case 9: tap->wr_rate = 72; break; case 10: tap->wr_rate = 96; break; case 11: tap->wr_rate = 108; break; } } else if (rate >= 12) { /* MCS0~15. */ /* Bit 7 set means HT MCS instead of rate. */ tap->wr_rate = 0x80 | (rate - 12); } #if 0 tap->wr_dbm_antsignal = *rssi; #endif /* XXX not nice */ tap->wr_dbm_antsignal = rsu_hwrssi_to_rssi(sc, sc->sc_currssi); tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq); tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags); } return (m); } static struct mbuf * rsu_rx_multi_frame(struct rsu_softc *sc, uint8_t *buf, int len) { struct r92s_rx_stat *stat; uint32_t rxdw0; int totlen, pktlen, infosz, npkts; struct mbuf *m, *m0 = NULL, *prevm = NULL; /* Get the number of encapsulated frames. */ stat = (struct r92s_rx_stat *)buf; npkts = MS(le32toh(stat->rxdw2), R92S_RXDW2_PKTCNT); RSU_DPRINTF(sc, RSU_DEBUG_RX, "%s: Rx %d frames in one chunk\n", __func__, npkts); /* Process all of them. */ while (npkts-- > 0) { if (__predict_false(len < sizeof(*stat))) break; stat = (struct r92s_rx_stat *)buf; rxdw0 = le32toh(stat->rxdw0); pktlen = MS(rxdw0, R92S_RXDW0_PKTLEN); if (__predict_false(pktlen == 0)) break; infosz = MS(rxdw0, R92S_RXDW0_INFOSZ) * 8; /* Make sure everything fits in xfer. */ totlen = sizeof(*stat) + infosz + pktlen; if (__predict_false(totlen > len)) break; /* Process 802.11 frame. */ m = rsu_rx_frame(sc, buf, pktlen); if (m0 == NULL) m0 = m; if (prevm == NULL) prevm = m; else { prevm->m_next = m; prevm = m; } /* Next chunk is 128-byte aligned. */ totlen = (totlen + 127) & ~127; buf += totlen; len -= totlen; } return (m0); } static struct mbuf * rsu_rxeof(struct usb_xfer *xfer, struct rsu_data *data) { struct rsu_softc *sc = data->sc; struct ieee80211com *ic = &sc->sc_ic; struct r92s_rx_stat *stat; int len; usbd_xfer_status(xfer, &len, NULL, NULL, NULL); if (__predict_false(len < sizeof(*stat))) { DPRINTF("xfer too short %d\n", len); counter_u64_add(ic->ic_ierrors, 1); return (NULL); } /* Determine if it is a firmware C2H event or an 802.11 frame. */ stat = (struct r92s_rx_stat *)data->buf; if ((le32toh(stat->rxdw1) & 0x1ff) == 0x1ff) { rsu_rx_multi_event(sc, data->buf, len); /* No packets to process. */ return (NULL); } else return (rsu_rx_multi_frame(sc, data->buf, len)); } static void rsu_bulk_rx_callback(struct usb_xfer *xfer, usb_error_t error) { struct rsu_softc *sc = usbd_xfer_softc(xfer); struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_frame *wh; struct ieee80211_node *ni; struct mbuf *m = NULL, *next; struct rsu_data *data; RSU_ASSERT_LOCKED(sc); switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: data = STAILQ_FIRST(&sc->sc_rx_active); if (data == NULL) goto tr_setup; STAILQ_REMOVE_HEAD(&sc->sc_rx_active, next); m = rsu_rxeof(xfer, data); STAILQ_INSERT_TAIL(&sc->sc_rx_inactive, data, next); /* FALLTHROUGH */ case USB_ST_SETUP: tr_setup: /* * XXX TODO: if we have an mbuf list, but then * we hit data == NULL, what now? */ data = STAILQ_FIRST(&sc->sc_rx_inactive); if (data == NULL) { KASSERT(m == NULL, ("mbuf isn't NULL")); return; } STAILQ_REMOVE_HEAD(&sc->sc_rx_inactive, next); STAILQ_INSERT_TAIL(&sc->sc_rx_active, data, next); usbd_xfer_set_frame_data(xfer, 0, data->buf, usbd_xfer_max_len(xfer)); usbd_transfer_submit(xfer); /* * To avoid LOR we should unlock our private mutex here to call * ieee80211_input() because here is at the end of a USB * callback and safe to unlock. */ RSU_UNLOCK(sc); while (m != NULL) { int rssi; /* Cheat and get the last calibrated RSSI */ rssi = rsu_hwrssi_to_rssi(sc, sc->sc_currssi); next = m->m_next; m->m_next = NULL; wh = mtod(m, struct ieee80211_frame *); ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh); if (ni != NULL) { if (ni->ni_flags & IEEE80211_NODE_HT) m->m_flags |= M_AMPDU; (void)ieee80211_input(ni, m, rssi, -96); ieee80211_free_node(ni); } else (void)ieee80211_input_all(ic, m, rssi, -96); m = next; } RSU_LOCK(sc); break; default: /* needs it to the inactive queue due to a error. */ data = STAILQ_FIRST(&sc->sc_rx_active); if (data != NULL) { STAILQ_REMOVE_HEAD(&sc->sc_rx_active, next); STAILQ_INSERT_TAIL(&sc->sc_rx_inactive, data, next); } if (error != USB_ERR_CANCELLED) { usbd_xfer_set_stall(xfer); counter_u64_add(ic->ic_ierrors, 1); goto tr_setup; } break; } } static void rsu_txeof(struct usb_xfer *xfer, struct rsu_data *data) { #ifdef USB_DEBUG struct rsu_softc *sc = usbd_xfer_softc(xfer); #endif RSU_DPRINTF(sc, RSU_DEBUG_TXDONE, "%s: called; data=%p\n", __func__, data); if (data->m) { /* XXX status? */ ieee80211_tx_complete(data->ni, data->m, 0); data->m = NULL; data->ni = NULL; } } static void rsu_bulk_tx_callback_sub(struct usb_xfer *xfer, usb_error_t error, uint8_t which) { struct rsu_softc *sc = usbd_xfer_softc(xfer); struct ieee80211com *ic = &sc->sc_ic; struct rsu_data *data; RSU_ASSERT_LOCKED(sc); switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: data = STAILQ_FIRST(&sc->sc_tx_active[which]); if (data == NULL) goto tr_setup; RSU_DPRINTF(sc, RSU_DEBUG_TXDONE, "%s: transfer done %p\n", __func__, data); STAILQ_REMOVE_HEAD(&sc->sc_tx_active[which], next); rsu_txeof(xfer, data); rsu_freebuf(sc, data); /* FALLTHROUGH */ case USB_ST_SETUP: tr_setup: data = STAILQ_FIRST(&sc->sc_tx_pending[which]); if (data == NULL) { RSU_DPRINTF(sc, RSU_DEBUG_TXDONE, "%s: empty pending queue sc %p\n", __func__, sc); return; } STAILQ_REMOVE_HEAD(&sc->sc_tx_pending[which], next); STAILQ_INSERT_TAIL(&sc->sc_tx_active[which], data, next); usbd_xfer_set_frame_data(xfer, 0, data->buf, data->buflen); RSU_DPRINTF(sc, RSU_DEBUG_TXDONE, "%s: submitting transfer %p\n", __func__, data); usbd_transfer_submit(xfer); break; default: data = STAILQ_FIRST(&sc->sc_tx_active[which]); if (data != NULL) { STAILQ_REMOVE_HEAD(&sc->sc_tx_active[which], next); rsu_txeof(xfer, data); rsu_freebuf(sc, data); } counter_u64_add(ic->ic_oerrors, 1); if (error != USB_ERR_CANCELLED) { usbd_xfer_set_stall(xfer); goto tr_setup; } break; } /* * XXX TODO: if the queue is low, flush out FF TX frames. * Remember to unlock the driver for now; net80211 doesn't * defer it for us. */ } static void rsu_bulk_tx_callback_be_bk(struct usb_xfer *xfer, usb_error_t error) { struct rsu_softc *sc = usbd_xfer_softc(xfer); rsu_bulk_tx_callback_sub(xfer, error, RSU_BULK_TX_BE_BK); /* This kicks the TX taskqueue */ rsu_start(sc); } static void rsu_bulk_tx_callback_vi_vo(struct usb_xfer *xfer, usb_error_t error) { struct rsu_softc *sc = usbd_xfer_softc(xfer); rsu_bulk_tx_callback_sub(xfer, error, RSU_BULK_TX_VI_VO); /* This kicks the TX taskqueue */ rsu_start(sc); } static void rsu_bulk_tx_callback_h2c(struct usb_xfer *xfer, usb_error_t error) { struct rsu_softc *sc = usbd_xfer_softc(xfer); rsu_bulk_tx_callback_sub(xfer, error, RSU_BULK_TX_H2C); /* This kicks the TX taskqueue */ rsu_start(sc); } /* * Transmit the given frame. * * This doesn't free the node or mbuf upon failure. */ static int rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, struct mbuf *m0, struct rsu_data *data) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct r92s_tx_desc *txd; uint8_t type; int prio = 0; uint8_t which; int hasqos; int xferlen; int qid; RSU_ASSERT_LOCKED(sc); wh = mtod(m0, struct ieee80211_frame *); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: data=%p, m=%p\n", __func__, data, m0); if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_encap(ni, m0); if (k == NULL) { device_printf(sc->sc_dev, "ieee80211_crypto_encap returns NULL.\n"); /* XXX we don't expect the fragmented frames */ return (ENOBUFS); } wh = mtod(m0, struct ieee80211_frame *); } /* If we have QoS then use it */ /* XXX TODO: mbuf WME/PRI versus TID? */ if (IEEE80211_QOS_HAS_SEQ(wh)) { /* Has QoS */ prio = M_WME_GETAC(m0); which = rsu_wme_ac_xfer_map[prio]; hasqos = 1; } else { /* Non-QoS TID */ /* XXX TODO: tid=0 for non-qos TID? */ which = rsu_wme_ac_xfer_map[WME_AC_BE]; hasqos = 0; prio = 0; } qid = rsu_ac2qid[prio]; #if 0 switch (type) { case IEEE80211_FC0_TYPE_CTL: case IEEE80211_FC0_TYPE_MGT: which = rsu_wme_ac_xfer_map[WME_AC_VO]; break; default: which = rsu_wme_ac_xfer_map[M_WME_GETAC(m0)]; break; } hasqos = 0; #endif RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: pri=%d, which=%d, hasqos=%d\n", __func__, prio, which, hasqos); /* Fill Tx descriptor. */ txd = (struct r92s_tx_desc *)data->buf; memset(txd, 0, sizeof(*txd)); txd->txdw0 |= htole32( SM(R92S_TXDW0_PKTLEN, m0->m_pkthdr.len) | SM(R92S_TXDW0_OFFSET, sizeof(*txd)) | R92S_TXDW0_OWN | R92S_TXDW0_FSG | R92S_TXDW0_LSG); txd->txdw1 |= htole32( SM(R92S_TXDW1_MACID, R92S_MACID_BSS) | SM(R92S_TXDW1_QSEL, qid)); if (!hasqos) txd->txdw1 |= htole32(R92S_TXDW1_NONQOS); #ifdef notyet if (k != NULL) { switch (k->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_WEP: cipher = R92S_TXDW1_CIPHER_WEP; break; case IEEE80211_CIPHER_TKIP: cipher = R92S_TXDW1_CIPHER_TKIP; break; case IEEE80211_CIPHER_AES_CCM: cipher = R92S_TXDW1_CIPHER_AES; break; default: cipher = R92S_TXDW1_CIPHER_NONE; } txd->txdw1 |= htole32( SM(R92S_TXDW1_CIPHER, cipher) | SM(R92S_TXDW1_KEYIDX, k->k_id)); } #endif /* XXX todo: set AGGEN bit if appropriate? */ txd->txdw2 |= htole32(R92S_TXDW2_BK); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) txd->txdw2 |= htole32(R92S_TXDW2_BMCAST); /* * Firmware will use and increment the sequence number for the * specified priority. */ txd->txdw3 |= htole32(SM(R92S_TXDW3_SEQ, prio)); if (ieee80211_radiotap_active_vap(vap)) { struct rsu_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq); tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags); ieee80211_radiotap_tx(vap, m0); } xferlen = sizeof(*txd) + m0->m_pkthdr.len; m_copydata(m0, 0, m0->m_pkthdr.len, (caddr_t)&txd[1]); data->buflen = xferlen; data->ni = ni; data->m = m0; STAILQ_INSERT_TAIL(&sc->sc_tx_pending[which], data, next); /* start transfer, if any */ usbd_transfer_start(sc->sc_xfer[which]); return (0); } static int rsu_transmit(struct ieee80211com *ic, struct mbuf *m) { struct rsu_softc *sc = ic->ic_softc; int error; RSU_LOCK(sc); if (!sc->sc_running) { RSU_UNLOCK(sc); return (ENXIO); } /* * XXX TODO: ensure that we treat 'm' as a list of frames * to transmit! */ error = mbufq_enqueue(&sc->sc_snd, m); if (error) { RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: mbufq_enable: failed (%d)\n", __func__, error); RSU_UNLOCK(sc); return (error); } RSU_UNLOCK(sc); /* This kicks the TX taskqueue */ rsu_start(sc); return (0); } static void rsu_drain_mbufq(struct rsu_softc *sc) { struct mbuf *m; struct ieee80211_node *ni; RSU_ASSERT_LOCKED(sc); while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; m->m_pkthdr.rcvif = NULL; ieee80211_free_node(ni); m_freem(m); } } static void _rsu_start(struct rsu_softc *sc) { struct ieee80211_node *ni; struct rsu_data *bf; struct mbuf *m; RSU_ASSERT_LOCKED(sc); while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) { bf = rsu_getbuf(sc); if (bf == NULL) { RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: failed to get buffer\n", __func__); mbufq_prepend(&sc->sc_snd, m); break; } ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; m->m_pkthdr.rcvif = NULL; if (rsu_tx_start(sc, ni, m, bf) != 0) { RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: failed to transmit\n", __func__); if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); rsu_freebuf(sc, bf); ieee80211_free_node(ni); m_freem(m); break; } } } static void rsu_start(struct rsu_softc *sc) { taskqueue_enqueue(taskqueue_thread, &sc->tx_task); } static void rsu_parent(struct ieee80211com *ic) { struct rsu_softc *sc = ic->ic_softc; int startall = 0; RSU_LOCK(sc); if (ic->ic_nrunning > 0) { if (!sc->sc_running) { rsu_init(sc); startall = 1; } } else if (sc->sc_running) rsu_stop(sc); RSU_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } /* * Power on sequence for A-cut adapters. */ static void rsu_power_on_acut(struct rsu_softc *sc) { uint32_t reg; rsu_write_1(sc, R92S_SPS0_CTRL + 1, 0x53); rsu_write_1(sc, R92S_SPS0_CTRL + 0, 0x57); /* Enable AFE macro block's bandgap and Mbias. */ rsu_write_1(sc, R92S_AFE_MISC, rsu_read_1(sc, R92S_AFE_MISC) | R92S_AFE_MISC_BGEN | R92S_AFE_MISC_MBEN); /* Enable LDOA15 block. */ rsu_write_1(sc, R92S_LDOA15_CTRL, rsu_read_1(sc, R92S_LDOA15_CTRL) | R92S_LDA15_EN); rsu_write_1(sc, R92S_SPS1_CTRL, rsu_read_1(sc, R92S_SPS1_CTRL) | R92S_SPS1_LDEN); rsu_ms_delay(sc, 2000); /* Enable switch regulator block. */ rsu_write_1(sc, R92S_SPS1_CTRL, rsu_read_1(sc, R92S_SPS1_CTRL) | R92S_SPS1_SWEN); rsu_write_4(sc, R92S_SPS1_CTRL, 0x00a7b267); rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1, rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) | 0x08); rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x20); rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1, rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) & ~0x90); /* Enable AFE clock. */ rsu_write_1(sc, R92S_AFE_XTAL_CTRL + 1, rsu_read_1(sc, R92S_AFE_XTAL_CTRL + 1) & ~0x04); /* Enable AFE PLL macro block. */ rsu_write_1(sc, R92S_AFE_PLL_CTRL, rsu_read_1(sc, R92S_AFE_PLL_CTRL) | 0x11); /* Attach AFE PLL to MACTOP/BB. */ rsu_write_1(sc, R92S_SYS_ISO_CTRL, rsu_read_1(sc, R92S_SYS_ISO_CTRL) & ~0x11); /* Switch to 40MHz clock instead of 80MHz. */ rsu_write_2(sc, R92S_SYS_CLKR, rsu_read_2(sc, R92S_SYS_CLKR) & ~R92S_SYS_CLKSEL); /* Enable MAC clock. */ rsu_write_2(sc, R92S_SYS_CLKR, rsu_read_2(sc, R92S_SYS_CLKR) | R92S_MAC_CLK_EN | R92S_SYS_CLK_EN); rsu_write_1(sc, R92S_PMC_FSM, 0x02); /* Enable digital core and IOREG R/W. */ rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x08); rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x80); /* Switch the control path to firmware. */ reg = rsu_read_2(sc, R92S_SYS_CLKR); reg = (reg & ~R92S_SWHW_SEL) | R92S_FWHW_SEL; rsu_write_2(sc, R92S_SYS_CLKR, reg); rsu_write_2(sc, R92S_CR, 0x37fc); /* Fix USB RX FIFO issue. */ rsu_write_1(sc, 0xfe5c, rsu_read_1(sc, 0xfe5c) | 0x80); rsu_write_1(sc, 0x00ab, rsu_read_1(sc, 0x00ab) | 0xc0); rsu_write_1(sc, R92S_SYS_CLKR, rsu_read_1(sc, R92S_SYS_CLKR) & ~R92S_SYS_CPU_CLKSEL); } /* * Power on sequence for B-cut and C-cut adapters. */ static void rsu_power_on_bcut(struct rsu_softc *sc) { uint32_t reg; int ntries; /* Prevent eFuse leakage. */ rsu_write_1(sc, 0x37, 0xb0); rsu_ms_delay(sc, 10); rsu_write_1(sc, 0x37, 0x30); /* Switch the control path to hardware. */ reg = rsu_read_2(sc, R92S_SYS_CLKR); if (reg & R92S_FWHW_SEL) { rsu_write_2(sc, R92S_SYS_CLKR, reg & ~(R92S_SWHW_SEL | R92S_FWHW_SEL)); } rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) & ~0x8c); rsu_ms_delay(sc, 1); rsu_write_1(sc, R92S_SPS0_CTRL + 1, 0x53); rsu_write_1(sc, R92S_SPS0_CTRL + 0, 0x57); reg = rsu_read_1(sc, R92S_AFE_MISC); rsu_write_1(sc, R92S_AFE_MISC, reg | R92S_AFE_MISC_BGEN); rsu_write_1(sc, R92S_AFE_MISC, reg | R92S_AFE_MISC_BGEN | R92S_AFE_MISC_MBEN | R92S_AFE_MISC_I32_EN); /* Enable PLL. */ rsu_write_1(sc, R92S_LDOA15_CTRL, rsu_read_1(sc, R92S_LDOA15_CTRL) | R92S_LDA15_EN); rsu_write_1(sc, R92S_LDOV12D_CTRL, rsu_read_1(sc, R92S_LDOV12D_CTRL) | R92S_LDV12_EN); rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1, rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) | 0x08); rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x20); /* Support 64KB IMEM. */ rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1, rsu_read_1(sc, R92S_SYS_ISO_CTRL + 1) & ~0x97); /* Enable AFE clock. */ rsu_write_1(sc, R92S_AFE_XTAL_CTRL + 1, rsu_read_1(sc, R92S_AFE_XTAL_CTRL + 1) & ~0x04); /* Enable AFE PLL macro block. */ reg = rsu_read_1(sc, R92S_AFE_PLL_CTRL); rsu_write_1(sc, R92S_AFE_PLL_CTRL, reg | 0x11); rsu_ms_delay(sc, 1); rsu_write_1(sc, R92S_AFE_PLL_CTRL, reg | 0x51); rsu_ms_delay(sc, 1); rsu_write_1(sc, R92S_AFE_PLL_CTRL, reg | 0x11); rsu_ms_delay(sc, 1); /* Attach AFE PLL to MACTOP/BB. */ rsu_write_1(sc, R92S_SYS_ISO_CTRL, rsu_read_1(sc, R92S_SYS_ISO_CTRL) & ~0x11); /* Switch to 40MHz clock. */ rsu_write_1(sc, R92S_SYS_CLKR, 0x00); /* Disable CPU clock and 80MHz SSC. */ rsu_write_1(sc, R92S_SYS_CLKR, rsu_read_1(sc, R92S_SYS_CLKR) | 0xa0); /* Enable MAC clock. */ rsu_write_2(sc, R92S_SYS_CLKR, rsu_read_2(sc, R92S_SYS_CLKR) | R92S_MAC_CLK_EN | R92S_SYS_CLK_EN); rsu_write_1(sc, R92S_PMC_FSM, 0x02); /* Enable digital core and IOREG R/W. */ rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x08); rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, rsu_read_1(sc, R92S_SYS_FUNC_EN + 1) | 0x80); /* Switch the control path to firmware. */ reg = rsu_read_2(sc, R92S_SYS_CLKR); reg = (reg & ~R92S_SWHW_SEL) | R92S_FWHW_SEL; rsu_write_2(sc, R92S_SYS_CLKR, reg); rsu_write_2(sc, R92S_CR, 0x37fc); /* Fix USB RX FIFO issue. */ rsu_write_1(sc, 0xfe5c, rsu_read_1(sc, 0xfe5c) | 0x80); rsu_write_1(sc, R92S_SYS_CLKR, rsu_read_1(sc, R92S_SYS_CLKR) & ~R92S_SYS_CPU_CLKSEL); rsu_write_1(sc, 0xfe1c, 0x80); /* Make sure TxDMA is ready to download firmware. */ for (ntries = 0; ntries < 20; ntries++) { reg = rsu_read_1(sc, R92S_TCR); if ((reg & (R92S_TCR_IMEM_CHK_RPT | R92S_TCR_EMEM_CHK_RPT)) == (R92S_TCR_IMEM_CHK_RPT | R92S_TCR_EMEM_CHK_RPT)) break; rsu_ms_delay(sc, 1); } if (ntries == 20) { RSU_DPRINTF(sc, RSU_DEBUG_RESET | RSU_DEBUG_TX, "%s: TxDMA is not ready\n", __func__); /* Reset TxDMA. */ reg = rsu_read_1(sc, R92S_CR); rsu_write_1(sc, R92S_CR, reg & ~R92S_CR_TXDMA_EN); rsu_ms_delay(sc, 1); rsu_write_1(sc, R92S_CR, reg | R92S_CR_TXDMA_EN); } } static void rsu_power_off(struct rsu_softc *sc) { /* Turn RF off. */ rsu_write_1(sc, R92S_RF_CTRL, 0x00); rsu_ms_delay(sc, 5); /* Turn MAC off. */ /* Switch control path. */ rsu_write_1(sc, R92S_SYS_CLKR + 1, 0x38); /* Reset MACTOP. */ rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, 0x70); rsu_write_1(sc, R92S_PMC_FSM, 0x06); rsu_write_1(sc, R92S_SYS_ISO_CTRL + 0, 0xf9); rsu_write_1(sc, R92S_SYS_ISO_CTRL + 1, 0xe8); /* Disable AFE PLL. */ rsu_write_1(sc, R92S_AFE_PLL_CTRL, 0x00); /* Disable A15V. */ rsu_write_1(sc, R92S_LDOA15_CTRL, 0x54); /* Disable eFuse 1.2V. */ rsu_write_1(sc, R92S_SYS_FUNC_EN + 1, 0x50); rsu_write_1(sc, R92S_LDOV12D_CTRL, 0x24); /* Enable AFE macro block's bandgap and Mbias. */ rsu_write_1(sc, R92S_AFE_MISC, 0x30); /* Disable 1.6V LDO. */ rsu_write_1(sc, R92S_SPS0_CTRL + 0, 0x56); rsu_write_1(sc, R92S_SPS0_CTRL + 1, 0x43); /* Firmware - tell it to switch things off */ (void) rsu_set_fw_power_state(sc, RSU_PWR_OFF); } static int rsu_fw_loadsection(struct rsu_softc *sc, const uint8_t *buf, int len) { const uint8_t which = rsu_wme_ac_xfer_map[WME_AC_VO]; struct rsu_data *data; struct r92s_tx_desc *txd; int mlen; while (len > 0) { data = rsu_getbuf(sc); if (data == NULL) return (ENOMEM); txd = (struct r92s_tx_desc *)data->buf; memset(txd, 0, sizeof(*txd)); if (len <= RSU_TXBUFSZ - sizeof(*txd)) { /* Last chunk. */ txd->txdw0 |= htole32(R92S_TXDW0_LINIP); mlen = len; } else mlen = RSU_TXBUFSZ - sizeof(*txd); txd->txdw0 |= htole32(SM(R92S_TXDW0_PKTLEN, mlen)); memcpy(&txd[1], buf, mlen); data->buflen = sizeof(*txd) + mlen; RSU_DPRINTF(sc, RSU_DEBUG_TX | RSU_DEBUG_FW | RSU_DEBUG_RESET, "%s: starting transfer %p\n", __func__, data); STAILQ_INSERT_TAIL(&sc->sc_tx_pending[which], data, next); buf += mlen; len -= mlen; } usbd_transfer_start(sc->sc_xfer[which]); return (0); } static int rsu_load_firmware(struct rsu_softc *sc) { const struct r92s_fw_hdr *hdr; struct r92s_fw_priv *dmem; struct ieee80211com *ic = &sc->sc_ic; const uint8_t *imem, *emem; int imemsz, ememsz; const struct firmware *fw; size_t size; uint32_t reg; int ntries, error; if (rsu_read_1(sc, R92S_TCR) & R92S_TCR_FWRDY) { RSU_DPRINTF(sc, RSU_DEBUG_ANY, "%s: Firmware already loaded\n", __func__); return (0); } RSU_UNLOCK(sc); /* Read firmware image from the filesystem. */ if ((fw = firmware_get("rsu-rtl8712fw")) == NULL) { device_printf(sc->sc_dev, "%s: failed load firmware of file rsu-rtl8712fw\n", __func__); RSU_LOCK(sc); return (ENXIO); } RSU_LOCK(sc); size = fw->datasize; if (size < sizeof(*hdr)) { device_printf(sc->sc_dev, "firmware too short\n"); error = EINVAL; goto fail; } hdr = (const struct r92s_fw_hdr *)fw->data; if (hdr->signature != htole16(0x8712) && hdr->signature != htole16(0x8192)) { device_printf(sc->sc_dev, "invalid firmware signature 0x%x\n", le16toh(hdr->signature)); error = EINVAL; goto fail; } DPRINTF("FW V%d %02x-%02x %02x:%02x\n", le16toh(hdr->version), hdr->month, hdr->day, hdr->hour, hdr->minute); /* Make sure that driver and firmware are in sync. */ if (hdr->privsz != htole32(sizeof(*dmem))) { device_printf(sc->sc_dev, "unsupported firmware image\n"); error = EINVAL; goto fail; } /* Get FW sections sizes. */ imemsz = le32toh(hdr->imemsz); ememsz = le32toh(hdr->sramsz); /* Check that all FW sections fit in image. */ if (size < sizeof(*hdr) + imemsz + ememsz) { device_printf(sc->sc_dev, "firmware too short\n"); error = EINVAL; goto fail; } imem = (const uint8_t *)&hdr[1]; emem = imem + imemsz; /* Load IMEM section. */ error = rsu_fw_loadsection(sc, imem, imemsz); if (error != 0) { device_printf(sc->sc_dev, "could not load firmware section %s\n", "IMEM"); goto fail; } /* Wait for load to complete. */ for (ntries = 0; ntries != 50; ntries++) { rsu_ms_delay(sc, 10); reg = rsu_read_1(sc, R92S_TCR); if (reg & R92S_TCR_IMEM_CODE_DONE) break; } if (ntries == 50) { device_printf(sc->sc_dev, "timeout waiting for IMEM transfer\n"); error = ETIMEDOUT; goto fail; } /* Load EMEM section. */ error = rsu_fw_loadsection(sc, emem, ememsz); if (error != 0) { device_printf(sc->sc_dev, "could not load firmware section %s\n", "EMEM"); goto fail; } /* Wait for load to complete. */ for (ntries = 0; ntries != 50; ntries++) { rsu_ms_delay(sc, 10); reg = rsu_read_2(sc, R92S_TCR); if (reg & R92S_TCR_EMEM_CODE_DONE) break; } if (ntries == 50) { device_printf(sc->sc_dev, "timeout waiting for EMEM transfer\n"); error = ETIMEDOUT; goto fail; } /* Enable CPU. */ rsu_write_1(sc, R92S_SYS_CLKR, rsu_read_1(sc, R92S_SYS_CLKR) | R92S_SYS_CPU_CLKSEL); if (!(rsu_read_1(sc, R92S_SYS_CLKR) & R92S_SYS_CPU_CLKSEL)) { device_printf(sc->sc_dev, "could not enable system clock\n"); error = EIO; goto fail; } rsu_write_2(sc, R92S_SYS_FUNC_EN, rsu_read_2(sc, R92S_SYS_FUNC_EN) | R92S_FEN_CPUEN); if (!(rsu_read_2(sc, R92S_SYS_FUNC_EN) & R92S_FEN_CPUEN)) { device_printf(sc->sc_dev, "could not enable microcontroller\n"); error = EIO; goto fail; } /* Wait for CPU to initialize. */ for (ntries = 0; ntries < 100; ntries++) { if (rsu_read_1(sc, R92S_TCR) & R92S_TCR_IMEM_RDY) break; rsu_ms_delay(sc, 1); } if (ntries == 100) { device_printf(sc->sc_dev, "timeout waiting for microcontroller\n"); error = ETIMEDOUT; goto fail; } /* Update DMEM section before loading. */ dmem = __DECONST(struct r92s_fw_priv *, &hdr->priv); memset(dmem, 0, sizeof(*dmem)); dmem->hci_sel = R92S_HCI_SEL_USB | R92S_HCI_SEL_8172; dmem->nendpoints = sc->sc_nendpoints; dmem->chip_version = sc->cut; dmem->rf_config = sc->sc_rftype; dmem->vcs_type = R92S_VCS_TYPE_AUTO; dmem->vcs_mode = R92S_VCS_MODE_RTS_CTS; dmem->turbo_mode = 0; dmem->bw40_en = !! (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40); dmem->amsdu2ampdu_en = !! (sc->sc_ht); dmem->ampdu_en = !! (sc->sc_ht); dmem->agg_offload = !! (sc->sc_ht); dmem->qos_en = 1; dmem->ps_offload = 1; dmem->lowpower_mode = 1; /* XXX TODO: configurable? */ /* Load DMEM section. */ error = rsu_fw_loadsection(sc, (uint8_t *)dmem, sizeof(*dmem)); if (error != 0) { device_printf(sc->sc_dev, "could not load firmware section %s\n", "DMEM"); goto fail; } /* Wait for load to complete. */ for (ntries = 0; ntries < 100; ntries++) { if (rsu_read_1(sc, R92S_TCR) & R92S_TCR_DMEM_CODE_DONE) break; rsu_ms_delay(sc, 1); } if (ntries == 100) { device_printf(sc->sc_dev, "timeout waiting for %s transfer\n", "DMEM"); error = ETIMEDOUT; goto fail; } /* Wait for firmware readiness. */ for (ntries = 0; ntries < 60; ntries++) { if (!(rsu_read_1(sc, R92S_TCR) & R92S_TCR_FWRDY)) break; rsu_ms_delay(sc, 1); } if (ntries == 60) { device_printf(sc->sc_dev, "timeout waiting for firmware readiness\n"); error = ETIMEDOUT; goto fail; } fail: firmware_put(fw, FIRMWARE_UNLOAD); return (error); } static int rsu_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct rsu_softc *sc = ic->ic_softc; struct rsu_data *bf; /* prevent management frames from being sent if we're not ready */ if (!sc->sc_running) { m_freem(m); return (ENETDOWN); } RSU_LOCK(sc); bf = rsu_getbuf(sc); if (bf == NULL) { m_freem(m); RSU_UNLOCK(sc); return (ENOBUFS); } if (rsu_tx_start(sc, ni, m, bf) != 0) { m_freem(m); rsu_freebuf(sc, bf); RSU_UNLOCK(sc); return (EIO); } RSU_UNLOCK(sc); return (0); } static void rsu_init(struct rsu_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); uint8_t macaddr[IEEE80211_ADDR_LEN]; int error; int i; RSU_ASSERT_LOCKED(sc); /* Ensure the mbuf queue is drained */ rsu_drain_mbufq(sc); /* Init host async commands ring. */ sc->cmdq.cur = sc->cmdq.next = sc->cmdq.queued = 0; /* Reset power management state. */ rsu_write_1(sc, R92S_USB_HRPWM, 0); /* Power on adapter. */ if (sc->cut == 1) rsu_power_on_acut(sc); else rsu_power_on_bcut(sc); /* Load firmware. */ error = rsu_load_firmware(sc); if (error != 0) goto fail; /* Enable Rx TCP checksum offload. */ rsu_write_4(sc, R92S_RCR, rsu_read_4(sc, R92S_RCR) | 0x04000000); /* Append PHY status. */ rsu_write_4(sc, R92S_RCR, rsu_read_4(sc, R92S_RCR) | 0x02000000); rsu_write_4(sc, R92S_CR, rsu_read_4(sc, R92S_CR) & ~0xff000000); /* Use 128 bytes pages. */ rsu_write_1(sc, 0x00b5, rsu_read_1(sc, 0x00b5) | 0x01); /* Enable USB Rx aggregation. */ rsu_write_1(sc, 0x00bd, rsu_read_1(sc, 0x00bd) | 0x80); /* Set USB Rx aggregation threshold. */ rsu_write_1(sc, 0x00d9, 0x01); /* Set USB Rx aggregation timeout (1.7ms/4). */ rsu_write_1(sc, 0xfe5b, 0x04); /* Fix USB Rx FIFO issue. */ rsu_write_1(sc, 0xfe5c, rsu_read_1(sc, 0xfe5c) | 0x80); /* Set MAC address. */ IEEE80211_ADDR_COPY(macaddr, vap ? vap->iv_myaddr : ic->ic_macaddr); rsu_write_region_1(sc, R92S_MACID, macaddr, IEEE80211_ADDR_LEN); /* It really takes 1.5 seconds for the firmware to boot: */ rsu_ms_delay(sc, 2000); RSU_DPRINTF(sc, RSU_DEBUG_RESET, "%s: setting MAC address to %s\n", __func__, ether_sprintf(macaddr)); error = rsu_fw_cmd(sc, R92S_CMD_SET_MAC_ADDRESS, macaddr, IEEE80211_ADDR_LEN); if (error != 0) { device_printf(sc->sc_dev, "could not set MAC address\n"); goto fail; } + + /* Setup multicast filter (must be done after firmware loading). */ + rsu_set_multi(sc); /* Set PS mode fully active */ error = rsu_set_fw_power_state(sc, RSU_PWR_ACTIVE); if (error != 0) { device_printf(sc->sc_dev, "could not set PS mode\n"); goto fail; } sc->sc_extra_scan = 0; usbd_transfer_start(sc->sc_xfer[RSU_BULK_RX]); /* We're ready to go. */ sc->sc_running = 1; return; fail: /* Need to stop all failed transfers, if any */ for (i = 0; i != RSU_N_TRANSFER; i++) usbd_transfer_stop(sc->sc_xfer[i]); } static void rsu_stop(struct rsu_softc *sc) { int i; RSU_ASSERT_LOCKED(sc); sc->sc_running = 0; sc->sc_calibrating = 0; taskqueue_cancel_timeout(taskqueue_thread, &sc->calib_task, NULL); taskqueue_cancel(taskqueue_thread, &sc->tx_task, NULL); /* Power off adapter. */ rsu_power_off(sc); for (i = 0; i < RSU_N_TRANSFER; i++) usbd_transfer_stop(sc->sc_xfer[i]); /* Ensure the mbuf queue is drained */ rsu_drain_mbufq(sc); } /* * Note: usb_pause_mtx() actually releases the mutex before calling pause(), * which breaks any kind of driver serialisation. */ static void rsu_ms_delay(struct rsu_softc *sc, int ms) { //usb_pause_mtx(&sc->sc_mtx, hz / 1000); DELAY(ms * 1000); } Index: projects/clang391-import/sys/dev/usb/wlan/if_rsureg.h =================================================================== --- projects/clang391-import/sys/dev/usb/wlan/if_rsureg.h (revision 309262) +++ projects/clang391-import/sys/dev/usb/wlan/if_rsureg.h (revision 309263) @@ -1,802 +1,803 @@ /*- * Copyright (c) 2010 Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $OpenBSD: if_rsureg.h,v 1.3 2013/04/15 09:23:01 mglocker Exp $ * $FreeBSD$ */ /* USB Requests. */ #define R92S_REQ_REGS 0x05 /* * MAC registers. */ #define R92S_SYSCFG 0x0000 #define R92S_SYS_ISO_CTRL (R92S_SYSCFG + 0x000) #define R92S_SYS_FUNC_EN (R92S_SYSCFG + 0x002) #define R92S_PMC_FSM (R92S_SYSCFG + 0x004) #define R92S_SYS_CLKR (R92S_SYSCFG + 0x008) #define R92S_EE_9346CR (R92S_SYSCFG + 0x00a) #define R92S_AFE_MISC (R92S_SYSCFG + 0x010) #define R92S_SPS0_CTRL (R92S_SYSCFG + 0x011) #define R92S_SPS1_CTRL (R92S_SYSCFG + 0x018) #define R92S_RF_CTRL (R92S_SYSCFG + 0x01f) #define R92S_LDOA15_CTRL (R92S_SYSCFG + 0x020) #define R92S_LDOV12D_CTRL (R92S_SYSCFG + 0x021) #define R92S_AFE_XTAL_CTRL (R92S_SYSCFG + 0x026) #define R92S_AFE_PLL_CTRL (R92S_SYSCFG + 0x028) #define R92S_EFUSE_CTRL (R92S_SYSCFG + 0x030) #define R92S_EFUSE_TEST (R92S_SYSCFG + 0x034) #define R92S_EFUSE_CLK_CTRL (R92S_SYSCFG + 0x2f8) #define R92S_CMDCTRL 0x0040 #define R92S_CR (R92S_CMDCTRL + 0x000) #define R92S_TCR (R92S_CMDCTRL + 0x004) #define R92S_RCR (R92S_CMDCTRL + 0x008) #define R92S_MACIDSETTING 0x0050 #define R92S_MACID (R92S_MACIDSETTING + 0x000) +#define R92S_MAR (R92S_MACIDSETTING + 0x010) #define R92S_GP 0x01e0 #define R92S_GPIO_CTRL (R92S_GP + 0x00c) #define R92S_GPIO_IO_SEL (R92S_GP + 0x00e) #define R92S_MAC_PINMUX_CTRL (R92S_GP + 0x011) #define R92S_IOCMD_CTRL 0x0370 #define R92S_IOCMD_DATA 0x0374 #define R92S_USB_HRPWM 0xfe58 /* Bits for R92S_SYS_FUNC_EN. */ #define R92S_FEN_CPUEN 0x0400 /* Bits for R92S_PMC_FSM. */ #define R92S_PMC_FSM_CUT_M 0x000f8000 #define R92S_PMC_FSM_CUT_S 15 /* Bits for R92S_SYS_CLKR. */ #define R92S_SYS_CLKSEL 0x0001 #define R92S_SYS_PS_CLKSEL 0x0002 #define R92S_SYS_CPU_CLKSEL 0x0004 #define R92S_MAC_CLK_EN 0x0800 #define R92S_SYS_CLK_EN 0x1000 #define R92S_SWHW_SEL 0x4000 #define R92S_FWHW_SEL 0x8000 /* Bits for R92S_EE_9346CR. */ #define R92S_9356SEL 0x10 #define R92S_EEPROM_EN 0x20 /* Bits for R92S_AFE_MISC. */ #define R92S_AFE_MISC_BGEN 0x01 #define R92S_AFE_MISC_MBEN 0x02 #define R92S_AFE_MISC_I32_EN 0x08 /* Bits for R92S_SPS1_CTRL. */ #define R92S_SPS1_LDEN 0x01 #define R92S_SPS1_SWEN 0x02 /* Bits for R92S_LDOA15_CTRL. */ #define R92S_LDA15_EN 0x01 /* Bits for R92S_LDOV12D_CTRL. */ #define R92S_LDV12_EN 0x01 /* Bits for R92C_EFUSE_CTRL. */ #define R92S_EFUSE_CTRL_DATA_M 0x000000ff #define R92S_EFUSE_CTRL_DATA_S 0 #define R92S_EFUSE_CTRL_ADDR_M 0x0003ff00 #define R92S_EFUSE_CTRL_ADDR_S 8 #define R92S_EFUSE_CTRL_VALID 0x80000000 /* Bits for R92S_CR. */ #define R92S_CR_TXDMA_EN 0x10 /* Bits for R92S_TCR. */ #define R92S_TCR_IMEM_CODE_DONE 0x01 #define R92S_TCR_IMEM_CHK_RPT 0x02 #define R92S_TCR_EMEM_CODE_DONE 0x04 #define R92S_TCR_EMEM_CHK_RPT 0x08 #define R92S_TCR_DMEM_CODE_DONE 0x10 #define R92S_TCR_IMEM_RDY 0x20 #define R92S_TCR_FWRDY 0x80 /* Bits for R92S_GPIO_IO_SEL. */ #define R92S_GPIO_WPS 0x10 /* Bits for R92S_MAC_PINMUX_CTRL. */ #define R92S_GPIOSEL_GPIO_M 0x03 #define R92S_GPIOSEL_GPIO_S 0 #define R92S_GPIOSEL_GPIO_JTAG 0 #define R92S_GPIOSEL_GPIO_PHYDBG 1 #define R92S_GPIOSEL_GPIO_BT 2 #define R92S_GPIOSEL_GPIO_WLANDBG 3 #define R92S_GPIOMUX_EN 0x08 /* Bits for R92S_IOCMD_CTRL. */ #define R92S_IOCMD_CLASS_M 0xff000000 #define R92S_IOCMD_CLASS_S 24 #define R92S_IOCMD_CLASS_BB_RF 0xf0 #define R92S_IOCMD_VALUE_M 0x00ffff00 #define R92S_IOCMD_VALUE_S 8 #define R92S_IOCMD_INDEX_M 0x000000ff #define R92S_IOCMD_INDEX_S 0 #define R92S_IOCMD_INDEX_BB_READ 0 #define R92S_IOCMD_INDEX_BB_WRITE 1 #define R92S_IOCMD_INDEX_RF_READ 2 #define R92S_IOCMD_INDEX_RF_WRITE 3 /* Bits for R92S_USB_HRPWM. */ #define R92S_USB_HRPWM_PS_ALL_ON 0x04 #define R92S_USB_HRPWM_PS_ST_ACTIVE 0x08 /* * Macros to access subfields in registers. */ /* Mask and Shift (getter). */ #define MS(val, field) \ (((val) & field##_M) >> field##_S) /* Shift and Mask (setter). */ #define SM(field, val) \ (((val) << field##_S) & field##_M) /* Rewrite. */ #define RW(var, field, val) \ (((var) & ~field##_M) | SM(field, val)) /* * ROM field with RF config. */ enum { RTL8712_RFCONFIG_1T = 0x10, RTL8712_RFCONFIG_2T = 0x20, RTL8712_RFCONFIG_1R = 0x01, RTL8712_RFCONFIG_2R = 0x02, RTL8712_RFCONFIG_1T1R = 0x11, RTL8712_RFCONFIG_1T2R = 0x12, RTL8712_RFCONFIG_TURBO = 0x92, RTL8712_RFCONFIG_2T2R = 0x22 }; /* * Firmware image header. */ struct r92s_fw_priv { /* QWORD0 */ uint16_t signature; uint8_t hci_sel; #define R92S_HCI_SEL_PCIE 0x01 #define R92S_HCI_SEL_USB 0x02 #define R92S_HCI_SEL_SDIO 0x04 #define R92S_HCI_SEL_8172 0x10 #define R92S_HCI_SEL_AP 0x80 uint8_t chip_version; uint16_t custid; uint8_t rf_config; //0x11: 1T1R, 0x12: 1T2R, 0x92: 1T2R turbo, 0x22: 2T2R uint8_t nendpoints; /* QWORD1 */ uint32_t regulatory; uint8_t rfintfs; uint8_t def_nettype; uint8_t turbo_mode; uint8_t lowpower_mode; /* QWORD2 */ uint8_t lbk_mode; uint8_t mp_mode; uint8_t vcs_type; #define R92S_VCS_TYPE_DISABLE 0 #define R92S_VCS_TYPE_ENABLE 1 #define R92S_VCS_TYPE_AUTO 2 uint8_t vcs_mode; #define R92S_VCS_MODE_NONE 0 #define R92S_VCS_MODE_RTS_CTS 1 #define R92S_VCS_MODE_CTS2SELF 2 uint32_t reserved1; /* QWORD3 */ uint8_t qos_en; uint8_t bw40_en; uint8_t amsdu2ampdu_en; uint8_t ampdu_en; uint8_t rc_offload; uint8_t agg_offload; uint16_t reserved2; /* QWORD4 */ uint8_t beacon_offload; uint8_t mlme_offload; uint8_t hwpc_offload; uint8_t tcpcsum_offload; uint8_t tcp_offload; uint8_t ps_offload; uint8_t wwlan_offload; uint8_t reserved3; /* QWORD5 */ uint16_t tcp_tx_len; uint16_t tcp_rx_len; uint32_t reserved4; } __packed; struct r92s_fw_hdr { uint16_t signature; uint16_t version; uint32_t dmemsz; uint32_t imemsz; uint32_t sramsz; uint32_t privsz; uint16_t efuse_addr; uint16_t h2c_resp_addr; uint32_t svnrev; uint8_t month; uint8_t day; uint8_t hour; uint8_t minute; struct r92s_fw_priv priv; } __packed; /* Structure for FW commands and FW events notifications. */ struct r92s_fw_cmd_hdr { uint16_t len; uint8_t code; uint8_t seq; #define R92S_FW_CMD_MORE 0x80 uint32_t reserved; } __packed; /* FW commands codes. */ #define R92S_CMD_READ_MACREG 0 #define R92S_CMD_WRITE_MACREG 1 #define R92S_CMD_READ_BBREG 2 #define R92S_CMD_WRITE_BBREG 3 #define R92S_CMD_READ_RFREG 4 #define R92S_CMD_WRITE_RFREG 5 #define R92S_CMD_READ_EEPROM 6 #define R92S_CMD_WRITE_EEPROM 7 #define R92S_CMD_READ_EFUSE 8 #define R92S_CMD_WRITE_EFUSE 9 #define R92S_CMD_READ_CAM 10 #define R92S_CMD_WRITE_CAM 11 #define R92S_CMD_SET_BCNITV 12 #define R92S_CMD_SET_MBIDCFG 13 #define R92S_CMD_JOIN_BSS 14 #define R92S_CMD_DISCONNECT 15 #define R92S_CMD_CREATE_BSS 16 #define R92S_CMD_SET_OPMODE 17 #define R92S_CMD_SITE_SURVEY 18 #define R92S_CMD_SET_AUTH 19 #define R92S_CMD_SET_KEY 20 #define R92S_CMD_SET_STA_KEY 21 #define R92S_CMD_SET_ASSOC_STA 22 #define R92S_CMD_DEL_ASSOC_STA 23 #define R92S_CMD_SET_STAPWRSTATE 24 #define R92S_CMD_SET_BASIC_RATE 25 #define R92S_CMD_GET_BASIC_RATE 26 #define R92S_CMD_SET_DATA_RATE 27 #define R92S_CMD_GET_DATA_RATE 28 #define R92S_CMD_SET_PHY_INFO 29 #define R92S_CMD_GET_PHY_INFO 30 #define R92S_CMD_SET_PHY 31 #define R92S_CMD_GET_PHY 32 #define R92S_CMD_READ_RSSI 33 #define R92S_CMD_READ_GAIN 34 #define R92S_CMD_SET_ATIM 35 #define R92S_CMD_SET_PWR_MODE 36 #define R92S_CMD_JOIN_BSS_RPT 37 #define R92S_CMD_SET_RA_TABLE 38 #define R92S_CMD_GET_RA_TABLE 39 #define R92S_CMD_GET_CCX_REPORT 40 #define R92S_CMD_GET_DTM_REPORT 41 #define R92S_CMD_GET_TXRATE_STATS 42 #define R92S_CMD_SET_USB_SUSPEND 43 #define R92S_CMD_SET_H2C_LBK 44 #define R92S_CMD_ADDBA_REQ 45 #define R92S_CMD_SET_CHANNEL 46 #define R92S_CMD_SET_TXPOWER 47 #define R92S_CMD_SWITCH_ANTENNA 48 #define R92S_CMD_SET_CRYSTAL_CAL 49 #define R92S_CMD_SET_SINGLE_CARRIER_TX 50 #define R92S_CMD_SET_SINGLE_TONE_TX 51 #define R92S_CMD_SET_CARRIER_SUPPR_TX 52 #define R92S_CMD_SET_CONTINUOUS_TX 53 #define R92S_CMD_SWITCH_BANDWIDTH 54 #define R92S_CMD_TX_BEACON 55 #define R92S_CMD_SET_POWER_TRACKING 56 #define R92S_CMD_AMSDU_TO_AMPDU 57 #define R92S_CMD_SET_MAC_ADDRESS 58 #define R92S_CMD_GET_H2C_LBK 59 #define R92S_CMD_SET_PBREQ_IE 60 #define R92S_CMD_SET_ASSOCREQ_IE 61 #define R92S_CMD_SET_PBRESP_IE 62 #define R92S_CMD_SET_ASSOCRESP_IE 63 #define R92S_CMD_GET_CURDATARATE 64 #define R92S_CMD_GET_TXRETRY_CNT 65 #define R92S_CMD_GET_RXRETRY_CNT 66 #define R92S_CMD_GET_BCNOK_CNT 67 #define R92S_CMD_GET_BCNERR_CNT 68 #define R92S_CMD_GET_CURTXPWR_LEVEL 69 #define R92S_CMD_SET_DIG 70 #define R92S_CMD_SET_RA 71 #define R92S_CMD_SET_PT 72 #define R92S_CMD_READ_TSSI 73 /* FW events notifications codes. */ #define R92S_EVT_READ_MACREG 0 #define R92S_EVT_READ_BBREG 1 #define R92S_EVT_READ_RFREG 2 #define R92S_EVT_READ_EEPROM 3 #define R92S_EVT_READ_EFUSE 4 #define R92S_EVT_READ_CAM 5 #define R92S_EVT_GET_BASICRATE 6 #define R92S_EVT_GET_DATARATE 7 #define R92S_EVT_SURVEY 8 #define R92S_EVT_SURVEY_DONE 9 #define R92S_EVT_JOIN_BSS 10 #define R92S_EVT_ADD_STA 11 #define R92S_EVT_DEL_STA 12 #define R92S_EVT_ATIM_DONE 13 #define R92S_EVT_TX_REPORT 14 #define R92S_EVT_CCX_REPORT 15 #define R92S_EVT_DTM_REPORT 16 #define R92S_EVT_TXRATE_STATS 17 #define R92S_EVT_C2H_LBK 18 #define R92S_EVT_FWDBG 19 #define R92S_EVT_C2H_FEEDBACK 20 #define R92S_EVT_ADDBA 21 #define R92S_EVT_C2H_BCN 22 #define R92S_EVT_PWR_STATE 23 #define R92S_EVT_WPS_PBC 24 #define R92S_EVT_ADDBA_REQ_REPORT 25 /* Structure for R92S_CMD_SITE_SURVEY. */ struct r92s_fw_cmd_sitesurvey { uint32_t active; uint32_t limit; uint32_t ssidlen; uint8_t ssid[32 + 1]; } __packed; /* Structure for R92S_CMD_SET_AUTH. */ struct r92s_fw_cmd_auth { uint8_t mode; #define R92S_AUTHMODE_OPEN 0 #define R92S_AUTHMODE_SHARED 1 #define R92S_AUTHMODE_WPA 2 uint8_t dot1x; } __packed; /* Structure for R92S_CMD_SET_KEY. */ struct r92s_fw_cmd_set_key { uint8_t algo; #define R92S_KEY_ALGO_NONE 0 #define R92S_KEY_ALGO_WEP40 1 #define R92S_KEY_ALGO_TKIP 2 #define R92S_KEY_ALGO_TKIP_MMIC 3 #define R92S_KEY_ALGO_AES 4 #define R92S_KEY_ALGO_WEP104 5 uint8_t id; uint8_t grpkey; uint8_t key[16]; } __packed; /* Structures for R92S_EVENT_SURVEY/R92S_CMD_JOIN_BSS. */ /* NDIS_802_11_SSID. */ struct ndis_802_11_ssid { uint32_t ssidlen; uint8_t ssid[32]; } __packed; /* NDIS_802_11_CONFIGURATION_FH. */ struct ndis_802_11_configuration_fh { uint32_t len; uint32_t hoppattern; uint32_t hopset; uint32_t dwelltime; } __packed; /* NDIS_802_11_CONFIGURATION. */ struct ndis_802_11_configuration { uint32_t len; uint32_t bintval; uint32_t atim; uint32_t dsconfig; struct ndis_802_11_configuration_fh fhconfig; } __packed; /* NDIS_WLAN_BSSID_EX. */ struct ndis_wlan_bssid_ex { uint32_t len; uint8_t macaddr[IEEE80211_ADDR_LEN]; uint8_t reserved[2]; struct ndis_802_11_ssid ssid; uint32_t privacy; int32_t rssi; uint32_t networktype; #define NDIS802_11FH 0 #define NDIS802_11DS 1 #define NDIS802_11OFDM5 2 #define NDIS802_11OFDM24 3 #define NDIS802_11AUTOMODE 4 struct ndis_802_11_configuration config; uint32_t inframode; #define NDIS802_11IBSS 0 #define NDIS802_11INFRASTRUCTURE 1 #define NDIS802_11AUTOUNKNOWN 2 #define NDIS802_11MONITOR 3 #define NDIS802_11APMODE 4 uint8_t supprates[16]; uint32_t ieslen; /* Followed by ``ieslen'' bytes. */ } __packed; /* NDIS_802_11_FIXED_IEs. */ struct ndis_802_11_fixed_ies { uint8_t tstamp[8]; uint16_t bintval; uint16_t capabilities; } __packed; /* Structure for R92S_CMD_SET_PWR_MODE. */ struct r92s_set_pwr_mode { uint8_t mode; #define R92S_PS_MODE_ACTIVE 0 #define R92S_PS_MODE_MIN 1 #define R92S_PS_MODE_MAX 2 #define R92S_PS_MODE_DTIM 3 #define R92S_PS_MODE_VOIP 4 #define R92S_PS_MODE_UAPSD_WMM 5 #define R92S_PS_MODE_UAPSD 6 #define R92S_PS_MODE_IBSS 7 #define R92S_PS_MODE_WWLAN 8 #define R92S_PS_MODE_RADIOOFF 9 #define R92S_PS_MODE_DISABLE 10 uint8_t low_traffic_en; uint8_t lpnav_en; uint8_t rf_low_snr_en; uint8_t dps_en; uint8_t bcn_rx_en; uint8_t bcn_pass_cnt; uint8_t bcn_to; uint16_t bcn_itv; uint8_t app_itv; uint8_t awake_bcn_itv; uint8_t smart_ps; uint8_t bcn_pass_time; } __packed; /* Structure for event R92S_EVENT_JOIN_BSS. */ struct r92s_event_join_bss { uint32_t next; uint32_t prev; uint32_t networktype; uint32_t fixed; uint32_t lastscanned; uint32_t associd; uint32_t join_res; struct ndis_wlan_bssid_ex bss; } __packed; #define R92S_MACID_BSS 5 /* Rx MAC descriptor. */ struct r92s_rx_stat { uint32_t rxdw0; #define R92S_RXDW0_PKTLEN_M 0x00003fff #define R92S_RXDW0_PKTLEN_S 0 #define R92S_RXDW0_CRCERR 0x00004000 #define R92S_RXDW0_INFOSZ_M 0x000f0000 #define R92S_RXDW0_INFOSZ_S 16 #define R92S_RXDW0_QOS 0x00800000 #define R92S_RXDW0_SHIFT_M 0x03000000 #define R92S_RXDW0_SHIFT_S 24 #define R92S_RXDW0_DECRYPTED 0x08000000 uint32_t rxdw1; #define R92S_RXDW1_MOREFRAG 0x08000000 uint32_t rxdw2; #define R92S_RXDW2_FRAG_M 0x0000f000 #define R92S_RXDW2_FRAG_S 12 #define R92S_RXDW2_PKTCNT_M 0x00ff0000 #define R92S_RXDW2_PKTCNT_S 16 uint32_t rxdw3; #define R92S_RXDW3_RATE_M 0x0000003f #define R92S_RXDW3_RATE_S 0 #define R92S_RXDW3_TCPCHKRPT 0x00000800 #define R92S_RXDW3_IPCHKRPT 0x00001000 #define R92S_RXDW3_TCPCHKVALID 0x00002000 #define R92S_RXDW3_HTC 0x00004000 uint32_t rxdw4; uint32_t rxdw5; } __packed __aligned(4); /* Rx PHY descriptor. */ struct r92s_rx_phystat { uint32_t phydw0; uint32_t phydw1; uint32_t phydw2; uint32_t phydw3; uint32_t phydw4; uint32_t phydw5; uint32_t phydw6; uint32_t phydw7; } __packed __aligned(4); /* Rx PHY CCK descriptor. */ struct r92s_rx_cck { uint8_t adc_pwdb[4]; uint8_t sq_rpt; uint8_t agc_rpt; } __packed; /* Tx MAC descriptor. */ struct r92s_tx_desc { uint32_t txdw0; #define R92S_TXDW0_PKTLEN_M 0x0000ffff #define R92S_TXDW0_PKTLEN_S 0 #define R92S_TXDW0_OFFSET_M 0x00ff0000 #define R92S_TXDW0_OFFSET_S 16 #define R92S_TXDW0_TYPE_M 0x03000000 #define R92S_TXDW0_TYPE_S 24 #define R92S_TXDW0_LSG 0x04000000 #define R92S_TXDW0_FSG 0x08000000 #define R92S_TXDW0_LINIP 0x10000000 #define R92S_TXDW0_OWN 0x80000000 uint32_t txdw1; #define R92S_TXDW1_MACID_M 0x0000001f #define R92S_TXDW1_MACID_S 0 #define R92S_TXDW1_MOREDATA 0x00000020 #define R92S_TXDW1_MOREFRAG 0x00000040 #define R92S_TXDW1_QSEL_M 0x00001f00 #define R92S_TXDW1_QSEL_S 8 #define R92S_TXDW1_QSEL_BE 0x03 #define R92S_TXDW1_QSEL_H2C 0x13 #define R92S_TXDW1_NONQOS 0x00010000 #define R92S_TXDW1_KEYIDX_M 0x00060000 #define R92S_TXDW1_KEYIDX_S 17 #define R92S_TXDW1_CIPHER_M 0x00c00000 #define R92S_TXDW1_CIPHER_S 22 #define R92S_TXDW1_CIPHER_WEP 1 #define R92S_TXDW1_CIPHER_TKIP 2 #define R92S_TXDW1_CIPHER_AES 3 #define R92S_TXDW1_HWPC 0x80000000 uint32_t txdw2; #define R92S_TXDW2_BMCAST 0x00000080 #define R92S_TXDW2_AGGEN 0x20000000 #define R92S_TXDW2_BK 0x40000000 uint32_t txdw3; #define R92S_TXDW3_SEQ_M 0x0fff0000 #define R92S_TXDW3_SEQ_S 16 #define R92S_TXDW3_FRAG_M 0xf0000000 #define R92S_TXDW3_FRAG_S 28 uint32_t txdw4; #define R92S_TXDW4_TXBW 0x00040000 uint32_t txdw5; #define R92S_TXDW5_DISFB 0x00008000 uint16_t ipchksum; uint16_t tcpchksum; uint16_t txbufsize; uint16_t reserved1; } __packed __aligned(4); struct r92s_add_ba_event { uint8_t mac_addr[IEEE80211_ADDR_LEN]; uint16_t ssn; uint8_t tid; }; struct r92s_add_ba_req { uint32_t tid; }; /* * Driver definitions. */ #define RSU_RX_LIST_COUNT 100 #define RSU_TX_LIST_COUNT 32 #define RSU_HOST_CMD_RING_COUNT 32 #define RSU_RXBUFSZ (8 * 1024) #define RSU_TXBUFSZ \ ((sizeof(struct r92s_tx_desc) + IEEE80211_MAX_LEN + 3) & ~3) #define RSU_TX_TIMEOUT 5000 /* ms */ #define RSU_CMD_TIMEOUT 2000 /* ms */ /* Queue ids (used by soft only). */ #define RSU_QID_BCN 0 #define RSU_QID_MGT 1 #define RSU_QID_BMC 2 #define RSU_QID_VO 3 #define RSU_QID_VI 4 #define RSU_QID_BE 5 #define RSU_QID_BK 6 #define RSU_QID_RXOFF 7 #define RSU_QID_H2C 8 #define RSU_QID_C2H 9 /* Map AC to queue id. */ static const uint8_t rsu_ac2qid[WME_NUM_AC] = { RSU_QID_BE, RSU_QID_BK, RSU_QID_VI, RSU_QID_VO }; /* Pipe index to endpoint address mapping. */ static const uint8_t r92s_epaddr[] = { 0x83, 0x04, 0x06, 0x0d, 0x05, 0x07, 0x89, 0x0a, 0x0b, 0x0c }; /* Queue id to pipe index mapping for 4 endpoints configurations. */ static const uint8_t rsu_qid2idx_4ep[] = { 3, 3, 3, 1, 1, 2, 2, 0, 3, 0 }; /* Queue id to pipe index mapping for 6 endpoints configurations. */ static const uint8_t rsu_qid2idx_6ep[] = { 3, 3, 3, 1, 4, 2, 5, 0, 3, 0 }; /* Queue id to pipe index mapping for 11 endpoints configurations. */ static const uint8_t rsu_qid2idx_11ep[] = { 7, 9, 8, 1, 4, 2, 5, 0, 3, 6 }; struct rsu_rx_radiotap_header { struct ieee80211_radiotap_header wr_ihdr; uint8_t wr_flags; uint8_t wr_rate; uint16_t wr_chan_freq; uint16_t wr_chan_flags; uint8_t wr_dbm_antsignal; } __packed __aligned(8); #define RSU_RX_RADIOTAP_PRESENT \ (1 << IEEE80211_RADIOTAP_FLAGS | \ 1 << IEEE80211_RADIOTAP_RATE | \ 1 << IEEE80211_RADIOTAP_CHANNEL | \ 1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) struct rsu_tx_radiotap_header { struct ieee80211_radiotap_header wt_ihdr; uint8_t wt_flags; uint16_t wt_chan_freq; uint16_t wt_chan_flags; } __packed __aligned(8); #define RSU_TX_RADIOTAP_PRESENT \ (1 << IEEE80211_RADIOTAP_FLAGS | \ 1 << IEEE80211_RADIOTAP_CHANNEL) struct rsu_softc; struct rsu_host_cmd { void (*cb)(struct rsu_softc *, void *); uint8_t data[256]; }; struct rsu_cmd_newstate { enum ieee80211_state state; int arg; }; struct rsu_cmd_key { struct ieee80211_key key; }; struct rsu_host_cmd_ring { struct rsu_host_cmd cmd[RSU_HOST_CMD_RING_COUNT]; int cur; int next; int queued; }; enum { RSU_BULK_RX, RSU_BULK_TX_BE_BK, /* = WME_AC_BE/BK */ RSU_BULK_TX_VI_VO, /* = WME_AC_VI/VO */ RSU_BULK_TX_H2C, /* H2C */ RSU_N_TRANSFER, }; struct rsu_data { struct rsu_softc *sc; uint8_t *buf; uint16_t buflen; struct mbuf *m; struct ieee80211_node *ni; STAILQ_ENTRY(rsu_data) next; }; struct rsu_vap { struct ieee80211vap vap; int (*newstate)(struct ieee80211vap *, enum ieee80211_state, int); }; #define RSU_VAP(vap) ((struct rsu_vap *)(vap)) #define RSU_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define RSU_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define RSU_ASSERT_LOCKED(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) struct rsu_softc { struct ieee80211com sc_ic; struct mbufq sc_snd; device_t sc_dev; struct usb_device *sc_udev; int (*sc_newstate)(struct ieee80211com *, enum ieee80211_state, int); struct usbd_interface *sc_iface; struct timeout_task calib_task; struct task tx_task; const uint8_t *qid2idx; struct mtx sc_mtx; int sc_ht; int sc_nendpoints; int sc_curpwrstate; int sc_currssi; u_int sc_running:1, sc_calibrating:1, sc_active_scan:1, sc_extra_scan:1; u_int cut; uint8_t sc_rftype; int8_t sc_nrxstream; int8_t sc_ntxstream; struct rsu_host_cmd_ring cmdq; struct rsu_data sc_rx[RSU_RX_LIST_COUNT]; struct rsu_data sc_tx[RSU_TX_LIST_COUNT]; struct rsu_data *fwcmd_data; uint8_t cmd_seq; uint8_t rom[128]; struct usb_xfer *sc_xfer[RSU_N_TRANSFER]; STAILQ_HEAD(, rsu_data) sc_rx_active; STAILQ_HEAD(, rsu_data) sc_rx_inactive; STAILQ_HEAD(, rsu_data) sc_tx_active[RSU_N_TRANSFER]; STAILQ_HEAD(, rsu_data) sc_tx_inactive; STAILQ_HEAD(, rsu_data) sc_tx_pending[RSU_N_TRANSFER]; union { struct rsu_rx_radiotap_header th; uint8_t pad[64]; } sc_rxtapu; #define sc_rxtap sc_rxtapu.th union { struct rsu_tx_radiotap_header th; uint8_t pad[64]; } sc_txtapu; #define sc_txtap sc_txtapu.th }; Index: projects/clang391-import/sys/netinet/ip_fastfwd.c =================================================================== --- projects/clang391-import/sys/netinet/ip_fastfwd.c (revision 309262) +++ projects/clang391-import/sys/netinet/ip_fastfwd.c (revision 309263) @@ -1,479 +1,434 @@ /*- * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * ip_fastforward gets its speed from processing the forwarded packet to * completion (if_output on the other side) without any queues or netisr's. * The receiving interface DMAs the packet into memory, the upper half of * driver calls ip_fastforward, we do our routing table lookup and directly * send it off to the outgoing interface, which DMAs the packet to the * network card. The only part of the packet we touch with the CPU is the * IP header (unless there are complex firewall rules touching other parts * of the packet, but that is up to you). We are essentially limited by bus * bandwidth and how fast the network card/driver can set up receives and * transmits. * * We handle basic errors, IP header errors, checksum errors, * destination unreachable, fragmentation and fragmentation needed and * report them via ICMP to the sender. * * Else if something is not pure IPv4 unicast forwarding we fall back to * the normal ip_input processing path. We should only be called from * interfaces connected to the outside world. * * Firewalling is fully supported including divert, ipfw fwd and ipfilter * ipnat and address rewrite. * * IPSEC is not supported if this host is a tunnel broker. IPSEC is * supported for connections to/from local host. * * We try to do the least expensive (in CPU ops) checks and operations * first to catch junk with as little overhead as possible. * * We take full advantage of hardware support for IP checksum and * fragmentation offloading. * * We don't do ICMP redirect in the fast forwarding path. I have had my own * cases where two core routers with Zebra routing suite would send millions * ICMP redirects to connected hosts if the destination router was not the * default gateway. In one case it was filling the routing table of a host * with approximately 300.000 cloned redirect entries until it ran out of * kernel memory. However the networking code proved very robust and it didn't * crash or fail in other ways. */ /* * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which * is being followed here. */ #include __FBSDID("$FreeBSD$"); #include "opt_ipstealth.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include -static struct sockaddr_in * -ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) +static int +ip_findroute(struct nhop4_basic *pnh, struct in_addr dest, struct mbuf *m) { - struct sockaddr_in *dst; - struct rtentry *rt; + bzero(pnh, sizeof(*pnh)); + if (fib4_lookup_nh_basic(M_GETFIB(m), dest, 0, 0, pnh) != 0) { + IPSTAT_INC(ips_noroute); + IPSTAT_INC(ips_cantforward); + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); + return (EHOSTUNREACH); + } /* - * Find route to destination. + * Drop blackholed traffic and directed broadcasts. */ - bzero(ro, sizeof(*ro)); - dst = (struct sockaddr_in *)&ro->ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr.s_addr = dest.s_addr; - in_rtalloc_ign(ro, 0, M_GETFIB(m)); + if ((pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST)) != 0) { + IPSTAT_INC(ips_cantforward); + m_freem(m); + return (EHOSTUNREACH); + } - /* - * Route there and interface still up? - */ - rt = ro->ro_rt; - if (rt && (rt->rt_flags & RTF_UP) && - (rt->rt_ifp->if_flags & IFF_UP) && - (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if (rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in *)rt->rt_gateway; - } else { - IPSTAT_INC(ips_noroute); + if (pnh->nh_flags & NHF_REJECT) { IPSTAT_INC(ips_cantforward); - if (rt) - RTFREE(rt); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - return NULL; + return (EHOSTUNREACH); } - return dst; + + return (0); } /* * Try to forward a packet based on the destination address. * This is a fast path optimized for the plain forwarding case. * If the packet is handled (and consumed) here then we return NULL; * otherwise mbuf is returned and the packet should be delivered * to ip_input for full processing. */ struct mbuf * ip_tryforward(struct mbuf *m) { struct ip *ip; struct mbuf *m0 = NULL; - struct route ro; - struct sockaddr_in *dst = NULL; - struct ifnet *ifp; + struct nhop4_basic nh; + struct sockaddr_in dst; struct in_addr odest, dest; uint16_t ip_len, ip_off; int error = 0; - int mtu; struct m_tag *fwd_tag = NULL; /* * Are we active and forwarding packets? */ M_ASSERTVALID(m); M_ASSERTPKTHDR(m); - bzero(&ro, sizeof(ro)); - - #ifdef ALTQ /* * Is packet dropped by traffic conditioner? */ if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) goto drop; #endif /* * Only IP packets without options */ ip = mtod(m, struct ip *); if (ip->ip_hl != (sizeof(struct ip) >> 2)) { if (V_ip_doopts == 1) return m; else if (V_ip_doopts == 2) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB, 0, 0); return NULL; /* mbuf already free'd */ } /* else ignore IP options and continue */ } /* * Only unicast IP, not from loopback, no L2 or IP broadcast, * no multicast, no INADDR_ANY * * XXX: Probably some of these checks could be direct drop * conditions. However it is not clear whether there are some * hacks or obscure behaviours which make it necessary to * let ip_input handle it. We play safe here and let ip_input * deal with it until it is proven that we can directly drop it. */ if ((m->m_flags & (M_BCAST|M_MCAST)) || (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST || ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST || IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || ip->ip_src.s_addr == INADDR_ANY || ip->ip_dst.s_addr == INADDR_ANY ) return m; /* * Is it for a local address on this host? */ if (in_localip(ip->ip_dst)) return m; IPSTAT_INC(ips_total); /* * Step 3: incoming packet firewall processing */ odest.s_addr = dest.s_addr = ip->ip_dst.s_addr; /* * Run through list of ipfilter hooks for input packets */ if (!PFIL_HOOKED(&V_inet_pfil_hook)) goto passin; if (pfil_run_hooks( &V_inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) || m == NULL) goto drop; M_ASSERTVALID(m); M_ASSERTPKTHDR(m); ip = mtod(m, struct ip *); /* m may have changed by pfil hook */ dest.s_addr = ip->ip_dst.s_addr; /* * Destination address changed? */ if (odest.s_addr != dest.s_addr) { /* * Is it now for a local address on this host? */ if (in_localip(dest)) goto forwardlocal; /* * Go on with new destination address */ } if (m->m_flags & M_FASTFWD_OURS) { /* * ipfw changed it for a local address on this host. */ goto forwardlocal; } passin: /* * Step 4: decrement TTL and look up route */ /* * Check TTL */ #ifdef IPSTEALTH if (!V_ipstealth) { #endif if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); return NULL; /* mbuf already free'd */ } /* * Decrement the TTL and incrementally change the IP header checksum. * Don't bother doing this with hw checksum offloading, it's faster * doing it right here. */ ip->ip_ttl -= IPTTLDEC; if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8)) ip->ip_sum -= ~htons(IPTTLDEC << 8); else ip->ip_sum += htons(IPTTLDEC << 8); #ifdef IPSTEALTH } #endif /* * Find route to destination. */ - if ((dst = ip_findroute(&ro, dest, m)) == NULL) - return NULL; /* icmp unreach already sent */ - ifp = ro.ro_rt->rt_ifp; + if (ip_findroute(&nh, dest, m) != 0) + return (NULL); /* icmp unreach already sent */ /* - * Immediately drop blackholed traffic, and directed broadcasts - * for either the all-ones or all-zero subnet addresses on - * locally attached networks. - */ - if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0) - goto drop; - - /* * Step 5: outgoing firewall packet processing */ - - /* - * Run through list of hooks for output packets. - */ if (!PFIL_HOOKED(&V_inet_pfil_hook)) goto passout; - if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) { + if (pfil_run_hooks(&V_inet_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, NULL) || + m == NULL) { goto drop; } M_ASSERTVALID(m); M_ASSERTPKTHDR(m); ip = mtod(m, struct ip *); dest.s_addr = ip->ip_dst.s_addr; /* * Destination address changed? */ if (m->m_flags & M_IP_NEXTHOP) fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); if (odest.s_addr != dest.s_addr || fwd_tag != NULL) { /* * Is it now for a local address on this host? */ if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) { forwardlocal: /* * Return packet for processing by ip_input(). */ m->m_flags |= M_FASTFWD_OURS; - if (ro.ro_rt) - RTFREE(ro.ro_rt); - return m; + return (m); } /* * Redo route lookup with new destination address */ if (fwd_tag) { dest.s_addr = ((struct sockaddr_in *) (fwd_tag + 1))->sin_addr.s_addr; m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP_NEXTHOP; } - RTFREE(ro.ro_rt); - if ((dst = ip_findroute(&ro, dest, m)) == NULL) - return NULL; /* icmp unreach already sent */ - ifp = ro.ro_rt->rt_ifp; + if (ip_findroute(&nh, dest, m) != 0) + return (NULL); /* icmp unreach already sent */ } passout: /* * Step 6: send off the packet */ ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); - /* - * Check if route is dampned (when ARP is unable to resolve) - */ - if ((ro.ro_rt->rt_flags & RTF_REJECT) && - (ro.ro_rt->rt_expire == 0 || time_uptime < ro.ro_rt->rt_expire)) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - goto consumed; - } + bzero(&dst, sizeof(dst)); + dst.sin_family = AF_INET; + dst.sin_len = sizeof(dst); + dst.sin_addr = nh.nh_addr; /* - * Check if media link state of interface is not down - */ - if (ifp->if_link_state == LINK_STATE_DOWN) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - goto consumed; - } - - /* * Check if packet fits MTU or if hardware will fragment for us */ - if (ro.ro_rt->rt_mtu) - mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); - else - mtu = ifp->if_mtu; - - if (ip_len <= mtu) { + if (ip_len <= nh.nh_mtu) { /* * Avoid confusing lower layers. */ m_clrprotoflags(m); /* * Send off the packet via outgoing interface */ - IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, &ro); + IP_PROBE(send, NULL, NULL, ip, nh.nh_ifp, ip, NULL); + error = (*nh.nh_ifp->if_output)(nh.nh_ifp, m, + (struct sockaddr *)&dst, NULL); } else { /* * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery */ if (ip_off & IP_DF) { IPSTAT_INC(ips_cantfrag); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, - 0, mtu); + 0, nh.nh_mtu); goto consumed; } else { /* * We have to fragment the packet */ m->m_pkthdr.csum_flags |= CSUM_IP; - if (ip_fragment(ip, &m, mtu, ifp->if_hwassist)) + if (ip_fragment(ip, &m, nh.nh_mtu, + nh.nh_ifp->if_hwassist) != 0) goto drop; KASSERT(m != NULL, ("null mbuf and no error")); /* * Send off the fragments via outgoing interface */ error = 0; do { m0 = m->m_nextpkt; m->m_nextpkt = NULL; /* * Avoid confusing lower layers. */ m_clrprotoflags(m); - IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, &ro); + IP_PROBE(send, NULL, NULL, ip, nh.nh_ifp, + ip, NULL); + /* XXX: we can use cached route here */ + error = (*nh.nh_ifp->if_output)(nh.nh_ifp, m, + (struct sockaddr *)&dst, NULL); if (error) break; } while ((m = m0) != NULL); if (error) { /* Reclaim remaining fragments */ for (m = m0; m; m = m0) { m0 = m->m_nextpkt; m_freem(m); } } else IPSTAT_INC(ips_fragmented); } } if (error != 0) IPSTAT_INC(ips_odropped); else { - counter_u64_add(ro.ro_rt->rt_pksent, 1); IPSTAT_INC(ips_forward); IPSTAT_INC(ips_fastforward); } consumed: - RTFREE(ro.ro_rt); return NULL; drop: if (m) m_freem(m); - if (ro.ro_rt) - RTFREE(ro.ro_rt); return NULL; } Index: projects/clang391-import/usr.bin/clang/clang.prog.mk =================================================================== --- projects/clang391-import/usr.bin/clang/clang.prog.mk (revision 309262) +++ projects/clang391-import/usr.bin/clang/clang.prog.mk (revision 309263) @@ -1,21 +1,23 @@ # $FreeBSD$ .include "${SRCTOP}/lib/clang/clang.pre.mk" CFLAGS+= -I${OBJTOP}/lib/clang/libclang CFLAGS+= -I${OBJTOP}/lib/clang/libllvm .include "${SRCTOP}/lib/clang/clang.build.mk" LIBDEPS+= clang LIBDEPS+= llvm .for lib in ${LIBDEPS} DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a .endfor +PACKAGE= clang + LIBADD+= ncursesw LIBADD+= pthread .include Index: projects/clang391-import/usr.bin/clang/lld/Makefile =================================================================== --- projects/clang391-import/usr.bin/clang/lld/Makefile (revision 309262) +++ projects/clang391-import/usr.bin/clang/lld/Makefile (revision 309263) @@ -1,74 +1,75 @@ # $FreeBSD$ .include LLVM_SRCS= ${SRCTOP}/contrib/llvm LLD_SRCS= ${LLVM_SRCS}/tools/lld +PACKAGE= lld PROG_CXX= ld.lld MAN= .if ${MK_LLD_AS_LD} != "no" SYMLINKS= ${PROG_CXX} ${BINDIR}/ld .endif CFLAGS+= -I${LLD_SRCS}/include CFLAGS+= -I${.OBJDIR} CFLAGS+= -I${.OBJDIR}/../../../lib/clang/libllvm SRCDIR= tools/lld SRCS+= ELF/Driver.cpp SRCS+= ELF/DriverUtils.cpp SRCS+= ELF/EhFrame.cpp SRCS+= ELF/Error.cpp SRCS+= ELF/ICF.cpp SRCS+= ELF/InputFiles.cpp SRCS+= ELF/InputSection.cpp SRCS+= ELF/LinkerScript.cpp SRCS+= ELF/LTO.cpp SRCS+= ELF/MarkLive.cpp SRCS+= ELF/OutputSections.cpp SRCS+= ELF/Relocations.cpp SRCS+= ELF/ScriptParser.cpp SRCS+= ELF/Strings.cpp SRCS+= ELF/SymbolListFile.cpp SRCS+= ELF/SymbolTable.cpp SRCS+= ELF/Symbols.cpp SRCS+= ELF/Target.cpp SRCS+= ELF/Thunks.cpp SRCS+= ELF/Writer.cpp SRCS+= lib/Config/Version.cpp SRCS+= lib/Core/DefinedAtom.cpp SRCS+= lib/Core/Error.cpp SRCS+= lib/Core/File.cpp SRCS+= lib/Core/LinkingContext.cpp SRCS+= lib/Core/Reader.cpp SRCS+= lib/Core/Resolver.cpp SRCS+= lib/Core/SymbolTable.cpp SRCS+= lib/Core/Writer.cpp SRCS+= tools/lld/lld.cpp .include "${SRCTOP}/lib/clang/llvm.build.mk" LIBDEPS+= llvm .for lib in ${LIBDEPS} DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a .endfor LLVM_TBLGEN?= llvm-tblgen ELF/Options.inc: ${LLD_SRCS}/ELF/Options.td ${LLVM_TBLGEN} -gen-opt-parser-defs \ -I ${LLVM_SRCS}/include \ -d ${.TARGET:C/$/.d/} -o ${.TARGET} \ ${LLVM_SRCS}/tools/lld/ELF/Options.td TGHDRS+= ELF/Options.inc DPSRCS+= ${TGHDRS} CLEANFILES+= ${TGHDRS} ${TGHDRS:C/$/.d/} LIBADD+= ncursesw LIBADD+= pthread LIBADD+= z .include Index: projects/clang391-import/usr.bin/clang/lldb/Makefile =================================================================== --- projects/clang391-import/usr.bin/clang/lldb/Makefile (revision 309262) +++ projects/clang391-import/usr.bin/clang/lldb/Makefile (revision 309263) @@ -1,31 +1,32 @@ # $FreeBSD$ .include "${SRCTOP}/lib/clang/lldb.pre.mk" +PACKAGE= lldb PROG_CXX= lldb # Man page directory .PATH: ${LLDB_SRCS}/docs CFLAGS+= -I${LLDB_SRCS}/include SRCDIR= tools/lldb/tools/driver SRCS+= Driver.cpp .include "${SRCTOP}/lib/clang/clang.build.mk" LIBDEPS+= lldb LIBDEPS+= clang LIBDEPS+= llvm .for lib in ${LIBDEPS} DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a .endfor LIBADD+= edit LIBADD+= panel LIBADD+= ncursesw LIBADD+= pthread LIBADD+= z .include Index: projects/clang391-import/usr.bin/clang/llvm.prog.mk =================================================================== --- projects/clang391-import/usr.bin/clang/llvm.prog.mk (revision 309262) +++ projects/clang391-import/usr.bin/clang/llvm.prog.mk (revision 309263) @@ -1,25 +1,27 @@ # $FreeBSD$ .include "${SRCTOP}/lib/clang/llvm.pre.mk" CFLAGS+= -I${OBJTOP}/lib/clang/libllvm .include "${SRCTOP}/lib/clang/llvm.build.mk" # Special case for the bootstrap-tools phase. .if (defined(TOOLS_PREFIX) || ${MACHINE} == "host") && \ (${PROG_CXX} == "clang-tblgen" || ${PROG_CXX} == "llvm-tblgen") LIBDEPS+= llvmminimal .else LIBDEPS+= llvm .endif .for lib in ${LIBDEPS} DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${lib}.a .endfor +PACKAGE= clang + LIBADD+= ncursesw LIBADD+= pthread .include Index: projects/clang391-import/usr.bin/indent/indent.c =================================================================== --- projects/clang391-import/usr.bin/indent/indent.c (revision 309262) +++ projects/clang391-import/usr.bin/indent/indent.c (revision 309263) @@ -1,1265 +1,1275 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1976 Board of Trustees of the University of Illinois. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\ @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\ @(#) Copyright (c) 1980, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #if 0 #ifndef lint static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" static void bakcopy(void); static void indent_declaration(int, int); const char *in_name = "Standard Input"; /* will always point to name of input * file */ const char *out_name = "Standard Output"; /* will always point to name * of output file */ char bakfile[MAXPATHLEN] = ""; int main(int argc, char **argv) { cap_rights_t rights; int dec_ind; /* current indentation for declarations */ int di_stack[20]; /* a stack of structure indentation levels */ int flushed_nl; /* used when buffering up comments to remember * that a newline was passed over */ int force_nl; /* when true, code must be broken */ int hd_type = 0; /* used to store type of stmt for if (...), * for (...), etc */ int i; /* local loop counter */ int scase; /* set to true when we see a case, so we will * know what to do with the following colon */ int sp_sw; /* when true, we are in the expression of * if(...), while(...), etc. */ int squest; /* when this is positive, we have seen a ? * without the matching : in a ?: * construct */ const char *t_ptr; /* used for copying tokens */ int tabs_to_var; /* true if using tabs to indent to var name */ int type_code; /* the type of token, returned by lexi */ int last_else = 0; /* true iff last keyword was an else */ /*-----------------------------------------------*\ | INITIALIZATION | \*-----------------------------------------------*/ found_err = 0; ps.p_stack[0] = stmt; /* this is the parser's stack */ ps.last_nl = true; /* this is true if the last thing scanned was * a newline */ ps.last_token = semicolon; combuf = (char *) malloc(bufsize); if (combuf == NULL) err(1, NULL); labbuf = (char *) malloc(bufsize); if (labbuf == NULL) err(1, NULL); codebuf = (char *) malloc(bufsize); if (codebuf == NULL) err(1, NULL); tokenbuf = (char *) malloc(bufsize); if (tokenbuf == NULL) err(1, NULL); alloc_typenames(); l_com = combuf + bufsize - 5; l_lab = labbuf + bufsize - 5; l_code = codebuf + bufsize - 5; l_token = tokenbuf + bufsize - 5; combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and * comment buffers */ combuf[1] = codebuf[1] = labbuf[1] = '\0'; ps.else_if = 1; /* Default else-if special processing to on */ s_lab = e_lab = labbuf + 1; s_code = e_code = codebuf + 1; s_com = e_com = combuf + 1; s_token = e_token = tokenbuf + 1; in_buffer = (char *) malloc(10); if (in_buffer == NULL) err(1, NULL); in_buffer_limit = in_buffer + 8; buf_ptr = buf_end = in_buffer; line_no = 1; had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; sp_sw = force_nl = false; ps.in_or_st = false; ps.bl_line = true; dec_ind = 0; di_stack[ps.dec_nest = 0] = 0; ps.want_blank = ps.in_stmt = ps.ind_stmt = false; scase = ps.pcase = false; squest = 0; sc_end = NULL; bp_save = NULL; be_save = NULL; output = NULL; tabs_to_var = 0; /*--------------------------------------------------*\ | COMMAND LINE SCAN | \*--------------------------------------------------*/ #ifdef undef max_col = 78; /* -l78 */ lineup_to_parens = 1; /* -lp */ ps.ljust_decl = 0; /* -ndj */ ps.com_ind = 33; /* -c33 */ star_comment_cont = 1; /* -sc */ ps.ind_size = 8; /* -i8 */ verbose = 0; ps.decl_indent = 16; /* -di16 */ ps.local_decl_indent = -1; /* if this is not set to some nonnegative value * by an arg, we will set this equal to * ps.decl_ind */ ps.indent_parameters = 1; /* -ip */ ps.decl_com_ind = 0; /* if this is not set to some positive value * by an arg, we will set this equal to * ps.com_ind */ btype_2 = 1; /* -br */ cuddle_else = 1; /* -ce */ ps.unindent_displace = 0; /* -d0 */ ps.case_indent = 0; /* -cli0 */ format_block_comments = 1; /* -fcb */ format_col1_comments = 1; /* -fc1 */ procnames_start_line = 1; /* -psl */ proc_calls_space = 0; /* -npcs */ comment_delimiter_on_blankline = 1; /* -cdb */ ps.leave_comma = 1; /* -nbc */ #endif for (i = 1; i < argc; ++i) if (strcmp(argv[i], "-npro") == 0) break; set_defaults(); if (i >= argc) set_profile(); for (i = 1; i < argc; ++i) { /* * look thru args (if any) for changes to defaults */ if (argv[i][0] != '-') {/* no flag on parameter */ if (input == NULL) { /* we must have the input file */ in_name = argv[i]; /* remember name of input file */ input = fopen(in_name, "r"); if (input == NULL) /* check for open error */ err(1, "%s", in_name); continue; } else if (output == NULL) { /* we have the output file */ out_name = argv[i]; /* remember name of output file */ if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite * the file */ errx(1, "input and output files must be different"); } output = fopen(out_name, "w"); if (output == NULL) /* check for create error */ err(1, "%s", out_name); continue; } errx(1, "unknown parameter: %s", argv[i]); } else set_option(argv[i]); } /* end of for */ if (input == NULL) input = stdin; if (output == NULL) { if (troff || input == stdin) output = stdout; else { out_name = in_name; bakcopy(); } } /* Restrict input/output descriptors and enter Capsicum sandbox. */ cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); if (cap_rights_limit(fileno(output), &rights) < 0 && errno != ENOSYS) err(EXIT_FAILURE, "unable to limit rights for %s", out_name); cap_rights_init(&rights, CAP_FSTAT, CAP_READ); if (cap_rights_limit(fileno(input), &rights) < 0 && errno != ENOSYS) err(EXIT_FAILURE, "unable to limit rights for %s", in_name); if (cap_enter() < 0 && errno != ENOSYS) err(EXIT_FAILURE, "unable to enter capability mode"); if (ps.com_ind <= 1) ps.com_ind = 2; /* dont put normal comments before column 2 */ if (troff) { if (bodyf.font[0] == 0) parsefont(&bodyf, "R"); if (scomf.font[0] == 0) parsefont(&scomf, "I"); if (blkcomf.font[0] == 0) blkcomf = scomf, blkcomf.size += 2; if (boxcomf.font[0] == 0) boxcomf = blkcomf; if (stringf.font[0] == 0) parsefont(&stringf, "L"); if (keywordf.font[0] == 0) parsefont(&keywordf, "B"); writefdef(&bodyf, 'B'); writefdef(&scomf, 'C'); writefdef(&blkcomf, 'L'); writefdef(&boxcomf, 'X'); writefdef(&stringf, 'S'); writefdef(&keywordf, 'K'); } if (block_comment_max_col <= 0) block_comment_max_col = max_col; if (ps.local_decl_indent < 0) /* if not specified by user, set this */ ps.local_decl_indent = ps.decl_indent; if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; if (continuation_indent == 0) continuation_indent = ps.ind_size; fill_buffer(); /* get first batch of stuff into input buffer */ parse(semicolon); { char *p = buf_ptr; int col = 1; while (1) { if (*p == ' ') col++; else if (*p == '\t') col = ((col - 1) & ~7) + 9; else break; p++; } if (col > ps.ind_size) ps.ind_level = ps.i_l_follow = col / ps.ind_size; } if (troff) { const char *p = in_name, *beg = in_name; while (*p) if (*p++ == '/') beg = p; fprintf(output, ".Fn \"%s\"\n", beg); } /* * START OF MAIN LOOP */ while (1) { /* this is the main loop. it will go until we * reach eof */ int is_procname; type_code = lexi(); /* lexi reads one token. The actual * characters read are stored in "token". lexi * returns a code indicating the type of token */ is_procname = ps.procname[0]; /* * The following code moves everything following an if (), while (), * else, etc. up to the start of the following stmt to a buffer. This * allows proper handling of both kinds of brace placement. */ flushed_nl = false; while (ps.search_brace) { /* if we scanned an if(), while(), * etc., we might need to copy stuff * into a buffer we must loop, copying * stuff into save_com, until we find * the start of the stmt which follows * the if, or whatever */ switch (type_code) { case newline: ++line_no; if (sc_end != NULL) goto sw_buffer; /* dump comment, if any */ flushed_nl = true; case form_feed: break; /* form feeds and newlines found here will be * ignored */ case lbrace: /* this is a brace that starts the compound * stmt */ if (sc_end == NULL) { /* ignore buffering if a comment wasn't * stored up */ ps.search_brace = false; goto check_type; } if (btype_2) { save_com[0] = '{'; /* we either want to put the brace * right after the if */ goto sw_buffer; /* go to common code to get out of * this loop */ } case comment: /* we have a comment, so we must copy it into * the buffer */ if (!flushed_nl || sc_end != NULL) { if (sc_end == NULL) { /* if this is the first comment, we * must set up the buffer */ save_com[0] = save_com[1] = ' '; sc_end = &(save_com[2]); } else { *sc_end++ = '\n'; /* add newline between * comments */ *sc_end++ = ' '; --line_no; } *sc_end++ = '/'; /* copy in start of comment */ *sc_end++ = '*'; for (;;) { /* loop until we get to the end of the comment */ *sc_end = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*sc_end++ == '*' && *buf_ptr == '/') break; /* we are at end of comment */ if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer * overflow */ diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); fflush(output); exit(1); } } *sc_end++ = '/'; /* add ending slash */ if (++buf_ptr >= buf_end) /* get past / in buffer */ fill_buffer(); break; } default: /* it is the start of a normal statement */ if (flushed_nl) /* if we flushed a newline, make sure it is * put back */ force_nl = true; if ((type_code == sp_paren && *token == 'i' && last_else && ps.else_if) || (type_code == sp_nparen && *token == 'e' && e_code != s_code && e_code[-1] == '}')) force_nl = false; if (sc_end == NULL) { /* ignore buffering if comment wasn't * saved up */ ps.search_brace = false; goto check_type; } if (force_nl) { /* if we should insert a nl here, put it into * the buffer */ force_nl = false; --line_no; /* this will be re-increased when the nl is * read from the buffer */ *sc_end++ = '\n'; *sc_end++ = ' '; if (verbose && !flushed_nl) /* print error msg if the line * was not already broken */ diag2(0, "Line broken"); flushed_nl = false; } for (t_ptr = token; *t_ptr; ++t_ptr) *sc_end++ = *t_ptr; /* copy token into temp buffer */ ps.procname[0] = 0; sw_buffer: ps.search_brace = false; /* stop looking for start of * stmt */ bp_save = buf_ptr; /* save current input buffer */ be_save = buf_end; buf_ptr = save_com; /* fix so that subsequent calls to * lexi will take tokens out of * save_com */ *sc_end++ = ' ';/* add trailing blank, just in case */ buf_end = sc_end; sc_end = NULL; break; } /* end of switch */ if (type_code != 0) /* we must make this check, just in case there * was an unexpected EOF */ type_code = lexi(); /* read another token */ /* if (ps.search_brace) ps.procname[0] = 0; */ if ((is_procname = ps.procname[0]) && flushed_nl && !procnames_start_line && ps.in_decl && type_code == ident) flushed_nl = 0; } /* end of while (search_brace) */ last_else = 0; check_type: if (type_code == 0) { /* we got eof */ if (s_lab != e_lab || s_code != e_code || s_com != e_com) /* must dump end of line */ dump_line(); if (ps.tos > 1) /* check for balanced braces */ diag2(1, "Stuff missing from end of file"); if (verbose) { printf("There were %d output lines and %d comments\n", ps.out_lines, ps.out_coms); printf("(Lines with comments)/(Lines with code): %6.3f\n", (1.0 * ps.com_lines) / code_lines); } fflush(output); exit(found_err); } if ( (type_code != comment) && (type_code != newline) && (type_code != preesc) && (type_code != form_feed)) { if (force_nl && (type_code != semicolon) && (type_code != lbrace || !btype_2)) { /* we should force a broken line here */ if (verbose && !flushed_nl) diag2(0, "Line broken"); flushed_nl = false; dump_line(); ps.want_blank = false; /* dont insert blank at line start */ force_nl = false; } ps.in_stmt = true; /* turn on flag which causes an extra level of * indentation. this is turned off by a ; or * '}' */ if (s_com != e_com) { /* the turkey has embedded a comment * in a line. fix it */ *e_code++ = ' '; for (t_ptr = s_com; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } *e_code++ = ' '; *e_code = '\0'; /* null terminate code sect */ ps.want_blank = false; e_com = s_com; } } else if (type_code != comment) /* preserve force_nl thru a comment */ force_nl = false; /* cancel forced newline after newline, form * feed, etc */ /*-----------------------------------------------------*\ | do switch on type of token scanned | \*-----------------------------------------------------*/ CHECK_SIZE_CODE; switch (type_code) { /* now, decide what to do with the token */ case form_feed: /* found a form feed in line */ ps.use_ff = true; /* a form feed is treated much like a newline */ dump_line(); ps.want_blank = false; break; case newline: if (ps.last_token != comma || ps.p_l_follow > 0 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { dump_line(); ps.want_blank = false; } ++line_no; /* keep track of input line number */ break; case lparen: /* got a '(' or '[' */ ++ps.p_l_follow; /* count parens to make Healy happy */ if (ps.want_blank && *token != '[' && (ps.last_token != ident || proc_calls_space || /* offsetof (1) is never allowed a space; sizeof (2) gets * one iff -bs; all other keywords (>2) always get a space * before lparen */ (ps.keyword + Bill_Shannon > 2))) *e_code++ = ' '; ps.want_blank = false; if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && !is_procname) { /* function pointer declarations */ if (troff) { sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); e_code += strlen(e_code); } else { indent_declaration(dec_ind, tabs_to_var); } ps.dumped_decl_indent = true; } if (!troff) *e_code++ = token[0]; ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent && ps.paren_indents[0] < 2 * ps.ind_size) ps.paren_indents[0] = 2 * ps.ind_size; if (ps.in_or_st && *token == '(' && ps.tos <= 2) { /* * this is a kluge to make sure that declarations will be * aligned right if proc decl has an explicit type on it, i.e. * "int a(x) {..." */ parse(semicolon); /* I said this was a kluge... */ ps.in_or_st = false; /* turn off flag for structure decl or * initialization */ } /* parenthesized type following sizeof or offsetof is not a cast */ if (ps.keyword == 1 || ps.keyword == 2) ps.not_cast_mask |= 1 << ps.p_l_follow; break; case rparen: /* got a ')' or ']' */ rparen_count--; if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { ps.last_u_d = true; ps.cast_mask &= (1 << ps.p_l_follow) - 1; ps.want_blank = space_after_cast; } else ps.want_blank = true; ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; if (--ps.p_l_follow < 0) { ps.p_l_follow = 0; diag3(0, "Extra %c", *token); } if (e_code == s_code) /* if the paren starts the line */ ps.paren_level = ps.p_l_follow; /* then indent it */ *e_code++ = token[0]; if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if * (...), or some such */ sp_sw = false; force_nl = true;/* must force newline after if */ ps.last_u_d = true; /* inform lexi that a following * operator is unary */ ps.in_stmt = false; /* dont use stmt continuation * indentation */ parse(hd_type); /* let parser worry about if, or whatever */ } ps.search_brace = btype_2; /* this should insure that constructs * such as main(){...} and int[]{...} * have their braces put in the right * place */ break; case unary_op: /* this could be any unary operation */ if (!ps.dumped_decl_indent && ps.in_decl && !is_procname && !ps.block_init) { /* pointer declarations */ if (troff) { if (ps.want_blank) *e_code++ = ' '; sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); e_code += strlen(e_code); } else { /* if this is a unary op in a declaration, we should * indent this token */ for (i = 0; token[i]; ++i) /* find length of token */; indent_declaration(dec_ind - i, tabs_to_var); } ps.dumped_decl_indent = true; } else if (ps.want_blank) *e_code++ = ' '; { const char *res = token; if (troff && token[0] == '-' && token[1] == '>') res = "\\(->"; for (t_ptr = res; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } } ps.want_blank = false; break; case binary_op: /* any binary operation */ if (ps.want_blank) *e_code++ = ' '; { const char *res = token; if (troff) switch (token[0]) { case '<': if (token[1] == '=') res = "\\(<="; break; case '>': if (token[1] == '=') res = "\\(>="; break; case '!': if (token[1] == '=') res = "\\(!="; break; case '|': if (token[1] == '|') res = "\\(br\\(br"; else if (token[1] == 0) res = "\\(br"; break; } for (t_ptr = res; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; /* move the operator */ } } ps.want_blank = true; break; case postop: /* got a trailing ++ or -- */ *e_code++ = token[0]; *e_code++ = token[1]; ps.want_blank = true; break; case question: /* got a ? */ squest++; /* this will be used when a later colon * appears so we can distinguish the * ?: construct */ if (ps.want_blank) *e_code++ = ' '; *e_code++ = '?'; ps.want_blank = true; break; case casestmt: /* got word 'case' or 'default' */ scase = true; /* so we can process the later colon properly */ goto copy_id; case colon: /* got a ':' */ if (squest > 0) { /* it is part of the ?: construct */ --squest; if (ps.want_blank) *e_code++ = ' '; *e_code++ = ':'; ps.want_blank = true; break; } if (ps.in_or_st) { *e_code++ = ':'; ps.want_blank = false; break; } ps.in_stmt = false; /* seeing a label does not imply we are in a * stmt */ for (t_ptr = s_code; *t_ptr; ++t_ptr) *e_lab++ = *t_ptr; /* turn everything so far into a label */ e_code = s_code; *e_lab++ = ':'; *e_lab++ = ' '; *e_lab = '\0'; force_nl = ps.pcase = scase; /* ps.pcase will be used by * dump_line to decide how to * indent the label. force_nl * will force a case n: to be * on a line by itself */ scase = false; ps.want_blank = false; break; case semicolon: /* got a ';' */ if (ps.dec_nest == 0) ps.in_or_st = false;/* we are not in an initialization or * structure declaration */ scase = false; /* these will only need resetting in an error */ squest = 0; if (ps.last_token == rparen && rparen_count == 0) ps.in_parameter_declaration = 0; ps.cast_mask = 0; ps.not_cast_mask = 0; ps.block_init = 0; ps.block_init_level = 0; ps.just_saw_decl--; if (ps.in_decl && s_code == e_code && !ps.block_init && !ps.dumped_decl_indent) { /* indent stray semicolons in declarations */ indent_declaration(dec_ind - 1, tabs_to_var); ps.dumped_decl_indent = true; } ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level * structure declaration, we * arent any more */ if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { /* * This should be true iff there were unbalanced parens in the * stmt. It is a bit complicated, because the semicolon might * be in a for stmt */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; if (sp_sw) { /* this is a check for an if, while, etc. with * unbalanced parens */ sp_sw = false; parse(hd_type); /* dont lose the if, or whatever */ } } *e_code++ = ';'; ps.want_blank = true; ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the * middle of a stmt */ if (!sp_sw) { /* if not if for (;;) */ parse(semicolon); /* let parser know about end of stmt */ force_nl = true;/* force newline after an end of stmt */ } break; case lbrace: /* got a '{' */ ps.in_stmt = false; /* dont indent the {} */ if (!ps.block_init) force_nl = true;/* force other stuff on same line as '{' onto * new line */ else if (ps.block_init_level <= 0) ps.block_init_level = 1; else ps.block_init_level++; if (s_code != e_code && !ps.block_init) { if (!btype_2) { dump_line(); ps.want_blank = false; } else if (ps.in_parameter_declaration && !ps.in_or_st) { ps.i_l_follow = 0; if (function_brace_split) { /* dump the line prior to the * brace ... */ dump_line(); ps.want_blank = false; } else /* add a space between the decl and brace */ ps.want_blank = true; } } if (ps.in_parameter_declaration) prefix_blankline_requested = 0; if (ps.p_l_follow > 0) { /* check for preceding unbalanced * parens */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; if (sp_sw) { /* check for unclosed if, for, etc. */ sp_sw = false; parse(hd_type); ps.ind_level = ps.i_l_follow; } } if (s_code == e_code) ps.ind_stmt = false; /* dont put extra indentation on line * with '{' */ if (ps.in_decl && ps.in_or_st) { /* this is either a structure * declaration or an init */ di_stack[ps.dec_nest++] = dec_ind; /* ? dec_ind = 0; */ } else { ps.decl_on_line = false; /* we can't be in the middle of * a declaration, so don't do * special indentation of * comments */ if (blanklines_after_declarations_at_proctop && ps.in_parameter_declaration) postfix_blankline_requested = 1; ps.in_parameter_declaration = 0; } dec_ind = 0; parse(lbrace); /* let parser know about this */ if (ps.want_blank) /* put a blank before '{' if '{' is not at * start of line */ *e_code++ = ' '; ps.want_blank = false; *e_code++ = '{'; ps.just_saw_decl = 0; break; case rbrace: /* got a '}' */ if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be * omitted in * declarations */ parse(semicolon); if (ps.p_l_follow) {/* check for unclosed if, for, else. */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; sp_sw = false; } ps.just_saw_decl = 0; ps.block_init_level--; if (s_code != e_code && !ps.block_init) { /* '}' must be first on * line */ if (verbose) diag2(0, "Line broken"); dump_line(); } *e_code++ = '}'; ps.want_blank = true; ps.in_stmt = ps.ind_stmt = false; if (ps.dec_nest > 0) { /* we are in multi-level structure * declaration */ dec_ind = di_stack[--ps.dec_nest]; if (ps.dec_nest == 0 && !ps.in_parameter_declaration) ps.just_saw_decl = 2; ps.in_decl = true; } prefix_blankline_requested = 0; parse(rbrace); /* let parser know about this */ ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead && ps.il[ps.tos] >= ps.ind_level; if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) postfix_blankline_requested = 1; break; case swstmt: /* got keyword "switch" */ sp_sw = true; hd_type = swstmt; /* keep this for when we have seen the * expression */ goto copy_id; /* go move the token into buffer */ case sp_paren: /* token is if, while, for */ sp_sw = true; /* the interesting stuff is done after the * expression is scanned */ hd_type = (*token == 'i' ? ifstmt : (*token == 'w' ? whilestmt : forstmt)); /* * remember the type of header for later use by parser */ goto copy_id; /* copy the token into line */ case sp_nparen: /* got else, do */ ps.in_stmt = false; if (*token == 'e') { if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { if (verbose) diag2(0, "Line broken"); dump_line();/* make sure this starts a line */ ps.want_blank = false; } force_nl = true;/* also, following stuff must go onto new line */ last_else = 1; parse(elselit); } else { if (e_code != s_code) { /* make sure this starts a line */ if (verbose) diag2(0, "Line broken"); dump_line(); ps.want_blank = false; } force_nl = true;/* also, following stuff must go onto new line */ last_else = 0; parse(dolit); } goto copy_id; /* move the token into line */ case decl: /* we have a declaration type (int, register, * etc.) */ parse(decl); /* let parser worry about indentation */ if (ps.last_token == rparen && ps.tos <= 1) { ps.in_parameter_declaration = 1; if (s_code != e_code) { dump_line(); ps.want_blank = 0; } } if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { ps.ind_level = ps.i_l_follow = 1; ps.ind_stmt = 0; } ps.in_or_st = true; /* this might be a structure or initialization * declaration */ ps.in_decl = ps.decl_on_line = true; if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) ps.just_saw_decl = 2; prefix_blankline_requested = 0; for (i = 0; token[i++];); /* get length of token */ if (ps.ind_level == 0 || ps.dec_nest > 0) { /* global variable or struct member in local variable */ dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); } else { /* local variable */ dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i; tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0); } goto copy_id; case ident: /* got an identifier or constant */ if (ps.in_decl) { /* if we are in a declaration, we must indent * identifier */ if (is_procname == 0 || !procnames_start_line) { if (!ps.block_init && !ps.dumped_decl_indent) { if (troff) { if (ps.want_blank) *e_code++ = ' '; sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); e_code += strlen(e_code); } else indent_declaration(dec_ind, tabs_to_var); ps.dumped_decl_indent = true; ps.want_blank = false; } } else { if (ps.want_blank) *e_code++ = ' '; ps.want_blank = false; if (dec_ind && s_code != e_code) { *e_code = '\0'; dump_line(); } dec_ind = 0; } } else if (sp_sw && ps.p_l_follow == 0) { sp_sw = false; force_nl = true; ps.last_u_d = true; ps.in_stmt = false; parse(hd_type); } copy_id: if (ps.want_blank) *e_code++ = ' '; if (troff && ps.keyword) { e_code = chfont(&bodyf, &keywordf, e_code); for (t_ptr = token; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = keywordf.allcaps && islower(*t_ptr) ? toupper(*t_ptr) : *t_ptr; } e_code = chfont(&keywordf, &bodyf, e_code); } else for (t_ptr = token; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } ps.want_blank = true; break; + case strpfx: + if (ps.want_blank) + *e_code++ = ' '; + for (t_ptr = token; *t_ptr; ++t_ptr) { + CHECK_SIZE_CODE; + *e_code++ = *t_ptr; + } + ps.want_blank = false; + break; + case period: /* treat a period kind of like a binary * operation */ *e_code++ = '.'; /* move the period into line */ ps.want_blank = false; /* dont put a blank after a period */ break; case comma: ps.want_blank = (s_code != e_code); /* only put blank after comma * if comma does not start the * line */ if (ps.in_decl && is_procname == 0 && !ps.block_init && !ps.dumped_decl_indent) { /* indent leading commas and not the actual identifiers */ indent_declaration(dec_ind - 1, tabs_to_var); ps.dumped_decl_indent = true; } *e_code++ = ','; if (ps.p_l_follow == 0) { if (ps.block_init_level <= 0) ps.block_init = 0; if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) force_nl = true; } break; case preesc: /* got the character '#' */ if ((s_com != e_com) || (s_lab != e_lab) || (s_code != e_code)) dump_line(); *e_lab++ = '#'; /* move whole line to 'label' buffer */ { int in_comment = 0; int com_start = 0; char quote = 0; int com_end = 0; while (*buf_ptr == ' ' || *buf_ptr == '\t') { buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } while (*buf_ptr != '\n' || (in_comment && !had_eof)) { CHECK_SIZE_LAB; *e_lab = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); switch (*e_lab++) { case BACKSLASH: if (troff) *e_lab++ = BACKSLASH; if (!in_comment) { *e_lab++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } break; case '/': if (*buf_ptr == '*' && !in_comment && !quote) { in_comment = 1; *e_lab++ = *buf_ptr++; com_start = e_lab - s_lab - 2; } break; case '"': if (quote == '"') quote = 0; break; case '\'': if (quote == '\'') quote = 0; break; case '*': if (*buf_ptr == '/' && in_comment) { in_comment = 0; *e_lab++ = *buf_ptr++; com_end = e_lab - s_lab; } break; } } while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; if (e_lab - s_lab == com_end && bp_save == NULL) { /* comment on preprocessor line */ if (sc_end == NULL) /* if this is the first comment, we * must set up the buffer */ sc_end = &(save_com[0]); else { *sc_end++ = '\n'; /* add newline between * comments */ *sc_end++ = ' '; --line_no; } bcopy(s_lab + com_start, sc_end, com_end - com_start); sc_end += com_end - com_start; if (sc_end >= &save_com[sc_size]) abort(); e_lab = s_lab + com_start; while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; bp_save = buf_ptr; /* save current input buffer */ be_save = buf_end; buf_ptr = save_com; /* fix so that subsequent calls to * lexi will take tokens out of * save_com */ *sc_end++ = ' '; /* add trailing blank, just in case */ buf_end = sc_end; sc_end = NULL; } *e_lab = '\0'; /* null terminate line */ ps.pcase = false; } if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ if ((size_t)ifdef_level < nitems(state_stack)) { match_state[ifdef_level].tos = -1; state_stack[ifdef_level++] = ps; } else diag2(1, "#if stack overflow"); } else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ if (ifdef_level <= 0) diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); else { match_state[ifdef_level - 1] = ps; ps = state_stack[ifdef_level - 1]; } } else if (strncmp(s_lab, "#endif", 6) == 0) { if (ifdef_level <= 0) diag2(1, "Unmatched #endif"); else ifdef_level--; } else { struct directives { int size; const char *string; } recognized[] = { {7, "include"}, {6, "define"}, {5, "undef"}, {4, "line"}, {5, "error"}, {6, "pragma"} }; int d = nitems(recognized); while (--d >= 0) if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) break; if (d < 0) { diag2(1, "Unrecognized cpp directive"); break; } } if (blanklines_around_conditional_compilation) { postfix_blankline_requested++; n_real_blanklines = 0; } else { postfix_blankline_requested = 0; prefix_blankline_requested = 0; } break; /* subsequent processing of the newline * character will cause the line to be printed */ case comment: /* we have gotten a / followed by * this is a biggie */ if (flushed_nl) { /* we should force a broken line here */ dump_line(); ps.want_blank = false; /* dont insert blank at line start */ force_nl = false; } pr_comment(); break; } /* end of big switch stmt */ *e_code = '\0'; /* make sure code section is null terminated */ if (type_code != comment && type_code != newline && type_code != preesc) ps.last_token = type_code; } /* end of main while (1) loop */ } /* * copy input file to backup file if in_name is /blah/blah/blah/file, then * backup file will be ".Bfile" then make the backup file the input and * original input file the output */ static void bakcopy(void) { int n, bakchn; char buff[8 * 1024]; const char *p; /* construct file name .Bfile */ for (p = in_name; *p; p++); /* skip to end of string */ while (p > in_name && *p != '/') /* find last '/' */ p--; if (*p == '/') p++; sprintf(bakfile, "%s.BAK", p); /* copy in_name to backup file */ bakchn = creat(bakfile, 0600); if (bakchn < 0) err(1, "%s", bakfile); while ((n = read(fileno(input), buff, sizeof(buff))) > 0) if (write(bakchn, buff, n) != n) err(1, "%s", bakfile); if (n < 0) err(1, "%s", in_name); close(bakchn); fclose(input); /* re-open backup file as the input file */ input = fopen(bakfile, "r"); if (input == NULL) err(1, "%s", bakfile); /* now the original input file will be the output */ output = fopen(in_name, "w"); if (output == NULL) { unlink(bakfile); err(1, "%s", in_name); } } static void indent_declaration(int cur_dec_ind, int tabs_to_var) { int pos = e_code - s_code; char *startpos = e_code; /* * get the tab math right for indentations that are not multiples of 8 */ if ((ps.ind_level * ps.ind_size) % 8 != 0) { pos += (ps.ind_level * ps.ind_size) % 8; cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; } if (tabs_to_var) while ((pos & ~7) + 8 <= cur_dec_ind) { CHECK_SIZE_CODE; *e_code++ = '\t'; pos = (pos & ~7) + 8; } while (pos < cur_dec_ind) { CHECK_SIZE_CODE; *e_code++ = ' '; pos++; } if (e_code == startpos && ps.want_blank) { *e_code++ = ' '; ps.want_blank = false; } } Index: projects/clang391-import/usr.bin/indent/indent_codes.h =================================================================== --- projects/clang391-import/usr.bin/indent/indent_codes.h (revision 309262) +++ projects/clang391-import/usr.bin/indent/indent_codes.h (revision 309263) @@ -1,70 +1,71 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)indent_codes.h 8.1 (Berkeley) 6/6/93 * $FreeBSD$ */ #define newline 1 #define lparen 2 #define rparen 3 #define unary_op 4 #define binary_op 5 #define postop 6 #define question 7 #define casestmt 8 #define colon 9 #define semicolon 10 #define lbrace 11 #define rbrace 12 #define ident 13 #define comma 14 #define comment 15 #define swstmt 16 #define preesc 17 #define form_feed 18 #define decl 19 #define sp_paren 20 #define sp_nparen 21 #define ifstmt 22 #define whilestmt 23 #define forstmt 24 #define stmt 25 #define stmtl 26 #define elselit 27 #define dolit 28 #define dohead 29 #define ifhead 30 #define elsehead 31 #define period 32 +#define strpfx 33 Index: projects/clang391-import/usr.bin/indent/lexi.c =================================================================== --- projects/clang391-import/usr.bin/indent/lexi.c (revision 309262) +++ projects/clang391-import/usr.bin/indent/lexi.c (revision 309263) @@ -1,627 +1,632 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Here we have the token scanner for indent. It scans off one token and puts * it in the global variable "token". It returns a code, indicating the type * of token scanned. */ #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" #define alphanum 1 #define opchar 3 struct templ { const char *rwd; int rwcode; }; /* * This table has to be sorted alphabetically, because it'll be used in binary * search. For the same reason, string must be the first thing in struct templ. */ struct templ specials[] = { {"break", 9}, {"case", 8}, {"char", 4}, {"const", 4}, {"default", 8}, {"do", 6}, {"double", 4}, {"else", 6}, {"enum", 3}, {"extern", 4}, {"float", 4}, {"for", 5}, {"global", 4}, {"goto", 9}, {"if", 5}, {"int", 4}, {"long", 4}, {"offsetof", 1}, {"register", 4}, {"return", 9}, {"short", 4}, {"sizeof", 2}, {"static", 4}, {"struct", 3}, {"switch", 7}, {"typedef", 4}, {"union", 3}, {"unsigned", 4}, {"void", 4}, {"volatile", 4}, {"while", 5} }; const char **typenames; int typename_count; int typename_top = -1; char chartype[128] = { /* this is used to facilitate the decision of * what type (alphanumeric, operator) each * character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, 0, 0, 3, 3, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 0, 3, 0 }; static int strcmp_type(const void *e1, const void *e2) { return (strcmp(e1, *(const char * const *)e2)); } int lexi(void) { int unary_delim; /* this is set to 1 if the current token * forces a following operator to be unary */ static int last_code; /* the last token type returned */ static int l_struct; /* set to 1 if the last token was 'struct' */ int code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ e_token = s_token; /* point to start of place to save token */ unary_delim = false; ps.col_1 = ps.last_nl; /* tell world that this token started in * column 1 iff the last thing scanned was nl */ ps.last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ps.col_1 = false; /* leading blanks imply token is not in column * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } /* Scan an alphanumeric token */ if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { /* * we have a character or number */ struct templ *p; if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { int seendot = 0, seenexp = 0, seensfx = 0; if (*buf_ptr == '0' && (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; while (isxdigit(*buf_ptr)) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; } } else while (1) { if (*buf_ptr == '.') { if (seendot) break; else seendot++; } CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (!isdigit(*buf_ptr) && *buf_ptr != '.') { if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) break; else { seenexp++; seendot++; CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (*buf_ptr == '+' || *buf_ptr == '-') *e_token++ = *buf_ptr++; } } } while (1) { if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; seensfx |= 1; continue; } if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { CHECK_SIZE_TOKEN; if (buf_ptr[1] == buf_ptr[0]) *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; seensfx |= 2; continue; } break; } } else while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { /* fill_buffer() terminates buffer with newline */ if (*buf_ptr == BACKSLASH) { if (*(buf_ptr + 1) == '\n') { buf_ptr += 2; if (buf_ptr >= buf_end) fill_buffer(); } else break; } CHECK_SIZE_TOKEN; /* copy it over */ *e_token++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } *e_token++ = '\0'; + + if (s_token[0] == 'L' && s_token[1] == '\0' && + (*buf_ptr == '"' || *buf_ptr == '\'')) + return (strpfx); + while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) fill_buffer(); } ps.keyword = 0; if (l_struct && !ps.p_l_follow) { /* if last token was 'struct' and we're not * in parentheses, then this token * should be treated as a declaration */ l_struct = false; last_code = ident; ps.last_u_d = true; return (decl); } ps.last_u_d = l_struct; /* Operator after identifier is binary * unless last token was 'struct' */ l_struct = false; last_code = ident; /* Remember that this is the code we will * return */ p = bsearch(s_token, specials, sizeof(specials) / sizeof(specials[0]), sizeof(specials[0]), strcmp_type); if (p == NULL) { /* not a special keyword... */ char *u; /* ... so maybe a type_t or a typedef */ if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && strcmp(u, "_t") == 0) || (typename_top >= 0 && bsearch(s_token, typenames, typename_top + 1, sizeof(typenames[0]), strcmp_type))) { ps.keyword = 4; /* a type name */ ps.last_u_d = true; goto found_typename; } } else { /* we have a keyword */ ps.keyword = p->rwcode; ps.last_u_d = true; switch (p->rwcode) { case 7: /* it is a switch */ return (swstmt); case 8: /* a case or default */ return (casestmt); case 3: /* a "struct" */ /* * Next time around, we will want to know that we have had a * 'struct' */ l_struct = true; /* FALLTHROUGH */ case 4: /* one of the declaration keywords */ found_typename: if (ps.p_l_follow) { /* inside parens: cast, param list, offsetof or sizeof */ ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask; break; } last_code = decl; return (decl); case 5: /* if, while, for */ return (sp_paren); case 6: /* do, else */ return (sp_nparen); default: /* all others are treated like any other * identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { char *tp = buf_ptr; while (tp < buf_end) if (*tp++ == ')' && (*tp == ';' || *tp == ',')) goto not_proc; strncpy(ps.procname, token, sizeof ps.procname - 1); ps.in_parameter_declaration = 1; rparen_count = 1; not_proc:; } /* * The following hack attempts to guess whether or not the current * token is in fact a declaration keyword -- one that has been * typedefd */ if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') && !ps.p_l_follow && !ps.block_init && (ps.last_token == rparen || ps.last_token == semicolon || ps.last_token == decl || ps.last_token == lbrace || ps.last_token == rbrace)) { ps.keyword = 4; /* a type name */ ps.last_u_d = true; last_code = decl; return decl; } if (last_code == decl) /* if this is a declared variable, then * following sign is unary */ ps.last_u_d = true; /* will make "int a -1" work */ last_code = ident; return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ /* Scan a non-alphanumeric token */ *e_token++ = *buf_ptr; /* if it is only a one-character token, it is * moved here */ *e_token = '\0'; if (++buf_ptr >= buf_end) fill_buffer(); switch (*token) { case '\n': unary_delim = ps.last_u_d; ps.last_nl = true; /* remember that we just had a newline */ code = (had_eof ? 0 : newline); /* * if data has been exhausted, the newline is a dummy, and we should * return code to stop */ break; case '\'': /* start of quoted character */ case '"': /* start of string */ qchar = *token; if (troff) { e_token[-1] = '`'; if (qchar == '"') *e_token++ = '`'; e_token = chfont(&bodyf, &stringf, e_token); } do { /* copy the string */ while (1) { /* move one character or [/] */ if (*buf_ptr == '\n') { diag2(1, "Unterminated literal"); goto stop_lit; } CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, * since CHECK_SIZE guarantees that there * are at least 5 entries left */ *e_token = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*e_token == BACKSLASH) { /* if escape, copy extra char */ if (*buf_ptr == '\n') /* check for escaped newline */ ++line_no; if (troff) { *++e_token = BACKSLASH; if (*buf_ptr == BACKSLASH) *++e_token = BACKSLASH; } *++e_token = *buf_ptr++; ++e_token; /* we must increment this again because we * copied two chars */ if (buf_ptr >= buf_end) fill_buffer(); } else break; /* we copied one character */ } /* end of while (1) */ } while (*e_token++ != qchar); if (troff) { e_token = chfont(&stringf, &bodyf, e_token - 1); if (qchar == '"') *e_token++ = '\''; } stop_lit: code = ident; break; case ('('): case ('['): unary_delim = true; code = lparen; break; case (')'): case (']'): code = rparen; break; case '#': unary_delim = ps.last_u_d; code = preesc; break; case '?': unary_delim = true; code = question; break; case (':'): code = colon; unary_delim = true; break; case (';'): unary_delim = true; code = semicolon; break; case ('{'): unary_delim = true; /* * if (ps.in_or_st) ps.block_init = 1; */ /* ? code = ps.block_init ? lparen : lbrace; */ code = lbrace; break; case ('}'): unary_delim = true; /* ? code = ps.block_init ? rparen : rbrace; */ code = rbrace; break; case 014: /* a form feed */ unary_delim = ps.last_u_d; ps.last_nl = true; /* remember this so we can set 'ps.col_1' * right */ code = form_feed; break; case (','): unary_delim = true; code = comma; break; case '.': unary_delim = false; code = period; break; case '-': case '+': /* check for -, +, --, ++ */ code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ *e_token++ = *buf_ptr++; /* buffer overflow will be checked at end of loop */ if (last_code == ident || last_code == rparen) { code = (ps.last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } } else if (*buf_ptr == '=') /* check for operator += */ *e_token++ = *buf_ptr++; else if (*buf_ptr == '>') { /* check for operator -> */ *e_token++ = *buf_ptr++; if (!pointer_as_binop) { unary_delim = false; code = unary_op; ps.want_blank = false; } } break; /* buffer overflow will be checked at end of * switch */ case '=': if (ps.in_or_st) ps.block_init = 1; #ifdef undef if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ e_token[-1] = *buf_ptr++; if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) *e_token++ = *buf_ptr++; *e_token++ = '='; /* Flip =+ to += */ *e_token = 0; } #else if (*buf_ptr == '=') {/* == */ *e_token++ = '='; /* Flip =+ to += */ buf_ptr++; *e_token = 0; } #endif code = binary_op; unary_delim = true; break; /* can drop thru!!! */ case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } if (*buf_ptr == '=') *e_token++ = *buf_ptr++; code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; break; default: if (token[0] == '/' && *buf_ptr == '*') { /* it is start of comment */ *e_token++ = '*'; if (++buf_ptr >= buf_end) fill_buffer(); code = comment; unary_delim = ps.last_u_d; break; } while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { /* * handle ||, &&, etc, and also things as in int *****i */ *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ if (code != newline) { l_struct = false; last_code = code; } if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer(); ps.last_u_d = unary_delim; *e_token = '\0'; /* null terminate the token */ return (code); } void alloc_typenames(void) { typenames = (const char **)malloc(sizeof(typenames[0]) * (typename_count = 16)); if (typenames == NULL) err(1, NULL); } void add_typename(const char *key) { int comparison; const char *copy; if (typename_top + 1 >= typename_count) { typenames = realloc((void *)typenames, sizeof(typenames[0]) * (typename_count *= 2)); if (typenames == NULL) err(1, NULL); } if (typename_top == -1) typenames[++typename_top] = copy = strdup(key); else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { /* take advantage of sorted input */ if (comparison == 0) /* remove duplicates */ return; typenames[++typename_top] = copy = strdup(key); } else { int p; for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) /* find place for the new key */; if (comparison == 0) /* remove duplicates */ return; memmove(&typenames[p + 1], &typenames[p], sizeof(typenames[0]) * (++typename_top - p)); typenames[p] = copy = strdup(key); } if (copy == NULL) err(1, NULL); } Index: projects/clang391-import/usr.bin/indent/parse.c =================================================================== --- projects/clang391-import/usr.bin/indent/parse.c (revision 309262) +++ projects/clang391-import/usr.bin/indent/parse.c (revision 309263) @@ -1,337 +1,337 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" static void reduce(void); void parse(int tk) /* tk: the code for the construct scanned */ { int i; #ifdef debug printf("%2d - %s\n", tk, token); #endif while (ps.p_stack[ps.tos] == ifhead && tk != elselit) { /* true if we have an if without an else */ ps.p_stack[ps.tos] = stmt; /* apply the if(..) stmt ::= stmt * reduction */ reduce(); /* see if this allows any reduction */ } switch (tk) { /* go on and figure out what to do with the * input */ case decl: /* scanned a declaration word */ ps.search_brace = btype_2; /* indicate that following brace should be on same line */ if (ps.p_stack[ps.tos] != decl) { /* only put one declaration * onto stack */ break_comma = true; /* while in declaration, newline should be * forced after comma */ ps.p_stack[++ps.tos] = decl; ps.il[ps.tos] = ps.i_l_follow; if (ps.ljust_decl) {/* only do if we want left justified * declarations */ ps.ind_level = 0; for (i = ps.tos - 1; i > 0; --i) if (ps.p_stack[i] == decl) ++ps.ind_level; /* indentation is number of * declaration levels deep we are */ ps.i_l_follow = ps.ind_level; } } break; case ifstmt: /* scanned if (...) */ if (ps.p_stack[ps.tos] == elsehead && ps.else_if) /* "else if ..." */ ps.i_l_follow = ps.il[ps.tos]; /* the rest is the same as for dolit and forstmt */ case dolit: /* 'do' */ case forstmt: /* for (...) */ ps.p_stack[++ps.tos] = tk; ps.il[ps.tos] = ps.ind_level = ps.i_l_follow; ++ps.i_l_follow; /* subsequent statements should be indented 1 */ ps.search_brace = btype_2; break; case lbrace: /* scanned { */ break_comma = false; /* don't break comma in an initial list */ if (ps.p_stack[ps.tos] == stmt || ps.p_stack[ps.tos] == decl || ps.p_stack[ps.tos] == stmtl) ++ps.i_l_follow; /* it is a random, isolated stmt group or a * declaration */ else { if (s_code == e_code) { /* * only do this if there is nothing on the line */ --ps.ind_level; /* * it is a group as part of a while, for, etc. */ if (ps.p_stack[ps.tos] == swstmt && ps.case_indent >= 1) --ps.ind_level; /* * for a switch, brace should be two levels out from the code */ } } ps.p_stack[++ps.tos] = lbrace; ps.il[ps.tos] = ps.ind_level; ps.p_stack[++ps.tos] = stmt; /* allow null stmt between braces */ ps.il[ps.tos] = ps.i_l_follow; break; case whilestmt: /* scanned while (...) */ if (ps.p_stack[ps.tos] == dohead) { /* it is matched with do stmt */ ps.ind_level = ps.i_l_follow = ps.il[ps.tos]; ps.p_stack[++ps.tos] = whilestmt; ps.il[ps.tos] = ps.ind_level = ps.i_l_follow; } else { /* it is a while loop */ ps.p_stack[++ps.tos] = whilestmt; ps.il[ps.tos] = ps.i_l_follow; ++ps.i_l_follow; ps.search_brace = btype_2; } break; case elselit: /* scanned an else */ if (ps.p_stack[ps.tos] != ifhead) diag2(1, "Unmatched 'else'"); else { ps.ind_level = ps.il[ps.tos]; /* indentation for else should * be same as for if */ ps.i_l_follow = ps.ind_level + 1; /* everything following should * be in 1 level */ ps.p_stack[ps.tos] = elsehead; /* remember if with else */ ps.search_brace = btype_2 | ps.else_if; } break; case rbrace: /* scanned a } */ /* stack should have or */ if (ps.p_stack[ps.tos - 1] == lbrace) { ps.ind_level = ps.i_l_follow = ps.il[--ps.tos]; ps.p_stack[ps.tos] = stmt; } else diag2(1, "Statement nesting error"); break; case swstmt: /* had switch (...) */ ps.p_stack[++ps.tos] = swstmt; ps.cstk[ps.tos] = case_ind; /* save current case indent level */ ps.il[ps.tos] = ps.i_l_follow; case_ind = ps.i_l_follow + ps.case_indent; /* cases should be one * level down from * switch */ ps.i_l_follow += ps.case_indent + 1; /* statements should be two * levels in */ ps.search_brace = btype_2; break; case semicolon: /* this indicates a simple stmt */ break_comma = false; /* turn off flag to break after commas in a * declaration */ ps.p_stack[++ps.tos] = stmt; ps.il[ps.tos] = ps.ind_level; break; default: /* this is an error */ diag2(1, "Unknown code to parser"); return; } /* end of switch */ - if (ps.tos >= STACKSIZE) + if (ps.tos >= STACKSIZE - 1) errx(1, "Parser stack overflow"); reduce(); /* see if any reduction can be done */ #ifdef debug for (i = 1; i <= ps.tos; ++i) printf("(%d %d)", ps.p_stack[i], ps.il[i]); printf("\n"); #endif return; } /* * NAME: reduce * * FUNCTION: Implements the reduce part of the parsing algorithm * * ALGORITHM: The following reductions are done. Reductions are repeated * until no more are possible. * * Old TOS New TOS * * * do "dostmt" * if "ifstmt" * switch * decl * "ifelse" * for * while * "dostmt" while * * On each reduction, ps.i_l_follow (the indentation for the following line) * is set to the indentation level associated with the old TOS. * * PARAMETERS: None * * RETURNS: Nothing * * GLOBALS: ps.cstk ps.i_l_follow = ps.il ps.p_stack = ps.tos = * * CALLS: None * * CALLED BY: parse * * HISTORY: initial coding November 1976 D A Willcox of CAC * */ /*----------------------------------------------*\ | REDUCTION PHASE | \*----------------------------------------------*/ static void reduce(void) { int i; for (;;) { /* keep looping until there is nothing left to * reduce */ switch (ps.p_stack[ps.tos]) { case stmt: switch (ps.p_stack[ps.tos - 1]) { case stmt: case stmtl: /* stmtl stmt or stmt stmt */ ps.p_stack[--ps.tos] = stmtl; break; case dolit: /* */ ps.p_stack[--ps.tos] = dohead; ps.i_l_follow = ps.il[ps.tos]; break; case ifstmt: /* */ ps.p_stack[--ps.tos] = ifhead; for (i = ps.tos - 1; ( ps.p_stack[i] != stmt && ps.p_stack[i] != stmtl && ps.p_stack[i] != lbrace ); --i); ps.i_l_follow = ps.il[i]; /* * for the time being, we will assume that there is no else on * this if, and set the indentation level accordingly. If an * else is scanned, it will be fixed up later */ break; case swstmt: /* */ case_ind = ps.cstk[ps.tos - 1]; /* FALLTHROUGH */ case decl: /* finish of a declaration */ case elsehead: /* < else> */ case forstmt: /* */ case whilestmt: /* */ ps.p_stack[--ps.tos] = stmt; ps.i_l_follow = ps.il[ps.tos]; break; default: /* */ return; } /* end of section for on top of stack */ break; case whilestmt: /* while (...) on top */ if (ps.p_stack[ps.tos - 1] == dohead) { /* it is termination of a do while */ ps.tos -= 2; break; } else return; default: /* anything else on top */ return; } } } Index: projects/clang391-import/usr.bin/indent/pr_comment.c =================================================================== --- projects/clang391-import/usr.bin/indent/pr_comment.c (revision 309262) +++ projects/clang391-import/usr.bin/indent/pr_comment.c (revision 309263) @@ -1,335 +1,339 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)pr_comment.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "indent_globs.h" #include "indent.h" /* * NAME: * pr_comment * * FUNCTION: * This routine takes care of scanning and printing comments. * * ALGORITHM: * 1) Decide where the comment should be aligned, and if lines should * be broken. * 2) If lines should not be broken and filled, just copy up to end of * comment. * 3) If lines should be filled, then scan thru input_buffer copying * characters to com_buf. Remember where the last blank, tab, or * newline was. When line is filled, print up to last blank and * continue copying. * * HISTORY: * November 1976 D A Willcox of CAC Initial coding * 12/6/76 D A Willcox of CAC Modification to handle * UNIX-style comments * */ /* * this routine processes comments. It makes an attempt to keep comments from * going over the max line length. If a line is too long, it moves everything * from the last blank to the next comment line. Blanks and tabs from the * beginning of the input line are removed */ void pr_comment(void) { int now_col; /* column we are in now */ int adj_max_col; /* Adjusted max_col for when we decide to * spill comments over the right margin */ char *last_bl; /* points to the last blank in the output * buffer */ char *t_ptr; /* used for moving string */ int break_delim = comment_delimiter_on_blankline; int l_just_saw_decl = ps.just_saw_decl; adj_max_col = max_col; ps.just_saw_decl = 0; last_bl = NULL; /* no blanks found so far */ ps.box_com = false; /* at first, assume that we are not in * a boxed comment or some other * comment that should not be touched */ ++ps.out_coms; /* keep track of number of comments */ /* Figure where to align and how to treat the comment */ if (ps.col_1 && !format_col1_comments) { /* if comment starts in column * 1 it should not be touched */ ps.box_com = true; break_delim = false; ps.com_col = 1; } else { if (*buf_ptr == '-' || *buf_ptr == '*' || (*buf_ptr == '\n' && !format_block_comments)) { ps.box_com = true; /* A comment with a '-' or '*' immediately * after the /+* is assumed to be a boxed * comment. A comment with a newline * immediately after the /+* is assumed to * be a block comment and is treated as a * box comment unless format_block_comments * is nonzero (the default). */ break_delim = false; } if ( /* ps.bl_line && */ (s_lab == e_lab) && (s_code == e_code)) { /* klg: check only if this line is blank */ /* * If this (*and previous lines are*) blank, dont put comment way * out at left */ ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1; adj_max_col = block_comment_max_col; if (ps.com_col <= 1) ps.com_col = 1 + !format_col1_comments; } else { int target_col; break_delim = false; if (s_code != e_code) target_col = count_spaces(compute_code_target(), s_code); else { target_col = 1; if (s_lab != e_lab) target_col = count_spaces(compute_label_target(), s_lab); } ps.com_col = ps.decl_on_line || ps.ind_level == 0 ? ps.decl_com_ind : ps.com_ind; if (ps.com_col < target_col) ps.com_col = ((target_col + 7) & ~7) + 1; if (ps.com_col + 24 > adj_max_col) adj_max_col = ps.com_col + 24; } } if (ps.box_com) { buf_ptr[-2] = 0; ps.n_comment_delta = 1 - count_spaces(1, in_buffer); buf_ptr[-2] = '/'; } else { ps.n_comment_delta = 0; while (*buf_ptr == ' ' || *buf_ptr == '\t') buf_ptr++; } ps.comment_delta = 0; *e_com++ = '/'; /* put '/' followed by '*' into buffer */ *e_com++ = '*'; if (*buf_ptr != ' ' && !ps.box_com) *e_com++ = ' '; - /* Don't put a break delimiter if this comment is a one-liner */ - for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) { - if (t_ptr >= buf_end) - fill_buffer(); - if (t_ptr[0] == '*' && t_ptr[1] == '/') { - break_delim = false; - break; + /* + * Don't put a break delimiter if this is a one-liner that won't wrap. + */ + if (break_delim) + for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) { + if (t_ptr >= buf_end) + fill_buffer(); + if (t_ptr[0] == '*' && t_ptr[1] == '/') { + if (adj_max_col >= count_spaces_until(ps.com_col, buf_ptr, t_ptr + 2)) + break_delim = false; + break; + } } - } if (break_delim) { char *t = e_com; e_com = s_com + 2; *e_com = 0; if (blanklines_before_blockcomments) prefix_blankline_requested = 1; dump_line(); e_com = s_com = t; if (!ps.box_com && star_comment_cont) *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } if (troff) adj_max_col = 80; /* Start to copy the comment */ while (1) { /* this loop will go until the comment is * copied */ CHECK_SIZE_COM; switch (*buf_ptr) { /* this checks for various spcl cases */ case 014: /* check for a form feed */ if (!ps.box_com) { /* in a text comment, break the line here */ ps.use_ff = true; /* fix so dump_line uses a form feed */ dump_line(); last_bl = NULL; if (!ps.box_com && star_comment_cont) *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; while (*++buf_ptr == ' ' || *buf_ptr == '\t') ; } else { if (++buf_ptr >= buf_end) fill_buffer(); *e_com++ = 014; } break; case '\n': if (had_eof) { /* check for unexpected eof */ printf("Unterminated comment\n"); dump_line(); return; } last_bl = NULL; if (ps.box_com || ps.last_nl) { /* if this is a boxed comment, * we dont ignore the newline */ if (s_com == e_com) *e_com++ = ' '; if (!ps.box_com && e_com - s_com > 3) { dump_line(); if (star_comment_cont) *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } dump_line(); if (!ps.box_com && star_comment_cont) *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } else { ps.last_nl = 1; if (*(e_com - 1) == ' ' || *(e_com - 1) == '\t') last_bl = e_com - 1; /* * if there was a space at the end of the last line, remember * where it was */ else { /* otherwise, insert one */ last_bl = e_com; CHECK_SIZE_COM; *e_com++ = ' '; } } ++line_no; /* keep track of input line number */ if (!ps.box_com) { int nstar = 1; do { /* flush any blanks and/or tabs at start of * next line */ if (++buf_ptr >= buf_end) fill_buffer(); if (*buf_ptr == '*' && --nstar >= 0) { if (++buf_ptr >= buf_end) fill_buffer(); if (*buf_ptr == '/') goto end_of_comment; } } while (*buf_ptr == ' ' || *buf_ptr == '\t'); } else if (++buf_ptr >= buf_end) fill_buffer(); break; /* end of case for newline */ case '*': /* must check for possibility of being at end * of comment */ if (++buf_ptr >= buf_end) /* get to next char after * */ fill_buffer(); if (*buf_ptr == '/') { /* it is the end!!! */ end_of_comment: if (++buf_ptr >= buf_end) fill_buffer(); CHECK_SIZE_COM; if (break_delim) { if (e_com > s_com + 3) { dump_line(); } else s_com = e_com; *e_com++ = ' '; } if (e_com[-1] != ' ' && !ps.box_com) *e_com++ = ' '; /* ensure blank before end */ *e_com++ = '*', *e_com++ = '/', *e_com = '\0'; ps.just_saw_decl = l_just_saw_decl; return; } else /* handle isolated '*' */ *e_com++ = '*'; break; default: /* we have a random char */ now_col = count_spaces_until(ps.com_col, s_com, e_com); do { *e_com = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*e_com == ' ' || *e_com == '\t') last_bl = e_com; /* remember we saw a blank */ ++e_com; now_col++; } while (!memchr("*\n\r\b\t", *buf_ptr, 6) && (now_col <= adj_max_col || !last_bl)); ps.last_nl = false; if (now_col > adj_max_col && !ps.box_com && e_com[-1] > ' ') { /* * the comment is too long, it must be broken up */ if (last_bl == NULL) { dump_line(); if (!ps.box_com && star_comment_cont) *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; break; } *e_com = '\0'; e_com = last_bl; dump_line(); if (!ps.box_com && star_comment_cont) *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; for (t_ptr = last_bl + 1; *t_ptr == ' ' || *t_ptr == '\t'; t_ptr++) ; last_bl = NULL; while (*t_ptr != '\0') { if (*t_ptr == ' ' || *t_ptr == '\t') last_bl = e_com; *e_com++ = *t_ptr++; } } break; } } } Index: projects/clang391-import/usr.bin/sort/bwstring.c =================================================================== --- projects/clang391-import/usr.bin/sort/bwstring.c (revision 309262) +++ projects/clang391-import/usr.bin/sort/bwstring.c (revision 309263) @@ -1,1149 +1,1143 @@ /*- * Copyright (C) 2009 Gabor Kovesdan * Copyright (C) 2012 Oleg Moskalenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "bwstring.h" #include "sort.h" bool byte_sort; static wchar_t **wmonths; static unsigned char **cmonths; /* initialise months */ void initialise_months(void) { const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, ABMON_11, ABMON_12 }; unsigned char *tmp; size_t len; if (MB_CUR_MAX == 1) { if (cmonths == NULL) { unsigned char *m; cmonths = sort_malloc(sizeof(unsigned char*) * 12); for (int i = 0; i < 12; i++) { cmonths[i] = NULL; tmp = (unsigned char *) nl_langinfo(item[i]); if (debug_sort) printf("month[%d]=%s\n", i, tmp); if (*tmp == '\0') continue; m = sort_strdup(tmp); len = strlen(tmp); for (unsigned int j = 0; j < len; j++) m[j] = toupper(m[j]); cmonths[i] = m; } } } else { if (wmonths == NULL) { wchar_t *m; wmonths = sort_malloc(sizeof(wchar_t *) * 12); for (int i = 0; i < 12; i++) { wmonths[i] = NULL; tmp = (unsigned char *) nl_langinfo(item[i]); if (debug_sort) printf("month[%d]=%s\n", i, tmp); if (*tmp == '\0') continue; len = strlen(tmp); m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); if (mbstowcs(m, (char*)tmp, len) == ((size_t) - 1)) { sort_free(m); continue; } m[len] = L'\0'; for (unsigned int j = 0; j < len; j++) m[j] = towupper(m[j]); wmonths[i] = m; } } } } /* * Compare two wide-character strings */ static int wide_str_coll(const wchar_t *s1, const wchar_t *s2) { int ret = 0; errno = 0; ret = wcscoll(s1, s2); if (errno == EILSEQ) { errno = 0; ret = wcscmp(s1, s2); if (errno != 0) { for (size_t i = 0; ; ++i) { wchar_t c1 = s1[i]; wchar_t c2 = s2[i]; if (c1 == L'\0') return ((c2 == L'\0') ? 0 : -1); if (c2 == L'\0') return (+1); if (c1 == c2) continue; return ((int)(c1 - c2)); } } } return (ret); } /* counterparts of wcs functions */ void bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) { if (MB_CUR_MAX == 1) fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); else fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); } const void* bwsrawdata(const struct bwstring *bws) { return (&(bws->data)); } size_t bwsrawlen(const struct bwstring *bws) { return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len)); } size_t bws_memsize(const struct bwstring *bws) { return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring))); } void bws_setlen(struct bwstring *bws, size_t newlen) { if (bws && newlen != bws->len && newlen <= bws->len) { bws->len = newlen; if (MB_CUR_MAX == 1) bws->data.cstr[newlen] = '\0'; else bws->data.wstr[newlen] = L'\0'; } } /* * Allocate a new binary string of specified size */ struct bwstring * bwsalloc(size_t sz) { struct bwstring *ret; if (MB_CUR_MAX == 1) ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); else ret = sort_malloc(sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1)); ret->len = sz; if (MB_CUR_MAX == 1) ret->data.cstr[ret->len] = '\0'; else ret->data.wstr[ret->len] = L'\0'; return (ret); } /* * Create a copy of binary string. * New string size equals the length of the old string. */ struct bwstring * bwsdup(const struct bwstring *s) { if (s == NULL) return (NULL); else { struct bwstring *ret = bwsalloc(s->len); if (MB_CUR_MAX == 1) memcpy(ret->data.cstr, s->data.cstr, (s->len)); else memcpy(ret->data.wstr, s->data.wstr, SIZEOF_WCHAR_STRING(s->len)); return (ret); } } /* * Create a new binary string from a wide character buffer. */ struct bwstring * bwssbdup(const wchar_t *str, size_t len) { if (str == NULL) return ((len == 0) ? bwsalloc(0) : NULL); else { struct bwstring *ret; ret = bwsalloc(len); if (MB_CUR_MAX == 1) for (size_t i = 0; i < len; ++i) ret->data.cstr[i] = (unsigned char) str[i]; else memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); return (ret); } } /* * Create a new binary string from a raw binary buffer. */ struct bwstring * bwscsbdup(const unsigned char *str, size_t len) { struct bwstring *ret; ret = bwsalloc(len); if (str) { if (MB_CUR_MAX == 1) memcpy(ret->data.cstr, str, len); else { mbstate_t mbs; const char *s; size_t charlen, chars, cptr; charlen = chars = 0; cptr = 0; s = (const char *) str; memset(&mbs, 0, sizeof(mbs)); while (cptr < len) { size_t n = MB_CUR_MAX; if (n > len - cptr) n = len - cptr; charlen = mbrlen(s + cptr, n, &mbs); switch (charlen) { case 0: /* FALLTHROUGH */ case (size_t) -1: /* FALLTHROUGH */ case (size_t) -2: ret->data.wstr[chars++] = (unsigned char) s[cptr]; ++cptr; break; default: n = mbrtowc(ret->data.wstr + (chars++), s + cptr, charlen, &mbs); if ((n == (size_t)-1) || (n == (size_t)-2)) /* NOTREACHED */ err(2, "mbrtowc error"); cptr += charlen; } } ret->len = chars; ret->data.wstr[ret->len] = L'\0'; } } return (ret); } /* * De-allocate object memory */ void bwsfree(const struct bwstring *s) { if (s) sort_free(s); } /* * Copy content of src binary string to dst. * If the capacity of the dst string is not sufficient, * then the data is truncated. */ size_t bwscpy(struct bwstring *dst, const struct bwstring *src) { size_t nums = src->len; if (nums > dst->len) nums = dst->len; dst->len = nums; if (MB_CUR_MAX == 1) { memcpy(dst->data.cstr, src->data.cstr, nums); dst->data.cstr[dst->len] = '\0'; } else { memcpy(dst->data.wstr, src->data.wstr, SIZEOF_WCHAR_STRING(nums + 1)); dst->data.wstr[dst->len] = L'\0'; } return (nums); } /* * Copy content of src binary string to dst, * with specified number of symbols to be copied. * If the capacity of the dst string is not sufficient, * then the data is truncated. */ struct bwstring * bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) { size_t nums = src->len; if (nums > dst->len) nums = dst->len; if (nums > size) nums = size; dst->len = nums; if (MB_CUR_MAX == 1) { memcpy(dst->data.cstr, src->data.cstr, nums); dst->data.cstr[dst->len] = '\0'; } else { memcpy(dst->data.wstr, src->data.wstr, SIZEOF_WCHAR_STRING(nums + 1)); dst->data.wstr[dst->len] = L'\0'; } return (dst); } /* * Copy content of src binary string to dst, * with specified number of symbols to be copied. * An offset value can be specified, from the start of src string. * If the capacity of the dst string is not sufficient, * then the data is truncated. */ struct bwstring * bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, size_t size) { if (offset >= src->len) { dst->data.wstr[0] = 0; dst->len = 0; } else { size_t nums = src->len - offset; if (nums > dst->len) nums = dst->len; if (nums > size) nums = size; dst->len = nums; if (MB_CUR_MAX == 1) { memcpy(dst->data.cstr, src->data.cstr + offset, (nums)); dst->data.cstr[dst->len] = '\0'; } else { memcpy(dst->data.wstr, src->data.wstr + offset, SIZEOF_WCHAR_STRING(nums)); dst->data.wstr[dst->len] = L'\0'; } } return (dst); } /* * Write binary string to the file. * The output is ended either with '\n' (nl == true) * or '\0' (nl == false). */ size_t bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) { if (MB_CUR_MAX == 1) { size_t len = bws->len; if (!zero_ended) { bws->data.cstr[len] = '\n'; if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) err(2, NULL); bws->data.cstr[len] = '\0'; } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) err(2, NULL); return (len + 1); } else { wchar_t eols; size_t printed = 0; eols = zero_ended ? btowc('\0') : btowc('\n'); while (printed < BWSLEN(bws)) { const wchar_t *s = bws->data.wstr + printed; if (*s == L'\0') { int nums; nums = fwprintf(f, L"%lc", *s); if (nums != 1) err(2, NULL); ++printed; } else { int nums; nums = fwprintf(f, L"%ls", s); if (nums < 1) err(2, NULL); printed += nums; } } fwprintf(f, L"%lc", eols); return (printed + 1); } } /* * Allocate and read a binary string from file. * The strings are nl-ended or zero-ended, depending on the sort setting. */ struct bwstring * bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) { wint_t eols; eols = zero_ended ? btowc('\0') : btowc('\n'); if (!zero_ended && (MB_CUR_MAX > 1)) { wchar_t *ret; ret = fgetwln(f, len); if (ret == NULL) { if (!feof(f)) err(2, NULL); return (NULL); } if (*len > 0) { if (ret[*len - 1] == (wchar_t)eols) --(*len); } return (bwssbdup(ret, *len)); } else if (!zero_ended && (MB_CUR_MAX == 1)) { char *ret; ret = fgetln(f, len); if (ret == NULL) { if (!feof(f)) err(2, NULL); return (NULL); } if (*len > 0) { if (ret[*len - 1] == '\n') --(*len); } return (bwscsbdup((unsigned char*)ret, *len)); } else { *len = 0; if (feof(f)) return (NULL); if (2 >= rb->fgetwln_z_buffer_size) { rb->fgetwln_z_buffer_size += 256; rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, sizeof(wchar_t) * rb->fgetwln_z_buffer_size); } rb->fgetwln_z_buffer[*len] = 0; if (MB_CUR_MAX == 1) while (!feof(f)) { int c; c = fgetc(f); if (c == EOF) { if (*len == 0) return (NULL); goto line_read_done; } if (c == eols) goto line_read_done; if (*len + 1 >= rb->fgetwln_z_buffer_size) { rb->fgetwln_z_buffer_size += 256; rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); } rb->fgetwln_z_buffer[*len] = c; rb->fgetwln_z_buffer[++(*len)] = 0; } else while (!feof(f)) { wint_t c = 0; c = fgetwc(f); if (c == WEOF) { if (*len == 0) return (NULL); goto line_read_done; } if (c == eols) goto line_read_done; if (*len + 1 >= rb->fgetwln_z_buffer_size) { rb->fgetwln_z_buffer_size += 256; rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); } rb->fgetwln_z_buffer[*len] = c; rb->fgetwln_z_buffer[++(*len)] = 0; } line_read_done: /* we do not count the last 0 */ return (bwssbdup(rb->fgetwln_z_buffer, *len)); } } int bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset, size_t len) { size_t cmp_len, len1, len2; int res = 0; cmp_len = 0; len1 = bws1->len; len2 = bws2->len; if (len1 <= offset) { return ((len2 <= offset) ? 0 : -1); } else { if (len2 <= offset) return (+1); else { len1 -= offset; len2 -= offset; cmp_len = len1; if (len2 < cmp_len) cmp_len = len2; if (len < cmp_len) cmp_len = len; if (MB_CUR_MAX == 1) { const unsigned char *s1, *s2; s1 = bws1->data.cstr + offset; s2 = bws2->data.cstr + offset; res = memcmp(s1, s2, cmp_len); } else { const wchar_t *s1, *s2; s1 = bws1->data.wstr + offset; s2 = bws2->data.wstr + offset; res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); } } } if (res == 0) { if (len1 < cmp_len && len1 < len2) res = -1; else if (len2 < cmp_len && len2 < len1) res = +1; } return (res); } int bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) { size_t len1, len2, cmp_len; int res; len1 = bws1->len; len2 = bws2->len; len1 -= offset; len2 -= offset; cmp_len = len1; if (len2 < cmp_len) cmp_len = len2; res = bwsncmp(bws1, bws2, offset, cmp_len); if (res == 0) { if( len1 < len2) res = -1; else if (len2 < len1) res = +1; } return (res); } int bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) { wchar_t c1, c2; size_t i = 0; for (i = 0; i < len; ++i) { c1 = bws_get_iter_value(iter1); c2 = bws_get_iter_value(iter2); if (c1 != c2) return (c1 - c2); iter1 = bws_iterator_inc(iter1, 1); iter2 = bws_iterator_inc(iter2, 1); } return (0); } int bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) { size_t len1, len2; len1 = bws1->len; len2 = bws2->len; if (len1 <= offset) return ((len2 <= offset) ? 0 : -1); else { if (len2 <= offset) return (+1); else { len1 -= offset; len2 -= offset; if (MB_CUR_MAX == 1) { const unsigned char *s1, *s2; s1 = bws1->data.cstr + offset; s2 = bws2->data.cstr + offset; if (byte_sort) { int res = 0; if (len1 > len2) { res = memcmp(s1, s2, len2); if (!res) res = +1; } else if (len1 < len2) { res = memcmp(s1, s2, len1); if (!res) res = -1; } else res = memcmp(s1, s2, len1); return (res); } else { int res = 0; size_t i, maxlen; i = 0; maxlen = len1; if (maxlen > len2) maxlen = len2; while (i < maxlen) { /* goto next non-zero part: */ while ((i < maxlen) && !s1[i] && !s2[i]) ++i; if (i >= maxlen) break; if (s1[i] == 0) { if (s2[i] == 0) /* NOTREACHED */ err(2, "bwscoll error 01"); else return (-1); } else if (s2[i] == 0) return (+1); res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); if (res) return (res); while ((i < maxlen) && s1[i] && s2[i]) ++i; if (i >= maxlen) break; if (s1[i] == 0) { if (s2[i] == 0) { ++i; continue; } else return (-1); } else if (s2[i] == 0) return (+1); else /* NOTREACHED */ err(2, "bwscoll error 02"); } if (len1 < len2) return (-1); else if (len1 > len2) return (+1); return (0); } } else { const wchar_t *s1, *s2; size_t i, maxlen; int res = 0; s1 = bws1->data.wstr + offset; s2 = bws2->data.wstr + offset; i = 0; maxlen = len1; if (maxlen > len2) maxlen = len2; while (i < maxlen) { /* goto next non-zero part: */ while ((i < maxlen) && !s1[i] && !s2[i]) ++i; if (i >= maxlen) break; if (s1[i] == 0) { if (s2[i] == 0) /* NOTREACHED */ err(2, "bwscoll error 1"); else return (-1); } else if (s2[i] == 0) return (+1); res = wide_str_coll(s1 + i, s2 + i); if (res) return (res); while ((i < maxlen) && s1[i] && s2[i]) ++i; if (i >= maxlen) break; if (s1[i] == 0) { if (s2[i] == 0) { ++i; continue; } else return (-1); } else if (s2[i] == 0) return (+1); else /* NOTREACHED */ err(2, "bwscoll error 2"); } if (len1 < len2) return (-1); else if (len1 > len2) return (+1); return (0); } } } } /* * Correction of the system API */ double bwstod(struct bwstring *s0, bool *empty) { double ret = 0; if (MB_CUR_MAX == 1) { unsigned char *end, *s; char *ep; s = s0->data.cstr; end = s + s0->len; ep = NULL; while (isblank(*s) && s < end) ++s; if (!isprint(*s)) { *empty = true; return (0); } ret = strtod((char*)s, &ep); if ((unsigned char*) ep == s) { *empty = true; return (0); } } else { wchar_t *end, *ep, *s; s = s0->data.wstr; end = s + s0->len; ep = NULL; while (iswblank(*s) && s < end) ++s; if (!iswprint(*s)) { *empty = true; return (0); } ret = wcstod(s, &ep); if (ep == s) { *empty = true; return (0); } } *empty = false; return (ret); } /* * A helper function for monthcoll. If a line matches * a month name, it returns (number of the month - 1), * while if there is no match, it just return -1. */ int bws_month_score(const struct bwstring *s0) { if (MB_CUR_MAX == 1) { const unsigned char *end, *s; - size_t len; s = s0->data.cstr; end = s + s0->len; while (isblank(*s) && s < end) ++s; - len = strlen((const char*)s); - for (int i = 11; i >= 0; --i) { if (cmonths[i] && (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i])))) return (i); } } else { const wchar_t *end, *s; - size_t len; s = s0->data.wstr; end = s + s0->len; while (iswblank(*s) && s < end) ++s; - - len = wcslen(s); for (int i = 11; i >= 0; --i) { if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) return (i); } } return (-1); } /* * Rips out leading blanks (-b). */ struct bwstring * ignore_leading_blanks(struct bwstring *str) { if (MB_CUR_MAX == 1) { unsigned char *dst, *end, *src; src = str->data.cstr; dst = src; end = src + str->len; while (src < end && isblank(*src)) ++src; if (src != dst) { size_t newlen; newlen = BWSLEN(str) - (src - dst); while (src < end) { *dst = *src; ++dst; ++src; } bws_setlen(str, newlen); } } else { wchar_t *dst, *end, *src; src = str->data.wstr; dst = src; end = src + str->len; while (src < end && iswblank(*src)) ++src; if (src != dst) { size_t newlen = BWSLEN(str) - (src - dst); while (src < end) { *dst = *src; ++dst; ++src; } bws_setlen(str, newlen); } } return (str); } /* * Rips out nonprinting characters (-i). */ struct bwstring * ignore_nonprinting(struct bwstring *str) { size_t newlen = str->len; if (MB_CUR_MAX == 1) { unsigned char *dst, *end, *src; unsigned char c; src = str->data.cstr; dst = src; end = src + str->len; while (src < end) { c = *src; if (isprint(c)) { *dst = c; ++dst; ++src; } else { ++src; --newlen; } } } else { wchar_t *dst, *end, *src; wchar_t c; src = str->data.wstr; dst = src; end = src + str->len; while (src < end) { c = *src; if (iswprint(c)) { *dst = c; ++dst; ++src; } else { ++src; --newlen; } } } bws_setlen(str, newlen); return (str); } /* * Rips out any characters that are not alphanumeric characters * nor blanks (-d). */ struct bwstring * dictionary_order(struct bwstring *str) { size_t newlen = str->len; if (MB_CUR_MAX == 1) { unsigned char *dst, *end, *src; unsigned char c; src = str->data.cstr; dst = src; end = src + str->len; while (src < end) { c = *src; if (isalnum(c) || isblank(c)) { *dst = c; ++dst; ++src; } else { ++src; --newlen; } } } else { wchar_t *dst, *end, *src; wchar_t c; src = str->data.wstr; dst = src; end = src + str->len; while (src < end) { c = *src; if (iswalnum(c) || iswblank(c)) { *dst = c; ++dst; ++src; } else { ++src; --newlen; } } } bws_setlen(str, newlen); return (str); } /* * Converts string to lower case(-f). */ struct bwstring * ignore_case(struct bwstring *str) { if (MB_CUR_MAX == 1) { unsigned char *end, *s; s = str->data.cstr; end = s + str->len; while (s < end) { *s = toupper(*s); ++s; } } else { wchar_t *end, *s; s = str->data.wstr; end = s + str->len; while (s < end) { *s = towupper(*s); ++s; } } return (str); } void bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) { if (MB_CUR_MAX == 1) warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); else warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); } Index: projects/clang391-import/usr.bin/sort/sort.c =================================================================== --- projects/clang391-import/usr.bin/sort/sort.c (revision 309262) +++ projects/clang391-import/usr.bin/sort/sort.c (revision 309263) @@ -1,1321 +1,1315 @@ /*- * Copyright (C) 2009 Gabor Kovesdan * Copyright (C) 2012 Oleg Moskalenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "coll.h" #include "file.h" #include "sort.h" #ifndef WITHOUT_NLS #include nl_catd catalog; #endif #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) static bool need_random; static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; static const void *random_seed; static size_t random_seed_size; MD5_CTX md5_ctx; /* * Default messages to use when NLS is disabled or no catalogue * is found. */ const char *nlsstr[] = { "", /* 1*/"mutually exclusive flags", /* 2*/"extra argument not allowed with -c", /* 3*/"Unknown feature", /* 4*/"Wrong memory buffer specification", /* 5*/"0 field in key specs", /* 6*/"0 column in key specs", /* 7*/"Wrong file mode", /* 8*/"Cannot open file for reading", /* 9*/"Radix sort cannot be used with these sort options", /*10*/"The chosen sort method cannot be used with stable and/or unique sort", /*11*/"Invalid key position", /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " "[-o outfile] [--batch-size size] [--files0-from file] " "[--heapsort] [--mergesort] [--radixsort] [--qsort] " "[--mmap] " #if defined(SORT_THREADS) "[--parallel thread_no] " #endif "[--human-numeric-sort] " "[--version-sort] [--random-sort [--random-source file]] " "[--compress-program program] [file ...]\n" }; struct sort_opts sort_opts_vals; bool debug_sort; bool need_hint; #if defined(SORT_THREADS) unsigned int ncpu = 1; size_t nthreads = 1; #endif static bool gnusort_numeric_compatibility; static struct sort_mods default_sort_mods_object; struct sort_mods * const default_sort_mods = &default_sort_mods_object; static bool print_symbols_on_debug; /* * Arguments from file (when file0-from option is used: */ static size_t argc_from_file0 = (size_t)-1; static char **argv_from_file0; /* * Placeholder symbols for options which have no single-character equivalent */ enum { SORT_OPT = CHAR_MAX + 1, HELP_OPT, FF_OPT, BS_OPT, VERSION_OPT, DEBUG_OPT, #if defined(SORT_THREADS) PARALLEL_OPT, #endif RANDOMSOURCE_OPT, COMPRESSPROGRAM_OPT, QSORT_OPT, MERGESORT_OPT, HEAPSORT_OPT, RADIXSORT_OPT, MMAP_OPT }; #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; static struct option long_options[] = { { "batch-size", required_argument, NULL, BS_OPT }, { "buffer-size", required_argument, NULL, 'S' }, { "check", optional_argument, NULL, 'c' }, { "check=silent|quiet", optional_argument, NULL, 'C' }, { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, { "debug", no_argument, NULL, DEBUG_OPT }, { "dictionary-order", no_argument, NULL, 'd' }, { "field-separator", required_argument, NULL, 't' }, { "files0-from", required_argument, NULL, FF_OPT }, { "general-numeric-sort", no_argument, NULL, 'g' }, { "heapsort", no_argument, NULL, HEAPSORT_OPT }, { "help",no_argument, NULL, HELP_OPT }, { "human-numeric-sort", no_argument, NULL, 'h' }, { "ignore-leading-blanks", no_argument, NULL, 'b' }, { "ignore-case", no_argument, NULL, 'f' }, { "ignore-nonprinting", no_argument, NULL, 'i' }, { "key", required_argument, NULL, 'k' }, { "merge", no_argument, NULL, 'm' }, { "mergesort", no_argument, NULL, MERGESORT_OPT }, { "mmap", no_argument, NULL, MMAP_OPT }, { "month-sort", no_argument, NULL, 'M' }, { "numeric-sort", no_argument, NULL, 'n' }, { "output", required_argument, NULL, 'o' }, #if defined(SORT_THREADS) { "parallel", required_argument, NULL, PARALLEL_OPT }, #endif { "qsort", no_argument, NULL, QSORT_OPT }, { "radixsort", no_argument, NULL, RADIXSORT_OPT }, { "random-sort", no_argument, NULL, 'R' }, { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, { "reverse", no_argument, NULL, 'r' }, { "sort", required_argument, NULL, SORT_OPT }, { "stable", no_argument, NULL, 's' }, { "temporary-directory",required_argument, NULL, 'T' }, { "unique", no_argument, NULL, 'u' }, { "version", no_argument, NULL, VERSION_OPT }, { "version-sort",no_argument, NULL, 'V' }, { "zero-terminated", no_argument, NULL, 'z' }, { NULL, no_argument, NULL, 0 } }; void fix_obsolete_keys(int *argc, char **argv); /* * Check where sort modifier is present */ static bool sort_modifier_empty(struct sort_mods *sm) { if (sm == NULL) return (true); return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag)); } /* * Print out usage text. */ static void usage(bool opt_err) { - struct option *o; FILE *out; - out = stdout; - o = &(long_options[0]); + out = opt_err ? stderr : stdout; - if (opt_err) - out = stderr; fprintf(out, getstr(12), getprogname()); if (opt_err) exit(2); exit(0); } /* * Read input file names from a file (file0-from option). */ static void read_fns_from_file0(const char *fn) { FILE *f; char *line = NULL; size_t linesize = 0; ssize_t linelen; if (fn == NULL) return; f = fopen(fn, "r"); if (f == NULL) err(2, "%s", fn); while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { if (*line != '\0') { if (argc_from_file0 == (size_t) - 1) argc_from_file0 = 0; ++argc_from_file0; argv_from_file0 = sort_realloc(argv_from_file0, argc_from_file0 * sizeof(char *)); if (argv_from_file0 == NULL) err(2, NULL); argv_from_file0[argc_from_file0 - 1] = line; } else { free(line); } line = NULL; linesize = 0; } if (ferror(f)) err(2, "%s: getdelim", fn); closefile(f, fn); } /* * Check how much RAM is available for the sort. */ static void set_hw_params(void) { long pages, psize; - pages = psize = 0; - #if defined(SORT_THREADS) ncpu = 1; #endif pages = sysconf(_SC_PHYS_PAGES); if (pages < 1) { perror("sysconf pages"); - psize = 1; + pages = 1; } psize = sysconf(_SC_PAGESIZE); if (psize < 1) { perror("sysconf psize"); psize = 4096; } #if defined(SORT_THREADS) ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); if (ncpu < 1) ncpu = 1; else if(ncpu > 32) ncpu = 32; nthreads = ncpu; #endif free_memory = (unsigned long long) pages * (unsigned long long) psize; available_free_memory = free_memory / 2; if (available_free_memory < 1024) available_free_memory = 1024; } /* * Convert "plain" symbol to wide symbol, with default value. */ static void conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) { if (wc && c) { int res; res = mbtowc(wc, c, MB_CUR_MAX); if (res < 1) *wc = def; } } /* * Set current locale symbols. */ static void set_locale(void) { struct lconv *lc; const char *locale; setlocale(LC_ALL, ""); lc = localeconv(); if (lc) { /* obtain LC_NUMERIC info */ /* Convert to wide char form */ conv_mbtowc(&symbol_decimal_point, lc->decimal_point, symbol_decimal_point); conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, symbol_thousands_sep); conv_mbtowc(&symbol_positive_sign, lc->positive_sign, symbol_positive_sign); conv_mbtowc(&symbol_negative_sign, lc->negative_sign, symbol_negative_sign); } if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) gnusort_numeric_compatibility = true; locale = setlocale(LC_COLLATE, NULL); if (locale) { char *tmpl; const char *cclocale; tmpl = sort_strdup(locale); cclocale = setlocale(LC_COLLATE, "C"); if (cclocale && !strcmp(cclocale, tmpl)) byte_sort = true; else { const char *pclocale; pclocale = setlocale(LC_COLLATE, "POSIX"); if (pclocale && !strcmp(pclocale, tmpl)) byte_sort = true; } setlocale(LC_COLLATE, tmpl); sort_free(tmpl); } } /* * Set directory temporary files. */ static void set_tmpdir(void) { char *td; td = getenv("TMPDIR"); if (td != NULL) tmpdir = sort_strdup(td); } /* * Parse -S option. */ static unsigned long long parse_memory_buffer_value(const char *value) { if (value == NULL) return (available_free_memory); else { char *endptr; unsigned long long membuf; endptr = NULL; errno = 0; membuf = strtoll(value, &endptr, 10); if (errno != 0) { warn("%s",getstr(4)); membuf = available_free_memory; } else { switch (*endptr){ case 'Y': membuf *= 1024; /* FALLTHROUGH */ case 'Z': membuf *= 1024; /* FALLTHROUGH */ case 'E': membuf *= 1024; /* FALLTHROUGH */ case 'P': membuf *= 1024; /* FALLTHROUGH */ case 'T': membuf *= 1024; /* FALLTHROUGH */ case 'G': membuf *= 1024; /* FALLTHROUGH */ case 'M': membuf *= 1024; /* FALLTHROUGH */ case '\0': case 'K': membuf *= 1024; /* FALLTHROUGH */ case 'b': break; case '%': membuf = (available_free_memory * membuf) / 100; break; default: warnc(EINVAL, "%s", optarg); membuf = available_free_memory; } } return (membuf); } } /* * Signal handler that clears the temporary files. */ static void sig_handler(int sig __unused, siginfo_t *siginfo __unused, void *context __unused) { clear_tmp_files(); exit(-1); } /* * Set signal handler on panic signals. */ static void set_signal_handler(void) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = &sig_handler; sa.sa_flags = SA_SIGINFO; if (sigaction(SIGTERM, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGHUP, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGINT, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGQUIT, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGABRT, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGBUS, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGSEGV, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGUSR1, &sa, NULL) < 0) { perror("sigaction"); return; } if (sigaction(SIGUSR2, &sa, NULL) < 0) { perror("sigaction"); return; } } /* * Print "unknown" message and exit with status 2. */ static void unknown(const char *what) { errx(2, "%s: %s", getstr(3), what); } /* * Check whether contradictory input options are used. */ static void check_mutually_exclusive_flags(char c, bool *mef_flags) { int fo_index, mec; bool found_others, found_this; found_others = found_this = false; fo_index = 0; for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { mec = mutually_exclusive_flags[i]; if (mec != c) { if (mef_flags[i]) { if (found_this) errx(1, "%c:%c: %s", c, mec, getstr(1)); found_others = true; fo_index = i; } } else { if (found_others) errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); mef_flags[i] = true; found_this = true; } } } /* * Initialise sort opts data. */ static void set_sort_opts(void) { memset(&default_sort_mods_object, 0, sizeof(default_sort_mods_object)); memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); default_sort_mods_object.func = get_sort_func(&default_sort_mods_object); } /* * Set a sort modifier on a sort modifiers object. */ static bool set_sort_modifier(struct sort_mods *sm, int c) { if (sm) { switch (c){ case 'b': sm->bflag = true; break; case 'd': sm->dflag = true; break; case 'f': sm->fflag = true; break; case 'g': sm->gflag = true; need_hint = true; break; case 'i': sm->iflag = true; break; case 'R': sm->Rflag = true; need_random = true; break; case 'M': initialise_months(); sm->Mflag = true; need_hint = true; break; case 'n': sm->nflag = true; need_hint = true; print_symbols_on_debug = true; break; case 'r': sm->rflag = true; break; case 'V': sm->Vflag = true; break; case 'h': sm->hflag = true; need_hint = true; print_symbols_on_debug = true; break; default: return false; } sort_opts_vals.complex_sort = true; sm->func = get_sort_func(sm); } return (true); } /* * Parse POS in -k option. */ static int parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) { regmatch_t pmatch[4]; regex_t re; char *c, *f; const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; size_t len, nmatch; int ret; ret = -1; nmatch = 4; c = f = NULL; if (regcomp(&re, sregexp, REG_EXTENDED) != 0) return (-1); if (regexec(&re, s, nmatch, pmatch, 0) != 0) goto end; if (pmatch[0].rm_eo <= pmatch[0].rm_so) goto end; if (pmatch[1].rm_eo <= pmatch[1].rm_so) goto end; len = pmatch[1].rm_eo - pmatch[1].rm_so; f = sort_malloc((len + 1) * sizeof(char)); strncpy(f, s + pmatch[1].rm_so, len); f[len] = '\0'; if (second) { errno = 0; ks->f2 = (size_t) strtoul(f, NULL, 10); if (errno != 0) err(2, "-k"); if (ks->f2 == 0) { warn("%s",getstr(5)); goto end; } } else { errno = 0; ks->f1 = (size_t) strtoul(f, NULL, 10); if (errno != 0) err(2, "-k"); if (ks->f1 == 0) { warn("%s",getstr(5)); goto end; } } if (pmatch[2].rm_eo > pmatch[2].rm_so) { len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; c = sort_malloc((len + 1) * sizeof(char)); strncpy(c, s + pmatch[2].rm_so + 1, len); c[len] = '\0'; if (second) { errno = 0; ks->c2 = (size_t) strtoul(c, NULL, 10); if (errno != 0) err(2, "-k"); } else { errno = 0; ks->c1 = (size_t) strtoul(c, NULL, 10); if (errno != 0) err(2, "-k"); if (ks->c1 == 0) { warn("%s",getstr(6)); goto end; } } } else { if (second) ks->c2 = 0; else ks->c1 = 1; } if (pmatch[3].rm_eo > pmatch[3].rm_so) { regoff_t i = 0; for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { check_mutually_exclusive_flags(s[i], mef_flags); if (s[i] == 'b') { if (second) ks->pos2b = true; else ks->pos1b = true; } else if (!set_sort_modifier(&(ks->sm), s[i])) goto end; } } ret = 0; end: if (c) sort_free(c); if (f) sort_free(f); regfree(&re); return (ret); } /* * Parse -k option value. */ static int parse_k(const char *s, struct key_specs *ks) { int ret = -1; bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { false, false, false, false, false, false }; if (s && *s) { char *sptr; sptr = strchr(s, ','); if (sptr) { size_t size1; char *pos1, *pos2; size1 = sptr - s; if (size1 < 1) return (-1); pos1 = sort_malloc((size1 + 1) * sizeof(char)); strncpy(pos1, s, size1); pos1[size1] = '\0'; ret = parse_pos(pos1, ks, mef_flags, false); sort_free(pos1); if (ret < 0) return (ret); pos2 = sort_strdup(sptr + 1); ret = parse_pos(pos2, ks, mef_flags, true); sort_free(pos2); } else ret = parse_pos(s, ks, mef_flags, false); } return (ret); } /* * Parse POS in +POS -POS option. */ static int parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) { regex_t re; regmatch_t pmatch[4]; char *c, *f; const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; int ret; size_t len, nmatch; ret = -1; nmatch = 4; c = f = NULL; *nc = *nf = 0; if (regcomp(&re, sregexp, REG_EXTENDED) != 0) return (-1); if (regexec(&re, s, nmatch, pmatch, 0) != 0) goto end; if (pmatch[0].rm_eo <= pmatch[0].rm_so) goto end; if (pmatch[1].rm_eo <= pmatch[1].rm_so) goto end; len = pmatch[1].rm_eo - pmatch[1].rm_so; f = sort_malloc((len + 1) * sizeof(char)); strncpy(f, s + pmatch[1].rm_so, len); f[len] = '\0'; errno = 0; *nf = (size_t) strtoul(f, NULL, 10); if (errno != 0) errx(2, "%s", getstr(11)); if (pmatch[2].rm_eo > pmatch[2].rm_so) { len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; c = sort_malloc((len + 1) * sizeof(char)); strncpy(c, s + pmatch[2].rm_so + 1, len); c[len] = '\0'; errno = 0; *nc = (size_t) strtoul(c, NULL, 10); if (errno != 0) errx(2, "%s", getstr(11)); } if (pmatch[3].rm_eo > pmatch[3].rm_so) { len = pmatch[3].rm_eo - pmatch[3].rm_so; strncpy(sopts, s + pmatch[3].rm_so, len); sopts[len] = '\0'; } ret = 0; end: if (c) sort_free(c); if (f) sort_free(f); regfree(&re); return (ret); } /* * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax */ void fix_obsolete_keys(int *argc, char **argv) { char sopt[129]; for (int i = 1; i < *argc; i++) { char *arg1; arg1 = argv[i]; if (strlen(arg1) > 1 && arg1[0] == '+') { int c1, f1; char sopts1[128]; sopts1[0] = 0; c1 = f1 = 0; if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) continue; else { f1 += 1; c1 += 1; if (i + 1 < *argc) { char *arg2 = argv[i + 1]; if (strlen(arg2) > 1 && arg2[0] == '-') { int c2, f2; char sopts2[128]; sopts2[0] = 0; c2 = f2 = 0; if (parse_pos_obs(arg2 + 1, &f2, &c2, sopts2) >= 0) { if (c2 > 0) f2 += 1; sprintf(sopt, "-k%d.%d%s,%d.%d%s", f1, c1, sopts1, f2, c2, sopts2); argv[i] = sort_strdup(sopt); for (int j = i + 1; j + 1 < *argc; j++) argv[j] = argv[j + 1]; *argc -= 1; continue; } } } sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); argv[i] = sort_strdup(sopt); } } } } /* * Set random seed */ static void set_random_seed(void) { if (need_random) { if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { FILE* fseed; MD5_CTX ctx; char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; size_t sz = 0; fseed = openfile(random_source, "r"); while (!feof(fseed)) { int cr; cr = fgetc(fseed); if (cr == EOF) break; rsd[sz++] = (char) cr; if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) break; } closefile(fseed, random_source); MD5Init(&ctx); MD5Update(&ctx, rsd, sz); random_seed = MD5End(&ctx, NULL); random_seed_size = strlen(random_seed); } else { MD5_CTX ctx; char *b; MD5Init(&ctx); b = MD5File(random_source, NULL); if (b == NULL) err(2, NULL); random_seed = b; random_seed_size = strlen(b); } MD5Init(&md5_ctx); if(random_seed_size>0) { MD5Update(&md5_ctx, random_seed, random_seed_size); } } } /* * Main function. */ int main(int argc, char **argv) { char *outfile, *real_outfile; int c, result; bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { false, false, false, false, false, false }; result = 0; outfile = sort_strdup("-"); real_outfile = NULL; struct sort_mods *sm = &default_sort_mods_object; init_tmp_files(); set_signal_handler(); set_hw_params(); set_locale(); set_tmpdir(); set_sort_opts(); fix_obsolete_keys(&argc, argv); while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) != -1)) { check_mutually_exclusive_flags(c, mef_flags); if (!set_sort_modifier(sm, c)) { switch (c) { case 'c': sort_opts_vals.cflag = true; if (optarg) { if (!strcmp(optarg, "diagnose-first")) ; else if (!strcmp(optarg, "silent") || !strcmp(optarg, "quiet")) sort_opts_vals.csilentflag = true; else if (*optarg) unknown(optarg); } break; case 'C': sort_opts_vals.cflag = true; sort_opts_vals.csilentflag = true; break; case 'k': { sort_opts_vals.complex_sort = true; sort_opts_vals.kflag = true; keys_num++; keys = sort_realloc(keys, keys_num * sizeof(struct key_specs)); memset(&(keys[keys_num - 1]), 0, sizeof(struct key_specs)); if (parse_k(optarg, &(keys[keys_num - 1])) < 0) { errc(2, EINVAL, "-k %s", optarg); } break; } case 'm': sort_opts_vals.mflag = true; break; case 'o': outfile = sort_realloc(outfile, (strlen(optarg) + 1)); strcpy(outfile, optarg); break; case 's': sort_opts_vals.sflag = true; break; case 'S': available_free_memory = parse_memory_buffer_value(optarg); break; case 'T': tmpdir = sort_strdup(optarg); break; case 't': while (strlen(optarg) > 1) { if (optarg[0] != '\\') { errc(2, EINVAL, "%s", optarg); } optarg += 1; if (*optarg == '0') { *optarg = 0; break; } } sort_opts_vals.tflag = true; sort_opts_vals.field_sep = btowc(optarg[0]); if (sort_opts_vals.field_sep == WEOF) { errno = EINVAL; err(2, NULL); } if (!gnusort_numeric_compatibility) { if (symbol_decimal_point == sort_opts_vals.field_sep) symbol_decimal_point = WEOF; if (symbol_thousands_sep == sort_opts_vals.field_sep) symbol_thousands_sep = WEOF; if (symbol_negative_sign == sort_opts_vals.field_sep) symbol_negative_sign = WEOF; if (symbol_positive_sign == sort_opts_vals.field_sep) symbol_positive_sign = WEOF; } break; case 'u': sort_opts_vals.uflag = true; /* stable sort for the correct unique val */ sort_opts_vals.sflag = true; break; case 'z': sort_opts_vals.zflag = true; break; case SORT_OPT: if (optarg) { if (!strcmp(optarg, "general-numeric")) set_sort_modifier(sm, 'g'); else if (!strcmp(optarg, "human-numeric")) set_sort_modifier(sm, 'h'); else if (!strcmp(optarg, "numeric")) set_sort_modifier(sm, 'n'); else if (!strcmp(optarg, "month")) set_sort_modifier(sm, 'M'); else if (!strcmp(optarg, "random")) set_sort_modifier(sm, 'R'); else unknown(optarg); } break; #if defined(SORT_THREADS) case PARALLEL_OPT: nthreads = (size_t)(atoi(optarg)); if (nthreads < 1) nthreads = 1; if (nthreads > 1024) nthreads = 1024; break; #endif case QSORT_OPT: sort_opts_vals.sort_method = SORT_QSORT; break; case MERGESORT_OPT: sort_opts_vals.sort_method = SORT_MERGESORT; break; case MMAP_OPT: use_mmap = true; break; case HEAPSORT_OPT: sort_opts_vals.sort_method = SORT_HEAPSORT; break; case RADIXSORT_OPT: sort_opts_vals.sort_method = SORT_RADIXSORT; break; case RANDOMSOURCE_OPT: random_source = strdup(optarg); break; case COMPRESSPROGRAM_OPT: compress_program = strdup(optarg); break; case FF_OPT: read_fns_from_file0(optarg); break; case BS_OPT: { errno = 0; long mof = strtol(optarg, NULL, 10); if (errno != 0) err(2, "--batch-size"); if (mof >= 2) max_open_files = (size_t) mof + 1; } break; case VERSION_OPT: printf("%s\n", VERSION); exit(EXIT_SUCCESS); /* NOTREACHED */ break; case DEBUG_OPT: debug_sort = true; break; case HELP_OPT: usage(false); /* NOTREACHED */ break; default: usage(true); /* NOTREACHED */ } } } argc -= optind; argv += optind; if (argv_from_file0) { argc = argc_from_file0; argv = argv_from_file0; } #ifndef WITHOUT_NLS catalog = catopen("sort", NL_CAT_LOCALE); #endif if (sort_opts_vals.cflag && sort_opts_vals.mflag) errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); #ifndef WITHOUT_NLS catclose(catalog); #endif if (keys_num == 0) { keys_num = 1; keys = sort_realloc(keys, sizeof(struct key_specs)); memset(&(keys[0]), 0, sizeof(struct key_specs)); keys[0].c1 = 1; keys[0].pos1b = default_sort_mods->bflag; keys[0].pos2b = default_sort_mods->bflag; memcpy(&(keys[0].sm), default_sort_mods, sizeof(struct sort_mods)); } for (size_t i = 0; i < keys_num; i++) { struct key_specs *ks; ks = &(keys[i]); if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && !(ks->pos2b)) { ks->pos1b = sm->bflag; ks->pos2b = sm->bflag; memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); } ks->sm.func = get_sort_func(&(ks->sm)); } if (debug_sort) { printf("Memory to be used for sorting: %llu\n",available_free_memory); #if defined(SORT_THREADS) printf("Number of CPUs: %d\n",(int)ncpu); nthreads = 1; #endif printf("Using collate rules of %s locale\n", setlocale(LC_COLLATE, NULL)); if (byte_sort) printf("Byte sort is used\n"); if (print_symbols_on_debug) { printf("Decimal Point: <%lc>\n", symbol_decimal_point); if (symbol_thousands_sep) printf("Thousands separator: <%lc>\n", symbol_thousands_sep); printf("Positive sign: <%lc>\n", symbol_positive_sign); printf("Negative sign: <%lc>\n", symbol_negative_sign); } } set_random_seed(); /* Case when the outfile equals one of the input files: */ if (strcmp(outfile, "-")) { for(int i = 0; i < argc; ++i) { if (strcmp(argv[i], outfile) == 0) { real_outfile = sort_strdup(outfile); for(;;) { char* tmp = sort_malloc(strlen(outfile) + strlen(".tmp") + 1); strcpy(tmp, outfile); strcpy(tmp + strlen(tmp), ".tmp"); sort_free(outfile); outfile = tmp; if (access(outfile, F_OK) < 0) break; } tmp_file_atexit(outfile); } } } #if defined(SORT_THREADS) if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) nthreads = 1; #endif if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { struct file_list fl; struct sort_list list; sort_list_init(&list); file_list_init(&fl, true); if (argc < 1) procfile("-", &list, &fl); else { while (argc > 0) { procfile(*argv, &list, &fl); --argc; ++argv; } } if (fl.count < 1) sort_list_to_file(&list, outfile); else { if (list.count > 0) { char *flast = new_tmp_file_name(); sort_list_to_file(&list, flast); file_list_add(&fl, flast, false); } merge_files(&fl, outfile); } file_list_clean(&fl); /* * We are about to exit the program, so we can ignore * the clean-up for speed * * sort_list_clean(&list); */ } else if (sort_opts_vals.cflag) { result = (argc == 0) ? (check("-")) : (check(*argv)); } else if (sort_opts_vals.mflag) { struct file_list fl; file_list_init(&fl, false); file_list_populate(&fl, argc, argv, true); merge_files(&fl, outfile); file_list_clean(&fl); } if (real_outfile) { unlink(real_outfile); if (rename(outfile, real_outfile) < 0) err(2, NULL); sort_free(real_outfile); } sort_free(outfile); return (result); } Index: projects/clang391-import/usr.sbin/ctld/ctld.c =================================================================== --- projects/clang391-import/usr.sbin/ctld/ctld.c (revision 309262) +++ projects/clang391-import/usr.sbin/ctld/ctld.c (revision 309263) @@ -1,2726 +1,2729 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ctld.h" #include "isns.h" bool proxy_mode = false; static volatile bool sighup_received = false; static volatile bool sigterm_received = false; static volatile bool sigalrm_received = false; static int nchildren = 0; static uint16_t last_portal_group_tag = 0xff; static void usage(void) { fprintf(stderr, "usage: ctld [-d][-u][-f config-file]\n"); exit(1); } char * checked_strdup(const char *s) { char *c; c = strdup(s); if (c == NULL) log_err(1, "strdup"); return (c); } struct conf * conf_new(void) { struct conf *conf; conf = calloc(1, sizeof(*conf)); if (conf == NULL) log_err(1, "calloc"); TAILQ_INIT(&conf->conf_luns); TAILQ_INIT(&conf->conf_targets); TAILQ_INIT(&conf->conf_auth_groups); TAILQ_INIT(&conf->conf_ports); TAILQ_INIT(&conf->conf_portal_groups); TAILQ_INIT(&conf->conf_pports); TAILQ_INIT(&conf->conf_isns); conf->conf_isns_period = 900; conf->conf_isns_timeout = 5; conf->conf_debug = 0; conf->conf_timeout = 60; conf->conf_maxproc = 30; return (conf); } void conf_delete(struct conf *conf) { struct lun *lun, *ltmp; struct target *targ, *tmp; struct auth_group *ag, *cagtmp; struct portal_group *pg, *cpgtmp; struct pport *pp, *pptmp; struct isns *is, *istmp; assert(conf->conf_pidfh == NULL); TAILQ_FOREACH_SAFE(lun, &conf->conf_luns, l_next, ltmp) lun_delete(lun); TAILQ_FOREACH_SAFE(targ, &conf->conf_targets, t_next, tmp) target_delete(targ); TAILQ_FOREACH_SAFE(ag, &conf->conf_auth_groups, ag_next, cagtmp) auth_group_delete(ag); TAILQ_FOREACH_SAFE(pg, &conf->conf_portal_groups, pg_next, cpgtmp) portal_group_delete(pg); TAILQ_FOREACH_SAFE(pp, &conf->conf_pports, pp_next, pptmp) pport_delete(pp); TAILQ_FOREACH_SAFE(is, &conf->conf_isns, i_next, istmp) isns_delete(is); assert(TAILQ_EMPTY(&conf->conf_ports)); free(conf->conf_pidfile_path); free(conf); } static struct auth * auth_new(struct auth_group *ag) { struct auth *auth; auth = calloc(1, sizeof(*auth)); if (auth == NULL) log_err(1, "calloc"); auth->a_auth_group = ag; TAILQ_INSERT_TAIL(&ag->ag_auths, auth, a_next); return (auth); } static void auth_delete(struct auth *auth) { TAILQ_REMOVE(&auth->a_auth_group->ag_auths, auth, a_next); free(auth->a_user); free(auth->a_secret); free(auth->a_mutual_user); free(auth->a_mutual_secret); free(auth); } const struct auth * auth_find(const struct auth_group *ag, const char *user) { const struct auth *auth; TAILQ_FOREACH(auth, &ag->ag_auths, a_next) { if (strcmp(auth->a_user, user) == 0) return (auth); } return (NULL); } static void auth_check_secret_length(struct auth *auth) { size_t len; len = strlen(auth->a_secret); if (len > 16) { if (auth->a_auth_group->ag_name != NULL) log_warnx("secret for user \"%s\", auth-group \"%s\", " "is too long; it should be at most 16 characters " "long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("secret for user \"%s\", target \"%s\", " "is too long; it should be at most 16 characters " "long", auth->a_user, auth->a_auth_group->ag_target->t_name); } if (len < 12) { if (auth->a_auth_group->ag_name != NULL) log_warnx("secret for user \"%s\", auth-group \"%s\", " "is too short; it should be at least 12 characters " "long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("secret for user \"%s\", target \"%s\", " "is too short; it should be at least 12 characters " "long", auth->a_user, auth->a_auth_group->ag_target->t_name); } if (auth->a_mutual_secret != NULL) { len = strlen(auth->a_mutual_secret); if (len > 16) { if (auth->a_auth_group->ag_name != NULL) log_warnx("mutual secret for user \"%s\", " "auth-group \"%s\", is too long; it should " "be at most 16 characters long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("mutual secret for user \"%s\", " "target \"%s\", is too long; it should " "be at most 16 characters long", auth->a_user, auth->a_auth_group->ag_target->t_name); } if (len < 12) { if (auth->a_auth_group->ag_name != NULL) log_warnx("mutual secret for user \"%s\", " "auth-group \"%s\", is too short; it " "should be at least 12 characters long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("mutual secret for user \"%s\", " "target \"%s\", is too short; it should be " "at least 12 characters long", auth->a_user, auth->a_auth_group->ag_target->t_name); } } } const struct auth * auth_new_chap(struct auth_group *ag, const char *user, const char *secret) { struct auth *auth; if (ag->ag_type == AG_TYPE_UNKNOWN) ag->ag_type = AG_TYPE_CHAP; if (ag->ag_type != AG_TYPE_CHAP) { if (ag->ag_name != NULL) log_warnx("cannot mix \"chap\" authentication with " "other types for auth-group \"%s\"", ag->ag_name); else log_warnx("cannot mix \"chap\" authentication with " "other types for target \"%s\"", ag->ag_target->t_name); return (NULL); } auth = auth_new(ag); auth->a_user = checked_strdup(user); auth->a_secret = checked_strdup(secret); auth_check_secret_length(auth); return (auth); } const struct auth * auth_new_chap_mutual(struct auth_group *ag, const char *user, const char *secret, const char *user2, const char *secret2) { struct auth *auth; if (ag->ag_type == AG_TYPE_UNKNOWN) ag->ag_type = AG_TYPE_CHAP_MUTUAL; if (ag->ag_type != AG_TYPE_CHAP_MUTUAL) { if (ag->ag_name != NULL) log_warnx("cannot mix \"chap-mutual\" authentication " "with other types for auth-group \"%s\"", ag->ag_name); else log_warnx("cannot mix \"chap-mutual\" authentication " "with other types for target \"%s\"", ag->ag_target->t_name); return (NULL); } auth = auth_new(ag); auth->a_user = checked_strdup(user); auth->a_secret = checked_strdup(secret); auth->a_mutual_user = checked_strdup(user2); auth->a_mutual_secret = checked_strdup(secret2); auth_check_secret_length(auth); return (auth); } const struct auth_name * auth_name_new(struct auth_group *ag, const char *name) { struct auth_name *an; an = calloc(1, sizeof(*an)); if (an == NULL) log_err(1, "calloc"); an->an_auth_group = ag; an->an_initator_name = checked_strdup(name); TAILQ_INSERT_TAIL(&ag->ag_names, an, an_next); return (an); } static void auth_name_delete(struct auth_name *an) { TAILQ_REMOVE(&an->an_auth_group->ag_names, an, an_next); free(an->an_initator_name); free(an); } bool auth_name_defined(const struct auth_group *ag) { if (TAILQ_EMPTY(&ag->ag_names)) return (false); return (true); } const struct auth_name * auth_name_find(const struct auth_group *ag, const char *name) { const struct auth_name *auth_name; TAILQ_FOREACH(auth_name, &ag->ag_names, an_next) { if (strcmp(auth_name->an_initator_name, name) == 0) return (auth_name); } return (NULL); } int auth_name_check(const struct auth_group *ag, const char *initiator_name) { if (!auth_name_defined(ag)) return (0); if (auth_name_find(ag, initiator_name) == NULL) return (1); return (0); } const struct auth_portal * auth_portal_new(struct auth_group *ag, const char *portal) { struct auth_portal *ap; char *net, *mask, *str, *tmp; int len, dm, m; ap = calloc(1, sizeof(*ap)); if (ap == NULL) log_err(1, "calloc"); ap->ap_auth_group = ag; ap->ap_initator_portal = checked_strdup(portal); mask = str = checked_strdup(portal); net = strsep(&mask, "/"); if (net[0] == '[') net++; len = strlen(net); if (len == 0) goto error; if (net[len - 1] == ']') net[len - 1] = 0; if (strchr(net, ':') != NULL) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ap->ap_sa; sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; if (inet_pton(AF_INET6, net, &sin6->sin6_addr) <= 0) goto error; dm = 128; } else { struct sockaddr_in *sin = (struct sockaddr_in *)&ap->ap_sa; sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; if (inet_pton(AF_INET, net, &sin->sin_addr) <= 0) goto error; dm = 32; } if (mask != NULL) { m = strtol(mask, &tmp, 0); if (m < 0 || m > dm || tmp[0] != 0) goto error; } else m = dm; ap->ap_mask = m; free(str); TAILQ_INSERT_TAIL(&ag->ag_portals, ap, ap_next); return (ap); error: + free(str); free(ap); log_warnx("incorrect initiator portal \"%s\"", portal); return (NULL); } static void auth_portal_delete(struct auth_portal *ap) { TAILQ_REMOVE(&ap->ap_auth_group->ag_portals, ap, ap_next); free(ap->ap_initator_portal); free(ap); } bool auth_portal_defined(const struct auth_group *ag) { if (TAILQ_EMPTY(&ag->ag_portals)) return (false); return (true); } const struct auth_portal * auth_portal_find(const struct auth_group *ag, const struct sockaddr_storage *ss) { const struct auth_portal *ap; const uint8_t *a, *b; int i; uint8_t bmask; TAILQ_FOREACH(ap, &ag->ag_portals, ap_next) { if (ap->ap_sa.ss_family != ss->ss_family) continue; if (ss->ss_family == AF_INET) { a = (const uint8_t *) &((const struct sockaddr_in *)ss)->sin_addr; b = (const uint8_t *) &((const struct sockaddr_in *)&ap->ap_sa)->sin_addr; } else { a = (const uint8_t *) &((const struct sockaddr_in6 *)ss)->sin6_addr; b = (const uint8_t *) &((const struct sockaddr_in6 *)&ap->ap_sa)->sin6_addr; } for (i = 0; i < ap->ap_mask / 8; i++) { if (a[i] != b[i]) goto next; } if (ap->ap_mask % 8) { bmask = 0xff << (8 - (ap->ap_mask % 8)); if ((a[i] & bmask) != (b[i] & bmask)) goto next; } return (ap); next: ; } return (NULL); } int auth_portal_check(const struct auth_group *ag, const struct sockaddr_storage *sa) { if (!auth_portal_defined(ag)) return (0); if (auth_portal_find(ag, sa) == NULL) return (1); return (0); } struct auth_group * auth_group_new(struct conf *conf, const char *name) { struct auth_group *ag; if (name != NULL) { ag = auth_group_find(conf, name); if (ag != NULL) { log_warnx("duplicated auth-group \"%s\"", name); return (NULL); } } ag = calloc(1, sizeof(*ag)); if (ag == NULL) log_err(1, "calloc"); if (name != NULL) ag->ag_name = checked_strdup(name); TAILQ_INIT(&ag->ag_auths); TAILQ_INIT(&ag->ag_names); TAILQ_INIT(&ag->ag_portals); ag->ag_conf = conf; TAILQ_INSERT_TAIL(&conf->conf_auth_groups, ag, ag_next); return (ag); } void auth_group_delete(struct auth_group *ag) { struct auth *auth, *auth_tmp; struct auth_name *auth_name, *auth_name_tmp; struct auth_portal *auth_portal, *auth_portal_tmp; TAILQ_REMOVE(&ag->ag_conf->conf_auth_groups, ag, ag_next); TAILQ_FOREACH_SAFE(auth, &ag->ag_auths, a_next, auth_tmp) auth_delete(auth); TAILQ_FOREACH_SAFE(auth_name, &ag->ag_names, an_next, auth_name_tmp) auth_name_delete(auth_name); TAILQ_FOREACH_SAFE(auth_portal, &ag->ag_portals, ap_next, auth_portal_tmp) auth_portal_delete(auth_portal); free(ag->ag_name); free(ag); } struct auth_group * auth_group_find(const struct conf *conf, const char *name) { struct auth_group *ag; TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) { if (ag->ag_name != NULL && strcmp(ag->ag_name, name) == 0) return (ag); } return (NULL); } int auth_group_set_type(struct auth_group *ag, const char *str) { int type; if (strcmp(str, "none") == 0) { type = AG_TYPE_NO_AUTHENTICATION; } else if (strcmp(str, "deny") == 0) { type = AG_TYPE_DENY; } else if (strcmp(str, "chap") == 0) { type = AG_TYPE_CHAP; } else if (strcmp(str, "chap-mutual") == 0) { type = AG_TYPE_CHAP_MUTUAL; } else { if (ag->ag_name != NULL) log_warnx("invalid auth-type \"%s\" for auth-group " "\"%s\"", str, ag->ag_name); else log_warnx("invalid auth-type \"%s\" for target " "\"%s\"", str, ag->ag_target->t_name); return (1); } if (ag->ag_type != AG_TYPE_UNKNOWN && ag->ag_type != type) { if (ag->ag_name != NULL) { log_warnx("cannot set auth-type to \"%s\" for " "auth-group \"%s\"; already has a different " "type", str, ag->ag_name); } else { log_warnx("cannot set auth-type to \"%s\" for target " "\"%s\"; already has a different type", str, ag->ag_target->t_name); } return (1); } ag->ag_type = type; return (0); } static struct portal * portal_new(struct portal_group *pg) { struct portal *portal; portal = calloc(1, sizeof(*portal)); if (portal == NULL) log_err(1, "calloc"); TAILQ_INIT(&portal->p_targets); portal->p_portal_group = pg; TAILQ_INSERT_TAIL(&pg->pg_portals, portal, p_next); return (portal); } static void portal_delete(struct portal *portal) { TAILQ_REMOVE(&portal->p_portal_group->pg_portals, portal, p_next); if (portal->p_ai != NULL) freeaddrinfo(portal->p_ai); free(portal->p_listen); free(portal); } struct portal_group * portal_group_new(struct conf *conf, const char *name) { struct portal_group *pg; pg = portal_group_find(conf, name); if (pg != NULL) { log_warnx("duplicated portal-group \"%s\"", name); return (NULL); } pg = calloc(1, sizeof(*pg)); if (pg == NULL) log_err(1, "calloc"); pg->pg_name = checked_strdup(name); TAILQ_INIT(&pg->pg_options); TAILQ_INIT(&pg->pg_portals); TAILQ_INIT(&pg->pg_ports); pg->pg_conf = conf; pg->pg_tag = 0; /* Assigned later in conf_apply(). */ TAILQ_INSERT_TAIL(&conf->conf_portal_groups, pg, pg_next); return (pg); } void portal_group_delete(struct portal_group *pg) { struct portal *portal, *tmp; struct port *port, *tport; struct option *o, *otmp; TAILQ_FOREACH_SAFE(port, &pg->pg_ports, p_pgs, tport) port_delete(port); TAILQ_REMOVE(&pg->pg_conf->conf_portal_groups, pg, pg_next); TAILQ_FOREACH_SAFE(portal, &pg->pg_portals, p_next, tmp) portal_delete(portal); TAILQ_FOREACH_SAFE(o, &pg->pg_options, o_next, otmp) option_delete(&pg->pg_options, o); free(pg->pg_name); free(pg->pg_offload); free(pg->pg_redirection); free(pg); } struct portal_group * portal_group_find(const struct conf *conf, const char *name) { struct portal_group *pg; TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { if (strcmp(pg->pg_name, name) == 0) return (pg); } return (NULL); } static int parse_addr_port(char *arg, const char *def_port, struct addrinfo **ai) { struct addrinfo hints; char *str, *addr, *ch; const char *port; int error, colons = 0; str = arg = strdup(arg); if (arg[0] == '[') { /* * IPv6 address in square brackets, perhaps with port. */ arg++; addr = strsep(&arg, "]"); - if (arg == NULL) + if (arg == NULL) { + free(str); return (1); + } if (arg[0] == '\0') { port = def_port; } else if (arg[0] == ':') { port = arg + 1; } else { free(str); return (1); } } else { /* * Either IPv6 address without brackets - and without * a port - or IPv4 address. Just count the colons. */ for (ch = arg; *ch != '\0'; ch++) { if (*ch == ':') colons++; } if (colons > 1) { addr = arg; port = def_port; } else { addr = strsep(&arg, ":"); if (arg == NULL) port = def_port; else port = arg; } } memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; error = getaddrinfo(addr, port, &hints, ai); free(str); return ((error != 0) ? 1 : 0); } int portal_group_add_listen(struct portal_group *pg, const char *value, bool iser) { struct portal *portal; portal = portal_new(pg); portal->p_listen = checked_strdup(value); portal->p_iser = iser; if (parse_addr_port(portal->p_listen, "3260", &portal->p_ai)) { log_warnx("invalid listen address %s", portal->p_listen); portal_delete(portal); return (1); } /* * XXX: getaddrinfo(3) may return multiple addresses; we should turn * those into multiple portals. */ return (0); } int isns_new(struct conf *conf, const char *addr) { struct isns *isns; isns = calloc(1, sizeof(*isns)); if (isns == NULL) log_err(1, "calloc"); isns->i_conf = conf; TAILQ_INSERT_TAIL(&conf->conf_isns, isns, i_next); isns->i_addr = checked_strdup(addr); if (parse_addr_port(isns->i_addr, "3205", &isns->i_ai)) { log_warnx("invalid iSNS address %s", isns->i_addr); isns_delete(isns); return (1); } /* * XXX: getaddrinfo(3) may return multiple addresses; we should turn * those into multiple servers. */ return (0); } void isns_delete(struct isns *isns) { TAILQ_REMOVE(&isns->i_conf->conf_isns, isns, i_next); free(isns->i_addr); if (isns->i_ai != NULL) freeaddrinfo(isns->i_ai); free(isns); } static int isns_do_connect(struct isns *isns) { int s; s = socket(isns->i_ai->ai_family, isns->i_ai->ai_socktype, isns->i_ai->ai_protocol); if (s < 0) { log_warn("socket(2) failed for %s", isns->i_addr); return (-1); } if (connect(s, isns->i_ai->ai_addr, isns->i_ai->ai_addrlen)) { log_warn("connect(2) failed for %s", isns->i_addr); close(s); return (-1); } return(s); } static int isns_do_register(struct isns *isns, int s, const char *hostname) { struct conf *conf = isns->i_conf; struct target *target; struct portal *portal; struct portal_group *pg; struct port *port; struct isns_req *req; int res = 0; uint32_t error; req = isns_req_create(ISNS_FUNC_DEVATTRREG, ISNS_FLAG_CLIENT); isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name); isns_req_add_delim(req); isns_req_add_str(req, 1, hostname); isns_req_add_32(req, 2, 2); /* 2 -- iSCSI */ isns_req_add_32(req, 6, conf->conf_isns_period); TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { if (pg->pg_unassigned) continue; TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { isns_req_add_addr(req, 16, portal->p_ai); isns_req_add_port(req, 17, portal->p_ai); } } TAILQ_FOREACH(target, &conf->conf_targets, t_next) { isns_req_add_str(req, 32, target->t_name); isns_req_add_32(req, 33, 1); /* 1 -- Target*/ if (target->t_alias != NULL) isns_req_add_str(req, 34, target->t_alias); TAILQ_FOREACH(port, &target->t_ports, p_ts) { if ((pg = port->p_portal_group) == NULL) continue; isns_req_add_32(req, 51, pg->pg_tag); TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { isns_req_add_addr(req, 49, portal->p_ai); isns_req_add_port(req, 50, portal->p_ai); } } } res = isns_req_send(s, req); if (res < 0) { log_warn("send(2) failed for %s", isns->i_addr); goto quit; } res = isns_req_receive(s, req); if (res < 0) { log_warn("receive(2) failed for %s", isns->i_addr); goto quit; } error = isns_req_get_status(req); if (error != 0) { log_warnx("iSNS register error %d for %s", error, isns->i_addr); res = -1; } quit: isns_req_free(req); return (res); } static int isns_do_check(struct isns *isns, int s, const char *hostname) { struct conf *conf = isns->i_conf; struct isns_req *req; int res = 0; uint32_t error; req = isns_req_create(ISNS_FUNC_DEVATTRQRY, ISNS_FLAG_CLIENT); isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name); isns_req_add_str(req, 1, hostname); isns_req_add_delim(req); isns_req_add(req, 2, 0, NULL); res = isns_req_send(s, req); if (res < 0) { log_warn("send(2) failed for %s", isns->i_addr); goto quit; } res = isns_req_receive(s, req); if (res < 0) { log_warn("receive(2) failed for %s", isns->i_addr); goto quit; } error = isns_req_get_status(req); if (error != 0) { log_warnx("iSNS check error %d for %s", error, isns->i_addr); res = -1; } quit: isns_req_free(req); return (res); } static int isns_do_deregister(struct isns *isns, int s, const char *hostname) { struct conf *conf = isns->i_conf; struct isns_req *req; int res = 0; uint32_t error; req = isns_req_create(ISNS_FUNC_DEVDEREG, ISNS_FLAG_CLIENT); isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name); isns_req_add_delim(req); isns_req_add_str(req, 1, hostname); res = isns_req_send(s, req); if (res < 0) { log_warn("send(2) failed for %s", isns->i_addr); goto quit; } res = isns_req_receive(s, req); if (res < 0) { log_warn("receive(2) failed for %s", isns->i_addr); goto quit; } error = isns_req_get_status(req); if (error != 0) { log_warnx("iSNS deregister error %d for %s", error, isns->i_addr); res = -1; } quit: isns_req_free(req); return (res); } void isns_register(struct isns *isns, struct isns *oldisns) { struct conf *conf = isns->i_conf; int s; char hostname[256]; if (TAILQ_EMPTY(&conf->conf_targets) || TAILQ_EMPTY(&conf->conf_portal_groups)) return; set_timeout(conf->conf_isns_timeout, false); s = isns_do_connect(isns); if (s < 0) { set_timeout(0, false); return; } gethostname(hostname, sizeof(hostname)); if (oldisns == NULL || TAILQ_EMPTY(&oldisns->i_conf->conf_targets)) oldisns = isns; isns_do_deregister(oldisns, s, hostname); isns_do_register(isns, s, hostname); close(s); set_timeout(0, false); } void isns_check(struct isns *isns) { struct conf *conf = isns->i_conf; int s, res; char hostname[256]; if (TAILQ_EMPTY(&conf->conf_targets) || TAILQ_EMPTY(&conf->conf_portal_groups)) return; set_timeout(conf->conf_isns_timeout, false); s = isns_do_connect(isns); if (s < 0) { set_timeout(0, false); return; } gethostname(hostname, sizeof(hostname)); res = isns_do_check(isns, s, hostname); if (res < 0) { isns_do_deregister(isns, s, hostname); isns_do_register(isns, s, hostname); } close(s); set_timeout(0, false); } void isns_deregister(struct isns *isns) { struct conf *conf = isns->i_conf; int s; char hostname[256]; if (TAILQ_EMPTY(&conf->conf_targets) || TAILQ_EMPTY(&conf->conf_portal_groups)) return; set_timeout(conf->conf_isns_timeout, false); s = isns_do_connect(isns); if (s < 0) return; gethostname(hostname, sizeof(hostname)); isns_do_deregister(isns, s, hostname); close(s); set_timeout(0, false); } int portal_group_set_filter(struct portal_group *pg, const char *str) { int filter; if (strcmp(str, "none") == 0) { filter = PG_FILTER_NONE; } else if (strcmp(str, "portal") == 0) { filter = PG_FILTER_PORTAL; } else if (strcmp(str, "portal-name") == 0) { filter = PG_FILTER_PORTAL_NAME; } else if (strcmp(str, "portal-name-auth") == 0) { filter = PG_FILTER_PORTAL_NAME_AUTH; } else { log_warnx("invalid discovery-filter \"%s\" for portal-group " "\"%s\"; valid values are \"none\", \"portal\", " "\"portal-name\", and \"portal-name-auth\"", str, pg->pg_name); return (1); } if (pg->pg_discovery_filter != PG_FILTER_UNKNOWN && pg->pg_discovery_filter != filter) { log_warnx("cannot set discovery-filter to \"%s\" for " "portal-group \"%s\"; already has a different " "value", str, pg->pg_name); return (1); } pg->pg_discovery_filter = filter; return (0); } int portal_group_set_offload(struct portal_group *pg, const char *offload) { if (pg->pg_offload != NULL) { log_warnx("cannot set offload to \"%s\" for " "portal-group \"%s\"; already defined", offload, pg->pg_name); return (1); } pg->pg_offload = checked_strdup(offload); return (0); } int portal_group_set_redirection(struct portal_group *pg, const char *addr) { if (pg->pg_redirection != NULL) { log_warnx("cannot set redirection to \"%s\" for " "portal-group \"%s\"; already defined", addr, pg->pg_name); return (1); } pg->pg_redirection = checked_strdup(addr); return (0); } static bool valid_hex(const char ch) { switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': return (true); default: return (false); } } bool valid_iscsi_name(const char *name) { int i; if (strlen(name) >= MAX_NAME_LEN) { log_warnx("overlong name for target \"%s\"; max length allowed " "by iSCSI specification is %d characters", name, MAX_NAME_LEN); return (false); } /* * In the cases below, we don't return an error, just in case the admin * was right, and we're wrong. */ if (strncasecmp(name, "iqn.", strlen("iqn.")) == 0) { for (i = strlen("iqn."); name[i] != '\0'; i++) { /* * XXX: We should verify UTF-8 normalisation, as defined * by 3.2.6.2: iSCSI Name Encoding. */ if (isalnum(name[i])) continue; if (name[i] == '-' || name[i] == '.' || name[i] == ':') continue; log_warnx("invalid character \"%c\" in target name " "\"%s\"; allowed characters are letters, digits, " "'-', '.', and ':'", name[i], name); break; } /* * XXX: Check more stuff: valid date and a valid reversed domain. */ } else if (strncasecmp(name, "eui.", strlen("eui.")) == 0) { if (strlen(name) != strlen("eui.") + 16) log_warnx("invalid target name \"%s\"; the \"eui.\" " "should be followed by exactly 16 hexadecimal " "digits", name); for (i = strlen("eui."); name[i] != '\0'; i++) { if (!valid_hex(name[i])) { log_warnx("invalid character \"%c\" in target " "name \"%s\"; allowed characters are 1-9 " "and A-F", name[i], name); break; } } } else if (strncasecmp(name, "naa.", strlen("naa.")) == 0) { if (strlen(name) > strlen("naa.") + 32) log_warnx("invalid target name \"%s\"; the \"naa.\" " "should be followed by at most 32 hexadecimal " "digits", name); for (i = strlen("naa."); name[i] != '\0'; i++) { if (!valid_hex(name[i])) { log_warnx("invalid character \"%c\" in target " "name \"%s\"; allowed characters are 1-9 " "and A-F", name[i], name); break; } } } else { log_warnx("invalid target name \"%s\"; should start with " "either \"iqn.\", \"eui.\", or \"naa.\"", name); } return (true); } struct pport * pport_new(struct conf *conf, const char *name, uint32_t ctl_port) { struct pport *pp; pp = calloc(1, sizeof(*pp)); if (pp == NULL) log_err(1, "calloc"); pp->pp_conf = conf; pp->pp_name = checked_strdup(name); pp->pp_ctl_port = ctl_port; TAILQ_INIT(&pp->pp_ports); TAILQ_INSERT_TAIL(&conf->conf_pports, pp, pp_next); return (pp); } struct pport * pport_find(const struct conf *conf, const char *name) { struct pport *pp; TAILQ_FOREACH(pp, &conf->conf_pports, pp_next) { if (strcasecmp(pp->pp_name, name) == 0) return (pp); } return (NULL); } struct pport * pport_copy(struct pport *pp, struct conf *conf) { struct pport *ppnew; ppnew = pport_new(conf, pp->pp_name, pp->pp_ctl_port); return (ppnew); } void pport_delete(struct pport *pp) { struct port *port, *tport; TAILQ_FOREACH_SAFE(port, &pp->pp_ports, p_ts, tport) port_delete(port); TAILQ_REMOVE(&pp->pp_conf->conf_pports, pp, pp_next); free(pp->pp_name); free(pp); } struct port * port_new(struct conf *conf, struct target *target, struct portal_group *pg) { struct port *port; char *name; int ret; ret = asprintf(&name, "%s-%s", pg->pg_name, target->t_name); if (ret <= 0) log_err(1, "asprintf"); if (port_find(conf, name) != NULL) { log_warnx("duplicate port \"%s\"", name); free(name); return (NULL); } port = calloc(1, sizeof(*port)); if (port == NULL) log_err(1, "calloc"); port->p_conf = conf; port->p_name = name; TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next); TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts); port->p_target = target; TAILQ_INSERT_TAIL(&pg->pg_ports, port, p_pgs); port->p_portal_group = pg; port->p_foreign = pg->pg_foreign; return (port); } struct port * port_new_pp(struct conf *conf, struct target *target, struct pport *pp) { struct port *port; char *name; int ret; ret = asprintf(&name, "%s-%s", pp->pp_name, target->t_name); if (ret <= 0) log_err(1, "asprintf"); if (port_find(conf, name) != NULL) { log_warnx("duplicate port \"%s\"", name); free(name); return (NULL); } port = calloc(1, sizeof(*port)); if (port == NULL) log_err(1, "calloc"); port->p_conf = conf; port->p_name = name; TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next); TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts); port->p_target = target; TAILQ_INSERT_TAIL(&pp->pp_ports, port, p_pps); port->p_pport = pp; return (port); } struct port * port_find(const struct conf *conf, const char *name) { struct port *port; TAILQ_FOREACH(port, &conf->conf_ports, p_next) { if (strcasecmp(port->p_name, name) == 0) return (port); } return (NULL); } struct port * port_find_in_pg(const struct portal_group *pg, const char *target) { struct port *port; TAILQ_FOREACH(port, &pg->pg_ports, p_pgs) { if (strcasecmp(port->p_target->t_name, target) == 0) return (port); } return (NULL); } void port_delete(struct port *port) { if (port->p_portal_group) TAILQ_REMOVE(&port->p_portal_group->pg_ports, port, p_pgs); if (port->p_pport) TAILQ_REMOVE(&port->p_pport->pp_ports, port, p_pps); if (port->p_target) TAILQ_REMOVE(&port->p_target->t_ports, port, p_ts); TAILQ_REMOVE(&port->p_conf->conf_ports, port, p_next); free(port->p_name); free(port); } struct target * target_new(struct conf *conf, const char *name) { struct target *targ; int i, len; targ = target_find(conf, name); if (targ != NULL) { log_warnx("duplicated target \"%s\"", name); return (NULL); } if (valid_iscsi_name(name) == false) { log_warnx("target name \"%s\" is invalid", name); return (NULL); } targ = calloc(1, sizeof(*targ)); if (targ == NULL) log_err(1, "calloc"); targ->t_name = checked_strdup(name); /* * RFC 3722 requires us to normalize the name to lowercase. */ len = strlen(name); for (i = 0; i < len; i++) targ->t_name[i] = tolower(targ->t_name[i]); targ->t_conf = conf; TAILQ_INIT(&targ->t_ports); TAILQ_INSERT_TAIL(&conf->conf_targets, targ, t_next); return (targ); } void target_delete(struct target *targ) { struct port *port, *tport; TAILQ_FOREACH_SAFE(port, &targ->t_ports, p_ts, tport) port_delete(port); TAILQ_REMOVE(&targ->t_conf->conf_targets, targ, t_next); free(targ->t_name); free(targ->t_redirection); free(targ); } struct target * target_find(struct conf *conf, const char *name) { struct target *targ; TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { if (strcasecmp(targ->t_name, name) == 0) return (targ); } return (NULL); } int target_set_redirection(struct target *target, const char *addr) { if (target->t_redirection != NULL) { log_warnx("cannot set redirection to \"%s\" for " "target \"%s\"; already defined", addr, target->t_name); return (1); } target->t_redirection = checked_strdup(addr); return (0); } struct lun * lun_new(struct conf *conf, const char *name) { struct lun *lun; lun = lun_find(conf, name); if (lun != NULL) { log_warnx("duplicated lun \"%s\"", name); return (NULL); } lun = calloc(1, sizeof(*lun)); if (lun == NULL) log_err(1, "calloc"); lun->l_conf = conf; lun->l_name = checked_strdup(name); TAILQ_INIT(&lun->l_options); TAILQ_INSERT_TAIL(&conf->conf_luns, lun, l_next); lun->l_ctl_lun = -1; return (lun); } void lun_delete(struct lun *lun) { struct target *targ; struct option *o, *tmp; int i; TAILQ_FOREACH(targ, &lun->l_conf->conf_targets, t_next) { for (i = 0; i < MAX_LUNS; i++) { if (targ->t_luns[i] == lun) targ->t_luns[i] = NULL; } } TAILQ_REMOVE(&lun->l_conf->conf_luns, lun, l_next); TAILQ_FOREACH_SAFE(o, &lun->l_options, o_next, tmp) option_delete(&lun->l_options, o); free(lun->l_name); free(lun->l_backend); free(lun->l_device_id); free(lun->l_path); free(lun->l_scsiname); free(lun->l_serial); free(lun); } struct lun * lun_find(const struct conf *conf, const char *name) { struct lun *lun; TAILQ_FOREACH(lun, &conf->conf_luns, l_next) { if (strcmp(lun->l_name, name) == 0) return (lun); } return (NULL); } void lun_set_backend(struct lun *lun, const char *value) { free(lun->l_backend); lun->l_backend = checked_strdup(value); } void lun_set_blocksize(struct lun *lun, size_t value) { lun->l_blocksize = value; } void lun_set_device_type(struct lun *lun, uint8_t value) { lun->l_device_type = value; } void lun_set_device_id(struct lun *lun, const char *value) { free(lun->l_device_id); lun->l_device_id = checked_strdup(value); } void lun_set_path(struct lun *lun, const char *value) { free(lun->l_path); lun->l_path = checked_strdup(value); } void lun_set_scsiname(struct lun *lun, const char *value) { free(lun->l_scsiname); lun->l_scsiname = checked_strdup(value); } void lun_set_serial(struct lun *lun, const char *value) { free(lun->l_serial); lun->l_serial = checked_strdup(value); } void lun_set_size(struct lun *lun, size_t value) { lun->l_size = value; } void lun_set_ctl_lun(struct lun *lun, uint32_t value) { lun->l_ctl_lun = value; } struct option * option_new(struct options *options, const char *name, const char *value) { struct option *o; o = option_find(options, name); if (o != NULL) { log_warnx("duplicated option \"%s\"", name); return (NULL); } o = calloc(1, sizeof(*o)); if (o == NULL) log_err(1, "calloc"); o->o_name = checked_strdup(name); o->o_value = checked_strdup(value); TAILQ_INSERT_TAIL(options, o, o_next); return (o); } void option_delete(struct options *options, struct option *o) { TAILQ_REMOVE(options, o, o_next); free(o->o_name); free(o->o_value); free(o); } struct option * option_find(const struct options *options, const char *name) { struct option *o; TAILQ_FOREACH(o, options, o_next) { if (strcmp(o->o_name, name) == 0) return (o); } return (NULL); } void option_set(struct option *o, const char *value) { free(o->o_value); o->o_value = checked_strdup(value); } static struct connection * connection_new(struct portal *portal, int fd, const char *host, const struct sockaddr *client_sa) { struct connection *conn; conn = calloc(1, sizeof(*conn)); if (conn == NULL) log_err(1, "calloc"); conn->conn_portal = portal; conn->conn_socket = fd; conn->conn_initiator_addr = checked_strdup(host); memcpy(&conn->conn_initiator_sa, client_sa, client_sa->sa_len); /* * Default values, from RFC 3720, section 12. */ conn->conn_max_recv_data_segment_length = 8192; conn->conn_max_burst_length = 262144; conn->conn_first_burst_length = 65536; conn->conn_immediate_data = true; return (conn); } #if 0 static void conf_print(struct conf *conf) { struct auth_group *ag; struct auth *auth; struct auth_name *auth_name; struct auth_portal *auth_portal; struct portal_group *pg; struct portal *portal; struct target *targ; struct lun *lun; struct option *o; TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) { fprintf(stderr, "auth-group %s {\n", ag->ag_name); TAILQ_FOREACH(auth, &ag->ag_auths, a_next) fprintf(stderr, "\t chap-mutual %s %s %s %s\n", auth->a_user, auth->a_secret, auth->a_mutual_user, auth->a_mutual_secret); TAILQ_FOREACH(auth_name, &ag->ag_names, an_next) fprintf(stderr, "\t initiator-name %s\n", auth_name->an_initator_name); TAILQ_FOREACH(auth_portal, &ag->ag_portals, an_next) fprintf(stderr, "\t initiator-portal %s\n", auth_portal->an_initator_portal); fprintf(stderr, "}\n"); } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { fprintf(stderr, "portal-group %s {\n", pg->pg_name); TAILQ_FOREACH(portal, &pg->pg_portals, p_next) fprintf(stderr, "\t listen %s\n", portal->p_listen); fprintf(stderr, "}\n"); } TAILQ_FOREACH(lun, &conf->conf_luns, l_next) { fprintf(stderr, "\tlun %s {\n", lun->l_name); fprintf(stderr, "\t\tpath %s\n", lun->l_path); TAILQ_FOREACH(o, &lun->l_options, o_next) fprintf(stderr, "\t\toption %s %s\n", lo->o_name, lo->o_value); fprintf(stderr, "\t}\n"); } TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { fprintf(stderr, "target %s {\n", targ->t_name); if (targ->t_alias != NULL) fprintf(stderr, "\t alias %s\n", targ->t_alias); fprintf(stderr, "}\n"); } } #endif static int conf_verify_lun(struct lun *lun) { const struct lun *lun2; if (lun->l_backend == NULL) lun_set_backend(lun, "block"); if (strcmp(lun->l_backend, "block") == 0) { if (lun->l_path == NULL) { log_warnx("missing path for lun \"%s\"", lun->l_name); return (1); } } else if (strcmp(lun->l_backend, "ramdisk") == 0) { if (lun->l_size == 0) { log_warnx("missing size for ramdisk-backed lun \"%s\"", lun->l_name); return (1); } if (lun->l_path != NULL) { log_warnx("path must not be specified " "for ramdisk-backed lun \"%s\"", lun->l_name); return (1); } } if (lun->l_blocksize == 0) { if (lun->l_device_type == 5) lun_set_blocksize(lun, DEFAULT_CD_BLOCKSIZE); else lun_set_blocksize(lun, DEFAULT_BLOCKSIZE); } else if (lun->l_blocksize < 0) { log_warnx("invalid blocksize for lun \"%s\"; " "must be larger than 0", lun->l_name); return (1); } if (lun->l_size != 0 && lun->l_size % lun->l_blocksize != 0) { log_warnx("invalid size for lun \"%s\"; " "must be multiple of blocksize", lun->l_name); return (1); } TAILQ_FOREACH(lun2, &lun->l_conf->conf_luns, l_next) { if (lun == lun2) continue; if (lun->l_path != NULL && lun2->l_path != NULL && strcmp(lun->l_path, lun2->l_path) == 0) { log_debugx("WARNING: path \"%s\" duplicated " "between lun \"%s\", and " "lun \"%s\"", lun->l_path, lun->l_name, lun2->l_name); } } return (0); } int conf_verify(struct conf *conf) { struct auth_group *ag; struct portal_group *pg; struct port *port; struct target *targ; struct lun *lun; bool found; int error, i; if (conf->conf_pidfile_path == NULL) conf->conf_pidfile_path = checked_strdup(DEFAULT_PIDFILE); TAILQ_FOREACH(lun, &conf->conf_luns, l_next) { error = conf_verify_lun(lun); if (error != 0) return (error); } TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { if (targ->t_auth_group == NULL) { targ->t_auth_group = auth_group_find(conf, "default"); assert(targ->t_auth_group != NULL); } if (TAILQ_EMPTY(&targ->t_ports)) { pg = portal_group_find(conf, "default"); assert(pg != NULL); port_new(conf, targ, pg); } found = false; for (i = 0; i < MAX_LUNS; i++) { if (targ->t_luns[i] != NULL) found = true; } if (!found && targ->t_redirection == NULL) { log_warnx("no LUNs defined for target \"%s\"", targ->t_name); } if (found && targ->t_redirection != NULL) { log_debugx("target \"%s\" contains luns, " " but configured for redirection", targ->t_name); } } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { assert(pg->pg_name != NULL); if (pg->pg_discovery_auth_group == NULL) { pg->pg_discovery_auth_group = auth_group_find(conf, "default"); assert(pg->pg_discovery_auth_group != NULL); } if (pg->pg_discovery_filter == PG_FILTER_UNKNOWN) pg->pg_discovery_filter = PG_FILTER_NONE; if (pg->pg_redirection != NULL) { if (!TAILQ_EMPTY(&pg->pg_ports)) { log_debugx("portal-group \"%s\" assigned " "to target, but configured " "for redirection", pg->pg_name); } pg->pg_unassigned = false; } else if (!TAILQ_EMPTY(&pg->pg_ports)) { pg->pg_unassigned = false; } else { if (strcmp(pg->pg_name, "default") != 0) log_warnx("portal-group \"%s\" not assigned " "to any target", pg->pg_name); pg->pg_unassigned = true; } } TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) { if (ag->ag_name == NULL) assert(ag->ag_target != NULL); else assert(ag->ag_target == NULL); found = false; TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { if (targ->t_auth_group == ag) { found = true; break; } } TAILQ_FOREACH(port, &conf->conf_ports, p_next) { if (port->p_auth_group == ag) { found = true; break; } } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { if (pg->pg_discovery_auth_group == ag) { found = true; break; } } if (!found && ag->ag_name != NULL && strcmp(ag->ag_name, "default") != 0 && strcmp(ag->ag_name, "no-authentication") != 0 && strcmp(ag->ag_name, "no-access") != 0) { log_warnx("auth-group \"%s\" not assigned " "to any target", ag->ag_name); } } return (0); } static int conf_apply(struct conf *oldconf, struct conf *newconf) { struct lun *oldlun, *newlun, *tmplun; struct portal_group *oldpg, *newpg; struct portal *oldp, *newp; struct port *oldport, *newport, *tmpport; struct isns *oldns, *newns; pid_t otherpid; int changed, cumulated_error = 0, error, sockbuf; int one = 1; if (oldconf->conf_debug != newconf->conf_debug) { log_debugx("changing debug level to %d", newconf->conf_debug); log_init(newconf->conf_debug); } if (oldconf->conf_pidfh != NULL) { assert(oldconf->conf_pidfile_path != NULL); if (newconf->conf_pidfile_path != NULL && strcmp(oldconf->conf_pidfile_path, newconf->conf_pidfile_path) == 0) { newconf->conf_pidfh = oldconf->conf_pidfh; oldconf->conf_pidfh = NULL; } else { log_debugx("removing pidfile %s", oldconf->conf_pidfile_path); pidfile_remove(oldconf->conf_pidfh); oldconf->conf_pidfh = NULL; } } if (newconf->conf_pidfh == NULL && newconf->conf_pidfile_path != NULL) { log_debugx("opening pidfile %s", newconf->conf_pidfile_path); newconf->conf_pidfh = pidfile_open(newconf->conf_pidfile_path, 0600, &otherpid); if (newconf->conf_pidfh == NULL) { if (errno == EEXIST) log_errx(1, "daemon already running, pid: %jd.", (intmax_t)otherpid); log_err(1, "cannot open or create pidfile \"%s\"", newconf->conf_pidfile_path); } } /* * Go through the new portal groups, assigning tags or preserving old. */ TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) { if (newpg->pg_tag != 0) continue; oldpg = portal_group_find(oldconf, newpg->pg_name); if (oldpg != NULL) newpg->pg_tag = oldpg->pg_tag; else newpg->pg_tag = ++last_portal_group_tag; } /* Deregister on removed iSNS servers. */ TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) { TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) { if (strcmp(oldns->i_addr, newns->i_addr) == 0) break; } if (newns == NULL) isns_deregister(oldns); } /* * XXX: If target or lun removal fails, we should somehow "move" * the old lun or target into newconf, so that subsequent * conf_apply() would try to remove them again. That would * be somewhat hairy, though, and lun deletion failures don't * really happen, so leave it as it is for now. */ /* * First, remove any ports present in the old configuration * and missing in the new one. */ TAILQ_FOREACH_SAFE(oldport, &oldconf->conf_ports, p_next, tmpport) { if (oldport->p_foreign) continue; newport = port_find(newconf, oldport->p_name); if (newport != NULL && !newport->p_foreign) continue; log_debugx("removing port \"%s\"", oldport->p_name); error = kernel_port_remove(oldport); if (error != 0) { log_warnx("failed to remove port %s", oldport->p_name); /* * XXX: Uncomment after fixing the root cause. * * cumulated_error++; */ } } /* * Second, remove any LUNs present in the old configuration * and missing in the new one. */ TAILQ_FOREACH_SAFE(oldlun, &oldconf->conf_luns, l_next, tmplun) { newlun = lun_find(newconf, oldlun->l_name); if (newlun == NULL) { log_debugx("lun \"%s\", CTL lun %d " "not found in new configuration; " "removing", oldlun->l_name, oldlun->l_ctl_lun); error = kernel_lun_remove(oldlun); if (error != 0) { log_warnx("failed to remove lun \"%s\", " "CTL lun %d", oldlun->l_name, oldlun->l_ctl_lun); cumulated_error++; } continue; } /* * Also remove the LUNs changed by more than size. */ changed = 0; assert(oldlun->l_backend != NULL); assert(newlun->l_backend != NULL); if (strcmp(newlun->l_backend, oldlun->l_backend) != 0) { log_debugx("backend for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (oldlun->l_blocksize != newlun->l_blocksize) { log_debugx("blocksize for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (newlun->l_device_id != NULL && (oldlun->l_device_id == NULL || strcmp(oldlun->l_device_id, newlun->l_device_id) != 0)) { log_debugx("device-id for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (newlun->l_path != NULL && (oldlun->l_path == NULL || strcmp(oldlun->l_path, newlun->l_path) != 0)) { log_debugx("path for lun \"%s\", " "CTL lun %d, changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (newlun->l_serial != NULL && (oldlun->l_serial == NULL || strcmp(oldlun->l_serial, newlun->l_serial) != 0)) { log_debugx("serial for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (changed) { error = kernel_lun_remove(oldlun); if (error != 0) { log_warnx("failed to remove lun \"%s\", " "CTL lun %d", oldlun->l_name, oldlun->l_ctl_lun); cumulated_error++; } lun_delete(oldlun); continue; } lun_set_ctl_lun(newlun, oldlun->l_ctl_lun); } TAILQ_FOREACH_SAFE(newlun, &newconf->conf_luns, l_next, tmplun) { oldlun = lun_find(oldconf, newlun->l_name); if (oldlun != NULL) { log_debugx("modifying lun \"%s\", CTL lun %d", newlun->l_name, newlun->l_ctl_lun); error = kernel_lun_modify(newlun); if (error != 0) { log_warnx("failed to " "modify lun \"%s\", CTL lun %d", newlun->l_name, newlun->l_ctl_lun); cumulated_error++; } continue; } log_debugx("adding lun \"%s\"", newlun->l_name); error = kernel_lun_add(newlun); if (error != 0) { log_warnx("failed to add lun \"%s\"", newlun->l_name); lun_delete(newlun); cumulated_error++; } } /* * Now add new ports or modify existing ones. */ TAILQ_FOREACH(newport, &newconf->conf_ports, p_next) { if (newport->p_foreign) continue; oldport = port_find(oldconf, newport->p_name); if (oldport == NULL || oldport->p_foreign) { log_debugx("adding port \"%s\"", newport->p_name); error = kernel_port_add(newport); } else { log_debugx("updating port \"%s\"", newport->p_name); newport->p_ctl_port = oldport->p_ctl_port; error = kernel_port_update(newport, oldport); } if (error != 0) { log_warnx("failed to %s port %s", (oldport == NULL) ? "add" : "update", newport->p_name); /* * XXX: Uncomment after fixing the root cause. * * cumulated_error++; */ } } /* * Go through the new portals, opening the sockets as necessary. */ TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) { if (newpg->pg_foreign) continue; if (newpg->pg_unassigned) { log_debugx("not listening on portal-group \"%s\", " "not assigned to any target", newpg->pg_name); continue; } TAILQ_FOREACH(newp, &newpg->pg_portals, p_next) { /* * Try to find already open portal and reuse * the listening socket. We don't care about * what portal or portal group that was, what * matters is the listening address. */ TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups, pg_next) { TAILQ_FOREACH(oldp, &oldpg->pg_portals, p_next) { if (strcmp(newp->p_listen, oldp->p_listen) == 0 && oldp->p_socket > 0) { newp->p_socket = oldp->p_socket; oldp->p_socket = 0; break; } } } if (newp->p_socket > 0) { /* * We're done with this portal. */ continue; } #ifdef ICL_KERNEL_PROXY if (proxy_mode) { newpg->pg_conf->conf_portal_id++; newp->p_id = newpg->pg_conf->conf_portal_id; log_debugx("listening on %s, portal-group " "\"%s\", portal id %d, using ICL proxy", newp->p_listen, newpg->pg_name, newp->p_id); kernel_listen(newp->p_ai, newp->p_iser, newp->p_id); continue; } #endif assert(proxy_mode == false); assert(newp->p_iser == false); log_debugx("listening on %s, portal-group \"%s\"", newp->p_listen, newpg->pg_name); newp->p_socket = socket(newp->p_ai->ai_family, newp->p_ai->ai_socktype, newp->p_ai->ai_protocol); if (newp->p_socket < 0) { log_warn("socket(2) failed for %s", newp->p_listen); cumulated_error++; continue; } sockbuf = SOCKBUF_SIZE; if (setsockopt(newp->p_socket, SOL_SOCKET, SO_RCVBUF, &sockbuf, sizeof(sockbuf)) == -1) log_warn("setsockopt(SO_RCVBUF) failed " "for %s", newp->p_listen); sockbuf = SOCKBUF_SIZE; if (setsockopt(newp->p_socket, SOL_SOCKET, SO_SNDBUF, &sockbuf, sizeof(sockbuf)) == -1) log_warn("setsockopt(SO_SNDBUF) failed " "for %s", newp->p_listen); error = setsockopt(newp->p_socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); if (error != 0) { log_warn("setsockopt(SO_REUSEADDR) failed " "for %s", newp->p_listen); close(newp->p_socket); newp->p_socket = 0; cumulated_error++; continue; } error = bind(newp->p_socket, newp->p_ai->ai_addr, newp->p_ai->ai_addrlen); if (error != 0) { log_warn("bind(2) failed for %s", newp->p_listen); close(newp->p_socket); newp->p_socket = 0; cumulated_error++; continue; } error = listen(newp->p_socket, -1); if (error != 0) { log_warn("listen(2) failed for %s", newp->p_listen); close(newp->p_socket); newp->p_socket = 0; cumulated_error++; continue; } } } /* * Go through the no longer used sockets, closing them. */ TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups, pg_next) { TAILQ_FOREACH(oldp, &oldpg->pg_portals, p_next) { if (oldp->p_socket <= 0) continue; log_debugx("closing socket for %s, portal-group \"%s\"", oldp->p_listen, oldpg->pg_name); close(oldp->p_socket); oldp->p_socket = 0; } } /* (Re-)Register on remaining/new iSNS servers. */ TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) { TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) { if (strcmp(oldns->i_addr, newns->i_addr) == 0) break; } isns_register(newns, oldns); } /* Schedule iSNS update */ if (!TAILQ_EMPTY(&newconf->conf_isns)) set_timeout((newconf->conf_isns_period + 2) / 3, false); return (cumulated_error); } bool timed_out(void) { return (sigalrm_received); } static void sigalrm_handler_fatal(int dummy __unused) { /* * It would be easiest to just log an error and exit. We can't * do this, though, because log_errx() is not signal safe, since * it calls syslog(3). Instead, set a flag checked by pdu_send() * and pdu_receive(), to call log_errx() there. Should they fail * to notice, we'll exit here one second later. */ if (sigalrm_received) { /* * Oh well. Just give up and quit. */ _exit(2); } sigalrm_received = true; } static void sigalrm_handler(int dummy __unused) { sigalrm_received = true; } void set_timeout(int timeout, int fatal) { struct sigaction sa; struct itimerval itv; int error; if (timeout <= 0) { log_debugx("session timeout disabled"); bzero(&itv, sizeof(itv)); error = setitimer(ITIMER_REAL, &itv, NULL); if (error != 0) log_err(1, "setitimer"); sigalrm_received = false; return; } sigalrm_received = false; bzero(&sa, sizeof(sa)); if (fatal) sa.sa_handler = sigalrm_handler_fatal; else sa.sa_handler = sigalrm_handler; sigfillset(&sa.sa_mask); error = sigaction(SIGALRM, &sa, NULL); if (error != 0) log_err(1, "sigaction"); /* * First SIGALRM will arive after conf_timeout seconds. * If we do nothing, another one will arrive a second later. */ log_debugx("setting session timeout to %d seconds", timeout); bzero(&itv, sizeof(itv)); itv.it_interval.tv_sec = 1; itv.it_value.tv_sec = timeout; error = setitimer(ITIMER_REAL, &itv, NULL); if (error != 0) log_err(1, "setitimer"); } static int wait_for_children(bool block) { pid_t pid; int status; int num = 0; for (;;) { /* * If "block" is true, wait for at least one process. */ if (block && num == 0) pid = wait4(-1, &status, 0, NULL); else pid = wait4(-1, &status, WNOHANG, NULL); if (pid <= 0) break; if (WIFSIGNALED(status)) { log_warnx("child process %d terminated with signal %d", pid, WTERMSIG(status)); } else if (WEXITSTATUS(status) != 0) { log_warnx("child process %d terminated with exit status %d", pid, WEXITSTATUS(status)); } else { log_debugx("child process %d terminated gracefully", pid); } num++; } return (num); } static void handle_connection(struct portal *portal, int fd, const struct sockaddr *client_sa, bool dont_fork) { struct connection *conn; int error; pid_t pid; char host[NI_MAXHOST + 1]; struct conf *conf; conf = portal->p_portal_group->pg_conf; if (dont_fork) { log_debugx("incoming connection; not forking due to -d flag"); } else { nchildren -= wait_for_children(false); assert(nchildren >= 0); while (conf->conf_maxproc > 0 && nchildren >= conf->conf_maxproc) { log_debugx("maxproc limit of %d child processes hit; " "waiting for child process to exit", conf->conf_maxproc); nchildren -= wait_for_children(true); assert(nchildren >= 0); } log_debugx("incoming connection; forking child process #%d", nchildren); nchildren++; pid = fork(); if (pid < 0) log_err(1, "fork"); if (pid > 0) { close(fd); return; } } pidfile_close(conf->conf_pidfh); error = getnameinfo(client_sa, client_sa->sa_len, host, sizeof(host), NULL, 0, NI_NUMERICHOST); if (error != 0) log_errx(1, "getnameinfo: %s", gai_strerror(error)); log_debugx("accepted connection from %s; portal group \"%s\"", host, portal->p_portal_group->pg_name); log_set_peer_addr(host); setproctitle("%s", host); conn = connection_new(portal, fd, host, client_sa); set_timeout(conf->conf_timeout, true); kernel_capsicate(); login(conn); if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) { kernel_handoff(conn); log_debugx("connection handed off to the kernel"); } else { assert(conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY); discovery(conn); } log_debugx("nothing more to do; exiting"); exit(0); } static int fd_add(int fd, fd_set *fdset, int nfds) { /* * Skip sockets which we failed to bind. */ if (fd <= 0) return (nfds); FD_SET(fd, fdset); if (fd > nfds) nfds = fd; return (nfds); } static void main_loop(struct conf *conf, bool dont_fork) { struct portal_group *pg; struct portal *portal; struct sockaddr_storage client_sa; socklen_t client_salen; #ifdef ICL_KERNEL_PROXY int connection_id; int portal_id; #endif fd_set fdset; int error, nfds, client_fd; pidfile_write(conf->conf_pidfh); for (;;) { if (sighup_received || sigterm_received || timed_out()) return; #ifdef ICL_KERNEL_PROXY if (proxy_mode) { client_salen = sizeof(client_sa); kernel_accept(&connection_id, &portal_id, (struct sockaddr *)&client_sa, &client_salen); assert(client_salen >= client_sa.ss_len); log_debugx("incoming connection, id %d, portal id %d", connection_id, portal_id); TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { if (portal->p_id == portal_id) { goto found; } } } log_errx(1, "kernel returned invalid portal_id %d", portal_id); found: handle_connection(portal, connection_id, (struct sockaddr *)&client_sa, dont_fork); } else { #endif assert(proxy_mode == false); FD_ZERO(&fdset); nfds = 0; TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { TAILQ_FOREACH(portal, &pg->pg_portals, p_next) nfds = fd_add(portal->p_socket, &fdset, nfds); } error = select(nfds + 1, &fdset, NULL, NULL, NULL); if (error <= 0) { if (errno == EINTR) return; log_err(1, "select"); } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { if (!FD_ISSET(portal->p_socket, &fdset)) continue; client_salen = sizeof(client_sa); client_fd = accept(portal->p_socket, (struct sockaddr *)&client_sa, &client_salen); if (client_fd < 0) { if (errno == ECONNABORTED) continue; log_err(1, "accept"); } assert(client_salen >= client_sa.ss_len); handle_connection(portal, client_fd, (struct sockaddr *)&client_sa, dont_fork); break; } } #ifdef ICL_KERNEL_PROXY } #endif } } static void sighup_handler(int dummy __unused) { sighup_received = true; } static void sigterm_handler(int dummy __unused) { sigterm_received = true; } static void sigchld_handler(int dummy __unused) { /* * The only purpose of this handler is to make SIGCHLD * interrupt the ISCSIDWAIT ioctl(2), so we can call * wait_for_children(). */ } static void register_signals(void) { struct sigaction sa; int error; bzero(&sa, sizeof(sa)); sa.sa_handler = sighup_handler; sigfillset(&sa.sa_mask); error = sigaction(SIGHUP, &sa, NULL); if (error != 0) log_err(1, "sigaction"); sa.sa_handler = sigterm_handler; error = sigaction(SIGTERM, &sa, NULL); if (error != 0) log_err(1, "sigaction"); sa.sa_handler = sigterm_handler; error = sigaction(SIGINT, &sa, NULL); if (error != 0) log_err(1, "sigaction"); sa.sa_handler = sigchld_handler; error = sigaction(SIGCHLD, &sa, NULL); if (error != 0) log_err(1, "sigaction"); } static void check_perms(const char *path) { struct stat sb; int error; error = stat(path, &sb); if (error != 0) { log_warn("stat"); return; } if (sb.st_mode & S_IWOTH) { log_warnx("%s is world-writable", path); } else if (sb.st_mode & S_IROTH) { log_warnx("%s is world-readable", path); } else if (sb.st_mode & S_IXOTH) { /* * Ok, this one doesn't matter, but still do it, * just for consistency. */ log_warnx("%s is world-executable", path); } /* * XXX: Should we also check for owner != 0? */ } static struct conf * conf_new_from_file(const char *path, struct conf *oldconf, bool ucl) { struct conf *conf; struct auth_group *ag; struct portal_group *pg; struct pport *pp; int error; log_debugx("obtaining configuration from %s", path); conf = conf_new(); TAILQ_FOREACH(pp, &oldconf->conf_pports, pp_next) pport_copy(pp, conf); ag = auth_group_new(conf, "default"); assert(ag != NULL); ag = auth_group_new(conf, "no-authentication"); assert(ag != NULL); ag->ag_type = AG_TYPE_NO_AUTHENTICATION; ag = auth_group_new(conf, "no-access"); assert(ag != NULL); ag->ag_type = AG_TYPE_DENY; pg = portal_group_new(conf, "default"); assert(pg != NULL); if (ucl) error = uclparse_conf(conf, path); else error = parse_conf(conf, path); if (error != 0) { conf_delete(conf); return (NULL); } check_perms(path); if (conf->conf_default_ag_defined == false) { log_debugx("auth-group \"default\" not defined; " "going with defaults"); ag = auth_group_find(conf, "default"); assert(ag != NULL); ag->ag_type = AG_TYPE_DENY; } if (conf->conf_default_pg_defined == false) { log_debugx("portal-group \"default\" not defined; " "going with defaults"); pg = portal_group_find(conf, "default"); assert(pg != NULL); portal_group_add_listen(pg, "0.0.0.0:3260", false); portal_group_add_listen(pg, "[::]:3260", false); } conf->conf_kernel_port_on = true; error = conf_verify(conf); if (error != 0) { conf_delete(conf); return (NULL); } return (conf); } int main(int argc, char **argv) { struct conf *oldconf, *newconf, *tmpconf; struct isns *newns; const char *config_path = DEFAULT_CONFIG_PATH; int debug = 0, ch, error; bool dont_daemonize = false; bool use_ucl = false; while ((ch = getopt(argc, argv, "duf:R")) != -1) { switch (ch) { case 'd': dont_daemonize = true; debug++; break; case 'u': use_ucl = true; break; case 'f': config_path = optarg; break; case 'R': #ifndef ICL_KERNEL_PROXY log_errx(1, "ctld(8) compiled without ICL_KERNEL_PROXY " "does not support iSER protocol"); #endif proxy_mode = true; break; case '?': default: usage(); } } argc -= optind; if (argc != 0) usage(); log_init(debug); kernel_init(); oldconf = conf_new_from_kernel(); newconf = conf_new_from_file(config_path, oldconf, use_ucl); if (newconf == NULL) log_errx(1, "configuration error; exiting"); if (debug > 0) { oldconf->conf_debug = debug; newconf->conf_debug = debug; } error = conf_apply(oldconf, newconf); if (error != 0) log_errx(1, "failed to apply configuration; exiting"); conf_delete(oldconf); oldconf = NULL; register_signals(); if (dont_daemonize == false) { log_debugx("daemonizing"); if (daemon(0, 0) == -1) { log_warn("cannot daemonize"); pidfile_remove(newconf->conf_pidfh); exit(1); } } /* Schedule iSNS update */ if (!TAILQ_EMPTY(&newconf->conf_isns)) set_timeout((newconf->conf_isns_period + 2) / 3, false); for (;;) { main_loop(newconf, dont_daemonize); if (sighup_received) { sighup_received = false; log_debugx("received SIGHUP, reloading configuration"); tmpconf = conf_new_from_file(config_path, newconf, use_ucl); if (tmpconf == NULL) { log_warnx("configuration error, " "continuing with old configuration"); } else { if (debug > 0) tmpconf->conf_debug = debug; oldconf = newconf; newconf = tmpconf; error = conf_apply(oldconf, newconf); if (error != 0) log_warnx("failed to reload " "configuration"); conf_delete(oldconf); oldconf = NULL; } } else if (sigterm_received) { log_debugx("exiting on signal; " "reloading empty configuration"); log_debugx("removing CTL iSCSI ports " "and terminating all connections"); oldconf = newconf; newconf = conf_new(); if (debug > 0) newconf->conf_debug = debug; error = conf_apply(oldconf, newconf); if (error != 0) log_warnx("failed to apply configuration"); conf_delete(oldconf); oldconf = NULL; log_warnx("exiting on signal"); exit(0); } else { nchildren -= wait_for_children(false); assert(nchildren >= 0); if (timed_out()) { set_timeout(0, false); TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) isns_check(newns); /* Schedule iSNS update */ if (!TAILQ_EMPTY(&newconf->conf_isns)) { set_timeout((newconf->conf_isns_period + 2) / 3, false); } } } } /* NOTREACHED */ } Index: projects/clang391-import/usr.sbin/syslogd/syslogd.c =================================================================== --- projects/clang391-import/usr.sbin/syslogd/syslogd.c (revision 309262) +++ projects/clang391-import/usr.sbin/syslogd/syslogd.c (revision 309263) @@ -1,2923 +1,2923 @@ /* * Copyright (c) 1983, 1988, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1983, 1988, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)syslogd.c 8.3 (Berkeley) 4/4/94"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); /* * syslogd -- log system messages * * This program implements a system log. It takes a series of lines. * Each line may have a priority, signified as "" as * the first characters of the line. If this is * not present, a default priority is used. * * To kill syslogd, send a signal 15 (terminate). A signal 1 (hup) will * cause it to reread its configuration file. * * Defined Constants: * * MAXLINE -- the maximum line length that can be handled. * DEFUPRI -- the default priority for user messages * DEFSPRI -- the default priority for kernel messages * * Author: Eric Allman * extensive changes by Ralph Campbell * more extensive changes by Eric Allman (again) * Extension to log by program name as well as facility and priority * by Peter da Silva. * -u and -v by Harlan Stenn. * Priority comparison code by Harlan Stenn. */ #define MAXLINE 1024 /* maximum line length */ #define MAXSVLINE MAXLINE /* maximum saved line length */ #define DEFUPRI (LOG_USER|LOG_NOTICE) #define DEFSPRI (LOG_KERN|LOG_CRIT) #define TIMERINTVL 30 /* interval for checking flush, mark */ #define TTYMSGTIME 1 /* timeout passed to ttymsg */ #define RCVBUF_MINSIZE (80 * 1024) /* minimum size of dgram rcv buffer */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pathnames.h" #include "ttymsg.h" #define SYSLOG_NAMES #include const char *ConfFile = _PATH_LOGCONF; const char *PidFile = _PATH_LOGPID; const char ctty[] = _PATH_CONSOLE; static const char include_str[] = "include"; static const char include_ext[] = ".conf"; #define dprintf if (Debug) printf #define MAXUNAMES 20 /* maximum number of user names */ /* * List of hosts for binding. */ static STAILQ_HEAD(, host) hqueue; struct host { char *name; STAILQ_ENTRY(host) next; }; /* * Unix sockets. * We have two default sockets, one with 666 permissions, * and one for privileged programs. */ struct funix { int s; const char *name; mode_t mode; STAILQ_ENTRY(funix) next; }; struct funix funix_secure = { -1, _PATH_LOG_PRIV, S_IRUSR | S_IWUSR, { NULL } }; struct funix funix_default = { -1, _PATH_LOG, DEFFILEMODE, { &funix_secure } }; STAILQ_HEAD(, funix) funixes = { &funix_default, &(funix_secure.next.stqe_next) }; /* * Flags to logmsg(). */ #define IGN_CONS 0x001 /* don't print on console */ #define SYNC_FILE 0x002 /* do fsync on file after printing */ #define ADDDATE 0x004 /* add a date to the message */ #define MARK 0x008 /* this message is a mark */ #define ISKERNEL 0x010 /* kernel generated message */ /* * This structure represents the files that will have log * copies printed. * We require f_file to be valid if f_type is F_FILE, F_CONSOLE, F_TTY * or if f_type if F_PIPE and f_pid > 0. */ struct filed { struct filed *f_next; /* next in linked list */ short f_type; /* entry type, see below */ short f_file; /* file descriptor */ time_t f_time; /* time this was last written */ char *f_host; /* host from which to recd. */ u_char f_pmask[LOG_NFACILITIES+1]; /* priority mask */ u_char f_pcmp[LOG_NFACILITIES+1]; /* compare priority */ #define PRI_LT 0x1 #define PRI_EQ 0x2 #define PRI_GT 0x4 char *f_program; /* program this applies to */ union { char f_uname[MAXUNAMES][MAXLOGNAME]; struct { char f_hname[MAXHOSTNAMELEN]; struct addrinfo *f_addr; } f_forw; /* forwarding address */ char f_fname[MAXPATHLEN]; struct { char f_pname[MAXPATHLEN]; pid_t f_pid; } f_pipe; } f_un; char f_prevline[MAXSVLINE]; /* last message logged */ char f_lasttime[16]; /* time of last occurrence */ char f_prevhost[MAXHOSTNAMELEN]; /* host from which recd. */ int f_prevpri; /* pri of f_prevline */ int f_prevlen; /* length of f_prevline */ int f_prevcount; /* repetition cnt of prevline */ u_int f_repeatcount; /* number of "repeated" msgs */ int f_flags; /* file-specific flags */ #define FFLAG_SYNC 0x01 #define FFLAG_NEEDSYNC 0x02 }; /* * Queue of about-to-be dead processes we should watch out for. */ TAILQ_HEAD(stailhead, deadq_entry) deadq_head; struct stailhead *deadq_headp; struct deadq_entry { pid_t dq_pid; int dq_timeout; TAILQ_ENTRY(deadq_entry) dq_entries; }; /* * The timeout to apply to processes waiting on the dead queue. Unit * of measure is `mark intervals', i.e. 20 minutes by default. * Processes on the dead queue will be terminated after that time. */ #define DQ_TIMO_INIT 2 typedef struct deadq_entry *dq_t; /* * Struct to hold records of network addresses that are allowed to log * to us. */ struct allowedpeer { int isnumeric; u_short port; union { struct { struct sockaddr_storage addr; struct sockaddr_storage mask; } numeric; char *name; } u; #define a_addr u.numeric.addr #define a_mask u.numeric.mask #define a_name u.name }; /* * Intervals at which we flush out "message repeated" messages, * in seconds after previous message is logged. After each flush, * we move to the next interval until we reach the largest. */ int repeatinterval[] = { 30, 120, 600 }; /* # of secs before flush */ #define MAXREPEAT ((sizeof(repeatinterval) / sizeof(repeatinterval[0])) - 1) #define REPEATTIME(f) ((f)->f_time + repeatinterval[(f)->f_repeatcount]) #define BACKOFF(f) { if (++(f)->f_repeatcount > MAXREPEAT) \ (f)->f_repeatcount = MAXREPEAT; \ } /* values for f_type */ #define F_UNUSED 0 /* unused entry */ #define F_FILE 1 /* regular file */ #define F_TTY 2 /* terminal */ #define F_CONSOLE 3 /* console terminal */ #define F_FORW 4 /* remote machine */ #define F_USERS 5 /* list of users */ #define F_WALL 6 /* everyone logged on */ #define F_PIPE 7 /* pipe to program */ const char *TypeNames[8] = { "UNUSED", "FILE", "TTY", "CONSOLE", "FORW", "USERS", "WALL", "PIPE" }; static struct filed *Files; /* Log files that we write to */ static struct filed consfile; /* Console */ static int Debug; /* debug flag */ static int Foreground = 0; /* Run in foreground, instead of daemonizing */ static int resolve = 1; /* resolve hostname */ static char LocalHostName[MAXHOSTNAMELEN]; /* our hostname */ static const char *LocalDomain; /* our local domain name */ static int *finet; /* Internet datagram sockets */ static int fklog = -1; /* /dev/klog */ static int Initialized; /* set when we have initialized ourselves */ static int MarkInterval = 20 * 60; /* interval between marks in seconds */ static int MarkSeq; /* mark sequence number */ static int NoBind; /* don't bind() as suggested by RFC 3164 */ static int SecureMode; /* when true, receive only unix domain socks */ #ifdef INET6 static int family = PF_UNSPEC; /* protocol family (IPv4, IPv6 or both) */ #else static int family = PF_INET; /* protocol family (IPv4 only) */ #endif static int mask_C1 = 1; /* mask characters from 0x80 - 0x9F */ static int send_to_all; /* send message to all IPv4/IPv6 addresses */ static int use_bootfile; /* log entire bootfile for every kern msg */ static int no_compress; /* don't compress messages (1=pipes, 2=all) */ static int logflags = O_WRONLY|O_APPEND; /* flags used to open log files */ static char bootfile[MAXLINE+1]; /* booted kernel file */ struct allowedpeer *AllowedPeers; /* List of allowed peers */ static int NumAllowed; /* Number of entries in AllowedPeers */ static int RemoteAddDate; /* Always set the date on remote messages */ static int UniquePriority; /* Only log specified priority? */ static int LogFacPri; /* Put facility and priority in log message: */ /* 0=no, 1=numeric, 2=names */ static int KeepKernFac; /* Keep remotely logged kernel facility */ static int needdofsync = 0; /* Are any file(s) waiting to be fsynced? */ static struct pidfh *pfh; volatile sig_atomic_t MarkSet, WantDie; static int allowaddr(char *); static void cfline(const char *, struct filed *, const char *, const char *); static const char *cvthname(struct sockaddr *); static void deadq_enter(pid_t, const char *); static int deadq_remove(pid_t); static int decode(const char *, const CODE *); static void die(int) __dead2; static void dodie(int); static void dofsync(void); static void domark(int); static void fprintlog(struct filed *, int, const char *); static int *socksetup(int, char *); static void init(int); static void logerror(const char *); static void logmsg(int, const char *, const char *, int); static void log_deadchild(pid_t, int, const char *); static void markit(void); static int skip_message(const char *, const char *, int); static void printline(const char *, char *, int); static void printsys(char *); static int p_open(const char *, pid_t *); static void readklog(void); static void reapchild(int); static const char *ttymsg_check(struct iovec *, int, char *, int); static void usage(void); static int validate(struct sockaddr *, const char *); static void unmapped(struct sockaddr *); static void wallmsg(struct filed *, struct iovec *, const int iovlen); static int waitdaemon(int, int, int); static void timedout(int); static void increase_rcvbuf(int); static void close_filed(struct filed *f) { if (f == NULL || f->f_file == -1) return; (void)close(f->f_file); f->f_file = -1; f->f_type = F_UNUSED; } int main(int argc, char *argv[]) { int ch, i, fdsrmax = 0, l; struct sockaddr_un sunx, fromunix; struct sockaddr_storage frominet; fd_set *fdsr = NULL; char line[MAXLINE + 1]; const char *hname; struct timeval tv, *tvp; struct sigaction sact; struct host *host; struct funix *fx, *fx1; sigset_t mask; pid_t ppid = 1, spid; socklen_t len; if (madvise(NULL, 0, MADV_PROTECT) != 0) dprintf("madvise() failed: %s\n", strerror(errno)); STAILQ_INIT(&hqueue); while ((ch = getopt(argc, argv, "468Aa:b:cCdf:Fkl:m:nNop:P:sS:Tuv")) != -1) switch (ch) { case '4': family = PF_INET; break; #ifdef INET6 case '6': family = PF_INET6; break; #endif case '8': mask_C1 = 0; break; case 'A': send_to_all++; break; case 'a': /* allow specific network addresses only */ if (allowaddr(optarg) == -1) usage(); break; case 'b': { if ((host = malloc(sizeof(struct host))) == NULL) err(1, "malloc failed"); host->name = optarg; STAILQ_INSERT_TAIL(&hqueue, host, next); break; } case 'c': no_compress++; break; case 'C': logflags |= O_CREAT; break; case 'd': /* debug */ Debug++; break; case 'f': /* configuration file */ ConfFile = optarg; break; case 'F': /* run in foreground instead of daemon */ Foreground++; break; case 'k': /* keep remote kern fac */ KeepKernFac = 1; break; case 'l': { long perml; mode_t mode; char *name, *ep; if (optarg[0] == '/') { mode = DEFFILEMODE; name = optarg; } else if ((name = strchr(optarg, ':')) != NULL) { *name++ = '\0'; if (name[0] != '/') errx(1, "socket name must be absolute " "path"); if (isdigit(*optarg)) { perml = strtol(optarg, &ep, 8); if (*ep || perml < 0 || perml & ~(S_IRWXU|S_IRWXG|S_IRWXO)) errx(1, "invalid mode %s, exiting", optarg); mode = (mode_t )perml; } else errx(1, "invalid mode %s, exiting", optarg); } else /* doesn't begin with '/', and no ':' */ errx(1, "can't parse path %s", optarg); if (strlen(name) >= sizeof(sunx.sun_path)) errx(1, "%s path too long, exiting", name); if ((fx = malloc(sizeof(struct funix))) == NULL) err(1, "malloc failed"); fx->s = -1; fx->name = name; fx->mode = mode; STAILQ_INSERT_TAIL(&funixes, fx, next); break; } case 'm': /* mark interval */ MarkInterval = atoi(optarg) * 60; break; case 'N': NoBind = 1; SecureMode = 1; break; case 'n': resolve = 0; break; case 'o': use_bootfile = 1; break; case 'p': /* path */ if (strlen(optarg) >= sizeof(sunx.sun_path)) errx(1, "%s path too long, exiting", optarg); funix_default.name = optarg; break; case 'P': /* path for alt. PID */ PidFile = optarg; break; case 's': /* no network mode */ SecureMode++; break; case 'S': /* path for privileged originator */ if (strlen(optarg) >= sizeof(sunx.sun_path)) errx(1, "%s path too long, exiting", optarg); funix_secure.name = optarg; break; case 'T': RemoteAddDate = 1; break; case 'u': /* only log specified priority */ UniquePriority++; break; case 'v': /* log facility and priority */ LogFacPri++; break; default: usage(); } if ((argc -= optind) != 0) usage(); pfh = pidfile_open(PidFile, 0600, &spid); if (pfh == NULL) { if (errno == EEXIST) errx(1, "syslogd already running, pid: %d", spid); warn("cannot open pid file"); } if ((!Foreground) && (!Debug)) { ppid = waitdaemon(0, 0, 30); if (ppid < 0) { warn("could not become daemon"); pidfile_remove(pfh); exit(1); } } else if (Debug) { setlinebuf(stdout); } if (NumAllowed) endservent(); consfile.f_type = F_CONSOLE; (void)strlcpy(consfile.f_un.f_fname, ctty + sizeof _PATH_DEV - 1, sizeof(consfile.f_un.f_fname)); (void)strlcpy(bootfile, getbootfile(), sizeof(bootfile)); (void)signal(SIGTERM, dodie); (void)signal(SIGINT, Debug ? dodie : SIG_IGN); (void)signal(SIGQUIT, Debug ? dodie : SIG_IGN); /* * We don't want the SIGCHLD and SIGHUP handlers to interfere * with each other; they are likely candidates for being called * simultaneously (SIGHUP closes pipe descriptor, process dies, * SIGCHLD happens). */ sigemptyset(&mask); sigaddset(&mask, SIGHUP); sact.sa_handler = reapchild; sact.sa_mask = mask; sact.sa_flags = SA_RESTART; (void)sigaction(SIGCHLD, &sact, NULL); (void)signal(SIGALRM, domark); (void)signal(SIGPIPE, SIG_IGN); /* We'll catch EPIPE instead. */ (void)alarm(TIMERINTVL); TAILQ_INIT(&deadq_head); #ifndef SUN_LEN #define SUN_LEN(unp) (strlen((unp)->sun_path) + 2) #endif STAILQ_FOREACH_SAFE(fx, &funixes, next, fx1) { (void)unlink(fx->name); memset(&sunx, 0, sizeof(sunx)); sunx.sun_family = AF_LOCAL; (void)strlcpy(sunx.sun_path, fx->name, sizeof(sunx.sun_path)); fx->s = socket(PF_LOCAL, SOCK_DGRAM, 0); if (fx->s < 0 || bind(fx->s, (struct sockaddr *)&sunx, SUN_LEN(&sunx)) < 0 || chmod(fx->name, fx->mode) < 0) { (void)snprintf(line, sizeof line, "cannot create %s", fx->name); logerror(line); dprintf("cannot create %s (%d)\n", fx->name, errno); if (fx == &funix_default || fx == &funix_secure) die(0); else { STAILQ_REMOVE(&funixes, fx, funix, next); continue; } } increase_rcvbuf(fx->s); } if (SecureMode <= 1) { if (STAILQ_EMPTY(&hqueue)) finet = socksetup(family, NULL); STAILQ_FOREACH(host, &hqueue, next) { int *finet0, total; finet0 = socksetup(family, host->name); if (finet0 && !finet) { finet = finet0; } else if (finet0 && finet) { total = *finet0 + *finet + 1; finet = realloc(finet, total * sizeof(int)); if (finet == NULL) err(1, "realloc failed"); for (i = 1; i <= *finet0; i++) { finet[(*finet)+i] = finet0[i]; } *finet = total - 1; free(finet0); } } } if (finet) { if (SecureMode) { for (i = 0; i < *finet; i++) { if (shutdown(finet[i+1], SHUT_RD) < 0 && errno != ENOTCONN) { logerror("shutdown"); if (!Debug) die(0); } } } else { dprintf("listening on inet and/or inet6 socket\n"); } dprintf("sending on inet and/or inet6 socket\n"); } if ((fklog = open(_PATH_KLOG, O_RDONLY|O_NONBLOCK, 0)) < 0) dprintf("can't open %s (%d)\n", _PATH_KLOG, errno); /* tuck my process id away */ pidfile_write(pfh); dprintf("off & running....\n"); init(0); /* prevent SIGHUP and SIGCHLD handlers from running in parallel */ sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sact.sa_handler = init; sact.sa_mask = mask; sact.sa_flags = SA_RESTART; (void)sigaction(SIGHUP, &sact, NULL); tvp = &tv; tv.tv_sec = tv.tv_usec = 0; if (fklog != -1 && fklog > fdsrmax) fdsrmax = fklog; if (finet && !SecureMode) { for (i = 0; i < *finet; i++) { if (finet[i+1] != -1 && finet[i+1] > fdsrmax) fdsrmax = finet[i+1]; } } STAILQ_FOREACH(fx, &funixes, next) if (fx->s > fdsrmax) fdsrmax = fx->s; fdsr = (fd_set *)calloc(howmany(fdsrmax+1, NFDBITS), sizeof(fd_mask)); if (fdsr == NULL) errx(1, "calloc fd_set"); for (;;) { if (MarkSet) markit(); if (WantDie) die(WantDie); bzero(fdsr, howmany(fdsrmax+1, NFDBITS) * sizeof(fd_mask)); if (fklog != -1) FD_SET(fklog, fdsr); if (finet && !SecureMode) { for (i = 0; i < *finet; i++) { if (finet[i+1] != -1) FD_SET(finet[i+1], fdsr); } } STAILQ_FOREACH(fx, &funixes, next) FD_SET(fx->s, fdsr); i = select(fdsrmax+1, fdsr, NULL, NULL, needdofsync ? &tv : tvp); switch (i) { case 0: dofsync(); needdofsync = 0; if (tvp) { tvp = NULL; if (ppid != 1) kill(ppid, SIGALRM); } continue; case -1: if (errno != EINTR) logerror("select"); continue; } if (fklog != -1 && FD_ISSET(fklog, fdsr)) readklog(); if (finet && !SecureMode) { for (i = 0; i < *finet; i++) { if (FD_ISSET(finet[i+1], fdsr)) { len = sizeof(frominet); l = recvfrom(finet[i+1], line, MAXLINE, 0, (struct sockaddr *)&frominet, &len); if (l > 0) { line[l] = '\0'; hname = cvthname((struct sockaddr *)&frominet); unmapped((struct sockaddr *)&frominet); if (validate((struct sockaddr *)&frominet, hname)) printline(hname, line, RemoteAddDate ? ADDDATE : 0); } else if (l < 0 && errno != EINTR) logerror("recvfrom inet"); } } } STAILQ_FOREACH(fx, &funixes, next) { if (FD_ISSET(fx->s, fdsr)) { len = sizeof(fromunix); l = recvfrom(fx->s, line, MAXLINE, 0, (struct sockaddr *)&fromunix, &len); if (l > 0) { line[l] = '\0'; printline(LocalHostName, line, 0); } else if (l < 0 && errno != EINTR) logerror("recvfrom unix"); } } } if (fdsr) free(fdsr); } static void unmapped(struct sockaddr *sa) { struct sockaddr_in6 *sin6; struct sockaddr_in sin4; if (sa->sa_family != AF_INET6) return; if (sa->sa_len != sizeof(struct sockaddr_in6) || sizeof(sin4) > sa->sa_len) return; sin6 = (struct sockaddr_in6 *)sa; if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) return; memset(&sin4, 0, sizeof(sin4)); sin4.sin_family = AF_INET; sin4.sin_len = sizeof(struct sockaddr_in); memcpy(&sin4.sin_addr, &sin6->sin6_addr.s6_addr[12], sizeof(sin4.sin_addr)); sin4.sin_port = sin6->sin6_port; memcpy(sa, &sin4, sin4.sin_len); } static void usage(void) { fprintf(stderr, "%s\n%s\n%s\n%s\n", "usage: syslogd [-468ACcdFknosTuv] [-a allowed_peer]", " [-b bind_address] [-f config_file]", " [-l [mode:]path] [-m mark_interval]", " [-P pid_file] [-p log_socket]"); exit(1); } /* * Take a raw input line, decode the message, and print the message * on the appropriate log files. */ static void printline(const char *hname, char *msg, int flags) { char *p, *q; long n; int c, pri; char line[MAXLINE + 1]; /* test for special codes */ p = msg; pri = DEFUPRI; if (*p == '<') { errno = 0; n = strtol(p + 1, &q, 10); if (*q == '>' && n >= 0 && n < INT_MAX && errno == 0) { p = q + 1; pri = n; } } if (pri &~ (LOG_FACMASK|LOG_PRIMASK)) pri = DEFUPRI; /* * Don't allow users to log kernel messages. * NOTE: since LOG_KERN == 0 this will also match * messages with no facility specified. */ if ((pri & LOG_FACMASK) == LOG_KERN && !KeepKernFac) pri = LOG_MAKEPRI(LOG_USER, LOG_PRI(pri)); q = line; while ((c = (unsigned char)*p++) != '\0' && q < &line[sizeof(line) - 4]) { if (mask_C1 && (c & 0x80) && c < 0xA0) { c &= 0x7F; *q++ = 'M'; *q++ = '-'; } if (isascii(c) && iscntrl(c)) { if (c == '\n') { *q++ = ' '; } else if (c == '\t') { *q++ = '\t'; } else { *q++ = '^'; *q++ = c ^ 0100; } } else { *q++ = c; } } *q = '\0'; logmsg(pri, line, hname, flags); } /* * Read /dev/klog while data are available, split into lines. */ static void readklog(void) { char *p, *q, line[MAXLINE + 1]; int len, i; len = 0; for (;;) { i = read(fklog, line + len, MAXLINE - 1 - len); if (i > 0) { line[i + len] = '\0'; } else { if (i < 0 && errno != EINTR && errno != EAGAIN) { logerror("klog"); fklog = -1; } break; } for (p = line; (q = strchr(p, '\n')) != NULL; p = q + 1) { *q = '\0'; printsys(p); } len = strlen(p); if (len >= MAXLINE - 1) { printsys(p); len = 0; } if (len > 0) memmove(line, p, len + 1); } if (len > 0) printsys(line); } /* * Take a raw input line from /dev/klog, format similar to syslog(). */ static void printsys(char *msg) { char *p, *q; long n; int flags, isprintf, pri; flags = ISKERNEL | SYNC_FILE | ADDDATE; /* fsync after write */ p = msg; pri = DEFSPRI; isprintf = 1; if (*p == '<') { errno = 0; n = strtol(p + 1, &q, 10); if (*q == '>' && n >= 0 && n < INT_MAX && errno == 0) { p = q + 1; pri = n; isprintf = 0; } } /* * Kernel printf's and LOG_CONSOLE messages have been displayed * on the console already. */ if (isprintf || (pri & LOG_FACMASK) == LOG_CONSOLE) flags |= IGN_CONS; if (pri &~ (LOG_FACMASK|LOG_PRIMASK)) pri = DEFSPRI; logmsg(pri, p, LocalHostName, flags); } static time_t now; /* * Match a program or host name against a specification. * Return a non-0 value if the message must be ignored * based on the specification. */ static int skip_message(const char *name, const char *spec, int checkcase) { const char *s; char prev, next; int exclude = 0; /* Behaviour on explicit match */ if (spec == NULL) return 0; switch (*spec) { case '-': exclude = 1; /*FALLTHROUGH*/ case '+': spec++; break; default: break; } if (checkcase) s = strstr (spec, name); else s = strcasestr (spec, name); if (s != NULL) { prev = (s == spec ? ',' : *(s - 1)); next = *(s + strlen (name)); if (prev == ',' && (next == '\0' || next == ',')) /* Explicit match: skip iff the spec is an exclusive one. */ return exclude; } /* No explicit match for this name: skip the message iff the spec is an inclusive one. */ return !exclude; } /* * Log a message to the appropriate log files, users, etc. based on * the priority. */ static void logmsg(int pri, const char *msg, const char *from, int flags) { struct filed *f; int i, fac, msglen, omask, prilev; const char *timestamp; char prog[NAME_MAX+1]; char buf[MAXLINE+1]; dprintf("logmsg: pri %o, flags %x, from %s, msg %s\n", pri, flags, from, msg); omask = sigblock(sigmask(SIGHUP)|sigmask(SIGALRM)); /* * Check to see if msg looks non-standard. */ msglen = strlen(msg); if (msglen < 16 || msg[3] != ' ' || msg[6] != ' ' || msg[9] != ':' || msg[12] != ':' || msg[15] != ' ') flags |= ADDDATE; (void)time(&now); if (flags & ADDDATE) { timestamp = ctime(&now) + 4; } else { timestamp = msg; msg += 16; msglen -= 16; } /* skip leading blanks */ while (isspace(*msg)) { msg++; msglen--; } /* extract facility and priority level */ if (flags & MARK) fac = LOG_NFACILITIES; else fac = LOG_FAC(pri); /* Check maximum facility number. */ if (fac > LOG_NFACILITIES) { (void)sigsetmask(omask); return; } prilev = LOG_PRI(pri); /* extract program name */ for (i = 0; i < NAME_MAX; i++) { if (!isprint(msg[i]) || msg[i] == ':' || msg[i] == '[' || msg[i] == '/' || isspace(msg[i])) break; prog[i] = msg[i]; } prog[i] = 0; /* add kernel prefix for kernel messages */ if (flags & ISKERNEL) { snprintf(buf, sizeof(buf), "%s: %s", use_bootfile ? bootfile : "kernel", msg); msg = buf; msglen = strlen(buf); } /* log the message to the particular outputs */ if (!Initialized) { f = &consfile; /* * Open in non-blocking mode to avoid hangs during open * and close(waiting for the port to drain). */ f->f_file = open(ctty, O_WRONLY | O_NONBLOCK, 0); if (f->f_file >= 0) { (void)strlcpy(f->f_lasttime, timestamp, sizeof(f->f_lasttime)); fprintlog(f, flags, msg); close(f->f_file); f->f_file = -1; } (void)sigsetmask(omask); return; } for (f = Files; f; f = f->f_next) { /* skip messages that are incorrect priority */ if (!(((f->f_pcmp[fac] & PRI_EQ) && (f->f_pmask[fac] == prilev)) ||((f->f_pcmp[fac] & PRI_LT) && (f->f_pmask[fac] < prilev)) ||((f->f_pcmp[fac] & PRI_GT) && (f->f_pmask[fac] > prilev)) ) || f->f_pmask[fac] == INTERNAL_NOPRI) continue; /* skip messages with the incorrect hostname */ if (skip_message(from, f->f_host, 0)) continue; /* skip messages with the incorrect program name */ if (skip_message(prog, f->f_program, 1)) continue; /* skip message to console if it has already been printed */ if (f->f_type == F_CONSOLE && (flags & IGN_CONS)) continue; /* don't output marks to recently written files */ if ((flags & MARK) && (now - f->f_time) < MarkInterval / 2) continue; /* * suppress duplicate lines to this file */ if (no_compress - (f->f_type != F_PIPE) < 1 && (flags & MARK) == 0 && msglen == f->f_prevlen && !strcmp(msg, f->f_prevline) && !strcasecmp(from, f->f_prevhost)) { (void)strlcpy(f->f_lasttime, timestamp, sizeof(f->f_lasttime)); f->f_prevcount++; dprintf("msg repeated %d times, %ld sec of %d\n", f->f_prevcount, (long)(now - f->f_time), repeatinterval[f->f_repeatcount]); /* * If domark would have logged this by now, * flush it now (so we don't hold isolated messages), * but back off so we'll flush less often * in the future. */ if (now > REPEATTIME(f)) { fprintlog(f, flags, (char *)NULL); BACKOFF(f); } } else { /* new line, save it */ if (f->f_prevcount) fprintlog(f, 0, (char *)NULL); f->f_repeatcount = 0; f->f_prevpri = pri; (void)strlcpy(f->f_lasttime, timestamp, sizeof(f->f_lasttime)); (void)strlcpy(f->f_prevhost, from, sizeof(f->f_prevhost)); if (msglen < MAXSVLINE) { f->f_prevlen = msglen; (void)strlcpy(f->f_prevline, msg, sizeof(f->f_prevline)); fprintlog(f, flags, (char *)NULL); } else { f->f_prevline[0] = 0; f->f_prevlen = 0; fprintlog(f, flags, msg); } } } (void)sigsetmask(omask); } static void dofsync(void) { struct filed *f; for (f = Files; f; f = f->f_next) { if ((f->f_type == F_FILE) && (f->f_flags & FFLAG_NEEDSYNC)) { f->f_flags &= ~FFLAG_NEEDSYNC; (void)fsync(f->f_file); } } } #define IOV_SIZE 7 static void fprintlog(struct filed *f, int flags, const char *msg) { struct iovec iov[IOV_SIZE]; struct iovec *v; struct addrinfo *r; int i, l, lsent = 0; char line[MAXLINE + 1], repbuf[80], greetings[200], *wmsg = NULL; char nul[] = "", space[] = " ", lf[] = "\n", crlf[] = "\r\n"; const char *msgret; v = iov; if (f->f_type == F_WALL) { v->iov_base = greetings; /* The time displayed is not synchornized with the other log * destinations (like messages). Following fragment was using * ctime(&now), which was updating the time every 30 sec. * With f_lasttime, time is synchronized correctly. */ v->iov_len = snprintf(greetings, sizeof greetings, "\r\n\7Message from syslogd@%s at %.24s ...\r\n", f->f_prevhost, f->f_lasttime); if (v->iov_len >= sizeof greetings) v->iov_len = sizeof greetings - 1; v++; v->iov_base = nul; v->iov_len = 0; v++; } else { v->iov_base = f->f_lasttime; v->iov_len = strlen(f->f_lasttime); v++; v->iov_base = space; v->iov_len = 1; v++; } if (LogFacPri) { static char fp_buf[30]; /* Hollow laugh */ int fac = f->f_prevpri & LOG_FACMASK; int pri = LOG_PRI(f->f_prevpri); const char *f_s = NULL; char f_n[5]; /* Hollow laugh */ const char *p_s = NULL; char p_n[5]; /* Hollow laugh */ if (LogFacPri > 1) { const CODE *c; for (c = facilitynames; c->c_name; c++) { if (c->c_val == fac) { f_s = c->c_name; break; } } for (c = prioritynames; c->c_name; c++) { if (c->c_val == pri) { p_s = c->c_name; break; } } } if (!f_s) { snprintf(f_n, sizeof f_n, "%d", LOG_FAC(fac)); f_s = f_n; } if (!p_s) { snprintf(p_n, sizeof p_n, "%d", pri); p_s = p_n; } snprintf(fp_buf, sizeof fp_buf, "<%s.%s> ", f_s, p_s); v->iov_base = fp_buf; v->iov_len = strlen(fp_buf); } else { v->iov_base = nul; v->iov_len = 0; } v++; v->iov_base = f->f_prevhost; v->iov_len = strlen(v->iov_base); v++; v->iov_base = space; v->iov_len = 1; v++; if (msg) { wmsg = strdup(msg); /* XXX iov_base needs a `const' sibling. */ if (wmsg == NULL) { logerror("strdup"); exit(1); } v->iov_base = wmsg; v->iov_len = strlen(msg); } else if (f->f_prevcount > 1) { v->iov_base = repbuf; v->iov_len = snprintf(repbuf, sizeof repbuf, "last message repeated %d times", f->f_prevcount); } else { v->iov_base = f->f_prevline; v->iov_len = f->f_prevlen; } v++; dprintf("Logging to %s", TypeNames[f->f_type]); f->f_time = now; switch (f->f_type) { int port; case F_UNUSED: dprintf("\n"); break; case F_FORW: port = (int)ntohs(((struct sockaddr_in *) (f->f_un.f_forw.f_addr->ai_addr))->sin_port); if (port != 514) { dprintf(" %s:%d\n", f->f_un.f_forw.f_hname, port); } else { dprintf(" %s\n", f->f_un.f_forw.f_hname); } /* check for local vs remote messages */ if (strcasecmp(f->f_prevhost, LocalHostName)) l = snprintf(line, sizeof line - 1, "<%d>%.15s Forwarded from %s: %s", f->f_prevpri, (char *)iov[0].iov_base, f->f_prevhost, (char *)iov[5].iov_base); else l = snprintf(line, sizeof line - 1, "<%d>%.15s %s", f->f_prevpri, (char *)iov[0].iov_base, (char *)iov[5].iov_base); if (l < 0) l = 0; else if (l > MAXLINE) l = MAXLINE; if (finet) { for (r = f->f_un.f_forw.f_addr; r; r = r->ai_next) { for (i = 0; i < *finet; i++) { #if 0 /* * should we check AF first, or just * trial and error? FWD */ if (r->ai_family == address_family_of(finet[i+1])) #endif lsent = sendto(finet[i+1], line, l, 0, r->ai_addr, r->ai_addrlen); if (lsent == l) break; } if (lsent == l && !send_to_all) break; } dprintf("lsent/l: %d/%d\n", lsent, l); if (lsent != l) { int e = errno; logerror("sendto"); errno = e; switch (errno) { case ENOBUFS: case ENETDOWN: case ENETUNREACH: case EHOSTUNREACH: case EHOSTDOWN: case EADDRNOTAVAIL: break; /* case EBADF: */ /* case EACCES: */ /* case ENOTSOCK: */ /* case EFAULT: */ /* case EMSGSIZE: */ /* case EAGAIN: */ /* case ENOBUFS: */ /* case ECONNREFUSED: */ default: dprintf("removing entry: errno=%d\n", e); f->f_type = F_UNUSED; break; } } } break; case F_FILE: dprintf(" %s\n", f->f_un.f_fname); v->iov_base = lf; v->iov_len = 1; if (writev(f->f_file, iov, IOV_SIZE) < 0) { /* * If writev(2) fails for potentially transient errors * like the filesystem being full, ignore it. * Otherwise remove this logfile from the list. */ if (errno != ENOSPC) { int e = errno; close_filed(f); errno = e; logerror(f->f_un.f_fname); } } else if ((flags & SYNC_FILE) && (f->f_flags & FFLAG_SYNC)) { f->f_flags |= FFLAG_NEEDSYNC; needdofsync = 1; } break; case F_PIPE: dprintf(" %s\n", f->f_un.f_pipe.f_pname); v->iov_base = lf; v->iov_len = 1; if (f->f_un.f_pipe.f_pid == 0) { if ((f->f_file = p_open(f->f_un.f_pipe.f_pname, &f->f_un.f_pipe.f_pid)) < 0) { f->f_type = F_UNUSED; logerror(f->f_un.f_pipe.f_pname); break; } } if (writev(f->f_file, iov, IOV_SIZE) < 0) { int e = errno; close_filed(f); if (f->f_un.f_pipe.f_pid > 0) deadq_enter(f->f_un.f_pipe.f_pid, f->f_un.f_pipe.f_pname); f->f_un.f_pipe.f_pid = 0; errno = e; logerror(f->f_un.f_pipe.f_pname); } break; case F_CONSOLE: if (flags & IGN_CONS) { dprintf(" (ignored)\n"); break; } /* FALLTHROUGH */ case F_TTY: dprintf(" %s%s\n", _PATH_DEV, f->f_un.f_fname); v->iov_base = crlf; v->iov_len = 2; errno = 0; /* ttymsg() only sometimes returns an errno */ if ((msgret = ttymsg(iov, IOV_SIZE, f->f_un.f_fname, 10))) { f->f_type = F_UNUSED; logerror(msgret); } break; case F_USERS: case F_WALL: dprintf("\n"); v->iov_base = crlf; v->iov_len = 2; wallmsg(f, iov, IOV_SIZE); break; } f->f_prevcount = 0; free(wmsg); } /* * WALLMSG -- Write a message to the world at large * * Write the specified message to either the entire * world, or a list of approved users. */ static void wallmsg(struct filed *f, struct iovec *iov, const int iovlen) { static int reenter; /* avoid calling ourselves */ struct utmpx *ut; int i; const char *p; if (reenter++) return; setutxent(); /* NOSTRICT */ while ((ut = getutxent()) != NULL) { if (ut->ut_type != USER_PROCESS) continue; if (f->f_type == F_WALL) { if ((p = ttymsg(iov, iovlen, ut->ut_line, TTYMSGTIME)) != NULL) { errno = 0; /* already in msg */ logerror(p); } continue; } /* should we send the message to this user? */ for (i = 0; i < MAXUNAMES; i++) { if (!f->f_un.f_uname[i][0]) break; if (!strcmp(f->f_un.f_uname[i], ut->ut_user)) { if ((p = ttymsg_check(iov, iovlen, ut->ut_line, TTYMSGTIME)) != NULL) { errno = 0; /* already in msg */ logerror(p); } break; } } } endutxent(); reenter = 0; } /* * Wrapper routine for ttymsg() that checks the terminal for messages enabled. */ static const char * ttymsg_check(struct iovec *iov, int iovcnt, char *line, int tmout) { static char device[1024]; static char errbuf[1024]; struct stat sb; (void) snprintf(device, sizeof(device), "%s%s", _PATH_DEV, line); if (stat(device, &sb) < 0) { (void) snprintf(errbuf, sizeof(errbuf), "%s: %s", device, strerror(errno)); return (errbuf); } if ((sb.st_mode & S_IWGRP) == 0) /* Messages disabled. */ return (NULL); return ttymsg(iov, iovcnt, line, tmout); } static void reapchild(int signo __unused) { int status; pid_t pid; struct filed *f; while ((pid = wait3(&status, WNOHANG, (struct rusage *)NULL)) > 0) { if (!Initialized) /* Don't tell while we are initting. */ continue; /* First, look if it's a process from the dead queue. */ if (deadq_remove(pid)) goto oncemore; /* Now, look in list of active processes. */ for (f = Files; f; f = f->f_next) if (f->f_type == F_PIPE && f->f_un.f_pipe.f_pid == pid) { close_filed(f); f->f_un.f_pipe.f_pid = 0; log_deadchild(pid, status, f->f_un.f_pipe.f_pname); break; } oncemore: continue; } } /* * Return a printable representation of a host address. */ static const char * cvthname(struct sockaddr *f) { int error, hl; sigset_t omask, nmask; static char hname[NI_MAXHOST], ip[NI_MAXHOST]; error = getnameinfo((struct sockaddr *)f, ((struct sockaddr *)f)->sa_len, ip, sizeof ip, NULL, 0, NI_NUMERICHOST); dprintf("cvthname(%s)\n", ip); if (error) { dprintf("Malformed from address %s\n", gai_strerror(error)); return ("???"); } if (!resolve) return (ip); sigemptyset(&nmask); sigaddset(&nmask, SIGHUP); sigprocmask(SIG_BLOCK, &nmask, &omask); error = getnameinfo((struct sockaddr *)f, ((struct sockaddr *)f)->sa_len, hname, sizeof hname, NULL, 0, NI_NAMEREQD); sigprocmask(SIG_SETMASK, &omask, NULL); if (error) { dprintf("Host name for your address (%s) unknown\n", ip); return (ip); } hl = strlen(hname); if (hl > 0 && hname[hl-1] == '.') hname[--hl] = '\0'; trimdomain(hname, hl); return (hname); } static void dodie(int signo) { WantDie = signo; } static void domark(int signo __unused) { MarkSet = 1; } /* * Print syslogd errors some place. */ static void logerror(const char *type) { char buf[512]; static int recursed = 0; /* If there's an error while trying to log an error, give up. */ if (recursed) return; recursed++; if (errno) (void)snprintf(buf, sizeof buf, "syslogd: %s: %s", type, strerror(errno)); else (void)snprintf(buf, sizeof buf, "syslogd: %s", type); errno = 0; dprintf("%s\n", buf); logmsg(LOG_SYSLOG|LOG_ERR, buf, LocalHostName, ADDDATE); recursed--; } static void die(int signo) { struct filed *f; struct funix *fx; int was_initialized; char buf[100]; was_initialized = Initialized; Initialized = 0; /* Don't log SIGCHLDs. */ for (f = Files; f != NULL; f = f->f_next) { /* flush any pending output */ if (f->f_prevcount) fprintlog(f, 0, (char *)NULL); if (f->f_type == F_PIPE && f->f_un.f_pipe.f_pid > 0) { close_filed(f); f->f_un.f_pipe.f_pid = 0; } } Initialized = was_initialized; if (signo) { dprintf("syslogd: exiting on signal %d\n", signo); (void)snprintf(buf, sizeof(buf), "exiting on signal %d", signo); errno = 0; logerror(buf); } STAILQ_FOREACH(fx, &funixes, next) (void)unlink(fx->name); pidfile_remove(pfh); exit(1); } static int configfiles(const struct dirent *dp) { const char *p; size_t ext_len; if (dp->d_name[0] == '.') return (0); ext_len = sizeof(include_ext) -1; if (dp->d_namlen <= ext_len) return (0); p = &dp->d_name[dp->d_namlen - ext_len]; if (strcmp(p, include_ext) != 0) return (0); return (1); } static void readconfigfile(FILE *cf, struct filed **nextp, int allow_includes) { FILE *cf2; struct filed *f; struct dirent **ent; char cline[LINE_MAX]; char host[MAXHOSTNAMELEN]; char prog[LINE_MAX]; char file[MAXPATHLEN]; char *p, *tmp; int i, nents; size_t include_len; /* * Foreach line in the conf table, open that file. */ f = NULL; include_len = sizeof(include_str) -1; (void)strlcpy(host, "*", sizeof(host)); (void)strlcpy(prog, "*", sizeof(prog)); while (fgets(cline, sizeof(cline), cf) != NULL) { /* * check for end-of-section, comments, strip off trailing * spaces and newline character. #!prog is treated specially: * following lines apply only to that program. */ for (p = cline; isspace(*p); ++p) continue; if (*p == 0) continue; if (allow_includes && strncmp(p, include_str, include_len) == 0 && isspace(p[include_len])) { p += include_len; while (isspace(*p)) p++; tmp = p; while (*tmp != '\0' && !isspace(*tmp)) tmp++; *tmp = '\0'; dprintf("Trying to include files in '%s'\n", p); nents = scandir(p, &ent, configfiles, alphasort); if (nents == -1) { dprintf("Unable to open '%s': %s\n", p, strerror(errno)); continue; } for (i = 0; i < nents; i++) { if (snprintf(file, sizeof(file), "%s/%s", p, ent[i]->d_name) >= (int)sizeof(file)) { dprintf("ignoring path too long: " "'%s/%s'\n", p, ent[i]->d_name); free(ent[i]); continue; } free(ent[i]); cf2 = fopen(file, "r"); if (cf2 == NULL) continue; dprintf("reading %s\n", file); readconfigfile(cf2, nextp, 0); fclose(cf2); } free(ent); continue; } if (*p == '#') { p++; if (*p != '!' && *p != '+' && *p != '-') continue; } if (*p == '+' || *p == '-') { host[0] = *p++; while (isspace(*p)) p++; if ((!*p) || (*p == '*')) { (void)strlcpy(host, "*", sizeof(host)); continue; } if (*p == '@') p = LocalHostName; for (i = 1; i < MAXHOSTNAMELEN - 1; i++) { if (!isalnum(*p) && *p != '.' && *p != '-' && *p != ',' && *p != ':' && *p != '%') break; host[i] = *p++; } host[i] = '\0'; continue; } if (*p == '!') { p++; while (isspace(*p)) p++; if ((!*p) || (*p == '*')) { (void)strlcpy(prog, "*", sizeof(prog)); continue; } for (i = 0; i < LINE_MAX - 1; i++) { if (!isprint(p[i]) || isspace(p[i])) break; prog[i] = p[i]; } prog[i] = 0; continue; } for (p = cline + 1; *p != '\0'; p++) { if (*p != '#') continue; if (*(p - 1) == '\\') { strcpy(p - 1, p); p--; continue; } *p = '\0'; break; } for (i = strlen(cline) - 1; i >= 0 && isspace(cline[i]); i--) cline[i] = '\0'; f = (struct filed *)calloc(1, sizeof(*f)); if (f == NULL) { logerror("calloc"); exit(1); } *nextp = f; nextp = &f->f_next; cfline(cline, f, prog, host); } } /* * INIT -- Initialize syslogd from configuration table */ static void init(int signo) { int i; FILE *cf; struct filed *f, *next, **nextp; char *p; char oldLocalHostName[MAXHOSTNAMELEN]; char hostMsg[2*MAXHOSTNAMELEN+40]; char bootfileMsg[LINE_MAX]; dprintf("init\n"); /* * Load hostname (may have changed). */ if (signo != 0) (void)strlcpy(oldLocalHostName, LocalHostName, sizeof(oldLocalHostName)); if (gethostname(LocalHostName, sizeof(LocalHostName))) err(EX_OSERR, "gethostname() failed"); if ((p = strchr(LocalHostName, '.')) != NULL) { *p++ = '\0'; LocalDomain = p; } else { LocalDomain = ""; } /* * Load / reload timezone data (in case it changed). * * Just calling tzset() again does not work, the timezone code * caches the result. However, by setting the TZ variable, one * can defeat the caching and have the timezone code really * reload the timezone data. Respect any initial setting of * TZ, in case the system is configured specially. */ dprintf("loading timezone data via tzset()\n"); if (getenv("TZ")) { tzset(); } else { setenv("TZ", ":/etc/localtime", 1); tzset(); unsetenv("TZ"); } /* * Close all open log files. */ Initialized = 0; for (f = Files; f != NULL; f = next) { /* flush any pending output */ if (f->f_prevcount) fprintlog(f, 0, (char *)NULL); switch (f->f_type) { case F_FILE: case F_FORW: case F_CONSOLE: case F_TTY: close_filed(f); break; case F_PIPE: if (f->f_un.f_pipe.f_pid > 0) { close_filed(f); deadq_enter(f->f_un.f_pipe.f_pid, f->f_un.f_pipe.f_pname); } f->f_un.f_pipe.f_pid = 0; break; } next = f->f_next; if (f->f_program) free(f->f_program); if (f->f_host) free(f->f_host); free((char *)f); } Files = NULL; - *nextp = NULL; + nextp = &Files; /* open the configuration file */ if ((cf = fopen(ConfFile, "r")) == NULL) { dprintf("cannot open %s\n", ConfFile); *nextp = (struct filed *)calloc(1, sizeof(*f)); if (*nextp == NULL) { logerror("calloc"); exit(1); } cfline("*.ERR\t/dev/console", *nextp, "*", "*"); (*nextp)->f_next = (struct filed *)calloc(1, sizeof(*f)); if ((*nextp)->f_next == NULL) { logerror("calloc"); exit(1); } cfline("*.PANIC\t*", (*nextp)->f_next, "*", "*"); Initialized = 1; return; } readconfigfile(cf, &Files, 1); /* close the configuration file */ (void)fclose(cf); Initialized = 1; if (Debug) { int port; for (f = Files; f; f = f->f_next) { for (i = 0; i <= LOG_NFACILITIES; i++) if (f->f_pmask[i] == INTERNAL_NOPRI) printf("X "); else printf("%d ", f->f_pmask[i]); printf("%s: ", TypeNames[f->f_type]); switch (f->f_type) { case F_FILE: printf("%s", f->f_un.f_fname); break; case F_CONSOLE: case F_TTY: printf("%s%s", _PATH_DEV, f->f_un.f_fname); break; case F_FORW: port = (int)ntohs(((struct sockaddr_in *) (f->f_un.f_forw.f_addr->ai_addr))->sin_port); if (port != 514) { printf("%s:%d", f->f_un.f_forw.f_hname, port); } else { printf("%s", f->f_un.f_forw.f_hname); } break; case F_PIPE: printf("%s", f->f_un.f_pipe.f_pname); break; case F_USERS: for (i = 0; i < MAXUNAMES && *f->f_un.f_uname[i]; i++) printf("%s, ", f->f_un.f_uname[i]); break; } if (f->f_program) printf(" (%s)", f->f_program); printf("\n"); } } logmsg(LOG_SYSLOG|LOG_INFO, "syslogd: restart", LocalHostName, ADDDATE); dprintf("syslogd: restarted\n"); /* * Log a change in hostname, but only on a restart. */ if (signo != 0 && strcmp(oldLocalHostName, LocalHostName) != 0) { (void)snprintf(hostMsg, sizeof(hostMsg), "syslogd: hostname changed, \"%s\" to \"%s\"", oldLocalHostName, LocalHostName); logmsg(LOG_SYSLOG|LOG_INFO, hostMsg, LocalHostName, ADDDATE); dprintf("%s\n", hostMsg); } /* * Log the kernel boot file if we aren't going to use it as * the prefix, and if this is *not* a restart. */ if (signo == 0 && !use_bootfile) { (void)snprintf(bootfileMsg, sizeof(bootfileMsg), "syslogd: kernel boot file is %s", bootfile); logmsg(LOG_KERN|LOG_INFO, bootfileMsg, LocalHostName, ADDDATE); dprintf("%s\n", bootfileMsg); } } /* * Crack a configuration file line */ static void cfline(const char *line, struct filed *f, const char *prog, const char *host) { struct addrinfo hints, *res; int error, i, pri, syncfile; const char *p, *q; char *bp; char buf[MAXLINE], ebuf[100]; dprintf("cfline(\"%s\", f, \"%s\", \"%s\")\n", line, prog, host); errno = 0; /* keep strerror() stuff out of logerror messages */ /* clear out file entry */ memset(f, 0, sizeof(*f)); for (i = 0; i <= LOG_NFACILITIES; i++) f->f_pmask[i] = INTERNAL_NOPRI; /* save hostname if any */ if (host && *host == '*') host = NULL; if (host) { int hl; f->f_host = strdup(host); if (f->f_host == NULL) { logerror("strdup"); exit(1); } hl = strlen(f->f_host); if (hl > 0 && f->f_host[hl-1] == '.') f->f_host[--hl] = '\0'; trimdomain(f->f_host, hl); } /* save program name if any */ if (prog && *prog == '*') prog = NULL; if (prog) { f->f_program = strdup(prog); if (f->f_program == NULL) { logerror("strdup"); exit(1); } } /* scan through the list of selectors */ for (p = line; *p && *p != '\t' && *p != ' ';) { int pri_done; int pri_cmp; int pri_invert; /* find the end of this facility name list */ for (q = p; *q && *q != '\t' && *q != ' ' && *q++ != '.'; ) continue; /* get the priority comparison */ pri_cmp = 0; pri_done = 0; pri_invert = 0; if (*q == '!') { pri_invert = 1; q++; } while (!pri_done) { switch (*q) { case '<': pri_cmp |= PRI_LT; q++; break; case '=': pri_cmp |= PRI_EQ; q++; break; case '>': pri_cmp |= PRI_GT; q++; break; default: pri_done++; break; } } /* collect priority name */ for (bp = buf; *q && !strchr("\t,; ", *q); ) *bp++ = *q++; *bp = '\0'; /* skip cruft */ while (strchr(",;", *q)) q++; /* decode priority name */ if (*buf == '*') { pri = LOG_PRIMASK; pri_cmp = PRI_LT | PRI_EQ | PRI_GT; } else { /* Ignore trailing spaces. */ for (i = strlen(buf) - 1; i >= 0 && buf[i] == ' '; i--) buf[i] = '\0'; pri = decode(buf, prioritynames); if (pri < 0) { errno = 0; (void)snprintf(ebuf, sizeof ebuf, "unknown priority name \"%s\"", buf); logerror(ebuf); return; } } if (!pri_cmp) pri_cmp = (UniquePriority) ? (PRI_EQ) : (PRI_EQ | PRI_GT) ; if (pri_invert) pri_cmp ^= PRI_LT | PRI_EQ | PRI_GT; /* scan facilities */ while (*p && !strchr("\t.; ", *p)) { for (bp = buf; *p && !strchr("\t,;. ", *p); ) *bp++ = *p++; *bp = '\0'; if (*buf == '*') { for (i = 0; i < LOG_NFACILITIES; i++) { f->f_pmask[i] = pri; f->f_pcmp[i] = pri_cmp; } } else { i = decode(buf, facilitynames); if (i < 0) { errno = 0; (void)snprintf(ebuf, sizeof ebuf, "unknown facility name \"%s\"", buf); logerror(ebuf); return; } f->f_pmask[i >> 3] = pri; f->f_pcmp[i >> 3] = pri_cmp; } while (*p == ',' || *p == ' ') p++; } p = q; } /* skip to action part */ while (*p == '\t' || *p == ' ') p++; if (*p == '-') { syncfile = 0; p++; } else syncfile = 1; switch (*p) { case '@': { char *tp; char endkey = ':'; /* * scan forward to see if there is a port defined. * so we can't use strlcpy.. */ i = sizeof(f->f_un.f_forw.f_hname); tp = f->f_un.f_forw.f_hname; p++; /* * an ipv6 address should start with a '[' in that case * we should scan for a ']' */ if (*p == '[') { p++; endkey = ']'; } while (*p && (*p != endkey) && (i-- > 0)) { *tp++ = *p++; } if (endkey == ']' && *p == endkey) p++; *tp = '\0'; } /* See if we copied a domain and have a port */ if (*p == ':') p++; else p = NULL; memset(&hints, 0, sizeof(hints)); hints.ai_family = family; hints.ai_socktype = SOCK_DGRAM; error = getaddrinfo(f->f_un.f_forw.f_hname, p ? p : "syslog", &hints, &res); if (error) { logerror(gai_strerror(error)); break; } f->f_un.f_forw.f_addr = res; f->f_type = F_FORW; break; case '/': if ((f->f_file = open(p, logflags, 0600)) < 0) { f->f_type = F_UNUSED; logerror(p); break; } if (syncfile) f->f_flags |= FFLAG_SYNC; if (isatty(f->f_file)) { if (strcmp(p, ctty) == 0) f->f_type = F_CONSOLE; else f->f_type = F_TTY; (void)strlcpy(f->f_un.f_fname, p + sizeof(_PATH_DEV) - 1, sizeof(f->f_un.f_fname)); } else { (void)strlcpy(f->f_un.f_fname, p, sizeof(f->f_un.f_fname)); f->f_type = F_FILE; } break; case '|': f->f_un.f_pipe.f_pid = 0; (void)strlcpy(f->f_un.f_pipe.f_pname, p + 1, sizeof(f->f_un.f_pipe.f_pname)); f->f_type = F_PIPE; break; case '*': f->f_type = F_WALL; break; default: for (i = 0; i < MAXUNAMES && *p; i++) { for (q = p; *q && *q != ','; ) q++; (void)strncpy(f->f_un.f_uname[i], p, MAXLOGNAME - 1); if ((q - p) >= MAXLOGNAME) f->f_un.f_uname[i][MAXLOGNAME - 1] = '\0'; else f->f_un.f_uname[i][q - p] = '\0'; while (*q == ',' || *q == ' ') q++; p = q; } f->f_type = F_USERS; break; } } /* * Decode a symbolic name to a numeric value */ static int decode(const char *name, const CODE *codetab) { const CODE *c; char *p, buf[40]; if (isdigit(*name)) return (atoi(name)); for (p = buf; *name && p < &buf[sizeof(buf) - 1]; p++, name++) { if (isupper(*name)) *p = tolower(*name); else *p = *name; } *p = '\0'; for (c = codetab; c->c_name; c++) if (!strcmp(buf, c->c_name)) return (c->c_val); return (-1); } static void markit(void) { struct filed *f; dq_t q, next; now = time((time_t *)NULL); MarkSeq += TIMERINTVL; if (MarkSeq >= MarkInterval) { logmsg(LOG_INFO, "-- MARK --", LocalHostName, ADDDATE|MARK); MarkSeq = 0; } for (f = Files; f; f = f->f_next) { if (f->f_prevcount && now >= REPEATTIME(f)) { dprintf("flush %s: repeated %d times, %d sec.\n", TypeNames[f->f_type], f->f_prevcount, repeatinterval[f->f_repeatcount]); fprintlog(f, 0, (char *)NULL); BACKOFF(f); } } /* Walk the dead queue, and see if we should signal somebody. */ for (q = TAILQ_FIRST(&deadq_head); q != NULL; q = next) { next = TAILQ_NEXT(q, dq_entries); switch (q->dq_timeout) { case 0: /* Already signalled once, try harder now. */ if (kill(q->dq_pid, SIGKILL) != 0) (void)deadq_remove(q->dq_pid); break; case 1: /* * Timed out on dead queue, send terminate * signal. Note that we leave the removal * from the dead queue to reapchild(), which * will also log the event (unless the process * didn't even really exist, in case we simply * drop it from the dead queue). */ if (kill(q->dq_pid, SIGTERM) != 0) (void)deadq_remove(q->dq_pid); /* FALLTHROUGH */ default: q->dq_timeout--; } } MarkSet = 0; (void)alarm(TIMERINTVL); } /* * fork off and become a daemon, but wait for the child to come online * before returing to the parent, or we get disk thrashing at boot etc. * Set a timer so we don't hang forever if it wedges. */ static int waitdaemon(int nochdir, int noclose, int maxwait) { int fd; int status; pid_t pid, childpid; switch (childpid = fork()) { case -1: return (-1); case 0: break; default: signal(SIGALRM, timedout); alarm(maxwait); while ((pid = wait3(&status, 0, NULL)) != -1) { if (WIFEXITED(status)) errx(1, "child pid %d exited with return code %d", pid, WEXITSTATUS(status)); if (WIFSIGNALED(status)) errx(1, "child pid %d exited on signal %d%s", pid, WTERMSIG(status), WCOREDUMP(status) ? " (core dumped)" : ""); if (pid == childpid) /* it's gone... */ break; } exit(0); } if (setsid() == -1) return (-1); if (!nochdir) (void)chdir("/"); if (!noclose && (fd = open(_PATH_DEVNULL, O_RDWR, 0)) != -1) { (void)dup2(fd, STDIN_FILENO); (void)dup2(fd, STDOUT_FILENO); (void)dup2(fd, STDERR_FILENO); if (fd > 2) (void)close (fd); } return (getppid()); } /* * We get a SIGALRM from the child when it's running and finished doing it's * fsync()'s or O_SYNC writes for all the boot messages. * * We also get a signal from the kernel if the timer expires, so check to * see what happened. */ static void timedout(int sig __unused) { int left; left = alarm(0); signal(SIGALRM, SIG_DFL); if (left == 0) errx(1, "timed out waiting for child"); else _exit(0); } /* * Add `s' to the list of allowable peer addresses to accept messages * from. * * `s' is a string in the form: * * [*]domainname[:{servicename|portnumber|*}] * * or * * netaddr/maskbits[:{servicename|portnumber|*}] * * Returns -1 on error, 0 if the argument was valid. */ static int allowaddr(char *s) { char *cp1, *cp2; struct allowedpeer ap; struct servent *se; int masklen = -1; struct addrinfo hints, *res; struct in_addr *addrp, *maskp; #ifdef INET6 int i; u_int32_t *addr6p, *mask6p; #endif char ip[NI_MAXHOST]; #ifdef INET6 if (*s != '[' || (cp1 = strchr(s + 1, ']')) == NULL) #endif cp1 = s; if ((cp1 = strrchr(cp1, ':'))) { /* service/port provided */ *cp1++ = '\0'; if (strlen(cp1) == 1 && *cp1 == '*') /* any port allowed */ ap.port = 0; else if ((se = getservbyname(cp1, "udp"))) { ap.port = ntohs(se->s_port); } else { ap.port = strtol(cp1, &cp2, 0); if (*cp2 != '\0') return (-1); /* port not numeric */ } } else { if ((se = getservbyname("syslog", "udp"))) ap.port = ntohs(se->s_port); else /* sanity, should not happen */ ap.port = 514; } if ((cp1 = strchr(s, '/')) != NULL && strspn(cp1 + 1, "0123456789") == strlen(cp1 + 1)) { *cp1 = '\0'; if ((masklen = atoi(cp1 + 1)) < 0) return (-1); } #ifdef INET6 if (*s == '[') { cp2 = s + strlen(s) - 1; if (*cp2 == ']') { ++s; *cp2 = '\0'; } else { cp2 = NULL; } } else { cp2 = NULL; } #endif memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_DGRAM; hints.ai_flags = AI_PASSIVE | AI_NUMERICHOST; if (getaddrinfo(s, NULL, &hints, &res) == 0) { ap.isnumeric = 1; memcpy(&ap.a_addr, res->ai_addr, res->ai_addrlen); memset(&ap.a_mask, 0, sizeof(ap.a_mask)); ap.a_mask.ss_family = res->ai_family; if (res->ai_family == AF_INET) { ap.a_mask.ss_len = sizeof(struct sockaddr_in); maskp = &((struct sockaddr_in *)&ap.a_mask)->sin_addr; addrp = &((struct sockaddr_in *)&ap.a_addr)->sin_addr; if (masklen < 0) { /* use default netmask */ if (IN_CLASSA(ntohl(addrp->s_addr))) maskp->s_addr = htonl(IN_CLASSA_NET); else if (IN_CLASSB(ntohl(addrp->s_addr))) maskp->s_addr = htonl(IN_CLASSB_NET); else maskp->s_addr = htonl(IN_CLASSC_NET); } else if (masklen <= 32) { /* convert masklen to netmask */ if (masklen == 0) maskp->s_addr = 0; else maskp->s_addr = htonl(~((1 << (32 - masklen)) - 1)); } else { freeaddrinfo(res); return (-1); } /* Lose any host bits in the network number. */ addrp->s_addr &= maskp->s_addr; } #ifdef INET6 else if (res->ai_family == AF_INET6 && masklen <= 128) { ap.a_mask.ss_len = sizeof(struct sockaddr_in6); if (masklen < 0) masklen = 128; mask6p = (u_int32_t *)&((struct sockaddr_in6 *)&ap.a_mask)->sin6_addr; /* convert masklen to netmask */ while (masklen > 0) { if (masklen < 32) { *mask6p = htonl(~(0xffffffff >> masklen)); break; } *mask6p++ = 0xffffffff; masklen -= 32; } /* Lose any host bits in the network number. */ mask6p = (u_int32_t *)&((struct sockaddr_in6 *)&ap.a_mask)->sin6_addr; addr6p = (u_int32_t *)&((struct sockaddr_in6 *)&ap.a_addr)->sin6_addr; for (i = 0; i < 4; i++) addr6p[i] &= mask6p[i]; } #endif else { freeaddrinfo(res); return (-1); } freeaddrinfo(res); } else { /* arg `s' is domain name */ ap.isnumeric = 0; ap.a_name = s; if (cp1) *cp1 = '/'; #ifdef INET6 if (cp2) { *cp2 = ']'; --s; } #endif } if (Debug) { printf("allowaddr: rule %d: ", NumAllowed); if (ap.isnumeric) { printf("numeric, "); getnameinfo((struct sockaddr *)&ap.a_addr, ((struct sockaddr *)&ap.a_addr)->sa_len, ip, sizeof ip, NULL, 0, NI_NUMERICHOST); printf("addr = %s, ", ip); getnameinfo((struct sockaddr *)&ap.a_mask, ((struct sockaddr *)&ap.a_mask)->sa_len, ip, sizeof ip, NULL, 0, NI_NUMERICHOST); printf("mask = %s; ", ip); } else { printf("domainname = %s; ", ap.a_name); } printf("port = %d\n", ap.port); } if ((AllowedPeers = realloc(AllowedPeers, ++NumAllowed * sizeof(struct allowedpeer))) == NULL) { logerror("realloc"); exit(1); } memcpy(&AllowedPeers[NumAllowed - 1], &ap, sizeof(struct allowedpeer)); return (0); } /* * Validate that the remote peer has permission to log to us. */ static int validate(struct sockaddr *sa, const char *hname) { int i; size_t l1, l2; char *cp, name[NI_MAXHOST], ip[NI_MAXHOST], port[NI_MAXSERV]; struct allowedpeer *ap; struct sockaddr_in *sin4, *a4p = NULL, *m4p = NULL; #ifdef INET6 int j, reject; struct sockaddr_in6 *sin6, *a6p = NULL, *m6p = NULL; #endif struct addrinfo hints, *res; u_short sport; if (NumAllowed == 0) /* traditional behaviour, allow everything */ return (1); (void)strlcpy(name, hname, sizeof(name)); memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_DGRAM; hints.ai_flags = AI_PASSIVE | AI_NUMERICHOST; if (getaddrinfo(name, NULL, &hints, &res) == 0) freeaddrinfo(res); else if (strchr(name, '.') == NULL) { strlcat(name, ".", sizeof name); strlcat(name, LocalDomain, sizeof name); } if (getnameinfo(sa, sa->sa_len, ip, sizeof ip, port, sizeof port, NI_NUMERICHOST | NI_NUMERICSERV) != 0) return (0); /* for safety, should not occur */ dprintf("validate: dgram from IP %s, port %s, name %s;\n", ip, port, name); sport = atoi(port); /* now, walk down the list */ for (i = 0, ap = AllowedPeers; i < NumAllowed; i++, ap++) { if (ap->port != 0 && ap->port != sport) { dprintf("rejected in rule %d due to port mismatch.\n", i); continue; } if (ap->isnumeric) { if (ap->a_addr.ss_family != sa->sa_family) { dprintf("rejected in rule %d due to address family mismatch.\n", i); continue; } if (ap->a_addr.ss_family == AF_INET) { sin4 = (struct sockaddr_in *)sa; a4p = (struct sockaddr_in *)&ap->a_addr; m4p = (struct sockaddr_in *)&ap->a_mask; if ((sin4->sin_addr.s_addr & m4p->sin_addr.s_addr) != a4p->sin_addr.s_addr) { dprintf("rejected in rule %d due to IP mismatch.\n", i); continue; } } #ifdef INET6 else if (ap->a_addr.ss_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)sa; a6p = (struct sockaddr_in6 *)&ap->a_addr; m6p = (struct sockaddr_in6 *)&ap->a_mask; if (a6p->sin6_scope_id != 0 && sin6->sin6_scope_id != a6p->sin6_scope_id) { dprintf("rejected in rule %d due to scope mismatch.\n", i); continue; } reject = 0; for (j = 0; j < 16; j += 4) { if ((*(u_int32_t *)&sin6->sin6_addr.s6_addr[j] & *(u_int32_t *)&m6p->sin6_addr.s6_addr[j]) != *(u_int32_t *)&a6p->sin6_addr.s6_addr[j]) { ++reject; break; } } if (reject) { dprintf("rejected in rule %d due to IP mismatch.\n", i); continue; } } #endif else continue; } else { cp = ap->a_name; l1 = strlen(name); if (*cp == '*') { /* allow wildmatch */ cp++; l2 = strlen(cp); if (l2 > l1 || memcmp(cp, &name[l1 - l2], l2) != 0) { dprintf("rejected in rule %d due to name mismatch.\n", i); continue; } } else { /* exact match */ l2 = strlen(cp); if (l2 != l1 || memcmp(cp, name, l1) != 0) { dprintf("rejected in rule %d due to name mismatch.\n", i); continue; } } } dprintf("accepted in rule %d.\n", i); return (1); /* hooray! */ } return (0); } /* * Fairly similar to popen(3), but returns an open descriptor, as * opposed to a FILE *. */ static int p_open(const char *prog, pid_t *rpid) { int pfd[2], nulldesc; pid_t pid; sigset_t omask, mask; char *argv[4]; /* sh -c cmd NULL */ char errmsg[200]; if (pipe(pfd) == -1) return (-1); if ((nulldesc = open(_PATH_DEVNULL, O_RDWR)) == -1) /* we are royally screwed anyway */ return (-1); sigemptyset(&mask); sigaddset(&mask, SIGALRM); sigaddset(&mask, SIGHUP); sigprocmask(SIG_BLOCK, &mask, &omask); switch ((pid = fork())) { case -1: sigprocmask(SIG_SETMASK, &omask, 0); close(nulldesc); return (-1); case 0: argv[0] = strdup("sh"); argv[1] = strdup("-c"); argv[2] = strdup(prog); argv[3] = NULL; if (argv[0] == NULL || argv[1] == NULL || argv[2] == NULL) { logerror("strdup"); exit(1); } alarm(0); (void)setsid(); /* Avoid catching SIGHUPs. */ /* * Throw away pending signals, and reset signal * behaviour to standard values. */ signal(SIGALRM, SIG_IGN); signal(SIGHUP, SIG_IGN); sigprocmask(SIG_SETMASK, &omask, 0); signal(SIGPIPE, SIG_DFL); signal(SIGQUIT, SIG_DFL); signal(SIGALRM, SIG_DFL); signal(SIGHUP, SIG_DFL); dup2(pfd[0], STDIN_FILENO); dup2(nulldesc, STDOUT_FILENO); dup2(nulldesc, STDERR_FILENO); closefrom(3); (void)execvp(_PATH_BSHELL, argv); _exit(255); } sigprocmask(SIG_SETMASK, &omask, 0); close(nulldesc); close(pfd[0]); /* * Avoid blocking on a hung pipe. With O_NONBLOCK, we are * supposed to get an EWOULDBLOCK on writev(2), which is * caught by the logic above anyway, which will in turn close * the pipe, and fork a new logging subprocess if necessary. * The stale subprocess will be killed some time later unless * it terminated itself due to closing its input pipe (so we * get rid of really dead puppies). */ if (fcntl(pfd[1], F_SETFL, O_NONBLOCK) == -1) { /* This is bad. */ (void)snprintf(errmsg, sizeof errmsg, "Warning: cannot change pipe to PID %d to " "non-blocking behaviour.", (int)pid); logerror(errmsg); } *rpid = pid; return (pfd[1]); } static void deadq_enter(pid_t pid, const char *name) { dq_t p; int status; /* * Be paranoid, if we can't signal the process, don't enter it * into the dead queue (perhaps it's already dead). If possible, * we try to fetch and log the child's status. */ if (kill(pid, 0) != 0) { if (waitpid(pid, &status, WNOHANG) > 0) log_deadchild(pid, status, name); return; } p = malloc(sizeof(struct deadq_entry)); if (p == NULL) { logerror("malloc"); exit(1); } p->dq_pid = pid; p->dq_timeout = DQ_TIMO_INIT; TAILQ_INSERT_TAIL(&deadq_head, p, dq_entries); } static int deadq_remove(pid_t pid) { dq_t q; TAILQ_FOREACH(q, &deadq_head, dq_entries) { if (q->dq_pid == pid) { TAILQ_REMOVE(&deadq_head, q, dq_entries); free(q); return (1); } } return (0); } static void log_deadchild(pid_t pid, int status, const char *name) { int code; char buf[256]; const char *reason; errno = 0; /* Keep strerror() stuff out of logerror messages. */ if (WIFSIGNALED(status)) { reason = "due to signal"; code = WTERMSIG(status); } else { reason = "with status"; code = WEXITSTATUS(status); if (code == 0) return; } (void)snprintf(buf, sizeof buf, "Logging subprocess %d (%s) exited %s %d.", pid, name, reason, code); logerror(buf); } static int * socksetup(int af, char *bindhostname) { struct addrinfo hints, *res, *r; const char *bindservice; char *cp; int error, maxs, *s, *socks; /* * We have to handle this case for backwards compatibility: * If there are two (or more) colons but no '[' and ']', * assume this is an inet6 address without a service. */ bindservice = "syslog"; if (bindhostname != NULL) { #ifdef INET6 if (*bindhostname == '[' && (cp = strchr(bindhostname + 1, ']')) != NULL) { ++bindhostname; *cp = '\0'; if (cp[1] == ':' && cp[2] != '\0') bindservice = cp + 2; } else { #endif cp = strchr(bindhostname, ':'); if (cp != NULL && strchr(cp + 1, ':') == NULL) { *cp = '\0'; if (cp[1] != '\0') bindservice = cp + 1; if (cp == bindhostname) bindhostname = NULL; } #ifdef INET6 } #endif } memset(&hints, 0, sizeof(hints)); hints.ai_flags = AI_PASSIVE; hints.ai_family = af; hints.ai_socktype = SOCK_DGRAM; error = getaddrinfo(bindhostname, bindservice, &hints, &res); if (error) { logerror(gai_strerror(error)); errno = 0; die(0); } /* Count max number of sockets we may open */ for (maxs = 0, r = res; r; r = r->ai_next, maxs++); socks = malloc((maxs+1) * sizeof(int)); if (socks == NULL) { logerror("couldn't allocate memory for sockets"); die(0); } *socks = 0; /* num of sockets counter at start of array */ s = socks + 1; for (r = res; r; r = r->ai_next) { int on = 1; *s = socket(r->ai_family, r->ai_socktype, r->ai_protocol); if (*s < 0) { logerror("socket"); continue; } #ifdef INET6 if (r->ai_family == AF_INET6) { if (setsockopt(*s, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&on, sizeof (on)) < 0) { logerror("setsockopt"); close(*s); continue; } } #endif if (setsockopt(*s, SOL_SOCKET, SO_REUSEADDR, (char *)&on, sizeof (on)) < 0) { logerror("setsockopt"); close(*s); continue; } /* * RFC 3164 recommends that client side message * should come from the privileged syslogd port. * * If the system administrator choose not to obey * this, we can skip the bind() step so that the * system will choose a port for us. */ if (!NoBind) { if (bind(*s, r->ai_addr, r->ai_addrlen) < 0) { logerror("bind"); close(*s); continue; } if (!SecureMode) increase_rcvbuf(*s); } (*socks)++; dprintf("socksetup: new socket fd is %d\n", *s); s++; } if (*socks == 0) { free(socks); if (Debug) return (NULL); else die(0); } if (res) freeaddrinfo(res); return (socks); } static void increase_rcvbuf(int fd) { socklen_t len, slen; slen = sizeof(len); if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &len, &slen) == 0) { if (len < RCVBUF_MINSIZE) { len = RCVBUF_MINSIZE; setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &len, sizeof(len)); } } } Index: projects/clang391-import =================================================================== --- projects/clang391-import (revision 309262) +++ projects/clang391-import (revision 309263) Property changes on: projects/clang391-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r309213-309262