Index: sys/amd64/include/atomic.h =================================================================== --- sys/amd64/include/atomic.h +++ sys/amd64/include/atomic.h @@ -96,7 +96,7 @@ * Kernel modules call real functions which are built into the kernel. * This allows kernel modules to be portable between UP and SMP systems. */ -#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) +#if (defined(KLD_MODULE) && !defined(KLD_BASE)) || !defined(__GNUCLIKE_ASM) #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) Index: sys/conf/kern.post.mk =================================================================== --- sys/conf/kern.post.mk +++ sys/conf/kern.post.mk @@ -185,13 +185,19 @@ ${CC} ${HACK_EXTRA_FLAGS} -nostdlib hack.c -o hack.pico rm -f hack.c +offset.inc: $S/kern/genoffset.sh genoffset.o + NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genoffset.sh genoffset.o > ${.TARGET} + +genoffset.o: $S/kern/genoffset.c + ${CC} -c ${CFLAGS:N-flto:N-fno-common} $S/kern/genoffset.c + assym.inc: $S/kern/genassym.sh genassym.o NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genassym.sh genassym.o > ${.TARGET} -genassym.o: $S/$M/$M/genassym.c +genassym.o: $S/$M/$M/genassym.c offset.inc ${CC} -c ${CFLAGS:N-flto:N-fno-common} $S/$M/$M/genassym.c -${SYSTEM_OBJS} genassym.o vers.o: opt_global.h +${SYSTEM_OBJS} genoffset.o genassym.o vers.o: opt_global.h .if !empty(.MAKE.MODE:Unormal:Mmeta) && empty(.MAKE.MODE:Unormal:Mnofilemon) _meta_filemon= 1 @@ -213,10 +219,10 @@ .endif kernel-depend: .depend -SRCS= assym.inc vnode_if.h ${BEFORE_DEPEND} ${CFILES} \ +SRCS= assym.inc offset.inc vnode_if.h ${BEFORE_DEPEND} ${CFILES} \ ${SYSTEM_CFILES} ${GEN_CFILES} ${SFILES} \ ${MFILES:T:S/.m$/.h/} -DEPENDOBJS+= ${SYSTEM_OBJS} genassym.o +DEPENDOBJS+= ${SYSTEM_OBJS} genassym.o genoffset.o DEPENDFILES= ${DEPENDOBJS:O:u:C/^/.depend./} .if ${MAKE_VERSION} < 20160220 DEPEND_MP?= -MP Index: sys/conf/kern.pre.mk =================================================================== --- sys/conf/kern.pre.mk +++ sys/conf/kern.pre.mk @@ -195,7 +195,7 @@ OFED_C_NOIMP= ${CC} -c -o ${.TARGET} ${OFEDCFLAGS} ${WERROR} ${PROF} OFED_C= ${OFED_C_NOIMP} ${.IMPSRC} -GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/} +GEN_CFILES= $S/$M/$M/genassym.c $S/kern/genoffset.c ${MFILES:T:S/.m$/.c/} SYSTEM_CFILES= config.c env.c hints.c vnode_if.c SYSTEM_DEP= Makefile ${SYSTEM_OBJS} SYSTEM_OBJS= locore.o ${MDOBJS} ${OBJS} Index: sys/dev/cxgbe/cxgbei/cxgbei.c =================================================================== --- sys/dev/cxgbe/cxgbei/cxgbei.c +++ sys/dev/cxgbe/cxgbei/cxgbei.c @@ -343,6 +343,7 @@ struct icl_cxgbei_pdu *icp = toep->ulpcb2; struct icl_pdu *ip; u_int pdu_len, val; + struct epoch_tracker et; MPASS(m == NULL); @@ -411,12 +412,12 @@ SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); icl_cxgbei_conn_pdu_free(NULL, ip); #ifdef INVARIANTS Index: sys/dev/cxgbe/tom/t4_connect.c =================================================================== --- sys/dev/cxgbe/tom/t4_connect.c +++ sys/dev/cxgbe/tom/t4_connect.c @@ -115,18 +115,19 @@ struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; struct toedev *tod = &toep->td->tod; + struct epoch_tracker et; free_atid(sc, atid); toep->tid = -1; CURVNET_SET(toep->vnet); if (status != EAGAIN) - INP_INFO_RLOCK(&V_tcbinfo); + NET_EPOCH_ENTER_ET(et); INP_WLOCK(inp); toe_connect_failed(tod, inp, status); final_cpl_received(toep); /* unlocks inp */ if (status != EAGAIN) - INP_INFO_RUNLOCK(&V_tcbinfo); + NET_EPOCH_EXIT_ET(et); CURVNET_RESTORE(); } Index: sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- sys/dev/cxgbe/tom/t4_cpl_io.c +++ sys/dev/cxgbe/tom/t4_cpl_io.c @@ -1235,6 +1235,7 @@ struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so; + struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif @@ -1268,7 +1269,7 @@ KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); tp = intotcpcb(inp); @@ -1312,7 +1313,7 @@ case TCPS_FIN_WAIT_2: tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); INP_WLOCK(inp); @@ -1325,7 +1326,7 @@ } done: INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return (0); } @@ -1344,6 +1345,7 @@ struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so = NULL; + struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif @@ -1354,7 +1356,7 @@ KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); tp = intotcpcb(inp); @@ -1372,7 +1374,7 @@ tcp_twstart(tp); release: INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); INP_WLOCK(inp); @@ -1397,7 +1399,7 @@ } done: INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return (0); } @@ -1452,6 +1454,7 @@ struct sge_wrq *ofld_txq = toep->ofld_txq; struct inpcb *inp; struct tcpcb *tp; + struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif @@ -1473,7 +1476,7 @@ inp = toep->inp; CURVNET_SET(toep->vnet); - INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ + INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for tcp_close */ INP_WLOCK(inp); tp = intotcpcb(inp); @@ -1507,7 +1510,7 @@ final_cpl_received(toep); done: - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); @@ -1560,6 +1563,7 @@ struct tcpcb *tp; struct socket *so; struct sockbuf *sb; + struct epoch_tracker et; int len; uint32_t ddp_placed = 0; @@ -1631,12 +1635,12 @@ INP_WUNLOCK(inp); CURVNET_SET(toep->vnet); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return (0); Index: sys/dev/cxgbe/tom/t4_listen.c =================================================================== --- sys/dev/cxgbe/tom/t4_listen.c +++ sys/dev/cxgbe/tom/t4_listen.c @@ -1255,6 +1255,7 @@ int reject_reason, v, ntids; uint16_t vid; u_int wnd; + struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif @@ -1369,15 +1370,15 @@ REJECT_PASS_ACCEPT(); rpl = wrtod(wr); - INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */ + INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for 4-tuple check */ /* Don't offload if the 4-tuple is already in use */ if (toe_4tuple_check(&inc, &th, ifp) != 0) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); inp = lctx->inp; /* listening socket, not owned by TOE */ INP_WLOCK(inp); @@ -1574,6 +1575,7 @@ struct tcpopt to; struct in_conninfo inc; struct toepcb *toep; + struct epoch_tracker et; u_int txqid, rxqid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); @@ -1587,7 +1589,7 @@ ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); CURVNET_SET(lctx->vnet); - INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */ + INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for syncache_expand */ INP_WLOCK(inp); CTR6(KTR_CXGBE, @@ -1603,7 +1605,7 @@ } INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return (0); } @@ -1629,7 +1631,7 @@ */ send_reset_synqe(TOEDEV(ifp), synqe); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return (0); } @@ -1695,7 +1697,7 @@ inp = release_lctx(sc, lctx); if (inp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); release_synqe(synqe); Index: sys/dev/cxgbe/tom/t4_tls.c =================================================================== --- sys/dev/cxgbe/tom/t4_tls.c +++ sys/dev/cxgbe/tom/t4_tls.c @@ -1559,6 +1559,8 @@ SOCKBUF_LOCK(sb); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { + struct epoch_tracker et; + CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", __func__, tid, pdu_length); m_freem(m); @@ -1566,12 +1568,12 @@ INP_WUNLOCK(inp); CURVNET_SET(toep->vnet); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return (0); Index: sys/dev/hwpmc/hwpmc_mod.c =================================================================== --- sys/dev/hwpmc/hwpmc_mod.c +++ sys/dev/hwpmc/hwpmc_mod.c @@ -85,6 +85,9 @@ #define free_domain(addr, type) free(addr, type) #endif +#define PMC_EPOCH_ENTER() struct epoch_tracker pmc_et; epoch_enter_preempt(global_epoch_preempt, &pmc_et) +#define PMC_EPOCH_EXIT() epoch_exit_preempt(global_epoch_preempt, &pmc_et) + /* * Types */ @@ -1752,12 +1755,12 @@ const struct pmc_process *pp; freepath = fullpath = NULL; - MPASS(!in_epoch()); + MPASS(!in_epoch(global_epoch_preempt)); pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); pid = td->td_proc->p_pid; - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); /* Inform owners of all system-wide sampling PMCs. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) @@ -1778,7 +1781,7 @@ done: if (freepath) free(freepath, M_TEMP); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); } @@ -1797,12 +1800,12 @@ pid = td->td_proc->p_pid; - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; @@ -1824,7 +1827,7 @@ struct pmc_owner *po; struct pmckern_map_in *km, *kmbase; - MPASS(in_epoch() || sx_xlocked(&pmc_sx)); + MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx)); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); @@ -2106,13 +2109,13 @@ pk = (struct pmckern_procexec *) arg; - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); /* Inform owners of SS mode PMCs of the exec event. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, PMC_ID_INVALID, p->p_pid, pk->pm_entryaddr, fullpath); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; @@ -2242,7 +2245,7 @@ break; case PMC_FN_MUNMAP: - MPASS(in_epoch() || sx_xlocked(&pmc_sx)); + MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx)); pmc_process_munmap(td, (struct pmckern_map_out *) arg); break; @@ -2479,7 +2482,7 @@ if (mode & PMC_FLAG_ALLOCATE) { if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) { wait_flag = M_WAITOK; - if ((mode & PMC_FLAG_NOWAIT) || in_epoch()) + if ((mode & PMC_FLAG_NOWAIT) || in_epoch(global_epoch_preempt)) wait_flag = M_NOWAIT; ptnew = malloc(THREADENTRY_SIZE, M_PMC, @@ -5070,11 +5073,11 @@ /* * Log a sysexit event to all SS PMC owners. */ - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_sysexit(po, p->p_pid); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); if (!is_using_hwpmcs) return; @@ -5255,13 +5258,13 @@ * If there are system-wide sampling PMCs active, we need to * log all fork events to their owner's logs. */ - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); pmclog_process_proccreate(po, newproc, 1); } - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); if (!is_using_hwpmcs) return; @@ -5327,11 +5330,11 @@ { struct pmc_owner *po; - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_threadcreate(po, td, 1); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); } static void @@ -5339,11 +5342,11 @@ { struct pmc_owner *po; - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_threadexit(po, td); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); } static void @@ -5351,11 +5354,11 @@ { struct pmc_owner *po; - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_proccreate(po, p, 1 /* sync */); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); } static void @@ -5388,12 +5391,12 @@ /* * Notify owners of system sampling PMCs about KLD operations. */ - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, (pid_t) -1, (uintfptr_t) lf->address, lf->filename); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); /* * TODO: Notify owners of (all) process-sampling PMCs too. @@ -5406,12 +5409,12 @@ { struct pmc_owner *po; - epoch_enter_preempt(global_epoch_preempt); + PMC_EPOCH_ENTER(); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, (pid_t) -1, (uintfptr_t) address, (uintfptr_t) address + size); - epoch_exit_preempt(global_epoch_preempt); + PMC_EPOCH_EXIT(); /* * TODO: Notify owners of process-sampling PMCs. Index: sys/kern/genoffset.c =================================================================== --- /dev/null +++ sys/kern/genoffset.c @@ -0,0 +1,14 @@ + +#include +__FBSDID("$FreeBSD$"); +#include +#include +#include +#include + +OFFSYM(TD_PRE_EPOCH_PRIO, offsetof(struct thread, td_pre_epoch_prio), u_char, thread); +OFFSYM(TD_PRIORITY, offsetof(struct thread, td_priority), u_char, thread); +OFFSYM(TD_EPOCHNEST, offsetof(struct thread, td_epochnest), u_char, thread); +OFFSYM(TD_CRITNEST, offsetof(struct thread, td_critnest), int, thread); +OFFSYM(TD_PINNED, offsetof(struct thread, td_pinned), int, thread); +OFFSYM(TD_OWEPREEMPT, offsetof(struct thread, td_owepreempt), u_char, thread); Index: sys/kern/genoffset.sh =================================================================== --- /dev/null +++ sys/kern/genoffset.sh @@ -0,0 +1,112 @@ +#!/bin/sh +# $FreeBSD$ + +usage() +{ + echo "usage: genoffset [-o outfile] objfile" + exit 1 +} + + +work() +{ + echo "#ifndef _OFFSET_INC_" + echo "#define _OFFSET_INC_" + ${NM:='nm'} ${NMFLAGS} "$1" | ${AWK:='awk'} ' + / C .*_datatype_*/ { + type = substr($3, match($3, "_datatype_") + length("_datatype_")) + } + / C .*_parenttype_*/ { + parent = substr($3, match($3, "_parenttype_") + length("_parenttype_")) + } + / C .*sign$/ { + sign = substr($1, length($1) - 3, 4) + sub("^0*", "", sign) + if (sign != "") + sign = "-" + } + / C .*w0$/ { + w0 = substr($1, length($1) - 3, 4) + } + / C .*w1$/ { + w1 = substr($1, length($1) - 3, 4) + } + / C .*w2$/ { + w2 = substr($1, length($1) - 3, 4) + } + / C .*w3$/ { + w3 = substr($1, length($1) - 3, 4) + w = w3 w2 w1 w0 + sub("^0*", "", w) + if (w == "") + w = "0" + hex = "" + if (w != "0") + hex = "0x" + sub("w3$", "", $3) + member = tolower($3) + # This still has minor problems representing INT_MIN, etc. + # E.g., + # with 32-bit 2''s complement ints, this prints -0x80000000, + # which has the wrong type (unsigned int). + offset = sprintf("%s%s%s", sign, hex, w) + + structures[parent] = sprintf("%s%s %s %s\n", + structures[parent], offset, type, member) + } + END { + for (struct in structures) { + printf("struct %s_global {\n", struct); + n = split(structures[struct], members, "\n") + for (i = 1; i < n; i++) { + for (j = i + 1; j < n; j++) { + split(members[i], ivar, " ") + split(members[j], jvar, " ") + if (jvar[1] < ivar[1]) { + tmp = members[i] + members[i] = members[j] + members[j] = tmp + } + } + } + off = "0" + for (i = 1; i < n; i++) { + split(members[i], m, " ") + printf "\tu_char\tpad_%s[%s - %s];\n", m[3], m[1], off + printf "\t%s\t%s;\n", m[2], m[3] + off = sprintf("(%s + sizeof(%s))", m[1], m[2]) + } + printf("};\n"); + } + } + ' + + echo "#endif" +} + + +# +#MAIN PROGGRAM +# +use_outfile="no" +while getopts "o:" option +do + case "$option" in + o) outfile="$OPTARG" + use_outfile="yes";; + *) usage;; + esac +done +shift $(($OPTIND - 1)) +case $# in +1) ;; +*) usage;; +esac + +if [ "$use_outfile" = "yes" ] +then + work $1 3>"$outfile" >&3 3>&- +else + work $1 +fi + Index: sys/kern/kern_switch.c =================================================================== --- sys/kern/kern_switch.c +++ sys/kern/kern_switch.c @@ -209,7 +209,7 @@ (long)td->td_proc->p_pid, td->td_name, td->td_critnest); } -static void __noinline +void __noinline critical_exit_preempt(void) { struct thread *td; Index: sys/kern/subr_epoch.c =================================================================== --- sys/kern/subr_epoch.c +++ sys/kern/subr_epoch.c @@ -58,17 +58,12 @@ #define MAX_ADAPTIVE_SPIN 1000 #define MAX_EPOCHS 64 -#ifdef __amd64__ -#define EPOCH_ALIGN CACHE_LINE_SIZE*2 -#else -#define EPOCH_ALIGN CACHE_LINE_SIZE -#endif - -CTASSERT(sizeof(epoch_section_t) == sizeof(ck_epoch_section_t)); CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context)); SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW, 0, "epoch information"); SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW, 0, "epoch stats"); +CTASSERT(offsetof(struct thread, td_epochnest) == offsetof(struct thread_global, td_epochnest)); +CTASSERT(offsetof(struct thread, td_critnest) == offsetof(struct thread_global, td_critnest)); /* Stats. */ static counter_u64_t block_count; @@ -100,26 +95,8 @@ CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry, ck_epoch_entry_container) -typedef struct epoch_record { - ck_epoch_record_t er_record; - volatile struct threadlist er_tdlist; - volatile uint32_t er_gen; - uint32_t er_cpuid; -} *epoch_record_t; - -struct epoch_pcpu_state { - struct epoch_record eps_record; -} __aligned(EPOCH_ALIGN); - -struct epoch { - struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); - struct epoch_pcpu_state *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN); - int e_idx; - int e_flags; - struct epoch_pcpu_state *e_pcpu[0]; -}; - -epoch_t allepochs[MAX_EPOCHS]; + + epoch_t allepochs[MAX_EPOCHS]; DPCPU_DEFINE(struct grouptask, epoch_cb_task); DPCPU_DEFINE(int, epoch_cb_count); @@ -192,17 +169,15 @@ epoch_init_numa(epoch_t epoch) { int domain, cpu_offset; - struct epoch_pcpu_state *eps; epoch_record_t er; for (domain = 0; domain < vm_ndomains; domain++) { - eps = malloc_domain(sizeof(*eps) * domcount[domain], M_EPOCH, + er = malloc_domain(sizeof(*er) * domcount[domain], M_EPOCH, domain, M_ZERO | M_WAITOK); - epoch->e_pcpu_dom[domain] = eps; + epoch->e_pcpu_dom[domain] = er; cpu_offset = domoffsets[domain]; - for (int i = 0; i < domcount[domain]; i++, eps++) { - epoch->e_pcpu[cpu_offset + i] = eps; - er = &eps->eps_record; + for (int i = 0; i < domcount[domain]; i++, er++) { + epoch->e_pcpu[cpu_offset + i] = er; ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); er->er_cpuid = cpu_offset + i; @@ -213,14 +188,12 @@ static void epoch_init_legacy(epoch_t epoch) { - struct epoch_pcpu_state *eps; epoch_record_t er; - eps = malloc(sizeof(*eps) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK); - epoch->e_pcpu_dom[0] = eps; - for (int i = 0; i < mp_ncpus; i++, eps++) { - epoch->e_pcpu[i] = eps; - er = &eps->eps_record; + er = malloc(sizeof(*er) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK); + epoch->e_pcpu_dom[0] = er; + for (int i = 0; i < mp_ncpus; i++, er++) { + epoch->e_pcpu[i] = er; ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); er->er_cpuid = i; @@ -253,12 +226,12 @@ { int domain; #ifdef INVARIANTS - struct epoch_pcpu_state *eps; + struct epoch_record *er; int cpu; CPU_FOREACH(cpu) { - eps = epoch->e_pcpu[cpu]; - MPASS(TAILQ_EMPTY(&eps->eps_record.er_tdlist)); + er = epoch->e_pcpu[cpu]; + MPASS(TAILQ_EMPTY(&er->er_tdlist)); } #endif allepochs[epoch->e_idx] = NULL; @@ -271,95 +244,32 @@ free(epoch, M_EPOCH); } -#define INIT_CHECK(epoch) \ - do { \ - if (__predict_false((epoch) == NULL)) \ - return; \ - } while (0) - void -epoch_enter_preempt_internal(epoch_t epoch, struct thread *td) +epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et) { - struct epoch_pcpu_state *eps; - MPASS(cold || epoch != NULL); - INIT_CHECK(epoch); - MPASS(epoch->e_flags & EPOCH_PREEMPT); - critical_enter(); - td->td_pre_epoch_prio = td->td_priority; - eps = epoch->e_pcpu[curcpu]; -#ifdef INVARIANTS - MPASS(td->td_epochnest < UCHAR_MAX - 2); - if (td->td_epochnest > 1) { - struct thread *curtd; - int found = 0; - - TAILQ_FOREACH(curtd, &eps->eps_record.er_tdlist, td_epochq) - if (curtd == td) - found = 1; - KASSERT(found, ("recursing on a second epoch")); - critical_exit(); - return; - } -#endif - TAILQ_INSERT_TAIL(&eps->eps_record.er_tdlist, td, td_epochq); - sched_pin(); - ck_epoch_begin(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section); - critical_exit(); + epoch_enter_preempt(epoch, et); } - void -epoch_enter(epoch_t epoch) +epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et) { - ck_epoch_record_t *record; - struct thread *td; - - MPASS(cold || epoch != NULL); - INIT_CHECK(epoch); - td = curthread; - critical_enter(); - td->td_epochnest++; - record = &epoch->e_pcpu[curcpu]->eps_record.er_record; - ck_epoch_begin(record, NULL); + epoch_exit_preempt(epoch, et); } void -epoch_exit_preempt_internal(epoch_t epoch, struct thread *td) +epoch_enter_KBI(epoch_t epoch) { - struct epoch_pcpu_state *eps; - - MPASS(td->td_epochnest == 0); - INIT_CHECK(epoch); - critical_enter(); - eps = epoch->e_pcpu[curcpu]; - MPASS(epoch->e_flags & EPOCH_PREEMPT); - ck_epoch_end(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section); - TAILQ_REMOVE(&eps->eps_record.er_tdlist, td, td_epochq); - eps->eps_record.er_gen++; - sched_unpin(); - if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) { - thread_lock(td); - sched_prio(td, td->td_pre_epoch_prio); - thread_unlock(td); - } - critical_exit(); + epoch_enter(epoch); } void -epoch_exit(epoch_t epoch) +epoch_exit_KBI(epoch_t epoch) { - ck_epoch_record_t *record; - struct thread *td; - INIT_CHECK(epoch); - td = curthread; - td->td_epochnest--; - record = &epoch->e_pcpu[curcpu]->eps_record.er_record; - ck_epoch_end(record, NULL); - critical_exit(); + epoch_exit(epoch); } /* @@ -371,7 +281,8 @@ void *arg __unused) { epoch_record_t record; - struct thread *td, *tdwait, *owner; + struct thread *td, *owner, *curwaittd; + struct epoch_thread *tdwait; struct turnstile *ts; struct lock_object *lock; int spincount, gen; @@ -389,13 +300,13 @@ * overhead of a migration */ if ((tdwait = TAILQ_FIRST(&record->er_tdlist)) != NULL && - TD_IS_RUNNING(tdwait)) { + TD_IS_RUNNING(tdwait->et_td)) { gen = record->er_gen; thread_unlock(td); do { cpu_spinwait(); } while (tdwait == TAILQ_FIRST(&record->er_tdlist) && - gen == record->er_gen && TD_IS_RUNNING(tdwait) && + gen == record->er_gen && TD_IS_RUNNING(tdwait->et_td) && spincount++ < MAX_ADAPTIVE_SPIN); thread_lock(td); return; @@ -426,28 +337,29 @@ * priority thread (highest prio value) and drop our priority * to match to allow it to run. */ - TAILQ_FOREACH(tdwait, &record->er_tdlist, td_epochq) { + TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) { /* * Propagate our priority to any other waiters to prevent us * from starving them. They will have their original priority * restore on exit from epoch_wait(). */ - if (!TD_IS_INHIBITED(tdwait) && tdwait->td_priority > td->td_priority) { + curwaittd = tdwait->et_td; + if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) { critical_enter(); thread_unlock(td); - thread_lock(tdwait); - sched_prio(tdwait, td->td_priority); - thread_unlock(tdwait); + thread_lock(curwaittd); + sched_prio(curwaittd, td->td_priority); + thread_unlock(curwaittd); thread_lock(td); critical_exit(); } - if (TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait) && - ((ts = tdwait->td_blocked) != NULL)) { + if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) && + ((ts = curwaittd->td_blocked) != NULL)) { /* * We unlock td to allow turnstile_wait to reacquire the * the thread lock. Before unlocking it we enter a critical * section to prevent preemption after we reenable interrupts - * by dropping the thread lock in order to prevent tdwait + * by dropping the thread lock in order to prevent curwaittd * from getting to run. */ critical_enter(); @@ -456,15 +368,15 @@ /* * The owner pointer indicates that the lock succeeded. Only * in case we hold the lock and the turnstile we locked is still - * the one that tdwait is blocked on can we continue. Otherwise + * the one that curwaittd is blocked on can we continue. Otherwise * The turnstile pointer has been changed out from underneath - * us, as in the case where the lock holder has signalled tdwait, + * us, as in the case where the lock holder has signalled curwaittd, * and we need to continue. */ - if (owner != NULL && ts == tdwait->td_blocked) { - MPASS(TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait)); + if (owner != NULL && ts == curwaittd->td_blocked) { + MPASS(TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd)); critical_exit(); - turnstile_wait(ts, owner, tdwait->td_tsqueue); + turnstile_wait(ts, owner, curwaittd->td_tsqueue); counter_u64_add(turnstile_count, 1); thread_lock(td); return; @@ -569,7 +481,7 @@ void epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t)) { - struct epoch_pcpu_state *eps; + epoch_record_t er; ck_epoch_entry_t *cb; cb = (void *)ctx; @@ -585,8 +497,8 @@ critical_enter(); *DPCPU_PTR(epoch_cb_count) += 1; - eps = epoch->e_pcpu[curcpu]; - ck_epoch_call(&eps->eps_record.er_record, cb, (ck_epoch_cb_t *)callback); + er = epoch->e_pcpu[curcpu]; + ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback); critical_exit(); return; boottime: @@ -608,7 +520,7 @@ for (total = i = 0; i < epoch_count; i++) { if (__predict_false((epoch = allepochs[i]) == NULL)) continue; - record = &epoch->e_pcpu[curcpu]->eps_record.er_record; + record = &epoch->e_pcpu[curcpu]->er_record; if ((npending = record->n_pending) == 0) continue; ck_epoch_poll_deferred(record, &cb_stack); @@ -632,7 +544,47 @@ } int -in_epoch(void) +in_epoch_verbose(epoch_t epoch, int dump_onfail) { - return (curthread->td_epochnest != 0); + struct epoch_thread *tdwait; + struct thread *td; + epoch_record_t er; + + td = curthread; + if (td->td_epochnest == 0) + return (0); + if (__predict_false((epoch) == NULL)) + return (0); + critical_enter(); + er = epoch->e_pcpu[curcpu]; + TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link) + if (tdwait->et_td == td) { + critical_exit(); + return (1); + } +#ifdef INVARIANTS + if (dump_onfail) { + MPASS(td->td_pinned); + printf("cpu: %d id: %d\n", curcpu, td->td_tid); + TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link) + printf("td_tid: %d ", tdwait->et_td->td_tid); + printf("\n"); + } +#endif + critical_exit(); + return (0); +} + +int +in_epoch(epoch_t epoch) +{ + return (in_epoch_verbose(epoch, 0)); +} + +void +epoch_adjust_prio(struct thread *td, u_char prio) +{ + thread_lock(td); + sched_prio(td, prio); + thread_unlock(td); } Index: sys/modules/tcp/fastpath/Makefile =================================================================== --- sys/modules/tcp/fastpath/Makefile +++ sys/modules/tcp/fastpath/Makefile @@ -14,5 +14,5 @@ # Enable full debugging # #CFLAGS += -g - +CFLAGS+= -DKLD_BASE .include Index: sys/modules/tcp/rack/Makefile =================================================================== --- sys/modules/tcp/rack/Makefile +++ sys/modules/tcp/rack/Makefile @@ -19,5 +19,5 @@ CFLAGS+= -DMODNAME=${KMOD} CFLAGS+= -DSTACKNAME=${STACKNAME} - +CFLAGS+= -DKLD_BASE .include Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -1760,29 +1760,35 @@ void if_addr_rlock(struct ifnet *ifp) { - - IF_ADDR_RLOCK(ifp); + MPASS(*(uint64_t *)&ifp->if_addr_et == 0); + epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et); } void if_addr_runlock(struct ifnet *ifp) { - - IF_ADDR_RUNLOCK(ifp); + epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et); +#ifdef INVARIANTS + bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker)); +#endif } void if_maddr_rlock(if_t ifp) { - IF_ADDR_RLOCK((struct ifnet *)ifp); + MPASS(*(uint64_t *)&ifp->if_maddr_et == 0); + epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et); } void if_maddr_runlock(if_t ifp) { - IF_ADDR_RUNLOCK((struct ifnet *)ifp); + epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et); +#ifdef INVARIANTS + bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker)); +#endif } /* @@ -1926,7 +1932,7 @@ struct ifnet *ifp; struct ifaddr *ifa; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) @@ -1969,7 +1975,7 @@ struct ifnet *ifp; struct ifaddr *ifa; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; @@ -1999,7 +2005,7 @@ struct ifnet *ifp; struct ifaddr *ifa; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; @@ -2032,7 +2038,7 @@ u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. @@ -2069,7 +2075,6 @@ */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { - IF_ADDR_RUNLOCK(ifp); goto done; } } else { @@ -2128,7 +2133,8 @@ if (af >= AF_MAX) return (NULL); - MPASS(in_epoch()); + + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; Index: sys/net/if_gif.h =================================================================== --- sys/net/if_gif.h +++ sys/net/if_gif.h @@ -96,8 +96,8 @@ /* mbuf adjust factor to force 32-bit alignment of IP header */ #define ETHERIP_ALIGN 2 -#define GIF_RLOCK() epoch_enter_preempt(net_epoch_preempt) -#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) +#define GIF_RLOCK() struct epoch_tracker gif_et; epoch_enter_preempt(net_epoch_preempt, &gif_et) +#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gif_et) #define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt) /* Prototypes */ Index: sys/net/if_gre.h =================================================================== --- sys/net/if_gre.h +++ sys/net/if_gre.h @@ -91,8 +91,8 @@ #endif #define GRE2IFP(sc) ((sc)->gre_ifp) -#define GRE_RLOCK() epoch_enter_preempt(net_epoch_preempt) -#define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) +#define GRE_RLOCK() struct epoch_tracker gre_et; epoch_enter_preempt(net_epoch_preempt, &gre_et) +#define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gre_et) #define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt) #define gre_hdr gre_uhdr.hdr Index: sys/net/if_lagg.c =================================================================== --- sys/net/if_lagg.c +++ sys/net/if_lagg.c @@ -73,10 +73,10 @@ #include #include -#define LAGG_RLOCK() epoch_enter_preempt(net_epoch_preempt) -#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) -#define LAGG_RLOCK_ASSERT() MPASS(in_epoch()) -#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch()) +#define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et) +#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et) +#define LAGG_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt)) +#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch(net_epoch_preempt)) #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx") #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx) @@ -1791,6 +1791,7 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_port *lp_next, *rval = NULL; + struct epoch_tracker net_et; /* * Search a port which reports an active link state. @@ -1809,15 +1810,14 @@ } search: - LAGG_RLOCK(); + epoch_enter_preempt(net_epoch_preempt, &net_et); CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp_next)) { - LAGG_RUNLOCK(); - rval = lp_next; - goto found; + epoch_exit_preempt(net_epoch_preempt, &net_et); + return (lp_next); } } - LAGG_RUNLOCK(); + epoch_exit_preempt(net_epoch_preempt, &net_et); found: return (rval); } Index: sys/net/if_me.c =================================================================== --- sys/net/if_me.c +++ sys/net/if_me.c @@ -87,8 +87,8 @@ CK_LIST_HEAD(me_list, me_softc); #define ME2IFP(sc) ((sc)->me_ifp) #define ME_READY(sc) ((sc)->me_src.s_addr != 0) -#define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt) -#define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) +#define ME_RLOCK() struct epoch_tracker me_et; epoch_enter_preempt(net_epoch_preempt, &me_et) +#define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &me_et) #define ME_WAIT() epoch_wait_preempt(net_epoch_preempt) #ifndef ME_HASH_SIZE @@ -315,7 +315,7 @@ if (V_me_hashtbl == NULL) return (0); - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); ip = mtod(m, const struct ip *); CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr, ip->ip_src.s_addr), chain) { Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -381,6 +381,8 @@ */ struct netdump_methods *if_netdump_methods; struct epoch_context if_epoch_ctx; + struct epoch_tracker if_addr_et; + struct epoch_tracker if_maddr_et; /* * Spare fields to be added before branching a stable branch, so @@ -398,15 +400,17 @@ */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) -#define IF_ADDR_RLOCK(if) epoch_enter_preempt(net_epoch_preempt); -#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt); +#define IF_ADDR_RLOCK(if) struct epoch_tracker if_addr_et; epoch_enter_preempt(net_epoch_preempt, &if_addr_et); +#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt, &if_addr_et); #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) -#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch() || mtx_owned(&(if)->if_addr_lock)) +#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) -#define NET_EPOCH_ENTER() epoch_enter_preempt(net_epoch_preempt) -#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt) +#define NET_EPOCH_ENTER() struct epoch_tracker nep_et; epoch_enter_preempt(net_epoch_preempt, &nep_et) +#define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et)) +#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et) +#define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et)) /* @@ -482,16 +486,16 @@ mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF) #define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock) -#define IF_AFDATA_RLOCK(ifp) epoch_enter_preempt(net_epoch_preempt) +#define IF_AFDATA_RLOCK(ifp) struct epoch_tracker if_afdata_et; epoch_enter_preempt(net_epoch_preempt, &if_afdata_et) #define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock) -#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt) +#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt, &if_afdata_et) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock) -#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch() || mtx_owned(&(ifp)->if_afdata_lock)) -#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch()); +#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock)) +#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt)); #define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED) @@ -573,16 +577,16 @@ * write, but also whether it was acquired with sleep support or not. */ #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) -#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch()) +#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt)) #define IFNET_WLOCK_ASSERT() do { \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \ } while (0) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) -#define IFNET_RLOCK_NOSLEEP() epoch_enter_preempt(net_epoch_preempt) +#define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) -#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt) +#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt, &ifnet_rlock_et) /* * Look up an ifnet given its index; the _ref variant also acquires a Index: sys/net/route.c =================================================================== --- sys/net/route.c +++ sys/net/route.c @@ -733,7 +733,7 @@ struct ifaddr *ifa; int not_found = 0; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, Index: sys/net/rtsock.c =================================================================== --- sys/net/rtsock.c +++ sys/net/rtsock.c @@ -1736,15 +1736,15 @@ struct rt_addrinfo info; int len, error = 0; struct sockaddr_storage ss; + struct epoch_tracker et; bzero((caddr_t)&info, sizeof(info)); bzero(&ifd, sizeof(ifd)); - IFNET_RLOCK_NOSLEEP(); + NET_EPOCH_ENTER_ET(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; if_data_copy(ifp, &ifd); - IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len); @@ -1785,15 +1785,12 @@ goto done; } } - IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = NULL; info.rti_info[RTAX_NETMASK] = NULL; info.rti_info[RTAX_BRD] = NULL; } done: - if (ifp != NULL) - IF_ADDR_RUNLOCK(ifp); - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT_ET(et); return (error); } Index: sys/netinet/in_gif.c =================================================================== --- sys/netinet/in_gif.c +++ sys/netinet/in_gif.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -224,7 +225,7 @@ int len; /* prepend new IP header */ - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); len = sizeof(struct ip); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) @@ -263,7 +264,7 @@ struct ip *ip; uint8_t ecn; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); if (sc == NULL) { m_freem(m); KMOD_IPSTAT_INC(ips_nogif); @@ -292,7 +293,7 @@ if (V_ipv4_hashtbl == NULL) return (0); - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); ip = mtod(m, const struct ip *); /* * NOTE: it is safe to iterate without any locking here, because softc Index: sys/netinet/in_pcb.h =================================================================== --- sys/netinet/in_pcb.h +++ sys/netinet/in_pcb.h @@ -632,16 +632,17 @@ #define INP_INFO_LOCK_INIT(ipi, d) \ mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE) #define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock) -#define INP_INFO_RLOCK(ipi) NET_EPOCH_ENTER() +#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER_ET((et)) #define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock) #define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock) #define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock) -#define INP_INFO_RUNLOCK(ipi) NET_EPOCH_EXIT() +#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et)) +#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT_ET(*(tp)->t_inpcb->inp_et) #define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock) -#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_lock)) -#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch()) +#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock)) +#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt)) #define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED) -#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch() && !mtx_owned(&(ipi)->ipi_lock)) +#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock)) #define INP_LIST_LOCK_INIT(ipi, d) \ rw_init_flags(&(ipi)->ipi_list_lock, (d), 0) @@ -664,11 +665,13 @@ #define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF) #define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock) -#define INP_HASH_RLOCK(ipi) NET_EPOCH_ENTER() +#define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et) +#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et)) #define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock) -#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT() +#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT_ET(inp_hash_et) +#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et)) #define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch() || mtx_owned(&(ipi)->ipi_hash_lock)) +#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock)) #define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED); #define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -1084,7 +1084,6 @@ ifp = ia->ia_ifp; ia = NULL; - IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; @@ -1098,10 +1097,8 @@ } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; - IF_ADDR_RUNLOCK(ifp); goto done; } - IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); @@ -1143,7 +1140,6 @@ */ ia = NULL; ifp = sro.ro_rt->rt_ifp; - IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) @@ -1156,10 +1152,8 @@ } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; - IF_ADDR_RUNLOCK(ifp); goto done; } - IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); @@ -1207,9 +1201,7 @@ ifp = ia->ia_ifp; ia = NULL; - IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; @@ -1222,10 +1214,8 @@ } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; - IF_ADDR_RUNLOCK(ifp); goto done; } - IF_ADDR_RUNLOCK(ifp); } /* 3. As a last resort return the 'default' jail address. */ @@ -1673,6 +1663,10 @@ { INP_WLOCK_ASSERT(inp); +#ifdef INVARIANTS + if (inp->inp_socket != NULL && inp->inp_ppcb != NULL) + MPASS(inp->inp_refcount > 1); +#endif /* * XXXRW: Possibly we should protect the setting of INP_DROPPED with @@ -2251,11 +2245,12 @@ struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; +#ifdef INVARIANTS KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - - INP_HASH_LOCK_ASSERT(pcbinfo); - + if (!mtx_owned(&pcbinfo->ipi_hash_lock)) + MPASS(in_epoch_verbose(net_epoch_preempt, 1)); +#endif /* * First look for an exact match. */ Index: sys/netinet/ip_divert.c =================================================================== --- sys/netinet/ip_divert.c +++ sys/netinet/ip_divert.c @@ -192,6 +192,7 @@ u_int16_t nport; struct sockaddr_in divsrc; struct m_tag *mtag; + struct epoch_tracker et; mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); if (mtag == NULL) { @@ -272,7 +273,7 @@ /* Put packet on socket queue, if any */ sa = NULL; nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info)); - INP_INFO_RLOCK(&V_divcbinfo); + INP_INFO_RLOCK_ET(&V_divcbinfo, et); CK_LIST_FOREACH(inp, &V_divcb, inp_list) { /* XXX why does only one socket match? */ if (inp->inp_lport == nport) { @@ -290,7 +291,7 @@ break; } } - INP_INFO_RUNLOCK(&V_divcbinfo); + INP_INFO_RUNLOCK_ET(&V_divcbinfo, et); if (sa == NULL) { m_freem(m); KMOD_IPSTAT_INC(ips_noproto); @@ -634,6 +635,7 @@ struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; + struct epoch_tracker net_et; /* * The process of preparing the TCB list is too time-consuming and @@ -652,10 +654,10 @@ /* * OK, now we're committed to doing something. */ - INP_INFO_RLOCK(&V_divcbinfo); + epoch_enter_preempt(net_epoch_preempt, &net_et); gencnt = V_divcbinfo.ipi_gencnt; n = V_divcbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_divcbinfo); + epoch_exit_preempt(net_epoch_preempt, &net_et); error = sysctl_wire_old_buffer(req, 2 * sizeof(xig) + n*sizeof(struct xinpcb)); @@ -674,7 +676,7 @@ if (inp_list == NULL) return ENOMEM; - INP_INFO_RLOCK(&V_divcbinfo); + epoch_enter_preempt(net_epoch_preempt, &net_et); for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); @@ -685,7 +687,7 @@ } INP_WUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_divcbinfo); + epoch_exit_preempt(net_epoch_preempt, &net_et); n = i; error = 0; @@ -711,6 +713,7 @@ INP_INFO_WUNLOCK(&V_divcbinfo); if (!error) { + struct epoch_tracker et; /* * Give the user an updated idea of our state. * If the generation differs from what we told @@ -718,11 +721,11 @@ * while we were processing this request, and it * might be necessary to retry. */ - INP_INFO_RLOCK(&V_divcbinfo); + INP_INFO_RLOCK_ET(&V_divcbinfo, et); xig.xig_gen = V_divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_divcbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_divcbinfo); + INP_INFO_RUNLOCK_ET(&V_divcbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); Index: sys/netinet/ip_encap.c =================================================================== --- sys/netinet/ip_encap.c +++ sys/netinet/ip_encap.c @@ -112,8 +112,8 @@ MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF); #define ENCAP_WLOCK() mtx_lock(&encapmtx) #define ENCAP_WUNLOCK() mtx_unlock(&encapmtx) -#define ENCAP_RLOCK() epoch_enter_preempt(net_epoch_preempt) -#define ENCAP_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) +#define ENCAP_RLOCK() struct epoch_tracker encap_et; epoch_enter_preempt(net_epoch_preempt, &encap_et) +#define ENCAP_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &encap_et) #define ENCAP_WAIT() epoch_wait_preempt(net_epoch_preempt) static struct encaptab * Index: sys/netinet/ip_gre.c =================================================================== --- sys/netinet/ip_gre.c +++ sys/netinet/ip_gre.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -118,7 +119,7 @@ if (V_ipv4_hashtbl == NULL) return (0); - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); ip = mtod(m, const struct ip *); CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr, ip->ip_src.s_addr), chain) { Index: sys/netinet/raw_ip.c =================================================================== --- sys/netinet/raw_ip.c +++ sys/netinet/raw_ip.c @@ -285,6 +285,7 @@ struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last; struct sockaddr_in ripsrc; + struct epoch_tracker et; int hash; *mp = NULL; @@ -299,7 +300,7 @@ hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); - INP_INFO_RLOCK(&V_ripcbinfo); + INP_INFO_RLOCK_ET(&V_ripcbinfo, et); CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { if (inp->inp_ip_p != proto) continue; @@ -422,7 +423,7 @@ skip_2: INP_RUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_ripcbinfo); + INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); if (last != NULL) { if (rip_append(last, ip, m, &ripsrc) != 0) IPSTAT_INC(ips_delivered); @@ -1035,6 +1036,7 @@ struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; + struct epoch_tracker net_et; /* * The process of preparing the TCB list is too time-consuming and @@ -1053,10 +1055,10 @@ /* * OK, now we're committed to doing something. */ - INP_INFO_RLOCK(&V_ripcbinfo); + epoch_enter_preempt(net_epoch_preempt, &net_et); gencnt = V_ripcbinfo.ipi_gencnt; n = V_ripcbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_ripcbinfo); + epoch_exit_preempt(net_epoch_preempt, &net_et); xig.xig_len = sizeof xig; xig.xig_count = n; @@ -1070,7 +1072,7 @@ if (inp_list == NULL) return (ENOMEM); - INP_INFO_RLOCK(&V_ripcbinfo); + epoch_enter_preempt(net_epoch_preempt, &net_et); for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); @@ -1081,7 +1083,7 @@ } INP_WUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_ripcbinfo); + epoch_exit_preempt(net_epoch_preempt, &net_et); n = i; error = 0; @@ -1107,17 +1109,18 @@ INP_INFO_WUNLOCK(&V_ripcbinfo); if (!error) { + struct epoch_tracker et; /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ - INP_INFO_RLOCK(&V_ripcbinfo); + INP_INFO_RLOCK_ET(&V_ripcbinfo, et); xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_ripcbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_ripcbinfo); + INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); Index: sys/netinet/tcp_hpts.h =================================================================== --- sys/netinet/tcp_hpts.h +++ sys/netinet/tcp_hpts.h @@ -238,10 +238,10 @@ #define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__); void tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, - int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked); + int32_t tlen, int32_t drop_hdrlen, uint8_t iptos); int __tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, - int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked, int32_t line); + int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line); #define tcp_queue_to_input(a, b, c, d, e, f, g) __tcp_queue_to_input(a, b, c, d, e, f, g, __LINE__) uint16_t tcp_hpts_delayedby(struct inpcb *inp); Index: sys/netinet/tcp_hpts.c =================================================================== --- sys/netinet/tcp_hpts.c +++ sys/netinet/tcp_hpts.c @@ -998,7 +998,7 @@ void tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, - int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked) + int32_t tlen, int32_t drop_hdrlen, uint8_t iptos) { /* Setup packet for input first */ INP_WLOCK_ASSERT(tp->t_inpcb); @@ -1006,7 +1006,7 @@ m->m_pkthdr.pace_tlen = (uint16_t) tlen; m->m_pkthdr.pace_drphdrlen = drop_hdrlen; m->m_pkthdr.pace_tos = iptos; - m->m_pkthdr.pace_lock = (uint8_t) ti_locked; + m->m_pkthdr.pace_lock = (curthread->td_epochnest != 0); if (tp->t_in_pkt == NULL) { tp->t_in_pkt = m; tp->t_tail_pkt = m; @@ -1019,11 +1019,11 @@ int32_t __tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, - int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked, int32_t line){ + int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line){ struct tcp_hpts_entry *hpts; int32_t ret; - tcp_queue_pkt_to_input(tp, m, th, tlen, drop_hdrlen, iptos, ti_locked); + tcp_queue_pkt_to_input(tp, m, th, tlen, drop_hdrlen, iptos); hpts = tcp_input_lock(tp->t_inpcb); ret = __tcp_queue_to_input_locked(tp->t_inpcb, hpts, line); mtx_unlock(&hpts->p_mtx); @@ -1145,6 +1145,7 @@ int16_t set_cpu; uint32_t did_prefetch = 0; int32_t ti_locked = TI_UNLOCKED; + struct epoch_tracker et; HPTS_MTX_ASSERT(hpts); while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) { @@ -1161,7 +1162,7 @@ mtx_unlock(&hpts->p_mtx); CURVNET_SET(inp->inp_vnet); if (drop_reason) { - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); ti_locked = TI_RLOCKED; } else { ti_locked = TI_UNLOCKED; @@ -1172,7 +1173,7 @@ out: hpts->p_inp = NULL; if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } if (in_pcbrele_wlocked(inp) == 0) { INP_WUNLOCK(inp); @@ -1201,7 +1202,7 @@ n = m->m_nextpkt; } tp = tcp_drop(tp, drop_reason); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); if (tp == NULL) { INP_WLOCK(inp); } @@ -1234,7 +1235,7 @@ (m->m_pkthdr.pace_lock == TI_RLOCKED || tp->t_state != TCPS_ESTABLISHED)) { ti_locked = TI_RLOCKED; - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); m = tp->t_in_pkt; } if (in_newts_every_tcb) { @@ -1270,13 +1271,15 @@ /* Use the hpts specific do_segment */ (*tp->t_fb->tfb_tcp_hpts_do_segment) (m, th, inp->inp_socket, tp, drop_hdrlen, - tlen, iptos, ti_locked, nxt_pkt, tv); + tlen, iptos, nxt_pkt, tv); } else { /* Use the default do_segment */ (*tp->t_fb->tfb_tcp_do_segment) (m, th, inp->inp_socket, tp, drop_hdrlen, - tlen, iptos, ti_locked); + tlen, iptos); } + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); /* * Do segment returns unlocked we need the * lock again but we also need some kasserts @@ -1289,7 +1292,7 @@ n = m->m_nextpkt; if (m != NULL && m->m_pkthdr.pace_lock == TI_RLOCKED) { - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); ti_locked = TI_RLOCKED; } else ti_locked = TI_UNLOCKED; @@ -1316,14 +1319,14 @@ if (ti_locked == TI_UNLOCKED && (tp->t_state != TCPS_ESTABLISHED)) { ti_locked = TI_RLOCKED; - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); } } /** end while(m) */ } /** end if ((m != NULL) && (m == tp->t_in_pkt)) */ if (in_pcbrele_wlocked(inp) == 0) INP_WUNLOCK(inp); if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_UNLOCK_ASSERT(inp); ti_locked = TI_UNLOCKED; Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -583,6 +583,7 @@ int rstreason = 0; /* For badport_bandlim accounting purposes */ uint8_t iptos; struct m_tag *fwd_tag = NULL; + struct epoch_tracker et; #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6; @@ -773,7 +774,7 @@ * connection in TIMEWAIT and SYNs not targeting a listening socket. */ if ((thflags & (TH_FIN | TH_RST)) != 0) { - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); ti_locked = TI_RLOCKED; } else ti_locked = TI_UNLOCKED; @@ -962,7 +963,7 @@ */ if (inp->inp_flags & INP_TIMEWAIT) { if (ti_locked == TI_UNLOCKED) { - INP_INFO_RLOCK(); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); @@ -974,7 +975,7 @@ */ if (tcp_twcheck(inp, &to, th, m, tlen)) goto findpcb; - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (IPPROTO_DONE); } /* @@ -1011,7 +1012,7 @@ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) && !IS_FASTOPEN(tp->t_flags)))) { if (ti_locked == TI_UNLOCKED) { - INP_INFO_RLOCK(); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); @@ -1145,8 +1146,9 @@ * the mbuf chain and unlocks the inpcb. */ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, - iptos, ti_locked); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + iptos); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (IPPROTO_DONE); } /* @@ -1350,7 +1352,7 @@ * Only the listen socket is unlocked by syncache_add(). */ if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); ti_locked = TI_UNLOCKED; } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); @@ -1384,15 +1386,16 @@ * state. tcp_do_segment() always consumes the mbuf chain, unlocks * the inpcb, and unlocks pcbinfo. */ - tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (IPPROTO_DONE); dropwithreset: TCP_PROBE5(receive, NULL, tp, m, tp, th); if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS @@ -1416,7 +1419,7 @@ TCP_PROBE5(receive, NULL, tp, m, tp, th); if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS @@ -1503,8 +1506,7 @@ void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, - struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, - int ti_locked) + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) { int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; int rstreason, todrop, win; @@ -1530,7 +1532,6 @@ tp->sackhint.last_sack_ack = 0; sack_changed = 0; nsegs = max(1, m->m_pkthdr.lro_nsegs); - /* * If this is either a state-changing packet or current state isn't * established, we require a write lock on tcbinfo. Otherwise, we @@ -1539,19 +1540,7 @@ */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { - KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " - "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { -#ifdef INVARIANTS - if (ti_locked == TI_RLOCKED) - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - else { - KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " - "ti_locked: %d", __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); - } -#endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", @@ -1760,10 +1749,6 @@ /* * This is a pure ack for outstanding data. */ - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - TCPSTAT_INC(tcps_predack); /* @@ -1867,10 +1852,6 @@ * nothing on the reassembly queue and we have enough * buffer space to take it. */ - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - /* Clean receiver SACK report if present */ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) tcp_clean_sackreport(tp); @@ -2072,8 +2053,6 @@ tcp_state_change(tp, TCPS_SYN_RECEIVED); } - KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " - "ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); @@ -2148,9 +2127,6 @@ (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_RLOCKED, - ("%s: TH_RST ti_locked %d, th %p tp %p", - __func__, ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); @@ -2193,8 +2169,6 @@ */ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && tp->t_state != TCPS_SYN_RECEIVED) { - KASSERT(ti_locked == TI_RLOCKED, - ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); @@ -2308,8 +2282,6 @@ */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { - KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " - "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { @@ -2899,7 +2871,6 @@ if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); - INP_INFO_RUNLOCK(&V_tcbinfo); m_freem(m); return; } @@ -3131,19 +3102,11 @@ */ case TCPS_FIN_WAIT_2: INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata " - "TCP_FIN_WAIT_2 ti_locked: %d", __func__, - ti_locked)); tcp_twstart(tp); - INP_INFO_RUNLOCK(&V_tcbinfo); return; } } - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, @@ -3158,9 +3121,6 @@ (void) tp->t_fb->tfb_tcp_output(tp); check_delack: - KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", - __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { @@ -3198,10 +3158,6 @@ &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - tp->t_flags |= TF_ACKNOW; (void) tp->t_fb->tfb_tcp_output(tp); INP_WUNLOCK(tp->t_inpcb); @@ -3209,10 +3165,6 @@ return; dropwithreset: - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(tp->t_inpcb); @@ -3221,15 +3173,6 @@ return; drop: - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - } -#ifdef INVARIANTS - else - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); -#endif - /* * Drop space held by incoming segment and return. */ Index: sys/netinet/tcp_stacks/fastpath.c =================================================================== --- sys/netinet/tcp_stacks/fastpath.c +++ sys/netinet/tcp_stacks/fastpath.c @@ -121,12 +121,10 @@ #include static void tcp_do_segment_fastslow(struct mbuf *, struct tcphdr *, - struct socket *, struct tcpcb *, int, int, uint8_t, - int); + struct socket *, struct tcpcb *, int, int, uint8_t); static void tcp_do_segment_fastack(struct mbuf *, struct tcphdr *, - struct socket *, struct tcpcb *, int, int, uint8_t, - int); + struct socket *, struct tcpcb *, int, int, uint8_t); /* * Indicate whether this ack should be delayed. We can delay the ack if @@ -154,7 +152,7 @@ static void tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, - int ti_locked, uint32_t tiwin) + uint32_t tiwin) { int acked; uint16_t nsegs; @@ -170,6 +168,7 @@ struct tcphdr tcp_savetcp; short ostate = 0; #endif + /* * The following if statement will be true if * we are doing the win_up_in_fp @@ -207,11 +206,6 @@ /* * This is a pure ack for outstanding data. */ - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } - ti_locked = TI_UNLOCKED; - TCPSTAT_INC(tcps_predack); /* @@ -310,9 +304,6 @@ sowwakeup(so); if (sbavail(&so->so_snd)) (void) tcp_output(tp); - KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", - __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { @@ -330,7 +321,7 @@ static void tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, - int ti_locked, uint32_t tiwin) + uint32_t tiwin) { int newsize = 0; /* automatic sockbuf scaling */ #ifdef TCPDEBUG @@ -354,16 +345,6 @@ tp->ts_recent = to->to_tsval; } - /* - * This is a pure, in-sequence data packet with - * nothing on the reassembly queue and we have enough - * buffer space to take it. - */ - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } - ti_locked = TI_UNLOCKED; - /* Clean receiver SACK report if present */ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) tcp_clean_sackreport(tp); @@ -413,9 +394,6 @@ tp->t_flags |= TF_ACKNOW; tcp_output(tp); } - KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", - __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { @@ -434,7 +412,7 @@ static void tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, - int ti_locked, uint32_t tiwin, int thflags) + uint32_t tiwin, int thflags) { int acked, ourfinisacked, needoutput = 0; int rstreason, todrop, win; @@ -464,7 +442,6 @@ if (win < 0) win = 0; tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); - switch (tp->t_state) { /* @@ -569,8 +546,6 @@ tcp_state_change(tp, TCPS_SYN_RECEIVED); } - KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " - "ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); @@ -644,9 +619,6 @@ SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_RLOCKED, - ("%s: TH_RST ti_locked %d, th %p tp %p", - __func__, ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); @@ -688,8 +660,6 @@ * Send challenge ACK for any SYN in synchronized state. */ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) { - KASSERT(ti_locked == TI_RLOCKED, - ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); @@ -803,8 +773,6 @@ */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { - KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " - "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { @@ -1333,7 +1301,6 @@ if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); - INP_INFO_RUNLOCK(&V_tcbinfo); m_freem(m); return; } @@ -1562,20 +1529,10 @@ */ case TCPS_FIN_WAIT_2: INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata " - "TCP_FIN_WAIT_2 ti_locked: %d", __func__, - ti_locked)); - tcp_twstart(tp); - INP_INFO_RUNLOCK(&V_tcbinfo); return; } } - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } - ti_locked = TI_UNLOCKED; - #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, @@ -1589,9 +1546,6 @@ if (needoutput || (tp->t_flags & TF_ACKNOW)) (void) tp->t_fb->tfb_tcp_output(tp); - KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", - __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { @@ -1629,11 +1583,6 @@ &tcp_savetcp, 0); #endif TCP_PROBE3(debug__drop, tp, th, m); - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } - ti_locked = TI_UNLOCKED; - tp->t_flags |= TF_ACKNOW; (void) tp->t_fb->tfb_tcp_output(tp); INP_WUNLOCK(tp->t_inpcb); @@ -1641,11 +1590,6 @@ return; dropwithreset: - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } - ti_locked = TI_UNLOCKED; - if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(tp->t_inpcb); @@ -1654,15 +1598,6 @@ return; drop: - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - } -#ifdef INVARIANTS - else - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); -#endif - /* * Drop space held by incoming segment and return. */ @@ -1687,8 +1622,8 @@ */ void tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so, - struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, - int ti_locked) + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) + { int thflags; uint32_t tiwin; @@ -1701,6 +1636,7 @@ thflags = th->th_flags; inc = &tp->t_inpcb->inp_inc; nsegs = max(1, m->m_pkthdr.lro_nsegs); + /* * If this is either a state-changing packet or current state isn't * established, we require a write lock on tcbinfo. Otherwise, we @@ -1709,19 +1645,7 @@ */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { - KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " - "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { -#ifdef INVARIANTS - if (ti_locked == TI_RLOCKED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { - KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " - "ti_locked: %d", __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); - } -#endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", @@ -1736,9 +1660,6 @@ "sysctl setting)\n", s, __func__); free(s, M_TCPLOG); } - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } INP_WUNLOCK(tp->t_inpcb); m_freem(m); return; @@ -1751,9 +1672,6 @@ if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_dropwithreset(m, th, tp, tlen, BANDLIM_UNLIMITED); - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } INP_WUNLOCK(tp->t_inpcb); return; } @@ -1919,19 +1837,19 @@ TAILQ_EMPTY(&tp->snd_holes)))) { /* We are done */ tcp_do_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, - ti_locked, tiwin); + tiwin); return; } else if ((tlen) && (th->th_ack == tp->snd_una && tlen <= sbspace(&so->so_rcv))) { tcp_do_fastnewdata(m, th, so, tp, &to, drop_hdrlen, tlen, - ti_locked, tiwin); + tiwin); /* We are done */ return; } } tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen, - ti_locked, tiwin, thflags); + tiwin, thflags); } @@ -1947,7 +1865,7 @@ static int tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, - int ti_locked, uint32_t tiwin) + uint32_t tiwin) { int acked; uint16_t nsegs; @@ -2039,11 +1957,6 @@ /* * This is a pure ack for outstanding data. */ - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } - ti_locked = TI_UNLOCKED; - TCPSTAT_INC(tcps_predack); /* @@ -2138,9 +2051,6 @@ } if (sbavail(&so->so_snd)) (void) tcp_output(tp); - KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", - __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { @@ -2167,8 +2077,7 @@ */ void tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, - struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, - int ti_locked) + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) { int thflags; uint32_t tiwin; @@ -2186,19 +2095,7 @@ */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { - KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " - "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { -#ifdef INVARIANTS - if (ti_locked == TI_RLOCKED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { - KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " - "ti_locked: %d", __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); - } -#endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", @@ -2213,9 +2110,6 @@ "sysctl setting)\n", s, __func__); free(s, M_TCPLOG); } - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } INP_WUNLOCK(tp->t_inpcb); m_freem(m); return; @@ -2228,9 +2122,6 @@ if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_dropwithreset(m, th, tp, tlen, BANDLIM_UNLIMITED); - if (ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - } INP_WUNLOCK(tp->t_inpcb); return; } @@ -2367,12 +2258,12 @@ __predict_true(LIST_EMPTY(&tp->t_segq)) && __predict_true(th->th_seq == tp->rcv_nxt)) { if (tcp_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, - ti_locked, tiwin)) { + tiwin)) { return; } } tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen, - ti_locked, tiwin, thflags); + tiwin, thflags); } struct tcp_function_block __tcp_fastslow = { Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -251,12 +251,12 @@ static int rack_process_ack(struct mbuf *m, struct tcphdr *th, - struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t * ti_locked, + struct socket *so, struct tcpcb *tp, struct tcpopt *to, uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val); static int rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static void rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery); @@ -275,7 +275,7 @@ static void rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, - uint8_t iptos, int32_t ti_locked); + uint8_t iptos); static void rack_dtor(void *mem, int32_t size, void *arg); static void rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm, @@ -307,7 +307,7 @@ static void rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, - uint8_t iptos, int32_t ti_locked, int32_t nxt_pkt, struct timeval *tv); + uint8_t iptos, int32_t nxt_pkt, struct timeval *tv); static uint32_t rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, @@ -338,57 +338,58 @@ static int32_t tcp_addrack(module_t mod, int32_t type, void *data); static void rack_challenge_ack(struct mbuf *m, struct tcphdr *th, - struct tcpcb *tp, int32_t * ti_locked, int32_t * ret_val); + struct tcpcb *tp, int32_t * ret_val); static int rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); -static void rack_do_drop(struct mbuf *m, struct tcpcb *tp, int32_t * ti_locked); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); +static void +rack_do_drop(struct mbuf *m, struct tcpcb *tp); static void rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp, - struct tcphdr *th, int32_t * ti_locked, int32_t thflags, int32_t tlen, int32_t * ret_val); + struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val); static void rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, - struct tcphdr *th, int32_t * ti_locked, int32_t rstreason, int32_t tlen); + struct tcphdr *th, int32_t rstreason, int32_t tlen); static int rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t nxt_pkt); static int rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, - int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); + int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_drop_checks(struct tcpopt *to, struct mbuf *m, - struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * ti_locked, int32_t * thf, + struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val); static int rack_process_rst(struct mbuf *m, struct tcphdr *th, - struct socket *so, struct tcpcb *tp, int32_t * ti_locked); + struct socket *so, struct tcpcb *tp); struct rack_sendmap * tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tsused); @@ -398,7 +399,7 @@ static int rack_ts_check(struct mbuf *m, struct tcphdr *th, - struct tcpcb *tp, int32_t * ti_locked, int32_t tlen, int32_t thflags, int32_t * ret_val); + struct tcpcb *tp, int32_t tlen, int32_t thflags, int32_t * ret_val); int32_t rack_clear_counter=0; @@ -1492,12 +1493,8 @@ } static void -rack_do_drop(struct mbuf *m, struct tcpcb *tp, int32_t * ti_locked) +rack_do_drop(struct mbuf *m, struct tcpcb *tp) { - if (*ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - *ti_locked = TI_UNLOCKED; - } /* * Drop space held by incoming segment and return. */ @@ -1508,12 +1505,9 @@ } static void -rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t * ti_locked, int32_t rstreason, int32_t tlen) +rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, + int32_t rstreason, int32_t tlen) { - if (*ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - *ti_locked = TI_UNLOCKED; - } if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(tp->t_inpcb); @@ -1528,7 +1522,7 @@ * and valid. */ static void -rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t * ti_locked, int32_t thflags, int32_t tlen, int32_t * ret_val) +rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val) { /* * Generate an ACK dropping incoming segment if it occupies sequence @@ -1550,14 +1544,10 @@ (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max))) { *ret_val = 1; - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return; } else *ret_val = 0; - if (*ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - *ti_locked = TI_UNLOCKED; - } rack = (struct tcp_rack *)tp->t_fb_ptr; rack->r_wanted_output++; tp->t_flags |= TF_ACKNOW; @@ -1567,7 +1557,7 @@ static int -rack_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t * ti_locked) +rack_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp) { /* * RFC5961 Section 3.2 @@ -1586,9 +1576,6 @@ (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - KASSERT(*ti_locked == TI_RLOCKED, - ("%s: TH_RST ti_locked %d, th %p tp %p", - __func__, *ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); @@ -1617,7 +1604,7 @@ tp = tcp_close(tp); } dropped = 1; - rack_do_drop(m, tp, ti_locked); + rack_do_drop(m, tp); } else { TCPSTAT_INC(tcps_badrst); /* Send challenge ACK. */ @@ -1638,10 +1625,9 @@ * and valid. */ static void -rack_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ti_locked, int32_t * ret_val) +rack_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val) { - KASSERT(*ti_locked == TI_RLOCKED, - ("tcp_do_segment: TH_SYN ti_locked %d", *ti_locked)); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); @@ -1650,7 +1636,7 @@ SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { tp = tcp_drop(tp, ECONNRESET); *ret_val = 1; - rack_do_drop(m, tp, ti_locked); + rack_do_drop(m, tp); } else { /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, @@ -1658,7 +1644,7 @@ tp->last_ack_sent = tp->rcv_nxt; m = NULL; *ret_val = 0; - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); } } @@ -1669,7 +1655,7 @@ * TCB is still valid and locked. */ static int -rack_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ti_locked, int32_t tlen, int32_t thflags, int32_t * ret_val) +rack_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t tlen, int32_t thflags, int32_t * ret_val) { /* Check to see if ts_recent is over 24 days old. */ @@ -1691,9 +1677,9 @@ TCPSTAT_INC(tcps_pawsdrop); *ret_val = 0; if (tlen) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, ret_val); } else { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); } return (1); } @@ -1707,7 +1693,7 @@ * TCB is still valid and locked. */ static int -rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * ti_locked, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val) +rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val) { int32_t todrop; int32_t thflags; @@ -1779,7 +1765,7 @@ tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_rcvwinprobe); } else { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, ret_val); return (1); } } else @@ -4482,7 +4468,7 @@ static int rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, - int32_t * ti_locked, uint32_t tiwin, int32_t tlen, + uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val) { int32_t ourfinisacked = 0; @@ -4494,7 +4480,7 @@ rack = (struct tcp_rack *)tp->t_fb_ptr; if (SEQ_GT(th->th_ack, tp->snd_max)) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, ret_val); return (1); } if (SEQ_GEQ(th->th_ack, tp->snd_una) || to->to_nsacks) { @@ -4642,7 +4628,7 @@ */ *ret_val = 1; tp = tcp_close(tp); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_UNLIMITED, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, tlen); return (1); } } @@ -4660,7 +4646,7 @@ static int rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { /* * Update window information. Don't look at window if no ACK: TAC's @@ -4883,28 +4869,16 @@ case TCPS_FIN_WAIT_2: rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - KASSERT(*ti_locked == TI_RLOCKED, ("%s: dodata " - "TCP_FIN_WAIT_2 ti_locked: %d", __func__, - *ti_locked)); tcp_twstart(tp); - *ti_locked = TI_UNLOCKED; - INP_INFO_RUNLOCK(&V_tcbinfo); return (1); } } - if (*ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - *ti_locked = TI_UNLOCKED; - } /* * Return any desired output. */ if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) { rack->r_wanted_output++; } - KASSERT(*ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", - __func__, *ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); return (0); } @@ -4917,7 +4891,7 @@ static int rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t nxt_pkt) + uint32_t tiwin, int32_t nxt_pkt) { int32_t nsegs; int32_t newsize = 0; /* automatic sockbuf scaling */ @@ -4969,10 +4943,6 @@ * This is a pure, in-sequence data packet with nothing on the * reassembly queue and we have enough buffer space to take it. */ - if (*ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - *ti_locked = TI_UNLOCKED; - } nsegs = max(1, m->m_pkthdr.lro_nsegs); @@ -5041,7 +5011,7 @@ static int rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t nxt_pkt, uint32_t cts) + uint32_t tiwin, int32_t nxt_pkt, uint32_t cts) { int32_t acked; int32_t nsegs; @@ -5117,10 +5087,6 @@ /* * This is a pure ack for outstanding data. */ - if (*ti_locked == TI_RLOCKED) { - INP_INFO_RUNLOCK(&V_tcbinfo); - *ti_locked = TI_UNLOCKED; - } TCPSTAT_INC(tcps_predack); /* @@ -5199,7 +5165,7 @@ static int rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t todrop; @@ -5220,22 +5186,22 @@ if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { TCP_PROBE5(connect__refused, NULL, tp, mtod(m, const char *), tp, th); tp = tcp_drop(tp, ECONNREFUSED); - rack_do_drop(m, tp, ti_locked); + rack_do_drop(m, tp); return (1); } if (thflags & TH_RST) { - rack_do_drop(m, tp, ti_locked); + rack_do_drop(m, tp); return (1); } if (!(thflags & TH_SYN)) { - rack_do_drop(m, tp, ti_locked); + rack_do_drop(m, tp); return (1); } tp->irs = th->th_seq; @@ -5323,8 +5289,6 @@ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); tcp_state_change(tp, TCPS_SYN_RECEIVED); } - KASSERT(*ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " - "ti_locked %d", __func__, *ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -5349,7 +5313,7 @@ * of step 5, ack processing. Otherwise, goto step 6. */ if (thflags & TH_ACK) { - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) return (ret_val); /* We may have changed to FIN_WAIT_1 above */ if (tp->t_state == TCPS_FIN_WAIT_1) { @@ -5381,7 +5345,7 @@ } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } /* @@ -5392,7 +5356,7 @@ static int rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; @@ -5402,7 +5366,7 @@ if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } if (IS_FASTOPEN(tp->t_flags)) { @@ -5414,7 +5378,7 @@ * FIN, or a RST. */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ @@ -5424,22 +5388,22 @@ if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_RXT) || (rack->r_ctl.rc_hpts_flags & PACE_TMR_TLP) || (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK)) { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } else if (!(thflags & (TH_ACK | TH_FIN | TH_RST))) { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } if (thflags & TH_RST) - return (rack_process_rst(m, th, so, tp, ti_locked)); + return (rack_process_rst(m, th, so, tp)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { - rack_challenge_ack(m, th, tp, ti_locked, &ret_val); + rack_challenge_ack(m, th, tp, &ret_val); return (ret_val); } /* @@ -5448,7 +5412,7 @@ */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { - if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) + if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val)) return (ret_val); } /* @@ -5459,10 +5423,10 @@ * "LAND" DoS attack. */ if (SEQ_LT(th->th_seq, tp->irs)) { - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } - if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { + if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* @@ -5497,7 +5461,7 @@ cc_conn_init(tp); } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } TCPSTAT_INC(tcps_connects); soisconnected(so); @@ -5546,7 +5510,7 @@ (void)tcp_reass(tp, (struct tcphdr *)0, 0, (struct mbuf *)0); tp->snd_wl1 = th->th_seq - 1; - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (tp->t_state == TCPS_FIN_WAIT_1) { @@ -5577,7 +5541,7 @@ } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } /* @@ -5588,7 +5552,7 @@ static int rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; @@ -5615,12 +5579,12 @@ rack = (struct tcp_rack *)tp->t_fb_ptr; if (tlen == 0) { if (rack_fastack(m, th, so, tp, to, drop_hdrlen, tlen, - ti_locked, tiwin, nxt_pkt, rack->r_ctl.rc_rcvtime)) { + tiwin, nxt_pkt, rack->r_ctl.rc_rcvtime)) { return (0); } } else { if (rack_do_fastnewdata(m, th, so, tp, to, drop_hdrlen, tlen, - ti_locked, tiwin, nxt_pkt)) { + tiwin, nxt_pkt)) { return (0); } } @@ -5628,14 +5592,14 @@ rack_calc_rwin(so, tp); if (thflags & TH_RST) - return (rack_process_rst(m, th, so, tp, ti_locked)); + return (rack_process_rst(m, th, so, tp)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { - rack_challenge_ack(m, th, tp, ti_locked, &ret_val); + rack_challenge_ack(m, th, tp, &ret_val); return (ret_val); } /* @@ -5644,10 +5608,10 @@ */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { - if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) + if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val)) return (ret_val); } - if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { + if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* @@ -5680,32 +5644,32 @@ if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val); return (ret_val); } else { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } /* * Ack processing. */ - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, NULL, thflags, &ret_val)) { + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) { return (ret_val); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } } /* State changes only happen in rack_process_data() */ return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } /* @@ -5716,19 +5680,19 @@ static int rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; rack_calc_rwin(so, tp); if (thflags & TH_RST) - return (rack_process_rst(m, th, so, tp, ti_locked)); + return (rack_process_rst(m, th, so, tp)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { - rack_challenge_ack(m, th, tp, ti_locked, &ret_val); + rack_challenge_ack(m, th, tp, &ret_val); return (ret_val); } /* @@ -5737,10 +5701,10 @@ */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { - if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) + if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val)) return (ret_val); } - if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { + if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* @@ -5772,48 +5736,46 @@ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val); return (ret_val); } else { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } /* * Ack processing. */ - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, NULL, thflags, &ret_val)) { + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) { return (ret_val); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } static int rack_check_data_after_close(struct mbuf *m, - struct tcpcb *tp, int32_t *ti_locked, int32_t *tlen, struct tcphdr *th, struct socket *so) + struct tcpcb *tp, int32_t *tlen, struct tcphdr *th, struct socket *so) { - struct tcp_rack *rack; + struct tcp_rack *rack; - KASSERT(*ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " - "CLOSE_WAIT && tlen ti_locked %d", __func__, *ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack->rc_allow_data_af_clo == 0) { close_now: tp = tcp_close(tp); TCPSTAT_INC(tcps_rcvafterclose); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_UNLIMITED, (*tlen)); + rack_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, (*tlen)); return (1); } if (sbavail(&so->so_snd) == 0) @@ -5834,7 +5796,7 @@ static int rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; @@ -5842,13 +5804,13 @@ rack_calc_rwin(so, tp); if (thflags & TH_RST) - return (rack_process_rst(m, th, so, tp, ti_locked)); + return (rack_process_rst(m, th, so, tp)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { - rack_challenge_ack(m, th, tp, ti_locked, &ret_val); + rack_challenge_ack(m, th, tp, &ret_val); return (ret_val); } /* @@ -5857,10 +5819,10 @@ */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { - if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) + if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val)) return (ret_val); } - if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { + if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* @@ -5868,7 +5830,7 @@ * are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tlen) { - if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) + if (rack_check_data_after_close(m, tp, &tlen, th, so)) return (1); } /* @@ -5900,19 +5862,19 @@ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val); return (ret_val); } else { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } /* * Ack processing. */ - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (ourfinisacked) { @@ -5937,12 +5899,12 @@ if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } /* @@ -5953,7 +5915,7 @@ static int rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; @@ -5961,13 +5923,13 @@ rack_calc_rwin(so, tp); if (thflags & TH_RST) - return (rack_process_rst(m, th, so, tp, ti_locked)); + return (rack_process_rst(m, th, so, tp)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { - rack_challenge_ack(m, th, tp, ti_locked, &ret_val); + rack_challenge_ack(m, th, tp, &ret_val); return (ret_val); } /* @@ -5976,10 +5938,10 @@ */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { - if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) + if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val)) return (ret_val); } - if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { + if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* @@ -5987,7 +5949,7 @@ * are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tlen) { - if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) + if (rack_check_data_after_close(m, tp, &tlen, th, so)) return (1); } /* @@ -6019,38 +5981,36 @@ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val); return (ret_val); } else { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } /* * Ack processing. */ - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); - INP_INFO_RUNLOCK(&V_tcbinfo); - *ti_locked = TI_UNLOCKED; m_freem(m); return (1); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } /* @@ -6061,7 +6021,7 @@ static int rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; @@ -6069,13 +6029,13 @@ rack_calc_rwin(so, tp); if (thflags & TH_RST) - return (rack_process_rst(m, th, so, tp, ti_locked)); + return (rack_process_rst(m, th, so, tp)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { - rack_challenge_ack(m, th, tp, ti_locked, &ret_val); + rack_challenge_ack(m, th, tp, &ret_val); return (ret_val); } /* @@ -6084,10 +6044,10 @@ */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { - if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) + if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val)) return (ret_val); } - if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { + if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* @@ -6095,7 +6055,7 @@ * are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tlen) { - if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) + if (rack_check_data_after_close(m, tp, &tlen, th, so)) return (1); } /* @@ -6127,36 +6087,37 @@ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val); return (ret_val); } else { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } /* * case TCPS_LAST_ACK: Ack processing. */ - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (ourfinisacked) { + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); - rack_do_drop(m, tp, ti_locked); + rack_do_drop(m, tp); return (1); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } @@ -6168,7 +6129,7 @@ static int rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, - int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) + uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; @@ -6177,13 +6138,13 @@ /* Reset receive buffer auto scaling when not in bulk receive mode. */ if (thflags & TH_RST) - return (rack_process_rst(m, th, so, tp, ti_locked)); + return (rack_process_rst(m, th, so, tp)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { - rack_challenge_ack(m, th, tp, ti_locked, &ret_val); + rack_challenge_ack(m, th, tp, &ret_val); return (ret_val); } /* @@ -6192,10 +6153,10 @@ */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { - if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) + if (rack_ts_check(m, th, tp, tlen, thflags, &ret_val)) return (ret_val); } - if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { + if (rack_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* @@ -6204,7 +6165,7 @@ */ if ((so->so_state & SS_NOFDREF) && tlen) { - if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) + if (rack_check_data_after_close(m, tp, &tlen, th, so)) return (1); } /* @@ -6236,30 +6197,30 @@ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { - rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); + rack_do_dropafterack(m, tp, th, thflags, tlen, &ret_val); return (ret_val); } else { - rack_do_drop(m, NULL, ti_locked); + rack_do_drop(m, NULL); return (0); } } /* * Ack processing. */ - if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { + if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); - rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); + rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, - ti_locked, tiwin, thflags, nxt_pkt)); + tiwin, thflags, nxt_pkt)); } @@ -6533,7 +6494,7 @@ static void rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos, - int32_t ti_locked, int32_t nxt_pkt, struct timeval *tv) + int32_t nxt_pkt, struct timeval *tv) { int32_t thflags, retval, did_out = 0; int32_t way_out = 0; @@ -6558,19 +6519,7 @@ */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { - KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " - "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { -#ifdef INVARIANTS - if (ti_locked == TI_RLOCKED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - } else { - KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " - "ti_locked: %d", __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); - } -#endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", @@ -6717,7 +6666,7 @@ if ((tp->t_flags & TF_SACK_PERMIT) == 0) { tcp_switch_back_to_default(tp); (*tp->t_fb->tfb_tcp_do_segment) (m, th, so, tp, drop_hdrlen, - tlen, iptos, ti_locked); + tlen, iptos); return; } /* Set the flag */ @@ -6740,7 +6689,7 @@ rack_clear_rate_sample(rack); retval = (*rack->r_substate) (m, th, so, tp, &to, drop_hdrlen, - tlen, &ti_locked, tiwin, thflags, nxt_pkt); + tlen, tiwin, thflags, nxt_pkt); #ifdef INVARIANTS if ((retval == 0) && (tp->t_inpcb == NULL)) { @@ -6748,11 +6697,6 @@ retval, tp, prev_state); } #endif - if (ti_locked != TI_UNLOCKED) { - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - INP_INFO_RUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; - } if (retval == 0) { /* * If retval is 1 the tcb is unlocked and most likely the tp @@ -6824,14 +6768,13 @@ void rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, - struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos, - int32_t ti_locked) + struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos) { struct timeval tv; #ifdef RSS struct tcp_function_block *tfb; struct tcp_rack *rack; - struct inpcb *inp; + struct epoch_tracker et; rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack->r_state == 0) { @@ -6839,21 +6782,19 @@ * Initial input (ACK to SYN-ACK etc)lets go ahead and get * it processed */ - INP_INFO_RLOCK(); - ti_locked = TI_RLOCKED; + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tcp_get_usecs(&tv); rack_hpts_do_segment(m, th, so, tp, drop_hdrlen, - tlen, iptos, ti_locked, 0, &tv); + tlen, iptos, 0, &tv); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return; } - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - tcp_queue_to_input(tp, m, th, tlen, drop_hdrlen, iptos, (uint8_t) ti_locked); + tcp_queue_to_input(tp, m, th, tlen, drop_hdrlen, iptos); INP_WUNLOCK(tp->t_inpcb); #else tcp_get_usecs(&tv); rack_hpts_do_segment(m, th, so, tp, drop_hdrlen, - tlen, iptos, ti_locked, 0, &tv); + tlen, iptos, 0, &tv); #endif } Index: sys/netinet/tcp_stacks/tcp_rack.h =================================================================== --- sys/netinet/tcp_stacks/tcp_rack.h +++ sys/netinet/tcp_stacks/tcp_rack.h @@ -281,7 +281,7 @@ TAILQ_ENTRY(tcp_rack) r_hpts; /* hptsi queue next Lock(b) */ int32_t(*r_substate) (struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, struct tcpopt *, - int32_t, int32_t, int32_t *, uint32_t, int, int); /* Lock(a) */ + int32_t, int32_t, uint32_t, int, int); /* Lock(a) */ struct tcpcb *rc_tp; /* The tcpcb Lock(a) */ struct inpcb *rc_inp; /* The inpcb Lock(a) */ uint32_t rc_free_cnt; /* Number of free entries on the rc_free list Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -1914,10 +1914,11 @@ { struct inpcb *inp; struct tcpcb *tp; + struct epoch_tracker et; tp = (struct tcpcb *)ptp; CURVNET_SET(tp->t_vnet); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); @@ -1937,13 +1938,13 @@ tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); return; } } INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); CURVNET_RESTORE(); } @@ -2107,6 +2108,7 @@ struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; + struct epoch_tracker et; /* * The process of preparing the TCB list is too time-consuming and @@ -2193,14 +2195,14 @@ } else INP_RUNLOCK(inp); } - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); if (!error) { /* @@ -2339,6 +2341,7 @@ struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct icmp *icp; struct in_conninfo inc; + struct epoch_tracker et; tcp_seq icmp_tcp_seq; int mtu; @@ -2370,7 +2373,7 @@ icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { @@ -2435,7 +2438,7 @@ out: if (inp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } #endif /* INET */ @@ -2453,6 +2456,7 @@ struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; struct in_conninfo inc; + struct epoch_tracker et; struct tcp_ports { uint16_t th_sport; uint16_t th_dport; @@ -2514,7 +2518,7 @@ } bzero(&t_ports, sizeof(struct tcp_ports)); m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport, &ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { @@ -2586,7 +2590,7 @@ out: if (inp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } #endif /* INET6 */ @@ -2925,6 +2929,7 @@ struct tcpcb *tp; struct tcptw *tw; struct sockaddr_in *fin, *lin; + struct epoch_tracker et; #ifdef INET6 struct sockaddr_in6 *fin6, *lin6; #endif @@ -2984,7 +2989,7 @@ default: return (EINVAL); } - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: @@ -3023,7 +3028,7 @@ INP_WUNLOCK(inp); } else error = ESRCH; - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (error); } Index: sys/netinet/tcp_timer.h =================================================================== --- sys/netinet/tcp_timer.h +++ sys/netinet/tcp_timer.h @@ -214,7 +214,6 @@ VNET_DECLARE(int, tcp_v6pmtud_blackhole_mss); #define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss) -int tcp_inpinfo_lock_add(struct inpcb *inp); void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp); void tcp_timer_init(void); Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -274,43 +274,9 @@ CURVNET_RESTORE(); } -/* - * When a timer wants to remove a TCB it must - * hold the INP_INFO_RLOCK(). The timer function - * should only have grabbed the INP_WLOCK() when - * it entered. To safely switch to holding both the - * INP_INFO_RLOCK() and the INP_WLOCK() we must first - * grab a reference on the inp, which will hold the inp - * so that it can't be removed. We then unlock the INP_WLOCK(), - * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK() - * we proceed again to get the INP_WLOCK() (this preserves proper - * lock order). After acquiring the INP_WLOCK we must check if someone - * else deleted the pcb i.e. the inp_flags check. - * If so we return 1 otherwise we return 0. - * - * No matter what the tcp_inpinfo_lock_add() function - * returns the caller must afterwards call tcp_inpinfo_lock_del() - * to drop the locks and reference properly. - */ - -int -tcp_inpinfo_lock_add(struct inpcb *inp) -{ - in_pcbref(inp); - INP_WUNLOCK(inp); - INP_INFO_RLOCK(&V_tcbinfo); - INP_WLOCK(inp); - if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { - return(1); - } - return(0); - -} - void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp) { - INP_INFO_RUNLOCK(&V_tcbinfo); if (inp && (tp == NULL)) { /* * If tcp_close/drop() gets called and tp @@ -331,6 +297,7 @@ { struct tcpcb *tp = xtp; struct inpcb *inp; + struct epoch_tracker et; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -377,11 +344,13 @@ tp->t_inpcb && tp->t_inpcb->inp_socket && (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { TCPSTAT_INC(tcps_finwait2_drops); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_close(tp); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } else { @@ -389,15 +358,17 @@ callout_reset(&tp->t_timers->tt_2msl, TP_KEEPINTVL(tp), tcp_timer_2msl, tp); } else { - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_close(tp); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } - } + } #ifdef TCPDEBUG if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) @@ -418,6 +389,7 @@ struct tcpcb *tp = xtp; struct tcptemp *t_template; struct inpcb *inp; + struct epoch_tracker et; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -511,11 +483,11 @@ dropit: TCPSTAT_INC(tcps_keepdrops); - - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); #ifdef TCPDEBUG @@ -524,8 +496,9 @@ PRU_SLOWTIMO); #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); -out: + out: CURVNET_RESTORE(); } @@ -534,6 +507,7 @@ { struct tcpcb *tp = xtp; struct inpcb *inp; + struct epoch_tracker et; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -573,11 +547,13 @@ (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { TCPSTAT_INC(tcps_persistdrop); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } @@ -588,11 +564,13 @@ if (tp->t_state > TCPS_CLOSE_WAIT && (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { TCPSTAT_INC(tcps_persistdrop); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } @@ -618,6 +596,7 @@ CURVNET_SET(tp->t_vnet); int rexmt; struct inpcb *inp; + struct epoch_tracker et; #ifdef TCPDEBUG int ostate; @@ -654,11 +633,13 @@ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; TCPSTAT_INC(tcps_timeoutdrop); - if (tcp_inpinfo_lock_add(inp)) { + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { tcp_inpinfo_lock_del(inp, tp); goto out; } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); tp = tcp_drop(tp, ETIMEDOUT); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); tcp_inpinfo_lock_del(inp, tp); goto out; } Index: sys/netinet/tcp_timewait.c =================================================================== --- sys/netinet/tcp_timewait.c +++ sys/netinet/tcp_timewait.c @@ -206,11 +206,12 @@ tcp_tw_destroy(void) { struct tcptw *tw; + struct epoch_tracker et; - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL) tcp_twclose(tw, 0); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); TW_LOCK_DESTROY(V_tw_lock); uma_zdestroy(V_tcptw_zone); @@ -674,6 +675,7 @@ { struct tcptw *tw; struct inpcb *inp; + struct epoch_tracker et; #ifdef INVARIANTS if (reuse) { @@ -707,12 +709,12 @@ in_pcbref(inp); TW_RUNLOCK(V_tw_lock); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); tw = intotw(inp); if (in_pcbrele_wlocked(inp)) { if (__predict_true(tw == NULL)) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); continue; } else { /* This should not happen as in TIMEWAIT @@ -731,7 +733,7 @@ "|| inp last reference) && tw != " "NULL", __func__); #endif - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); break; } } @@ -739,12 +741,12 @@ if (tw == NULL) { /* tcp_twclose() has already been called */ INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); continue; } tcp_twclose(tw, reuse); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); if (reuse) return tw; } Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -276,11 +276,12 @@ { struct inpcb *inp; int rlock = 0; + struct epoch_tracker et; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); if (!INP_INFO_WLOCKED(&V_tcbinfo)) { - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); rlock = 1; } INP_WLOCK(inp); @@ -288,7 +289,7 @@ ("tcp_usr_detach: inp_socket == NULL")); tcp_detach(so, inp); if (rlock) - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } #ifdef INET @@ -668,10 +669,11 @@ { struct inpcb *inp; struct tcpcb *tp = NULL; + struct epoch_tracker et; int error = 0; TCPDEBUG0; - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); INP_WLOCK(inp); @@ -688,7 +690,7 @@ TCPDEBUG2(PRU_DISCONNECT); TCP_PROBE2(debug__user, tp, PRU_DISCONNECT); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (error); } @@ -747,6 +749,7 @@ struct tcpcb *tp = NULL; struct in_addr addr; struct in6_addr addr6; + struct epoch_tracker et; in_port_t port = 0; int v4 = 0; TCPDEBUG0; @@ -756,7 +759,7 @@ inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNABORTED; @@ -783,7 +786,7 @@ TCPDEBUG2(PRU_ACCEPT); TCP_PROBE2(debug__user, tp, PRU_ACCEPT); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); if (error == 0) { if (v4) *nam = in6_v4mapsin6_sockaddr(port, &addr); @@ -803,9 +806,10 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; + struct epoch_tracker et; TCPDEBUG0; - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); inp = sotoinpcb(so); KASSERT(inp != NULL, ("inp == NULL")); INP_WLOCK(inp); @@ -824,7 +828,7 @@ TCPDEBUG2(PRU_SHUTDOWN); TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (error); } @@ -887,6 +891,7 @@ int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; + struct epoch_tracker net_et; #ifdef INET6 int isipv6; #endif @@ -897,7 +902,7 @@ * this call. */ if (flags & PRUS_EOF) - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, net_et); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); INP_WLOCK(inp); @@ -1040,7 +1045,7 @@ ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); INP_WUNLOCK(inp); if (flags & PRUS_EOF) - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et); return (error); } @@ -1079,12 +1084,13 @@ { struct inpcb *inp; struct tcpcb *tp = NULL; + struct epoch_tracker et; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); KASSERT(inp->inp_socket != NULL, ("tcp_usr_abort: inp_socket == NULL")); @@ -1110,7 +1116,7 @@ } INP_WUNLOCK(inp); dropped: - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } /* @@ -1121,12 +1127,13 @@ { struct inpcb *inp; struct tcpcb *tp = NULL; + struct epoch_tracker et; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); INP_WLOCK(inp); KASSERT(inp->inp_socket != NULL, ("tcp_usr_close: inp_socket == NULL")); @@ -1150,7 +1157,7 @@ inp->inp_flags |= INP_SOCKREF; } INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); } /* @@ -2043,6 +2050,7 @@ { struct tcpcb *tp; struct inpcb *inp; + struct epoch_tracker et; int error; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { @@ -2052,10 +2060,10 @@ } so->so_rcv.sb_flags |= SB_AUTOSIZE; so->so_snd.sb_flags |= SB_AUTOSIZE; - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_RLOCK_ET(&V_tcbinfo, et); error = in_pcballoc(so, &V_tcbinfo); if (error) { - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (error); } inp = sotoinpcb(so); @@ -2073,12 +2081,12 @@ if (tp == NULL) { in_pcbdetach(inp); in_pcbfree(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); return (ENOBUFS); } tp->t_state = TCPS_CLOSED; INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); TCPSTATES_INC(TCPS_CLOSED); return (0); } Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -264,12 +264,11 @@ int (*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *); void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, - int, int, uint8_t, - int); + int, int, uint8_t); void (*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, - int, int, struct timeval *); + int, struct timeval *); int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp); /* Optional memory allocation/free routine */ @@ -862,8 +861,7 @@ int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int); void tcp_do_segment(struct mbuf *, struct tcphdr *, - struct socket *, struct tcpcb *, int, int, uint8_t, - int); + struct socket *, struct tcpcb *, int, int, uint8_t); int register_tcp_functions(struct tcp_function_block *blk, int wait); int register_tcp_functions_as_names(struct tcp_function_block *blk, Index: sys/netinet/udp_usrreq.c =================================================================== --- sys/netinet/udp_usrreq.c +++ sys/netinet/udp_usrreq.c @@ -399,6 +399,7 @@ struct sockaddr_in udp_in[2]; struct mbuf *m; struct m_tag *fwd_tag; + struct epoch_tracker et; int cscov_partial, iphlen; m = *mp; @@ -529,7 +530,7 @@ struct inpcbhead *pcblist; struct ip_moptions *imo; - INP_INFO_RLOCK(pcbinfo); + INP_INFO_RLOCK_ET(pcbinfo, et); pcblist = udp_get_pcblist(proto); last = NULL; CK_LIST_FOREACH(inp, pcblist, inp_list) { @@ -625,14 +626,14 @@ UDPSTAT_INC(udps_noportbcast); if (inp) INP_RUNLOCK(inp); - INP_INFO_RUNLOCK(pcbinfo); + INP_INFO_RUNLOCK_ET(pcbinfo, et); goto badunlocked; } UDP_PROBE(receive, NULL, last, ip, last, uh); if (udp_append(last, ip, m, iphlen, udp_in) == 0) INP_RUNLOCK(last); inp_lost: - INP_INFO_RUNLOCK(pcbinfo); + INP_INFO_RUNLOCK_ET(pcbinfo, et); return (IPPROTO_DONE); } @@ -839,6 +840,7 @@ struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; + struct epoch_tracker et; /* * The process of preparing the PCB list is too time-consuming and @@ -857,10 +859,10 @@ /* * OK, now we're committed to doing something. */ - INP_INFO_RLOCK(&V_udbinfo); + INP_INFO_RLOCK_ET(&V_udbinfo, et); gencnt = V_udbinfo.ipi_gencnt; n = V_udbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_udbinfo); + INP_INFO_RUNLOCK_ET(&V_udbinfo, et); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + n * sizeof(struct xinpcb)); @@ -879,7 +881,7 @@ if (inp_list == NULL) return (ENOMEM); - INP_INFO_RLOCK(&V_udbinfo); + INP_INFO_RLOCK_ET(&V_udbinfo, et); for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); @@ -890,7 +892,7 @@ } INP_WUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_udbinfo); + INP_INFO_RUNLOCK_ET(&V_udbinfo, et); n = i; error = 0; @@ -922,11 +924,11 @@ * that something happened while we were processing this * request, and it might be necessary to retry. */ - INP_INFO_RLOCK(&V_udbinfo); + INP_INFO_RLOCK_ET(&V_udbinfo, et); xig.xig_gen = V_udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_udbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_udbinfo); + INP_INFO_RUNLOCK_ET(&V_udbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); @@ -1108,6 +1110,7 @@ struct cmsghdr *cm; struct inpcbinfo *pcbinfo; struct sockaddr_in *sin, src; + struct epoch_tracker et; int cscov_partial = 0; int error = 0; int ipflags; @@ -1264,7 +1267,7 @@ (inp->inp_laddr.s_addr == INADDR_ANY) || (inp->inp_lport == 0))) || (src.sin_family == AF_INET)) { - INP_HASH_RLOCK(pcbinfo); + INP_HASH_RLOCK_ET(pcbinfo, et); unlock_udbinfo = UH_RLOCKED; } else unlock_udbinfo = UH_UNLOCKED; @@ -1520,7 +1523,7 @@ if (unlock_udbinfo == UH_WLOCKED) INP_HASH_WUNLOCK(pcbinfo); else if (unlock_udbinfo == UH_RLOCKED) - INP_HASH_RUNLOCK(pcbinfo); + INP_HASH_RUNLOCK_ET(pcbinfo, et); UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u); error = ip_output(m, inp->inp_options, (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags, @@ -1540,7 +1543,7 @@ } else if (unlock_udbinfo == UH_RLOCKED) { KASSERT(unlock_inp == UH_RLOCKED, ("%s: shared udbinfo lock, excl inp lock", __func__)); - INP_HASH_RUNLOCK(pcbinfo); + INP_HASH_RUNLOCK_ET(pcbinfo, et); INP_RUNLOCK(inp); } else if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); Index: sys/netinet6/icmp6.c =================================================================== --- sys/netinet6/icmp6.c +++ sys/netinet6/icmp6.c @@ -1896,6 +1896,7 @@ struct inpcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; + struct epoch_tracker et; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST @@ -1922,7 +1923,7 @@ return (IPPROTO_DONE); } - INP_INFO_RLOCK(&V_ripcbinfo); + INP_INFO_RLOCK_ET(&V_ripcbinfo, et); CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; @@ -2000,7 +2001,7 @@ } last = in6p; } - INP_INFO_RUNLOCK(&V_ripcbinfo); + INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, m, &opts); Index: sys/netinet6/in6_gif.c =================================================================== --- sys/netinet6/in6_gif.c +++ sys/netinet6/in6_gif.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -241,7 +242,7 @@ int len; /* prepend new IP header */ - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); len = sizeof(struct ip6_hdr); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) @@ -283,7 +284,7 @@ struct ip6_hdr *ip6; uint8_t ecn; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); if (sc == NULL) { m_freem(m); IP6STAT_INC(ip6s_nogif); @@ -312,7 +313,7 @@ if (V_ipv6_hashtbl == NULL) return (0); - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); /* * NOTE: it is safe to iterate without any locking here, because softc * can be reclaimed only when we are not within net_epoch_preempt Index: sys/netinet6/ip6_gre.c =================================================================== --- sys/netinet6/ip6_gre.c +++ sys/netinet6/ip6_gre.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -110,7 +111,7 @@ if (V_ipv6_hashtbl == NULL) return (0); - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); ip6 = mtod(m, const struct ip6_hdr *); CK_LIST_FOREACH(sc, &GRE_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) { /* Index: sys/netinet6/raw_ip6.c =================================================================== --- sys/netinet6/raw_ip6.c +++ sys/netinet6/raw_ip6.c @@ -165,6 +165,7 @@ struct inpcb *last = NULL; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; + struct epoch_tracker et; RIP6STAT_INC(rip6s_ipackets); @@ -172,7 +173,7 @@ ifp = m->m_pkthdr.rcvif; - INP_INFO_RLOCK(&V_ripcbinfo); + INP_INFO_RLOCK_ET(&V_ripcbinfo, et); CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { /* XXX inp locking */ if ((in6p->inp_vflag & INP_IPV6) == 0) @@ -291,7 +292,7 @@ } last = in6p; } - INP_INFO_RUNLOCK(&V_ripcbinfo); + INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. Index: sys/netinet6/udp6_usrreq.c =================================================================== --- sys/netinet6/udp6_usrreq.c +++ sys/netinet6/udp6_usrreq.c @@ -214,6 +214,7 @@ int off = *offp; int cscov_partial; int plen, ulen; + struct epoch_tracker et; struct sockaddr_in6 fromsa[2]; struct m_tag *fwd_tag; uint16_t uh_sum; @@ -300,7 +301,7 @@ struct inpcbhead *pcblist; struct ip6_moptions *imo; - INP_INFO_RLOCK(pcbinfo); + INP_INFO_RLOCK_ET(pcbinfo, et); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address @@ -414,7 +415,7 @@ goto badheadlocked; } INP_RLOCK(last); - INP_INFO_RUNLOCK(pcbinfo); + INP_INFO_RUNLOCK_ET(pcbinfo, et); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, m, off, fromsa) == 0) INP_RUNLOCK(last); @@ -499,7 +500,7 @@ return (IPPROTO_DONE); badheadlocked: - INP_INFO_RUNLOCK(pcbinfo); + INP_INFO_RUNLOCK_ET(pcbinfo, et); badunlocked: if (m) m_freem(m); Index: sys/sys/assym.h =================================================================== --- sys/sys/assym.h +++ sys/sys/assym.h @@ -43,4 +43,11 @@ char name ## w2[((ASSYM_ABS(value) & 0xFFFF00000000ULL) >> 32) + ASSYM_BIAS]; \ char name ## w3[((ASSYM_ABS(value) & 0xFFFF000000000000ULL) >> 48) + ASSYM_BIAS] +#define OFFSYM(name, value, datatype, parenttype) \ +ASSYM(name, value); \ +char name ## _datatype_ ## datatype [1]; \ +char name ## _parenttype_ ## parenttype [1] + + + #endif /* !_SYS_ASSYM_H_ */ Index: sys/sys/epoch.h =================================================================== --- sys/sys/epoch.h +++ sys/sys/epoch.h @@ -31,10 +31,9 @@ #define _SYS_EPOCH_H_ #ifdef _KERNEL #include -#include +#include #endif -struct thread; struct epoch; typedef struct epoch *epoch_t; @@ -46,48 +45,49 @@ struct epoch_context { void *data[2]; -} __aligned(sizeof(void *)); +} __aligned(sizeof(void *)); typedef struct epoch_context *epoch_context_t; + +struct epoch_tracker { + void *datap[3]; +#ifdef INVARIANTS + int datai[5]; +#else + int datai[1]; +#endif +} __aligned(sizeof(void *)); + +typedef struct epoch_tracker *epoch_tracker_t; + epoch_t epoch_alloc(int flags); void epoch_free(epoch_t epoch); -void epoch_enter(epoch_t epoch); -void epoch_enter_preempt_internal(epoch_t epoch, struct thread *td); -void epoch_exit(epoch_t epoch); -void epoch_exit_preempt_internal(epoch_t epoch, struct thread *td); void epoch_wait(epoch_t epoch); void epoch_wait_preempt(epoch_t epoch); void epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t)); -int in_epoch(void); - +int in_epoch(epoch_t epoch); +int in_epoch_verbose(epoch_t epoch, int dump_onfail); #ifdef _KERNEL DPCPU_DECLARE(int, epoch_cb_count); DPCPU_DECLARE(struct grouptask, epoch_cb_task); +#define EPOCH_MAGIC0 0xFADECAFEF00DD00D +#define EPOCH_MAGIC1 0xBADDBABEDEEDFEED -static __inline void -epoch_enter_preempt(epoch_t epoch) -{ - struct thread *td; - int nesting __unused; +void epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et); +void epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et); +void epoch_enter_KBI(epoch_t epoch); +void epoch_exit_KBI(epoch_t epoch); - td = curthread; - nesting = td->td_epochnest++; -#ifndef INVARIANTS - if (nesting == 0) -#endif - epoch_enter_preempt_internal(epoch, td); -} -static __inline void -epoch_exit_preempt(epoch_t epoch) -{ - struct thread *td; +#if defined(KLD_MODULE) && !defined(KLD_BASE) +#define epoch_enter_preempt(e, t) epoch_enter_preempt_KBI((e), (t)) +#define epoch_exit_preempt(e, t) epoch_exit_preempt_KBI((e), (t)) +#define epoch_enter(e) epoch_enter_KBI((e)) +#define epoch_exit(e) epoch_exit_KBI((e)) +#else +#include +#endif /* KLD_MODULE */ - td = curthread; - MPASS(td->td_epochnest); - if (td->td_epochnest-- == 1) - epoch_exit_preempt_internal(epoch, td); -} -#endif /* _KERNEL */ +#endif /* _KERNEL */ #endif Index: sys/sys/epoch_private.h =================================================================== --- /dev/null +++ sys/sys/epoch_private.h @@ -0,0 +1,225 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018, Matthew Macy + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_EPOCH_PRIVATE_H_ +#define _SYS_EPOCH_PRIVATE_H_ +#ifndef _KERNEL +#error "no user serviceable parts" +#else +#include +#include "offset.inc" + +#include + +extern void epoch_adjust_prio(struct thread *td, u_char prio); +#ifndef _SYS_SYSTM_H_ +extern void critical_exit_preempt(void); +#endif + +#ifdef __amd64__ +#define EPOCH_ALIGN CACHE_LINE_SIZE*2 +#else +#define EPOCH_ALIGN CACHE_LINE_SIZE +#endif + +/* + * Standalone (_sa) routines for thread state manipulation + */ +static __inline void +critical_enter_sa(void *tdarg) +{ + struct thread_global *td; + + td = tdarg; + td->td_critnest++; + __compiler_membar(); +} + +static __inline void +critical_exit_sa(void *tdarg) +{ + struct thread_global *td; + + td = tdarg; + MPASS(td->td_critnest > 0); + __compiler_membar(); + td->td_critnest--; + __compiler_membar(); + if (__predict_false(td->td_owepreempt != 0)) + critical_exit_preempt(); +} + +static __inline void +sched_pin_sa(void *tdarg) +{ + struct thread_global *td; + + td = tdarg; + td->td_pinned++; + __compiler_membar(); +} + +static __inline void +sched_unpin_sa(void *tdarg) +{ + struct thread_global *td; + + td = tdarg; + MPASS(td->td_pinned > 0); + __compiler_membar(); + td->td_pinned--; + __compiler_membar(); +} + +typedef struct epoch_thread { +#ifdef INVARIANTS + uint64_t et_magic_pre; +#endif + TAILQ_ENTRY(epoch_thread) et_link; /* Epoch queue. */ + struct thread *et_td; /* pointer to thread in section */ + ck_epoch_section_t et_section; /* epoch section object */ +#ifdef INVARIANTS + uint64_t et_magic_post; +#endif +} *epoch_thread_t; +TAILQ_HEAD (epoch_tdlist, epoch_thread); + +typedef struct epoch_record { + ck_epoch_record_t er_record; + volatile struct epoch_tdlist er_tdlist; + volatile uint32_t er_gen; + uint32_t er_cpuid; +} __aligned(EPOCH_ALIGN) *epoch_record_t; + +struct epoch { + struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); + struct epoch_record *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN); + int e_idx; + int e_flags; + struct epoch_record *e_pcpu[0]; +}; + +#define INIT_CHECK(epoch) \ + do { \ + if (__predict_false((epoch) == NULL)) \ + return; \ + } while (0) + +static __inline void +epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et) +{ + struct epoch_record *er; + struct epoch_thread *etd; + struct thread_global *td; + MPASS(cold || epoch != NULL); + INIT_CHECK(epoch); + etd = (void *)et; +#ifdef INVARIANTS + MPASS(epoch->e_flags & EPOCH_PREEMPT); + etd->et_magic_pre = EPOCH_MAGIC0; + etd->et_magic_post = EPOCH_MAGIC1; +#endif + td = (struct thread_global *)curthread; + etd->et_td = (void*)td; + td->td_epochnest++; + critical_enter_sa(td); + sched_pin_sa(td); + + td->td_pre_epoch_prio = td->td_priority; + er = epoch->e_pcpu[curcpu]; + TAILQ_INSERT_TAIL(&er->er_tdlist, etd, et_link); + ck_epoch_begin(&er->er_record, (ck_epoch_section_t *)&etd->et_section); + critical_exit_sa(td); +} + +static __inline void +epoch_enter(epoch_t epoch) +{ + ck_epoch_record_t *record; + struct thread_global *td; + MPASS(cold || epoch != NULL); + INIT_CHECK(epoch); + td = (struct thread_global *)curthread; + + td->td_epochnest++; + critical_enter_sa(td); + record = &epoch->e_pcpu[curcpu]->er_record; + ck_epoch_begin(record, NULL); +} + +static __inline void +epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et) +{ + struct epoch_record *er; + struct epoch_thread *etd; + struct thread_global *td; + + INIT_CHECK(epoch); + td = (struct thread_global *)curthread; + critical_enter_sa(td); + sched_unpin_sa(td); + MPASS(td->td_epochnest); + td->td_epochnest--; + er = epoch->e_pcpu[curcpu]; + MPASS(epoch->e_flags & EPOCH_PREEMPT); + etd = (void *)et; +#ifdef INVARIANTS + MPASS(etd != NULL); + MPASS(etd->et_td == (struct thread *)td); + MPASS(etd->et_magic_pre == EPOCH_MAGIC0); + MPASS(etd->et_magic_post == EPOCH_MAGIC1); + etd->et_magic_pre = 0; + etd->et_magic_post = 0; + etd->et_td = (void*)0xDEADBEEF; +#endif + ck_epoch_end(&er->er_record, + (ck_epoch_section_t *)&etd->et_section); + TAILQ_REMOVE(&er->er_tdlist, etd, et_link); + er->er_gen++; + if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) + epoch_adjust_prio((struct thread *)td, td->td_pre_epoch_prio); + critical_exit_sa(td); +} + +static __inline void +epoch_exit(epoch_t epoch) +{ + ck_epoch_record_t *record; + struct thread_global *td; + + INIT_CHECK(epoch); + td = (struct thread_global *)curthread; + MPASS(td->td_epochnest); + td->td_epochnest--; + record = &epoch->e_pcpu[curcpu]->er_record; + ck_epoch_end(record, NULL); + critical_exit_sa(td); +} +#endif /* _KERNEL */ +#endif /* _SYS_EPOCH_PRIVATE_H_ */ Index: sys/sys/lock.h =================================================================== --- sys/sys/lock.h +++ sys/sys/lock.h @@ -127,7 +127,7 @@ * calling conventions for this debugging code in modules so that modules can * work with both debug and non-debug kernels. */ -#if defined(KLD_MODULE) || defined(WITNESS) || defined(INVARIANTS) || \ +#if (defined(KLD_MODULE) && !defined(KLD_BASE)) || defined(WITNESS) || defined(INVARIANTS) || \ defined(LOCK_PROFILING) || defined(KTR) #define LOCK_DEBUG 1 #else Index: sys/sys/mutex.h =================================================================== --- sys/sys/mutex.h +++ sys/sys/mutex.h @@ -138,7 +138,7 @@ void _thread_lock(struct thread *); #endif -#if defined(LOCK_PROFILING) || defined(KLD_MODULE) +#if defined(LOCK_PROFILING) || (defined(KLD_MODULE) && !defined(KLD_BASE)) #define thread_lock(tdp) \ thread_lock_flags_((tdp), 0, __FILE__, __LINE__) #elif LOCK_DEBUG > 0 Index: sys/sys/pmckern.h =================================================================== --- sys/sys/pmckern.h +++ sys/sys/pmckern.h @@ -201,11 +201,12 @@ /* Hook invocation; for use within the kernel */ #define PMC_CALL_HOOK(t, cmd, arg) \ -do { \ - epoch_enter_preempt(global_epoch_preempt); \ +do { \ + struct epoch_tracker et; \ + epoch_enter_preempt(global_epoch_preempt, &et); \ if (pmc_hook != NULL) \ (pmc_hook)((t), (cmd), (arg)); \ - epoch_exit_preempt(global_epoch_preempt); \ + epoch_exit_preempt(global_epoch_preempt, &et); \ } while (0) /* Hook invocation that needs an exclusive lock */ Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -74,19 +74,6 @@ #include #endif - -/* - * A section object may be passed to every begin-end pair to allow for - * forward progress guarantees with-in prolonged active sections. - * - * We can't include ck_epoch.h so we define our own variant here and - * then CTASSERT that it's the same size in subr_epoch.c - */ -struct epoch_section { - unsigned int bucket; -}; -typedef struct epoch_section epoch_section_t; - /* * One structure allocated per session. * @@ -373,8 +360,6 @@ int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ void *td_lkpi_task; /* LinuxKPI task struct pointer */ - TAILQ_ENTRY(thread) td_epochq; /* (t) Epoch queue. */ - epoch_section_t td_epoch_section; /* (t) epoch section object */ int td_pmcpend; }; Index: sys/sys/systm.h =================================================================== --- sys/sys/systm.h +++ sys/sys/systm.h @@ -216,6 +216,7 @@ void cpu_rootconf(void); void critical_enter(void); void critical_exit(void); +void critical_exit_preempt(void); void init_param1(void); void init_param2(long physpages); void init_static_kenv(char *, size_t);