diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h
index 894e0444b710..a49c21576994 100644
--- a/sys/dev/cxgbe/common/common.h
+++ b/sys/dev/cxgbe/common/common.h
@@ -1,966 +1,1008 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2011 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #ifndef __CHELSIO_COMMON_H
 #define __CHELSIO_COMMON_H
 
 #include "t4_hw.h"
 
 enum {
 	MAX_NPORTS     = 4,     /* max # of ports */
 	SERNUM_LEN     = 24,    /* Serial # length */
 	EC_LEN         = 16,    /* E/C length */
 	ID_LEN         = 16,    /* ID length */
 	PN_LEN         = 16,    /* Part Number length */
 	MD_LEN         = 16,    /* MFG diags version length */
 	MACADDR_LEN    = 12,    /* MAC Address length */
 };
 
 enum {
 	T4_REGMAP_SIZE = (160 * 1024),
 	T5_REGMAP_SIZE = (332 * 1024),
 };
 
 enum { MEM_EDC0, MEM_EDC1, MEM_MC, MEM_MC0 = MEM_MC, MEM_MC1, MEM_HMA };
 
 enum dev_master { MASTER_CANT, MASTER_MAY, MASTER_MUST };
 
 enum dev_state { DEV_STATE_UNINIT, DEV_STATE_INIT, DEV_STATE_ERR };
 
 enum {
 	PAUSE_RX      = 1 << 0,
 	PAUSE_TX      = 1 << 1,
 	PAUSE_AUTONEG = 1 << 2
 };
 
 enum {
 	/*
 	 * Real FECs.  In the same order as the FEC portion of caps32 so that
 	 * the code can do (fec & M_FW_PORT_CAP32_FEC) to get all the real FECs.
 	 */
 	FEC_RS        = 1 << 0,	/* Reed-Solomon */
 	FEC_BASER_RS  = 1 << 1,	/* BASE-R, aka Firecode */
 	FEC_NONE      = 1 << 2,	/* no FEC */
 
 	/*
 	 * Pseudo FECs that translate to real FECs.  The firmware knows nothing
 	 * about these and they start at M_FW_PORT_CAP32_FEC + 1.  AUTO should
 	 * be set all by itself.
 	 */
 	FEC_AUTO      = 1 << 5,
 	FEC_MODULE    = 1 << 6,	/* FEC suggested by the cable/transceiver. */
 };
 
 enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS };
 
 struct port_stats {
 	u64 tx_octets;            /* total # of octets in good frames */
 	u64 tx_frames;            /* all good frames */
 	u64 tx_bcast_frames;      /* all broadcast frames */
 	u64 tx_mcast_frames;      /* all multicast frames */
 	u64 tx_ucast_frames;      /* all unicast frames */
 	u64 tx_error_frames;      /* all error frames */
 
 	u64 tx_frames_64;         /* # of Tx frames in a particular range */
 	u64 tx_frames_65_127;
 	u64 tx_frames_128_255;
 	u64 tx_frames_256_511;
 	u64 tx_frames_512_1023;
 	u64 tx_frames_1024_1518;
 	u64 tx_frames_1519_max;
 
 	u64 tx_drop;              /* # of dropped Tx frames */
 	u64 tx_pause;             /* # of transmitted pause frames */
 	u64 tx_ppp0;              /* # of transmitted PPP prio 0 frames */
 	u64 tx_ppp1;              /* # of transmitted PPP prio 1 frames */
 	u64 tx_ppp2;              /* # of transmitted PPP prio 2 frames */
 	u64 tx_ppp3;              /* # of transmitted PPP prio 3 frames */
 	u64 tx_ppp4;              /* # of transmitted PPP prio 4 frames */
 	u64 tx_ppp5;              /* # of transmitted PPP prio 5 frames */
 	u64 tx_ppp6;              /* # of transmitted PPP prio 6 frames */
 	u64 tx_ppp7;              /* # of transmitted PPP prio 7 frames */
 
 	u64 rx_octets;            /* total # of octets in good frames */
 	u64 rx_frames;            /* all good frames */
 	u64 rx_bcast_frames;      /* all broadcast frames */
 	u64 rx_mcast_frames;      /* all multicast frames */
 	u64 rx_ucast_frames;      /* all unicast frames */
 	u64 rx_too_long;          /* # of frames exceeding MTU */
 	u64 rx_jabber;            /* # of jabber frames */
 	u64 rx_fcs_err;           /* # of received frames with bad FCS */
 	u64 rx_len_err;           /* # of received frames with length error */
 	u64 rx_symbol_err;        /* symbol errors */
 	u64 rx_runt;              /* # of short frames */
 
 	u64 rx_frames_64;         /* # of Rx frames in a particular range */
 	u64 rx_frames_65_127;
 	u64 rx_frames_128_255;
 	u64 rx_frames_256_511;
 	u64 rx_frames_512_1023;
 	u64 rx_frames_1024_1518;
 	u64 rx_frames_1519_max;
 
 	u64 rx_pause;             /* # of received pause frames */
 	u64 rx_ppp0;              /* # of received PPP prio 0 frames */
 	u64 rx_ppp1;              /* # of received PPP prio 1 frames */
 	u64 rx_ppp2;              /* # of received PPP prio 2 frames */
 	u64 rx_ppp3;              /* # of received PPP prio 3 frames */
 	u64 rx_ppp4;              /* # of received PPP prio 4 frames */
 	u64 rx_ppp5;              /* # of received PPP prio 5 frames */
 	u64 rx_ppp6;              /* # of received PPP prio 6 frames */
 	u64 rx_ppp7;              /* # of received PPP prio 7 frames */
 
 	u64 rx_ovflow0;           /* drops due to buffer-group 0 overflows */
 	u64 rx_ovflow1;           /* drops due to buffer-group 1 overflows */
 	u64 rx_ovflow2;           /* drops due to buffer-group 2 overflows */
 	u64 rx_ovflow3;           /* drops due to buffer-group 3 overflows */
 	u64 rx_trunc0;            /* buffer-group 0 truncated packets */
 	u64 rx_trunc1;            /* buffer-group 1 truncated packets */
 	u64 rx_trunc2;            /* buffer-group 2 truncated packets */
 	u64 rx_trunc3;            /* buffer-group 3 truncated packets */
 };
 
 struct lb_port_stats {
 	u64 octets;
 	u64 frames;
 	u64 bcast_frames;
 	u64 mcast_frames;
 	u64 ucast_frames;
 	u64 error_frames;
 
 	u64 frames_64;
 	u64 frames_65_127;
 	u64 frames_128_255;
 	u64 frames_256_511;
 	u64 frames_512_1023;
 	u64 frames_1024_1518;
 	u64 frames_1519_max;
 
 	u64 drop;
 
 	u64 ovflow0;
 	u64 ovflow1;
 	u64 ovflow2;
 	u64 ovflow3;
 	u64 trunc0;
 	u64 trunc1;
 	u64 trunc2;
 	u64 trunc3;
 };
 
 struct tp_tcp_stats {
 	u32 tcp_out_rsts;
 	u64 tcp_in_segs;
 	u64 tcp_out_segs;
 	u64 tcp_retrans_segs;
 };
 
 struct tp_usm_stats {
 	u32 frames;
 	u32 drops;
 	u64 octets;
 };
 
 struct tp_tid_stats {
 	u32 del;
 	u32 inv;
 	u32 act;
 	u32 pas;
 };
 
 struct tp_fcoe_stats {
 	u32 frames_ddp;
 	u32 frames_drop;
 	u64 octets_ddp;
 };
 
 struct tp_err_stats {
 	u32 mac_in_errs[MAX_NCHAN];
 	u32 hdr_in_errs[MAX_NCHAN];
 	u32 tcp_in_errs[MAX_NCHAN];
 	u32 tnl_cong_drops[MAX_NCHAN];
 	u32 ofld_chan_drops[MAX_NCHAN];
 	u32 tnl_tx_drops[MAX_NCHAN];
 	u32 ofld_vlan_drops[MAX_NCHAN];
 	u32 tcp6_in_errs[MAX_NCHAN];
 	u32 ofld_no_neigh;
 	u32 ofld_cong_defer;
 };
 
 struct tp_tnl_stats {
 	u32 out_pkt[MAX_NCHAN];
 	u32 in_pkt[MAX_NCHAN];
 };
 
 struct tp_proxy_stats {
 	u32 proxy[MAX_NCHAN];
 };
 
 struct tp_cpl_stats {
 	u32 req[MAX_NCHAN];
 	u32 rsp[MAX_NCHAN];
 };
 
 struct tp_rdma_stats {
 	u32 rqe_dfr_pkt;
 	u32 rqe_dfr_mod;
 };
 
 struct sge_params {
 	int timer_val[SGE_NTIMERS];	/* final, scaled values */
 	int counter_val[SGE_NCOUNTERS];
 	int fl_starve_threshold;
 	int fl_starve_threshold2;
 	int page_shift;
 	int eq_s_qpp;
 	int iq_s_qpp;
 	int spg_len;
 	int pad_boundary;
 	int pack_boundary;
 	int fl_pktshift;
 	u32 sge_control;
 	u32 sge_fl_buffer_size[SGE_FLBUF_SIZES];
 };
 
 struct tp_params {
 	unsigned int tre;            /* log2 of core clocks per TP tick */
 	unsigned int dack_re;        /* DACK timer resolution */
 	unsigned int la_mask;        /* what events are recorded by TP LA */
 
 	uint16_t filter_mode;
 	uint16_t filter_mask;	/* Used by TOE and hashfilters */
 	int vnic_mode;
 	uint32_t max_rx_pdu;
 	uint32_t max_tx_pdu;
 	bool rx_pkt_encap;
 
 	int8_t fcoe_shift;
 	int8_t port_shift;
 	int8_t vnic_shift;
 	int8_t vlan_shift;
 	int8_t tos_shift;
 	int8_t protocol_shift;
 	int8_t ethertype_shift;
 	int8_t macmatch_shift;
 	int8_t matchtype_shift;
 	int8_t frag_shift;
 };
 
 /* Use same modulation queue as the tx channel. */
 #define TX_MODQ(tx_chan) (tx_chan)
 
 struct vpd_params {
 	unsigned int cclk;
 	u8 ec[EC_LEN + 1];
 	u8 sn[SERNUM_LEN + 1];
 	u8 id[ID_LEN + 1];
 	u8 pn[PN_LEN + 1];
 	u8 na[MACADDR_LEN + 1];
 	u8 md[MD_LEN + 1];
 };
 
 struct pci_params {
 	unsigned int vpd_cap_addr;
 	unsigned int mps;
 	unsigned short speed;
 	unsigned short width;
 };
 
 /*
  * Firmware device log.
  */
 struct devlog_params {
 	u32 memtype;			/* which memory (FW_MEMTYPE_* ) */
 	u32 start;			/* start of log in firmware memory */
 	u32 size;			/* size of log */
 	u32 addr;			/* start address in flat addr space */
 };
 
 /* Stores chip specific parameters */
 struct chip_params {
 	u8 nchan;
 	u8 pm_stats_cnt;
 	u8 cng_ch_bits_log;		/* congestion channel map bits width */
 	u8 nsched_cls;
 	u8 cim_num_obq;
 	u8 filter_opt_len;
 	u16 mps_rplc_size;
 	u16 vfcount;
 	u32 sge_fl_db;
 	u16 mps_tcam_size;
 	u16 rss_nentries;
 	u16 cim_la_size;
 };
 
 /* VF-only parameters. */
 
 /*
  * Global Receive Side Scaling (RSS) parameters in host-native format.
  */
 struct rss_params {
 	unsigned int mode;		/* RSS mode */
 	union {
 	    struct {
 		u_int synmapen:1;	/* SYN Map Enable */
 		u_int syn4tupenipv6:1;	/* enable hashing 4-tuple IPv6 SYNs */
 		u_int syn2tupenipv6:1;	/* enable hashing 2-tuple IPv6 SYNs */
 		u_int syn4tupenipv4:1;	/* enable hashing 4-tuple IPv4 SYNs */
 		u_int syn2tupenipv4:1;	/* enable hashing 2-tuple IPv4 SYNs */
 		u_int ofdmapen:1;	/* Offload Map Enable */
 		u_int tnlmapen:1;	/* Tunnel Map Enable */
 		u_int tnlalllookup:1;	/* Tunnel All Lookup */
 		u_int hashtoeplitz:1;	/* use Toeplitz hash */
 	    } basicvirtual;
 	} u;
 };
 
 /*
  * Maximum resources provisioned for a PCI VF.
  */
 struct vf_resources {
 	unsigned int nvi;		/* N virtual interfaces */
 	unsigned int neq;		/* N egress Qs */
 	unsigned int nethctrl;		/* N egress ETH or CTRL Qs */
 	unsigned int niqflint;		/* N ingress Qs/w free list(s) & intr */
 	unsigned int niq;		/* N ingress Qs */
 	unsigned int tc;		/* PCI-E traffic class */
 	unsigned int pmask;		/* port access rights mask */
 	unsigned int nexactf;		/* N exact MPS filters */
 	unsigned int r_caps;		/* read capabilities */
 	unsigned int wx_caps;		/* write/execute capabilities */
 };
 
 struct adapter_params {
 	struct sge_params sge;
 	struct tp_params  tp;		/* PF-only */
 	struct vpd_params vpd;
 	struct pci_params pci;
 	struct devlog_params devlog;	/* PF-only */
 	struct rss_params rss;		/* VF-only */
 	struct vf_resources vfres;	/* VF-only */
 	unsigned int core_vdd;
 
 	unsigned int sf_size;             /* serial flash size in bytes */
 	unsigned int sf_nsec;             /* # of flash sectors */
 
 	unsigned int fw_vers;		/* firmware version */
 	unsigned int bs_vers;		/* bootstrap version */
 	unsigned int tp_vers;		/* TP microcode version */
 	unsigned int er_vers;		/* expansion ROM version */
 	unsigned int scfg_vers;		/* Serial Configuration version */
 	unsigned int vpd_vers;		/* VPD version */
 
 	unsigned short mtus[NMTUS];
 	unsigned short a_wnd[NCCTRL_WIN];
 	unsigned short b_wnd[NCCTRL_WIN];
 
 	unsigned int cim_la_size;
 
 	uint8_t nports;		/* # of ethernet ports */
 	uint8_t portvec;
 	unsigned int chipid:4;	/* chip ID.  T4 = 4, T5 = 5, ... */
 	unsigned int rev:4;	/* chip revision */
 	unsigned int fpga:1;	/* this is an FPGA */
 	unsigned int offload:1;	/* hw is TOE capable, fw has divvied up card
 				   resources for TOE operation. */
 	unsigned int bypass:1;	/* this is a bypass card */
 	unsigned int ethoffload:1;
 	unsigned int hash_filter:1;
 	unsigned int filter2_wr_support:1;
 	unsigned int port_caps32:1;
 	unsigned int smac_add_support:1;
 
 	unsigned int ofldq_wr_cred;
 	unsigned int eo_wr_cred;
 
 	unsigned int max_ordird_qp;
 	unsigned int max_ird_adapter;
 
 	/* These values are for all ports (8b/port, upto 4 ports) */
 	uint32_t mps_bg_map;	/* MPS rx buffer group map */
 	uint32_t tp_ch_map;	/* TPCHMAP from firmware */
 
 	bool ulptx_memwrite_dsgl;	/* use of T5 DSGL allowed */
 	bool fr_nsmr_tpte_wr_support;	/* FW support for FR_NSMR_TPTE_WR */
 	bool dev_512sgl_mr;		/* FW support for 512 SGL per FR MR */
 	bool viid_smt_extn_support;	/* FW returns vin, vfvld & smt index? */
 	unsigned int max_pkts_per_eth_tx_pkts_wr;
 	uint8_t nsched_cls;		/* # of usable sched classes per port */
 };
 
 #define CHELSIO_T4		0x4
 #define CHELSIO_T5		0x5
 #define CHELSIO_T6		0x6
 
 /*
  * State needed to monitor the forward progress of SGE Ingress DMA activities
  * and possible hangs.
  */
 struct sge_idma_monitor_state {
 	unsigned int idma_1s_thresh;	/* 1s threshold in Core Clock ticks */
 	unsigned int idma_stalled[2];	/* synthesized stalled timers in HZ */
 	unsigned int idma_state[2];	/* IDMA Hang detect state */
 	unsigned int idma_qid[2];	/* IDMA Hung Ingress Queue ID */
 	unsigned int idma_warn[2];	/* time to warning in HZ */
 };
 
 struct trace_params {
 	u32 data[TRACE_LEN / 4];
 	u32 mask[TRACE_LEN / 4];
 	unsigned short snap_len;
 	unsigned short min_len;
 	unsigned char skip_ofst;
 	unsigned char skip_len;
 	unsigned char invert;
 	unsigned char port;
 };
 
 struct link_config {
 	/* OS-specific code owns all the requested_* fields. */
 	int8_t requested_aneg;	/* link autonegotiation */
 	int8_t requested_fc;	/* flow control */
 	int8_t requested_fec;	/* FEC */
 	int8_t force_fec;	/* FORCE_FEC in L1_CFG32 command. */
 	u_int requested_speed;	/* speed (Mbps) */
 	uint32_t requested_caps;/* rcap in last l1cfg issued by the driver. */
 
 	/* These are populated with information from the firmware. */
 	uint32_t pcaps;		/* link capabilities */
 	uint32_t acaps;		/* advertised capabilities */
 	uint32_t lpacaps;	/* peer advertised capabilities */
 	u_int speed;		/* actual link speed (Mbps) */
 	int8_t fc;		/* actual link flow control */
 	int8_t fec_hint;	/* cable/transceiver recommended fec */
 	int8_t fec;		/* actual FEC */
 	bool link_ok;		/* link up? */
 	uint8_t link_down_rc;	/* link down reason */
 };
 
 #include "adapter.h"
 
 #ifndef PCI_VENDOR_ID_CHELSIO
 # define PCI_VENDOR_ID_CHELSIO 0x1425
 #endif
 
 #define for_each_port(adapter, iter) \
 	for (iter = 0; iter < (adapter)->params.nports; ++iter)
 
 static inline int is_ftid(const struct adapter *sc, u_int tid)
 {
 
 	return (sc->tids.nftids > 0 && tid >= sc->tids.ftid_base &&
 	    tid <= sc->tids.ftid_end);
 }
 
 static inline int is_hpftid(const struct adapter *sc, u_int tid)
 {
 
 	return (sc->tids.nhpftids > 0 && tid >= sc->tids.hpftid_base &&
 	    tid <= sc->tids.hpftid_end);
 }
 
 static inline int is_etid(const struct adapter *sc, u_int tid)
 {
 
 	return (sc->tids.netids > 0 && tid >= sc->tids.etid_base &&
 	    tid <= sc->tids.etid_end);
 }
 
 static inline int is_offload(const struct adapter *adap)
 {
 	return adap->params.offload;
 }
 
 static inline int is_ethoffload(const struct adapter *adap)
 {
 	return adap->params.ethoffload;
 }
 
 static inline int is_hashfilter(const struct adapter *adap)
 {
 	return adap->params.hash_filter;
 }
 
 static inline int is_ktls(const struct adapter *adap)
 {
 	return adap->cryptocaps & FW_CAPS_CONFIG_TLS_HW;
 }
 
 static inline int chip_id(struct adapter *adap)
 {
 	return adap->params.chipid;
 }
 
 static inline int chip_rev(struct adapter *adap)
 {
 	return adap->params.rev;
 }
 
 static inline int is_t4(struct adapter *adap)
 {
 	return adap->params.chipid == CHELSIO_T4;
 }
 
 static inline int is_t5(struct adapter *adap)
 {
 	return adap->params.chipid == CHELSIO_T5;
 }
 
 static inline int is_t6(struct adapter *adap)
 {
 	return adap->params.chipid == CHELSIO_T6;
 }
 
 static inline int is_fpga(struct adapter *adap)
 {
 	 return adap->params.fpga;
 }
 
 static inline unsigned int core_ticks_per_usec(const struct adapter *adap)
 {
 	return adap->params.vpd.cclk / 1000;
 }
 
 static inline unsigned int us_to_core_ticks(const struct adapter *adap,
 					    unsigned int us)
 {
 	return (us * adap->params.vpd.cclk) / 1000;
 }
 
 static inline unsigned int core_ticks_to_us(const struct adapter *adapter,
 					    unsigned int ticks)
 {
 	/* add Core Clock / 2 to round ticks to nearest uS */
 	return ((ticks * 1000 + adapter->params.vpd.cclk/2) /
 		adapter->params.vpd.cclk);
 }
 
 static inline unsigned int dack_ticks_to_usec(const struct adapter *adap,
 					      unsigned int ticks)
 {
 	return (ticks << adap->params.tp.dack_re) / core_ticks_per_usec(adap);
 }
 
 static inline u_int us_to_tcp_ticks(const struct adapter *adap, u_long us)
 {
 
 	return (us * adap->params.vpd.cclk / 1000 >> adap->params.tp.tre);
 }
 
 static inline u_int tcp_ticks_to_us(const struct adapter *adap, u_int ticks)
 {
 	return ((uint64_t)ticks << adap->params.tp.tre) /
 	    core_ticks_per_usec(adap);
 }
 
 void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask, u32 val);
 
 int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
 			    int size, void *rpl, bool sleep_ok, int timeout);
 int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size,
 		    void *rpl, bool sleep_ok);
 void t4_report_fw_error(struct adapter *adap);
 
 static inline int t4_wr_mbox_timeout(struct adapter *adap, int mbox,
 				     const void *cmd, int size, void *rpl,
 				     int timeout)
 {
 	return t4_wr_mbox_meat_timeout(adap, mbox, cmd, size, rpl, true,
 				       timeout);
 }
 
 static inline int t4_wr_mbox(struct adapter *adap, int mbox, const void *cmd,
 			     int size, void *rpl)
 {
 	return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, true);
 }
 
 static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd,
 				int size, void *rpl)
 {
 	return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, false);
 }
 
 void t4_read_indirect(struct adapter *adap, unsigned int addr_reg,
 		      unsigned int data_reg, u32 *vals, unsigned int nregs,
 		      unsigned int start_idx);
 void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
 		       unsigned int data_reg, const u32 *vals,
 		       unsigned int nregs, unsigned int start_idx);
 
 u32 t4_hw_pci_read_cfg4(adapter_t *adapter, int reg);
 
 struct fw_filter_wr;
 
 void t4_intr_enable(struct adapter *adapter);
 void t4_intr_disable(struct adapter *adapter);
 bool t4_slow_intr_handler(struct adapter *adapter, bool verbose);
 
 int t4_hash_mac_addr(const u8 *addr);
 int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
 		  struct link_config *lc);
 int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port);
 int t4_seeprom_read(struct adapter *adapter, u32 addr, u32 *data);
 int t4_seeprom_write(struct adapter *adapter, u32 addr, u32 data);
 int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz);
 int t4_seeprom_wp(struct adapter *adapter, int enable);
 int t4_read_flash(struct adapter *adapter, unsigned int addr, unsigned int nwords,
 		  u32 *data, int byte_oriented);
 int t4_write_flash(struct adapter *adapter, unsigned int addr,
 		   unsigned int n, const u8 *data, int byte_oriented);
 int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size);
 int t4_fwcache(struct adapter *adap, enum fw_params_param_dev_fwcache op);
 int t5_fw_init_extern_mem(struct adapter *adap);
 int t4_load_bootcfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size);
 int t4_load_boot(struct adapter *adap, u8 *boot_data,
                  unsigned int boot_addr, unsigned int size);
 int t4_flash_erase_sectors(struct adapter *adapter, int start, int end);
 int t4_flash_cfg_addr(struct adapter *adapter);
 int t4_load_cfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size);
 int t4_get_fw_version(struct adapter *adapter, u32 *vers);
 int t4_get_fw_hdr(struct adapter *adapter, struct fw_hdr *hdr);
 int t4_get_bs_version(struct adapter *adapter, u32 *vers);
 int t4_get_tp_version(struct adapter *adapter, u32 *vers);
 int t4_get_exprom_version(struct adapter *adapter, u32 *vers);
 int t4_get_scfg_version(struct adapter *adapter, u32 *vers);
 int t4_get_vpd_version(struct adapter *adapter, u32 *vers);
 int t4_get_version_info(struct adapter *adapter);
 int t4_init_hw(struct adapter *adapter, u32 fw_params);
 const struct chip_params *t4_get_chip_params(int chipid);
 int t4_prep_adapter(struct adapter *adapter, u32 *buf);
 int t4_shutdown_adapter(struct adapter *adapter);
 int t4_init_devlog_params(struct adapter *adapter, int fw_attach);
 int t4_init_sge_params(struct adapter *adapter);
 int t4_init_tp_params(struct adapter *adap);
 int t4_filter_field_shift(const struct adapter *adap, int filter_sel);
 int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id);
 void t4_fatal_err(struct adapter *adapter, bool fw_error);
 int t4_set_trace_filter(struct adapter *adapter, const struct trace_params *tp,
 			int filter_index, int enable);
 void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp,
 			 int filter_index, int *enabled);
 int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid,
 			int start, int n, const u16 *rspq, unsigned int nrspq);
 int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode,
 		       unsigned int flags);
 int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid,
 		     unsigned int flags, unsigned int defq, unsigned int skeyidx,
 		     unsigned int skey);
 int t4_read_rss(struct adapter *adapter, u16 *entries);
 void t4_read_rss_key(struct adapter *adapter, u32 *key, bool sleep_ok);
 void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx,
 		      bool sleep_ok);
 void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index,
 			   u32 *valp, bool sleep_ok);
 void t4_write_rss_pf_config(struct adapter *adapter, unsigned int index,
 			    u32 val, bool sleep_ok);
 void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index,
 			   u32 *vfl, u32 *vfh, bool sleep_ok);
 void t4_write_rss_vf_config(struct adapter *adapter, unsigned int index,
 			    u32 vfl, u32 vfh, bool sleep_ok);
 u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok);
 void t4_write_rss_pf_map(struct adapter *adapter, u32 pfmap, bool sleep_ok);
 u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok);
 void t4_write_rss_pf_mask(struct adapter *adapter, u32 pfmask, bool sleep_ok);
 int t4_mps_set_active_ports(struct adapter *adap, unsigned int port_mask);
 void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
 void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
 void t4_read_cimq_cfg(struct adapter *adap, u16 *base, u16 *size, u16 *thres);
 int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data, size_t n);
 int t4_read_cim_obq(struct adapter *adap, unsigned int qid, u32 *data, size_t n);
 int t4_cim_read(struct adapter *adap, unsigned int addr, unsigned int n,
 		unsigned int *valp);
 int t4_cim_write(struct adapter *adap, unsigned int addr, unsigned int n,
 		 const unsigned int *valp);
 int t4_cim_ctl_read(struct adapter *adap, unsigned int addr, unsigned int n,
 		    unsigned int *valp);
 int t4_cim_read_la(struct adapter *adap, u32 *la_buf, unsigned int *wrptr);
 void t4_cim_read_pif_la(struct adapter *adap, u32 *pif_req, u32 *pif_rsp,
 		unsigned int *pif_req_wrptr, unsigned int *pif_rsp_wrptr);
 void t4_cim_read_ma_la(struct adapter *adap, u32 *ma_req, u32 *ma_rsp);
 int t4_get_flash_params(struct adapter *adapter);
 
 u32 t4_read_pcie_cfg4(struct adapter *adap, int reg, int drv_fw_attach);
 int t4_mc_read(struct adapter *adap, int idx, u32 addr,
 	       __be32 *data, u64 *parity);
 int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity);
 int t4_mem_read(struct adapter *adap, int mtype, u32 addr, u32 size,
 		__be32 *data);
 void t4_idma_monitor_init(struct adapter *adapter,
 			  struct sge_idma_monitor_state *idma);
 void t4_idma_monitor(struct adapter *adapter,
 		     struct sge_idma_monitor_state *idma,
 		     int hz, int ticks);
 int t4_set_vf_mac(struct adapter *adapter, unsigned int pf, unsigned int vf,
 		  unsigned int naddr, u8 *addr);
 
 unsigned int t4_get_regs_len(struct adapter *adapter);
 void t4_get_regs(struct adapter *adap, u8 *buf, size_t buf_size);
 
 u32 t4_port_reg(struct adapter *adap, u8 port, u32 reg);
 const char *t4_get_port_type_description(enum fw_port_type port_type);
 void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p);
 void t4_get_port_stats_offset(struct adapter *adap, int idx,
 		struct port_stats *stats,
 		struct port_stats *offset);
 void t4_get_lb_stats(struct adapter *adap, int idx, struct lb_port_stats *p);
 void t4_clr_port_stats(struct adapter *adap, int idx);
 
 void t4_read_mtu_tbl(struct adapter *adap, u16 *mtus, u8 *mtu_log);
 void t4_read_cong_tbl(struct adapter *adap, u16 incr[NMTUS][NCCTRL_WIN]);
 void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]);
 void t4_get_tx_sched(struct adapter *adap, unsigned int sched, unsigned int *kbps,
 		     unsigned int *ipg, bool sleep_ok);
 void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr,
 			    unsigned int mask, unsigned int val);
 void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr);
 void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st,
 			 bool sleep_ok);
 void t4_tp_get_tnl_stats(struct adapter *adap, struct tp_tnl_stats *st,
 			 bool sleep_ok);
 void t4_tp_get_proxy_stats(struct adapter *adap, struct tp_proxy_stats *st,
     			   bool sleep_ok);
 void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st,
 			 bool sleep_ok);
 void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st,
 			  bool sleep_ok);
 void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st,
 		      bool sleep_ok);
 void t4_tp_get_tid_stats(struct adapter *adap, struct tp_tid_stats *st,
 		      bool sleep_ok);
 void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
 			 struct tp_tcp_stats *v6, bool sleep_ok);
 void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx,
 		       struct tp_fcoe_stats *st, bool sleep_ok);
 void t4_load_mtus(struct adapter *adap, const unsigned short *mtus,
 		  const unsigned short *alpha, const unsigned short *beta);
 
 void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf);
 
 int t4_set_sched_bps(struct adapter *adap, int sched, unsigned int kbps);
 int t4_set_sched_ipg(struct adapter *adap, int sched, unsigned int ipg);
 int t4_set_pace_tbl(struct adapter *adap, const unsigned int *pace_vals,
 		    unsigned int start, unsigned int n);
 void t4_get_chan_txrate(struct adapter *adap, u64 *nic_rate, u64 *ofld_rate);
 int t4_set_filter_cfg(struct adapter *adap, int mode, int mask, int vnic_mode);
 void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid);
 
 void t4_wol_magic_enable(struct adapter *adap, unsigned int port, const u8 *addr);
 int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map,
 		      u64 mask0, u64 mask1, unsigned int crc, bool enable);
 
 int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 		enum dev_master master, enum dev_state *state);
 int t4_fw_bye(struct adapter *adap, unsigned int mbox);
 int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset);
 int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force);
 int t4_fw_restart(struct adapter *adap, unsigned int mbox);
 int t4_fw_upgrade(struct adapter *adap, unsigned int mbox,
 		  const u8 *fw_data, unsigned int size, int force);
 int t4_fw_initialize(struct adapter *adap, unsigned int mbox);
 int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		    unsigned int vf, unsigned int nparams, const u32 *params,
 		    u32 *val);
 int t4_query_params_rw(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		       unsigned int vf, unsigned int nparams, const u32 *params,
 		       u32 *val, int rw);
 int t4_set_params_timeout(struct adapter *adap, unsigned int mbox,
 			  unsigned int pf, unsigned int vf,
 			  unsigned int nparams, const u32 *params,
 			  const u32 *val, int timeout);
 int t4_set_params(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		  unsigned int vf, unsigned int nparams, const u32 *params,
 		  const u32 *val);
 int t4_cfg_pfvf(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		unsigned int vf, unsigned int txq, unsigned int txq_eth_ctrl,
 		unsigned int rxqi, unsigned int rxq, unsigned int tc,
 		unsigned int vi, unsigned int cmask, unsigned int pmask,
 		unsigned int exactf, unsigned int rcaps, unsigned int wxcaps);
 int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox,
 		     unsigned int port, unsigned int pf, unsigned int vf,
 		     unsigned int nmac, u8 *mac, u16 *rss_size,
 		     uint8_t *vfvld, uint16_t *vin,
 		     unsigned int portfunc, unsigned int idstype);
 int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
 		unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
 		u16 *rss_size, uint8_t *vfvld, uint16_t *vin);
 int t4_free_vi(struct adapter *adap, unsigned int mbox,
 	       unsigned int pf, unsigned int vf,
 	       unsigned int viid);
 int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
 		  int mtu, int promisc, int all_multi, int bcast, int vlanex,
 		  bool sleep_ok);
 int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, unsigned int viid,
 		      bool free, unsigned int naddr, const u8 **addr, u16 *idx,
 		      u64 *hash, bool sleep_ok);
 int t4_free_mac_filt(struct adapter *adap, unsigned int mbox,
 		      unsigned int viid, unsigned int naddr,
 		      const u8 **addr, bool sleep_ok);
 int t4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
 			   int idx, bool sleep_ok);
 int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid,
 			 const u8 *addr, const u8 *mask, unsigned int idx,
 			 u8 lookup_type, u8 port_id, bool sleep_ok);
 int t4_alloc_raw_mac_filt(struct adapter *adap, unsigned int viid,
 			  const u8 *addr, const u8 *mask, unsigned int idx,
 			  u8 lookup_type, u8 port_id, bool sleep_ok);
 int t4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
 			    const u8 *addr, const u8 *mask, unsigned int vni,
 			    unsigned int vni_mask, u8 dip_hit, u8 lookup_type,
 			    bool sleep_ok);
 int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
 		  int idx, const u8 *addr, bool persist, uint16_t *smt_idx);
 int t4_del_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
 	       const u8 *addr, bool smac);
 int t4_add_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
 	       int idx, const u8 *addr, bool persist, u8 *smt_idx, bool smac);
 int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid,
 		     bool ucast, u64 vec, bool sleep_ok);
 int t4_enable_vi_params(struct adapter *adap, unsigned int mbox,
 			unsigned int viid, bool rx_en, bool tx_en, bool dcb_en);
 int t4_enable_vi(struct adapter *adap, unsigned int mbox, unsigned int viid,
 		 bool rx_en, bool tx_en);
 int t4_identify_port(struct adapter *adap, unsigned int mbox, unsigned int viid,
 		     unsigned int nblinks);
 int t4_mdio_rd(struct adapter *adap, unsigned int mbox, unsigned int phy_addr,
 	       unsigned int mmd, unsigned int reg, unsigned int *valp);
 int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr,
 	       unsigned int mmd, unsigned int reg, unsigned int val);
 int t4_i2c_io(struct adapter *adap, unsigned int mbox,
 	      int port, unsigned int devid,
 	      unsigned int offset, unsigned int len,
 	      u8 *buf, bool write);
 int t4_i2c_rd(struct adapter *adap, unsigned int mbox,
 	      int port, unsigned int devid,
 	      unsigned int offset, unsigned int len,
 	      u8 *buf);
 int t4_i2c_wr(struct adapter *adap, unsigned int mbox,
 	      int port, unsigned int devid,
 	      unsigned int offset, unsigned int len,
 	      u8 *buf);
 int t4_iq_stop(struct adapter *adap, unsigned int mbox, unsigned int pf,
 	       unsigned int vf, unsigned int iqtype, unsigned int iqid,
 	       unsigned int fl0id, unsigned int fl1id);
 int t4_iq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 	       unsigned int vf, unsigned int iqtype, unsigned int iqid,
 	       unsigned int fl0id, unsigned int fl1id);
 int t4_eth_eq_stop(struct adapter *adap, unsigned int mbox, unsigned int pf,
                    unsigned int vf, unsigned int eqid);
 int t4_eth_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		   unsigned int vf, unsigned int eqid);
 int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		    unsigned int vf, unsigned int eqid);
 int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		    unsigned int vf, unsigned int eqid);
 int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid,
 		   enum ctxt_type ctype, u32 *data);
 int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid, enum ctxt_type ctype,
 		      u32 *data);
 int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type);
 const char *t4_link_down_rc_str(unsigned char link_down_rc);
 int t4_update_port_info(struct port_info *pi);
 int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl);
 int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val);
 int t4_sched_config(struct adapter *adapter, int type, int minmaxen,
 		    int sleep_ok);
 int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 		    int rateunit, int ratemode, int channel, int cl,
 		    int minrate, int maxrate, int weight, int pktsize,
 		    int burstsize, int sleep_ok);
 int t4_sched_params_ch_rl(struct adapter *adapter, int channel, int ratemode,
 			  unsigned int maxrate, int sleep_ok);
 int t4_sched_params_cl_wrr(struct adapter *adapter, int channel, int cl,
 			   int weight, int sleep_ok);
 int t4_sched_params_cl_rl_kbps(struct adapter *adapter, int channel, int cl,
 			       int mode, unsigned int maxrate, int pktsize,
 			       int sleep_ok);
 int t4_config_watchdog(struct adapter *adapter, unsigned int mbox,
 		       unsigned int pf, unsigned int vf,
 		       unsigned int timeout, unsigned int action);
 int t4_get_devlog_level(struct adapter *adapter, unsigned int *level);
 int t4_set_devlog_level(struct adapter *adapter, unsigned int level);
 void t4_sge_decode_idma_state(struct adapter *adapter, int state);
 
 void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
 		    u32 start_index, bool sleep_ok);
 void t4_tp_pio_write(struct adapter *adap, const u32 *buff, u32 nregs,
 		     u32 start_index, bool sleep_ok);
 void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
 		       u32 start_index, bool sleep_ok);
 void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs,
 		    u32 start_index, bool sleep_ok);
 int t4_configure_ringbb(struct adapter *adap);
 int t4_configure_add_smac(struct adapter *adap);
 int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
 		    u16 vlan);
 
 static inline int t4vf_query_params(struct adapter *adapter,
 				    unsigned int nparams, const u32 *params,
 				    u32 *vals)
 {
 	return t4_query_params(adapter, 0, 0, 0, nparams, params, vals);
 }
 
 static inline int t4vf_set_params(struct adapter *adapter,
 				  unsigned int nparams, const u32 *params,
 				  const u32 *vals)
 {
 	return t4_set_params(adapter, 0, 0, 0, nparams, params, vals);
 }
 
 static inline int t4vf_wr_mbox(struct adapter *adap, const void *cmd,
 			       int size, void *rpl)
 {
 	return t4_wr_mbox(adap, adap->mbox, cmd, size, rpl);
 }
 
 int t4vf_wait_dev_ready(struct adapter *adapter);
 int t4vf_fw_reset(struct adapter *adapter);
 int t4vf_get_sge_params(struct adapter *adapter);
 int t4vf_get_rss_glb_config(struct adapter *adapter);
 int t4vf_get_vfres(struct adapter *adapter);
 int t4vf_prep_adapter(struct adapter *adapter);
 int t4vf_get_vf_mac(struct adapter *adapter, unsigned int port,
 		    unsigned int *naddr, u8 *addr);
 int t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid,
 		enum t4_bar2_qtype qtype, int user, u64 *pbar2_qoffset,
 		unsigned int *pbar2_qid);
 unsigned int fwcap_to_speed(uint32_t caps);
 uint32_t speed_to_fwcap(unsigned int speed);
 uint32_t fwcap_top_speed(uint32_t caps);
 
 static inline int
 port_top_speed(const struct port_info *pi)
 {
 
 	/* Mbps -> Gbps */
 	return (fwcap_to_speed(pi->link_cfg.pcaps) / 1000);
 }
 
+/* SET_TCB_FIELD sent as a ULP command looks like this */
+#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
+    sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
+
+static inline void *
+mk_set_tcb_field_ulp(struct adapter *sc, void *cur, int tid, uint16_t word,
+    uint64_t mask, uint64_t val)
+{
+	struct ulp_txpkt *ulpmc;
+	struct ulptx_idata *ulpsc;
+	struct cpl_set_tcb_field_core *req;
+
+	MPASS(((uintptr_t)cur & 7) == 0);
+
+	ulpmc = cur;
+	ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
+	    V_ULP_TXPKT_DEST(ULP_TXPKT_DEST_TP));
+	ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
+
+	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
+	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
+	ulpsc->len = htobe32(sizeof(*req));
+
+	req = (struct cpl_set_tcb_field_core *)(ulpsc + 1);
+	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
+	req->reply_ctrl = htobe16(F_NO_REPLY);
+	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
+	req->mask = htobe64(mask);
+	req->val = htobe64(val);
+
+	/*
+	 * ULP_TX is an 8B processor but the firmware transfers WRs in 16B
+	 * chunks.  The master command for set_tcb_field does not end at a 16B
+	 * boundary so it needs to be padded with a no-op.
+	 */
+	MPASS((LEN__SET_TCB_FIELD_ULP & 0xf) != 0);
+	ulpsc = (struct ulptx_idata *)(req + 1);
+	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
+	ulpsc->len = htobe32(0);
+
+	return (ulpsc + 1);
+}
 #endif /* __CHELSIO_COMMON_H */
diff --git a/sys/dev/cxgbe/t4_filter.c b/sys/dev/cxgbe/t4_filter.c
index 18fa1093800f..359aae6df24e 100644
--- a/sys/dev/cxgbe/t4_filter.c
+++ b/sys/dev/cxgbe/t4_filter.c
@@ -1,2063 +1,2030 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2018 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/fnv_hash.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/sbuf.h>
 #include <netinet/in.h>
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_regs_values.h"
 #include "common/t4_tcb.h"
 #include "t4_l2t.h"
 #include "t4_smt.h"
 
 struct filter_entry {
 	LIST_ENTRY(filter_entry) link_4t;
 	LIST_ENTRY(filter_entry) link_tid;
 
 	uint32_t valid:1;	/* filter allocated and valid */
 	uint32_t locked:1;	/* filter is administratively locked or busy */
 	uint32_t pending:1;	/* filter action is pending firmware reply */
 	int tid;		/* tid of the filter TCB */
 	struct l2t_entry *l2te;	/* L2 table entry for DMAC rewrite */
 	struct smt_entry *smt;	/* SMT entry for SMAC rewrite */
 
 	struct t4_filter_specification fs;
 };
 
 static void free_filter_resources(struct filter_entry *);
 static int get_tcamfilter(struct adapter *, struct t4_filter *);
 static int get_hashfilter(struct adapter *, struct t4_filter *);
 static int set_hashfilter(struct adapter *, struct t4_filter *, uint64_t,
     struct l2t_entry *, struct smt_entry *);
 static int del_hashfilter(struct adapter *, struct t4_filter *);
 static int configure_hashfilter_tcb(struct adapter *, struct filter_entry *);
 
 static inline bool
 separate_hpfilter_region(struct adapter *sc)
 {
 
 	return (chip_id(sc) >= CHELSIO_T6);
 }
 
 static inline uint32_t
 hf_hashfn_4t(struct t4_filter_specification *fs)
 {
 	struct t4_filter_tuple *ft = &fs->val;
 	uint32_t hash;
 
 	if (fs->type) {
 		/* IPv6 */
 		hash = fnv_32_buf(&ft->sip[0], 16, FNV1_32_INIT);
 		hash = fnv_32_buf(&ft->dip[0], 16, hash);
 	} else {
 		hash = fnv_32_buf(&ft->sip[0], 4, FNV1_32_INIT);
 		hash = fnv_32_buf(&ft->dip[0], 4, hash);
 	}
 	hash = fnv_32_buf(&ft->sport, sizeof(ft->sport), hash);
 	hash = fnv_32_buf(&ft->dport, sizeof(ft->dport), hash);
 
 	return (hash);
 }
 
 static inline uint32_t
 hf_hashfn_tid(int tid)
 {
 
 	return (fnv_32_buf(&tid, sizeof(tid), FNV1_32_INIT));
 }
 
 static int
 alloc_hftid_hash(struct tid_info *t, int flags)
 {
 	int n;
 
 	MPASS(t->ntids > 0);
 	MPASS(t->hftid_hash_4t == NULL);
 	MPASS(t->hftid_hash_tid == NULL);
 
 	n = max(t->ntids / 1024, 16);
 	t->hftid_hash_4t = hashinit_flags(n, M_CXGBE, &t->hftid_4t_mask, flags);
 	if (t->hftid_hash_4t == NULL)
 		return (ENOMEM);
 	t->hftid_hash_tid = hashinit_flags(n, M_CXGBE, &t->hftid_tid_mask,
 	    flags);
 	if (t->hftid_hash_tid == NULL) {
 		hashdestroy(t->hftid_hash_4t, M_CXGBE, t->hftid_4t_mask);
 		t->hftid_hash_4t = NULL;
 		return (ENOMEM);
 	}
 
 	mtx_init(&t->hftid_lock, "T4 hashfilters", 0, MTX_DEF);
 	cv_init(&t->hftid_cv, "t4hfcv");
 
 	return (0);
 }
 
 void
 free_hftid_hash(struct tid_info *t)
 {
 	struct filter_entry *f, *ftmp;
 	LIST_HEAD(, filter_entry) *head;
 	int i;
 #ifdef INVARIANTS
 	int n = 0;
 #endif
 
 	if (t->tids_in_use > 0) {
 		/* Remove everything from the tid hash. */
 		head = t->hftid_hash_tid;
 		for (i = 0; i <= t->hftid_tid_mask; i++) {
 			LIST_FOREACH_SAFE(f, &head[i], link_tid, ftmp) {
 				LIST_REMOVE(f, link_tid);
 			}
 		}
 
 		/* Remove and then free each filter in the 4t hash. */
 		head = t->hftid_hash_4t;
 		for (i = 0; i <= t->hftid_4t_mask; i++) {
 			LIST_FOREACH_SAFE(f, &head[i], link_4t, ftmp) {
 #ifdef INVARIANTS
 				n += f->fs.type ? 2 : 1;
 #endif
 				LIST_REMOVE(f, link_4t);
 				free(f, M_CXGBE);
 			}
 		}
 		MPASS(t->tids_in_use == n);
 		t->tids_in_use = 0;
 	}
 
 	if (t->hftid_hash_4t) {
 		hashdestroy(t->hftid_hash_4t, M_CXGBE, t->hftid_4t_mask);
 		t->hftid_hash_4t = NULL;
 	}
 	if (t->hftid_hash_tid) {
 		hashdestroy(t->hftid_hash_tid, M_CXGBE, t->hftid_tid_mask);
 		t->hftid_hash_tid = NULL;
 	}
 	if (mtx_initialized(&t->hftid_lock)) {
 		mtx_destroy(&t->hftid_lock);
 		cv_destroy(&t->hftid_cv);
 	}
 }
 
 static void
 insert_hf(struct adapter *sc, struct filter_entry *f, uint32_t hash)
 {
 	struct tid_info *t = &sc->tids;
 	LIST_HEAD(, filter_entry) *head = t->hftid_hash_4t;
 
 	MPASS(head != NULL);
 	if (hash == 0)
 		hash = hf_hashfn_4t(&f->fs);
 	LIST_INSERT_HEAD(&head[hash & t->hftid_4t_mask], f, link_4t);
 	atomic_add_int(&t->tids_in_use, f->fs.type ? 2 : 1);
 }
 
 static void
 insert_hftid(struct adapter *sc, struct filter_entry *f)
 {
 	struct tid_info *t = &sc->tids;
 	LIST_HEAD(, filter_entry) *head = t->hftid_hash_tid;
 	uint32_t hash;
 
 	MPASS(f->tid >= t->tid_base);
 	MPASS(f->tid - t->tid_base < t->ntids);
 	mtx_assert(&t->hftid_lock, MA_OWNED);
 
 	hash = hf_hashfn_tid(f->tid);
 	LIST_INSERT_HEAD(&head[hash & t->hftid_tid_mask], f, link_tid);
 }
 
 static bool
 filter_eq(struct t4_filter_specification *fs1,
     struct t4_filter_specification *fs2)
 {
 	int n;
 
 	MPASS(fs1->hash && fs2->hash);
 
 	if (fs1->type != fs2->type)
 		return (false);
 
 	n = fs1->type ? 16 : 4;
 	if (bcmp(&fs1->val.sip[0], &fs2->val.sip[0], n) ||
 	    bcmp(&fs1->val.dip[0], &fs2->val.dip[0], n) ||
 	    fs1->val.sport != fs2->val.sport ||
 	    fs1->val.dport != fs2->val.dport)
 		return (false);
 
 	/*
 	 * We know the masks are the same because all hashfilters conform to the
 	 * global tp->filter_mask and the driver has verified that already.
 	 */
 
 	if ((fs1->mask.pfvf_vld || fs1->mask.ovlan_vld) &&
 	    fs1->val.vnic != fs2->val.vnic)
 		return (false);
 	if (fs1->mask.vlan_vld && fs1->val.vlan != fs2->val.vlan)
 		return (false);
 	if (fs1->mask.macidx && fs1->val.macidx != fs2->val.macidx)
 		return (false);
 	if (fs1->mask.frag && fs1->val.frag != fs2->val.frag)
 		return (false);
 	if (fs1->mask.matchtype && fs1->val.matchtype != fs2->val.matchtype)
 		return (false);
 	if (fs1->mask.iport && fs1->val.iport != fs2->val.iport)
 		return (false);
 	if (fs1->mask.fcoe && fs1->val.fcoe != fs2->val.fcoe)
 		return (false);
 	if (fs1->mask.proto && fs1->val.proto != fs2->val.proto)
 		return (false);
 	if (fs1->mask.tos && fs1->val.tos != fs2->val.tos)
 		return (false);
 	if (fs1->mask.ethtype && fs1->val.ethtype != fs2->val.ethtype)
 		return (false);
 
 	return (true);
 }
 
 static struct filter_entry *
 lookup_hf(struct adapter *sc, struct t4_filter_specification *fs, uint32_t hash)
 {
 	struct tid_info *t = &sc->tids;
 	LIST_HEAD(, filter_entry) *head = t->hftid_hash_4t;
 	struct filter_entry *f;
 
 	mtx_assert(&t->hftid_lock, MA_OWNED);
 	MPASS(head != NULL);
 
 	if (hash == 0)
 		hash = hf_hashfn_4t(fs);
 
 	LIST_FOREACH(f, &head[hash & t->hftid_4t_mask], link_4t) {
 		if (filter_eq(&f->fs, fs))
 			return (f);
 	}
 
 	return (NULL);
 }
 
 static struct filter_entry *
 lookup_hftid(struct adapter *sc, int tid)
 {
 	struct tid_info *t = &sc->tids;
 	LIST_HEAD(, filter_entry) *head = t->hftid_hash_tid;
 	struct filter_entry *f;
 	uint32_t hash;
 
 	mtx_assert(&t->hftid_lock, MA_OWNED);
 	MPASS(head != NULL);
 
 	hash = hf_hashfn_tid(tid);
 	LIST_FOREACH(f, &head[hash & t->hftid_tid_mask], link_tid) {
 		if (f->tid == tid)
 			return (f);
 	}
 
 	return (NULL);
 }
 
 static void
 remove_hf(struct adapter *sc, struct filter_entry *f)
 {
 	struct tid_info *t = &sc->tids;
 
 	mtx_assert(&t->hftid_lock, MA_OWNED);
 
 	LIST_REMOVE(f, link_4t);
 	atomic_subtract_int(&t->tids_in_use, f->fs.type ? 2 : 1);
 }
 
 static void
 remove_hftid(struct adapter *sc, struct filter_entry *f)
 {
 #ifdef INVARIANTS
 	struct tid_info *t = &sc->tids;
 
 	mtx_assert(&t->hftid_lock, MA_OWNED);
 #endif
 
 	LIST_REMOVE(f, link_tid);
 }
 
 /*
  * Input: driver's 32b filter mode.
  * Returns: hardware filter mode (bits to set in vlan_pri_map) for the input.
  */
 static uint16_t
 mode_to_fconf(uint32_t mode)
 {
 	uint32_t fconf = 0;
 
 	if (mode & T4_FILTER_IP_FRAGMENT)
 		fconf |= F_FRAGMENTATION;
 
 	if (mode & T4_FILTER_MPS_HIT_TYPE)
 		fconf |= F_MPSHITTYPE;
 
 	if (mode & T4_FILTER_MAC_IDX)
 		fconf |= F_MACMATCH;
 
 	if (mode & T4_FILTER_ETH_TYPE)
 		fconf |= F_ETHERTYPE;
 
 	if (mode & T4_FILTER_IP_PROTO)
 		fconf |= F_PROTOCOL;
 
 	if (mode & T4_FILTER_IP_TOS)
 		fconf |= F_TOS;
 
 	if (mode & T4_FILTER_VLAN)
 		fconf |= F_VLAN;
 
 	if (mode & T4_FILTER_VNIC)
 		fconf |= F_VNIC_ID;
 
 	if (mode & T4_FILTER_PORT)
 		fconf |= F_PORT;
 
 	if (mode & T4_FILTER_FCoE)
 		fconf |= F_FCOE;
 
 	return (fconf);
 }
 
 /*
  * Input: driver's 32b filter mode.
  * Returns: hardware vnic mode (ingress config) matching the input.
  */
 static int
 mode_to_iconf(uint32_t mode)
 {
 	if ((mode & T4_FILTER_VNIC) == 0)
 		return (-1);	/* ingress config doesn't matter. */
 
 	if (mode & T4_FILTER_IC_VNIC)
 		return (FW_VNIC_MODE_PF_VF);
 	else if (mode & T4_FILTER_IC_ENCAP)
 		return (FW_VNIC_MODE_ENCAP_EN);
 	else
 		return (FW_VNIC_MODE_OUTER_VLAN);
 }
 
 static int
 check_fspec_against_fconf_iconf(struct adapter *sc,
     struct t4_filter_specification *fs)
 {
 	struct tp_params *tpp = &sc->params.tp;
 	uint32_t fconf = 0;
 
 	if (fs->val.frag || fs->mask.frag)
 		fconf |= F_FRAGMENTATION;
 
 	if (fs->val.matchtype || fs->mask.matchtype)
 		fconf |= F_MPSHITTYPE;
 
 	if (fs->val.macidx || fs->mask.macidx)
 		fconf |= F_MACMATCH;
 
 	if (fs->val.ethtype || fs->mask.ethtype)
 		fconf |= F_ETHERTYPE;
 
 	if (fs->val.proto || fs->mask.proto)
 		fconf |= F_PROTOCOL;
 
 	if (fs->val.tos || fs->mask.tos)
 		fconf |= F_TOS;
 
 	if (fs->val.vlan_vld || fs->mask.vlan_vld)
 		fconf |= F_VLAN;
 
 	if (fs->val.ovlan_vld || fs->mask.ovlan_vld) {
 		if (tpp->vnic_mode != FW_VNIC_MODE_OUTER_VLAN)
 			return (EINVAL);
 		fconf |= F_VNIC_ID;
 	}
 
 	if (fs->val.pfvf_vld || fs->mask.pfvf_vld) {
 		if (tpp->vnic_mode != FW_VNIC_MODE_PF_VF)
 			return (EINVAL);
 		fconf |= F_VNIC_ID;
 	}
 
 #ifdef notyet
 	if (fs->val.encap_vld || fs->mask.encap_vld) {
 		if (tpp->vnic_mode != FW_VNIC_MODE_ENCAP_EN);
 			return (EINVAL);
 		fconf |= F_VNIC_ID;
 	}
 #endif
 
 	if (fs->val.iport || fs->mask.iport)
 		fconf |= F_PORT;
 
 	if (fs->val.fcoe || fs->mask.fcoe)
 		fconf |= F_FCOE;
 
 	if ((tpp->filter_mode | fconf) != tpp->filter_mode)
 		return (E2BIG);
 
 	return (0);
 }
 
 /*
  * Input: hardware filter configuration (filter mode/mask, ingress config).
  * Input: driver's 32b filter mode matching the input.
  */
 static uint32_t
 fconf_to_mode(uint16_t hwmode, int vnic_mode)
 {
 	uint32_t mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
 	    T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
 
 	if (hwmode & F_FRAGMENTATION)
 		mode |= T4_FILTER_IP_FRAGMENT;
 	if (hwmode & F_MPSHITTYPE)
 		mode |= T4_FILTER_MPS_HIT_TYPE;
 	if (hwmode & F_MACMATCH)
 		mode |= T4_FILTER_MAC_IDX;
 	if (hwmode & F_ETHERTYPE)
 		mode |= T4_FILTER_ETH_TYPE;
 	if (hwmode & F_PROTOCOL)
 		mode |= T4_FILTER_IP_PROTO;
 	if (hwmode & F_TOS)
 		mode |= T4_FILTER_IP_TOS;
 	if (hwmode & F_VLAN)
 		mode |= T4_FILTER_VLAN;
 	if (hwmode & F_VNIC_ID)
 		mode |= T4_FILTER_VNIC; /* real meaning depends on vnic_mode. */
 	if (hwmode & F_PORT)
 		mode |= T4_FILTER_PORT;
 	if (hwmode & F_FCOE)
 		mode |= T4_FILTER_FCoE;
 
 	switch (vnic_mode) {
 	case FW_VNIC_MODE_PF_VF:
 		mode |= T4_FILTER_IC_VNIC;
 		break;
 	case FW_VNIC_MODE_ENCAP_EN:
 		mode |= T4_FILTER_IC_ENCAP;
 		break;
 	case FW_VNIC_MODE_OUTER_VLAN:
 	default:
 		break;
 	}
 
 	return (mode);
 }
 
 int
 get_filter_mode(struct adapter *sc, uint32_t *mode)
 {
 	struct tp_params *tp = &sc->params.tp;
 	uint16_t filter_mode;
 
 	/* Filter mask must comply with the global filter mode. */
 	MPASS((tp->filter_mode | tp->filter_mask) == tp->filter_mode);
 
 	/* Non-zero incoming value in mode means "hashfilter mode". */
 	filter_mode = *mode ? tp->filter_mask : tp->filter_mode;
 	*mode = fconf_to_mode(filter_mode, tp->vnic_mode);
 
 	return (0);
 }
 
 int
 set_filter_mode(struct adapter *sc, uint32_t mode)
 {
 	struct tp_params *tp = &sc->params.tp;
 	int rc, iconf;
 	uint16_t fconf;
 
 	iconf = mode_to_iconf(mode);
 	fconf = mode_to_fconf(mode);
 	if ((iconf == -1 || iconf == tp->vnic_mode) && fconf == tp->filter_mode)
 		return (0);	/* Nothing to do */
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setfm");
 	if (rc)
 		return (rc);
 
 	if (hw_off_limits(sc)) {
 		rc = ENXIO;
 		goto done;
 	}
 
 	if (sc->tids.ftids_in_use > 0 ||	/* TCAM filters active */
 	    sc->tids.hpftids_in_use > 0 ||	/* hi-pri TCAM filters active */
 	    sc->tids.tids_in_use > 0) {		/* TOE or hashfilters active */
 		rc = EBUSY;
 		goto done;
 	}
 
 #ifdef TCP_OFFLOAD
 	if (uld_active(sc, ULD_TOM)) {
 		rc = EBUSY;
 		goto done;
 	}
 #endif
 
 	/* Note that filter mask will get clipped to the new filter mode. */
 	rc = -t4_set_filter_cfg(sc, fconf, -1, iconf);
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 int
 set_filter_mask(struct adapter *sc, uint32_t mode)
 {
 	struct tp_params *tp = &sc->params.tp;
 	int rc, iconf;
 	uint16_t fmask;
 
 	iconf = mode_to_iconf(mode);
 	fmask = mode_to_fconf(mode);
 	if ((iconf == -1 || iconf == tp->vnic_mode) && fmask == tp->filter_mask)
 		return (0);	/* Nothing to do */
 
 	/*
 	 * We aren't going to change the global filter mode or VNIC mode here.
 	 * The given filter mask must conform to them.
 	 */
 	if ((fmask | tp->filter_mode) != tp->filter_mode)
 		return (EINVAL);
 	if (iconf != -1 && iconf != tp->vnic_mode)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sethfm");
 	if (rc)
 		return (rc);
 
 	if (hw_off_limits(sc)) {
 		rc = ENXIO;
 		goto done;
 	}
 
 	if (sc->tids.tids_in_use > 0) {		/* TOE or hashfilters active */
 		rc = EBUSY;
 		goto done;
 	}
 
 #ifdef TCP_OFFLOAD
 	if (uld_active(sc, ULD_TOM)) {
 		rc = EBUSY;
 		goto done;
 	}
 #endif
 	rc = -t4_set_filter_cfg(sc, -1, fmask, -1);
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 static inline uint64_t
 get_filter_hits(struct adapter *sc, uint32_t tid)
 {
 	uint32_t tcb_addr;
 	uint64_t hits;
 
 	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE;
 
 	mtx_lock(&sc->reg_lock);
 	if (hw_off_limits(sc))
 		hits = 0;
 	else if (is_t4(sc)) {
 		uint64_t t;
 
 		read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&t, 8);
 		hits = be64toh(t);
 	} else {
 		uint32_t t;
 
 		read_via_memwin(sc, 0, tcb_addr + 24, &t, 4);
 		hits = be32toh(t);
 	}
 	mtx_unlock(&sc->reg_lock);
 
 	return (hits);
 }
 
 int
 get_filter(struct adapter *sc, struct t4_filter *t)
 {
 	if (t->fs.hash)
 		return (get_hashfilter(sc, t));
 	else
 		return (get_tcamfilter(sc, t));
 }
 
 static int
 set_tcamfilter(struct adapter *sc, struct t4_filter *t, struct l2t_entry *l2te,
     struct smt_entry *smt)
 {
 	struct filter_entry *f;
 	struct fw_filter2_wr *fwr;
 	u_int vnic_vld, vnic_vld_mask;
 	struct wrq_cookie cookie;
 	int i, rc, busy, locked;
 	u_int tid;
 	const int ntids = t->fs.type ? 4 : 1;
 
 	MPASS(!t->fs.hash);
 	/* Already validated against fconf, iconf */
 	MPASS((t->fs.val.pfvf_vld & t->fs.val.ovlan_vld) == 0);
 	MPASS((t->fs.mask.pfvf_vld & t->fs.mask.ovlan_vld) == 0);
 
 	if (separate_hpfilter_region(sc) && t->fs.prio) {
 		MPASS(t->idx < sc->tids.nhpftids);
 		f = &sc->tids.hpftid_tab[t->idx];
 		tid = sc->tids.hpftid_base + t->idx;
 	} else {
 		MPASS(t->idx < sc->tids.nftids);
 		f = &sc->tids.ftid_tab[t->idx];
 		tid = sc->tids.ftid_base + t->idx;
 	}
 	rc = busy = locked = 0;
 	mtx_lock(&sc->tids.ftid_lock);
 	for (i = 0; i < ntids; i++) {
 		busy += f[i].pending + f[i].valid;
 		locked += f[i].locked;
 	}
 	if (locked > 0)
 		rc = EPERM;
 	else if (busy > 0)
 		rc = EBUSY;
 	else {
 		int len16;
 
 		if (sc->params.filter2_wr_support)
 			len16 = howmany(sizeof(struct fw_filter2_wr), 16);
 		else
 			len16 = howmany(sizeof(struct fw_filter_wr), 16);
 		fwr = start_wrq_wr(&sc->sge.ctrlq[0], len16, &cookie);
 		if (__predict_false(fwr == NULL))
 			rc = ENOMEM;
 		else {
 			f->pending = 1;
 			if (separate_hpfilter_region(sc) && t->fs.prio)
 				sc->tids.hpftids_in_use++;
 			else
 				sc->tids.ftids_in_use++;
 		}
 	}
 	mtx_unlock(&sc->tids.ftid_lock);
 	if (rc != 0)
 		return (rc);
 
 	/*
 	 * Can't fail now.  A set-filter WR will definitely be sent.
 	 */
 
 	f->tid = tid;
 	f->fs = t->fs;
 	f->l2te = l2te;
 	f->smt = smt;
 
 	if (t->fs.val.pfvf_vld || t->fs.val.ovlan_vld)
 		vnic_vld = 1;
 	else
 		vnic_vld = 0;
 	if (t->fs.mask.pfvf_vld || t->fs.mask.ovlan_vld)
 		vnic_vld_mask = 1;
 	else
 		vnic_vld_mask = 0;
 
 	bzero(fwr, sizeof(*fwr));
 	if (sc->params.filter2_wr_support)
 		fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER2_WR));
 	else
 		fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
 	fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
 	fwr->tid_to_iq =
 	    htobe32(V_FW_FILTER_WR_TID(f->tid) |
 		V_FW_FILTER_WR_RQTYPE(f->fs.type) |
 		V_FW_FILTER_WR_NOREPLY(0) |
 		V_FW_FILTER_WR_IQ(f->fs.iq));
 	fwr->del_filter_to_l2tix =
 	    htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
 		V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
 		V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
 		V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
 		V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
 		V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
 		V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
 		V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
 		V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
 		    f->fs.newvlan == VLAN_REWRITE) |
 		V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
 		    f->fs.newvlan == VLAN_REWRITE) |
 		V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
 		V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
 		V_FW_FILTER_WR_PRIO(f->fs.prio) |
 		V_FW_FILTER_WR_L2TIX(f->l2te ? f->l2te->idx : 0));
 	fwr->ethtype = htobe16(f->fs.val.ethtype);
 	fwr->ethtypem = htobe16(f->fs.mask.ethtype);
 	fwr->frag_to_ovlan_vldm =
 	    (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
 		V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
 		V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
 		V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) |
 		V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
 		V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask));
 	fwr->smac_sel = 0;
 	fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
 	    V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
 	fwr->maci_to_matchtypem =
 	    htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
 		V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
 		V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
 		V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
 		V_FW_FILTER_WR_PORT(f->fs.val.iport) |
 		V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
 		V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
 		V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
 	fwr->ptcl = f->fs.val.proto;
 	fwr->ptclm = f->fs.mask.proto;
 	fwr->ttyp = f->fs.val.tos;
 	fwr->ttypm = f->fs.mask.tos;
 	fwr->ivlan = htobe16(f->fs.val.vlan);
 	fwr->ivlanm = htobe16(f->fs.mask.vlan);
 	fwr->ovlan = htobe16(f->fs.val.vnic);
 	fwr->ovlanm = htobe16(f->fs.mask.vnic);
 	bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
 	bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
 	bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
 	bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
 	fwr->lp = htobe16(f->fs.val.dport);
 	fwr->lpm = htobe16(f->fs.mask.dport);
 	fwr->fp = htobe16(f->fs.val.sport);
 	fwr->fpm = htobe16(f->fs.mask.sport);
 	/* sma = 0 tells the fw to use SMAC_SEL for source MAC address */
 	bzero(fwr->sma, sizeof (fwr->sma));
 	if (sc->params.filter2_wr_support) {
 		fwr->filter_type_swapmac =
 		    V_FW_FILTER2_WR_SWAPMAC(f->fs.swapmac);
 		fwr->natmode_to_ulp_type =
 		    V_FW_FILTER2_WR_ULP_TYPE(f->fs.nat_mode ?
 			ULP_MODE_TCPDDP : ULP_MODE_NONE) |
 		    V_FW_FILTER2_WR_NATFLAGCHECK(f->fs.nat_flag_chk) |
 		    V_FW_FILTER2_WR_NATMODE(f->fs.nat_mode);
 		memcpy(fwr->newlip, f->fs.nat_dip, sizeof(fwr->newlip));
 		memcpy(fwr->newfip, f->fs.nat_sip, sizeof(fwr->newfip));
 		fwr->newlport = htobe16(f->fs.nat_dport);
 		fwr->newfport = htobe16(f->fs.nat_sport);
 		fwr->natseqcheck = htobe32(f->fs.nat_seq_chk);
 	}
 	commit_wrq_wr(&sc->sge.ctrlq[0], fwr, &cookie);
 
 	/* Wait for response. */
 	mtx_lock(&sc->tids.ftid_lock);
 	for (;;) {
 		if (f->pending == 0) {
 			rc = f->valid ? 0 : EIO;
 			break;
 		}
 		if (cv_wait_sig(&sc->tids.ftid_cv, &sc->tids.ftid_lock) != 0) {
 			rc = EINPROGRESS;
 			break;
 		}
 	}
 	mtx_unlock(&sc->tids.ftid_lock);
 	return (rc);
 }
 
 static int
 hashfilter_ntuple(struct adapter *sc, const struct t4_filter_specification *fs,
     uint64_t *ftuple)
 {
 	struct tp_params *tp = &sc->params.tp;
 	uint16_t fmask;
 
 	*ftuple = fmask = 0;
 
 	/*
 	 * Initialize each of the fields which we care about which are present
 	 * in the Compressed Filter Tuple.
 	 */
 	if (tp->vlan_shift >= 0 && fs->mask.vlan) {
 		*ftuple |= (uint64_t)(F_FT_VLAN_VLD | fs->val.vlan) <<
 		    tp->vlan_shift;
 		fmask |= F_VLAN;
 	}
 
 	if (tp->port_shift >= 0 && fs->mask.iport) {
 		*ftuple |= (uint64_t)fs->val.iport << tp->port_shift;
 		fmask |= F_PORT;
 	}
 
 	if (tp->protocol_shift >= 0 && fs->mask.proto) {
 		*ftuple |= (uint64_t)fs->val.proto << tp->protocol_shift;
 		fmask |= F_PROTOCOL;
 	}
 
 	if (tp->tos_shift >= 0 && fs->mask.tos) {
 		*ftuple |= (uint64_t)(fs->val.tos) << tp->tos_shift;
 		fmask |= F_TOS;
 	}
 
 	if (tp->vnic_shift >= 0 && fs->mask.vnic) {
 		/* vnic_mode was already validated. */
 		if (tp->vnic_mode == FW_VNIC_MODE_PF_VF)
 			MPASS(fs->mask.pfvf_vld);
 		else if (tp->vnic_mode == FW_VNIC_MODE_OUTER_VLAN)
 			MPASS(fs->mask.ovlan_vld);
 #ifdef notyet
 		else if (tp->vnic_mode == FW_VNIC_MODE_ENCAP_EN)
 			MPASS(fs->mask.encap_vld);
 #endif
 		*ftuple |= ((1ULL << 16) | fs->val.vnic) << tp->vnic_shift;
 		fmask |= F_VNIC_ID;
 	}
 
 	if (tp->macmatch_shift >= 0 && fs->mask.macidx) {
 		*ftuple |= (uint64_t)(fs->val.macidx) << tp->macmatch_shift;
 		fmask |= F_MACMATCH;
 	}
 
 	if (tp->ethertype_shift >= 0 && fs->mask.ethtype) {
 		*ftuple |= (uint64_t)(fs->val.ethtype) << tp->ethertype_shift;
 		fmask |= F_ETHERTYPE;
 	}
 
 	if (tp->matchtype_shift >= 0 && fs->mask.matchtype) {
 		*ftuple |= (uint64_t)(fs->val.matchtype) << tp->matchtype_shift;
 		fmask |= F_MPSHITTYPE;
 	}
 
 	if (tp->frag_shift >= 0 && fs->mask.frag) {
 		*ftuple |= (uint64_t)(fs->val.frag) << tp->frag_shift;
 		fmask |= F_FRAGMENTATION;
 	}
 
 	if (tp->fcoe_shift >= 0 && fs->mask.fcoe) {
 		*ftuple |= (uint64_t)(fs->val.fcoe) << tp->fcoe_shift;
 		fmask |= F_FCOE;
 	}
 
 	/* A hashfilter must conform to the hardware filter mask. */
 	if (fmask != tp->filter_mask)
 		return (EINVAL);
 
 	return (0);
 }
 
 static bool
 is_4tuple_specified(struct t4_filter_specification *fs)
 {
 	int i;
 	const int n = fs->type ? 16 : 4;
 
 	if (fs->mask.sport != 0xffff || fs->mask.dport != 0xffff)
 		return (false);
 
 	for (i = 0; i < n; i++) {
 		if (fs->mask.sip[i] != 0xff)
 			return (false);
 		if (fs->mask.dip[i] != 0xff)
 			return (false);
 	}
 
 	return (true);
 }
 
 int
 set_filter(struct adapter *sc, struct t4_filter *t)
 {
 	struct tid_info *ti = &sc->tids;
 	struct l2t_entry *l2te = NULL;
 	struct smt_entry *smt = NULL;
 	uint64_t ftuple;
 	int rc;
 
 	/*
 	 * Basic filter checks first.
 	 */
 
 	if (t->fs.hash) {
 		if (!is_hashfilter(sc) || ti->ntids == 0)
 			return (ENOTSUP);
 		/* Hardware, not user, selects a tid for hashfilters. */
 		if (t->idx != (uint32_t)-1)
 			return (EINVAL);
 		/* T5 can't count hashfilter hits. */
 		if (is_t5(sc) && t->fs.hitcnts)
 			return (EINVAL);
 		if (!is_4tuple_specified(&t->fs))
 			return (EINVAL);
 		rc = hashfilter_ntuple(sc, &t->fs, &ftuple);
 		if (rc != 0)
 			return (rc);
 	} else {
 		if (separate_hpfilter_region(sc) && t->fs.prio) {
 			if (ti->nhpftids == 0)
 				return (ENOTSUP);
 			if (t->idx >= ti->nhpftids)
 				return (EINVAL);
 		} else {
 			if (ti->nftids == 0)
 				return (ENOTSUP);
 			if (t->idx >= ti->nftids)
 				return (EINVAL);
 		}
 		/* IPv6 filter idx must be 4 aligned */
 		if (t->fs.type == 1 &&
 		    ((t->idx & 0x3) || t->idx + 4 >= ti->nftids))
 			return (EINVAL);
 	}
 
 	/* T4 doesn't support VLAN tag removal or rewrite, swapmac, and NAT. */
 	if (is_t4(sc) && t->fs.action == FILTER_SWITCH &&
 	    (t->fs.newvlan == VLAN_REMOVE || t->fs.newvlan == VLAN_REWRITE ||
 	    t->fs.swapmac || t->fs.nat_mode))
 		return (ENOTSUP);
 
 	if (t->fs.action == FILTER_SWITCH && t->fs.eport >= sc->params.nports)
 		return (EINVAL);
 	if (t->fs.val.iport >= sc->params.nports)
 		return (EINVAL);
 
 	/* Can't specify an iqid/rss_info if not steering. */
 	if (!t->fs.dirsteer && !t->fs.dirsteerhash && !t->fs.maskhash && t->fs.iq)
 		return (EINVAL);
 
 	/* Validate against the global filter mode and ingress config */
 	rc = check_fspec_against_fconf_iconf(sc, &t->fs);
 	if (rc != 0)
 		return (rc);
 
 	/*
 	 * Basic checks passed.  Make sure the queues and tid tables are setup.
 	 */
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
 	if (rc)
 		return (rc);
 
 	if (hw_off_limits(sc)) {
 		rc = ENXIO;
 		goto done;
 	}
 
 	if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_init(sc)) != 0))
 		goto done;
 
 	if (t->fs.hash) {
 		if (__predict_false(ti->hftid_hash_4t == NULL)) {
 			rc = alloc_hftid_hash(&sc->tids, HASH_NOWAIT);
 			if (rc != 0)
 				goto done;
 		}
 	} else if (separate_hpfilter_region(sc) && t->fs.prio &&
 	    __predict_false(ti->hpftid_tab == NULL)) {
 		MPASS(ti->nhpftids != 0);
 		KASSERT(ti->hpftids_in_use == 0,
 		    ("%s: no memory allocated but hpftids_in_use is %u",
 		    __func__, ti->hpftids_in_use));
 		ti->hpftid_tab = malloc(sizeof(struct filter_entry) *
 		    ti->nhpftids, M_CXGBE, M_NOWAIT | M_ZERO);
 		if (ti->hpftid_tab == NULL) {
 			rc = ENOMEM;
 			goto done;
 		}
 		if (!mtx_initialized(&sc->tids.ftid_lock)) {
 			mtx_init(&ti->ftid_lock, "T4 filters", 0, MTX_DEF);
 			cv_init(&ti->ftid_cv, "t4fcv");
 		}
 	} else if (__predict_false(ti->ftid_tab == NULL)) {
 		MPASS(ti->nftids != 0);
 		KASSERT(ti->ftids_in_use == 0,
 		    ("%s: no memory allocated but ftids_in_use is %u",
 		    __func__, ti->ftids_in_use));
 		ti->ftid_tab = malloc(sizeof(struct filter_entry) * ti->nftids,
 		    M_CXGBE, M_NOWAIT | M_ZERO);
 		if (ti->ftid_tab == NULL) {
 			rc = ENOMEM;
 			goto done;
 		}
 		if (!mtx_initialized(&sc->tids.ftid_lock)) {
 			mtx_init(&ti->ftid_lock, "T4 filters", 0, MTX_DEF);
 			cv_init(&ti->ftid_cv, "t4fcv");
 		}
 	}
 done:
 	end_synchronized_op(sc, 0);
 	if (rc != 0)
 		return (rc);
 
 	/*
 	 * Allocate L2T entry, SMT entry, etc.
 	 */
 
 	if (t->fs.newdmac || t->fs.newvlan) {
 		/* This filter needs an L2T entry; allocate one. */
 		l2te = t4_l2t_alloc_switching(sc, t->fs.vlan, t->fs.eport,
 		    t->fs.dmac);
 		if (__predict_false(l2te == NULL)) {
 			rc = EAGAIN;
 			goto error;
 		}
 	}
 
 	if (t->fs.newsmac) {
 		/* This filter needs an SMT entry; allocate one. */
 		smt = t4_smt_alloc_switching(sc->smt, t->fs.smac);
 		if (__predict_false(smt == NULL)) {
 			rc = EAGAIN;
 			goto error;
 		}
 		rc = t4_smt_set_switching(sc, smt, 0x0, t->fs.smac);
 		if (rc)
 			goto error;
 	}
 
 	if (t->fs.hash)
 		rc = set_hashfilter(sc, t, ftuple, l2te, smt);
 	else
 		rc = set_tcamfilter(sc, t, l2te, smt);
 
 	if (rc != 0 && rc != EINPROGRESS) {
 error:
 		if (l2te)
 			t4_l2t_release(l2te);
 		if (smt)
 			t4_smt_release(smt);
 	}
 	return (rc);
 }
 
 static int
 del_tcamfilter(struct adapter *sc, struct t4_filter *t)
 {
 	struct filter_entry *f;
 	struct fw_filter_wr *fwr;
 	struct wrq_cookie cookie;
 	int rc, nfilters;
 #ifdef INVARIANTS
 	u_int tid_base;
 #endif
 
 	mtx_lock(&sc->tids.ftid_lock);
 	if (separate_hpfilter_region(sc) && t->fs.prio) {
 		nfilters = sc->tids.nhpftids;
 		f = sc->tids.hpftid_tab;
 #ifdef INVARIANTS
 		tid_base = sc->tids.hpftid_base;
 #endif
 	} else {
 		nfilters = sc->tids.nftids;
 		f = sc->tids.ftid_tab;
 #ifdef INVARIANTS
 		tid_base = sc->tids.ftid_base;
 #endif
 	}
 	MPASS(f != NULL);	/* Caller checked this. */
 	if (t->idx >= nfilters) {
 		rc = EINVAL;
 		goto done;
 	}
 	f += t->idx;
 
 	if (f->locked) {
 		rc = EPERM;
 		goto done;
 	}
 	if (f->pending) {
 		rc = EBUSY;
 		goto done;
 	}
 	if (f->valid == 0) {
 		rc = EINVAL;
 		goto done;
 	}
 	MPASS(f->tid == tid_base + t->idx);
 	fwr = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*fwr), 16), &cookie);
 	if (fwr == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 
 	bzero(fwr, sizeof (*fwr));
 	t4_mk_filtdelwr(f->tid, fwr, sc->sge.fwq.abs_id);
 	f->pending = 1;
 	commit_wrq_wr(&sc->sge.ctrlq[0], fwr, &cookie);
 	t->fs = f->fs;	/* extra info for the caller */
 
 	for (;;) {
 		if (f->pending == 0) {
 			rc = f->valid ? EIO : 0;
 			break;
 		}
 		if (cv_wait_sig(&sc->tids.ftid_cv, &sc->tids.ftid_lock) != 0) {
 			rc = EINPROGRESS;
 			break;
 		}
 	}
 done:
 	mtx_unlock(&sc->tids.ftid_lock);
 	return (rc);
 }
 
 int
 del_filter(struct adapter *sc, struct t4_filter *t)
 {
 
 	/* No filters possible if not initialized yet. */
 	if (!(sc->flags & FULL_INIT_DONE))
 		return (EINVAL);
 
 	/*
 	 * The checks for tid tables ensure that the locks that del_* will reach
 	 * for are initialized.
 	 */
 	if (t->fs.hash) {
 		if (sc->tids.hftid_hash_4t != NULL)
 			return (del_hashfilter(sc, t));
 	} else if (separate_hpfilter_region(sc) && t->fs.prio) {
 		if (sc->tids.hpftid_tab != NULL)
 			return (del_tcamfilter(sc, t));
 	} else {
 		if (sc->tids.ftid_tab != NULL)
 			return (del_tcamfilter(sc, t));
 	}
 
 	return (EINVAL);
 }
 
 /*
  * Release secondary resources associated with the filter.
  */
 static void
 free_filter_resources(struct filter_entry *f)
 {
 
 	if (f->l2te) {
 		t4_l2t_release(f->l2te);
 		f->l2te = NULL;
 	}
 	if (f->smt) {
 		t4_smt_release(f->smt);
 		f->smt = NULL;
 	}
 }
 
 static int
 set_tcb_field(struct adapter *sc, u_int tid, uint16_t word, uint64_t mask,
     uint64_t val, int no_reply)
 {
 	struct wrq_cookie cookie;
 	struct cpl_set_tcb_field *req;
 
 	req = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*req), 16), &cookie);
 	if (req == NULL)
 		return (ENOMEM);
 	bzero(req, sizeof(*req));
 	INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, tid);
 	if (no_reply == 0) {
 		req->reply_ctrl = htobe16(V_QUEUENO(sc->sge.fwq.abs_id) |
 		    V_NO_REPLY(0));
 	} else
 		req->reply_ctrl = htobe16(V_NO_REPLY(1));
 	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(CPL_COOKIE_HASHFILTER));
 	req->mask = htobe64(mask);
 	req->val = htobe64(val);
 	commit_wrq_wr(&sc->sge.ctrlq[0], req, &cookie);
 
 	return (0);
 }
 
 /* Set one of the t_flags bits in the TCB. */
 static inline int
 set_tcb_tflag(struct adapter *sc, int tid, u_int bit_pos, u_int val,
     u_int no_reply)
 {
 
 	return (set_tcb_field(sc, tid,  W_TCB_T_FLAGS, 1ULL << bit_pos,
 	    (uint64_t)val << bit_pos, no_reply));
 }
 
 int
 t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
 	u_int tid = GET_TID(rpl);
 	u_int rc, idx;
 	struct filter_entry *f;
 
 	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
 	    rss->opcode));
 
 
 	if (is_hpftid(sc, tid)) {
 		idx = tid - sc->tids.hpftid_base;
 		f = &sc->tids.hpftid_tab[idx];
 	} else if (is_ftid(sc, tid)) {
 		idx = tid - sc->tids.ftid_base;
 		f = &sc->tids.ftid_tab[idx];
 	} else
 		panic("%s: FW reply for invalid TID %d.", __func__, tid);
 
 	MPASS(f->tid == tid);
 	rc = G_COOKIE(rpl->cookie);
 
 	mtx_lock(&sc->tids.ftid_lock);
 	KASSERT(f->pending, ("%s: reply %d for filter[%u] that isn't pending.",
 	    __func__, rc, tid));
 	switch(rc) {
 	case FW_FILTER_WR_FLT_ADDED:
 		/* set-filter succeeded */
 		f->valid = 1;
 		if (f->fs.newsmac) {
 			MPASS(f->smt != NULL);
 			set_tcb_tflag(sc, f->tid, S_TF_CCTRL_CWR, 1, 1);
 			set_tcb_field(sc, f->tid, W_TCB_SMAC_SEL,
 			    V_TCB_SMAC_SEL(M_TCB_SMAC_SEL),
 			    V_TCB_SMAC_SEL(f->smt->idx), 1);
 			/* XXX: wait for reply to TCB update before !pending */
 		}
 		break;
 	case FW_FILTER_WR_FLT_DELETED:
 		/* del-filter succeeded */
 		MPASS(f->valid == 1);
 		f->valid = 0;
 		/* Fall through */
 	case FW_FILTER_WR_SMT_TBL_FULL:
 		/* set-filter failed due to lack of SMT space. */
 		MPASS(f->valid == 0);
 		free_filter_resources(f);
 		if (separate_hpfilter_region(sc) && f->fs.prio)
 			sc->tids.hpftids_in_use--;
 		else
 			sc->tids.ftids_in_use--;
 		break;
 	case FW_FILTER_WR_SUCCESS:
 	case FW_FILTER_WR_EINVAL:
 	default:
 		panic("%s: unexpected reply %d for filter[%d].", __func__, rc,
 		    idx);
 	}
 	f->pending = 0;
 	cv_broadcast(&sc->tids.ftid_cv);
 	mtx_unlock(&sc->tids.ftid_lock);
 
 	return (0);
 }
 
 /*
  * This is the reply to the Active Open that created the filter.  Additional TCB
  * updates may be required to complete the filter configuration.
  */
 int
 t4_hashfilter_ao_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1);
 	u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status)));
 	u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status));
 	struct filter_entry *f = lookup_atid(sc, atid);
 
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	mtx_lock(&sc->tids.hftid_lock);
 	KASSERT(f->pending, ("%s: hashfilter[%p] isn't pending.", __func__, f));
 	KASSERT(f->tid == -1, ("%s: hashfilter[%p] has tid %d already.",
 	    __func__, f, f->tid));
 	if (status == CPL_ERR_NONE) {
 		f->tid = GET_TID(cpl);
 		MPASS(lookup_hftid(sc, f->tid) == NULL);
 		insert_hftid(sc, f);
 		/*
 		 * Leave the filter pending until it is fully set up, which will
 		 * be indicated by the reply to the last TCB update.  No need to
 		 * unblock the ioctl thread either.
 		 */
 		if (configure_hashfilter_tcb(sc, f) == EINPROGRESS)
 			goto done;
 		f->valid = 1;
 		f->pending = 0;
 	} else {
 		/* provide errno instead of tid to ioctl */
 		f->tid = act_open_rpl_status_to_errno(status);
 		f->valid = 0;
 		f->pending = 0;
 		if (act_open_has_tid(status))
 			release_tid(sc, GET_TID(cpl), &sc->sge.ctrlq[0]);
 		free_filter_resources(f);
 		remove_hf(sc, f);
 		if (f->locked == 0)
 			free(f, M_CXGBE);
 	}
 	cv_broadcast(&sc->tids.hftid_cv);
 done:
 	mtx_unlock(&sc->tids.hftid_lock);
 
 	free_atid(sc, atid);
 	return (0);
 }
 
 int
 t4_hashfilter_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
 	u_int tid = GET_TID(rpl);
 	struct filter_entry *f;
 
 	mtx_lock(&sc->tids.hftid_lock);
 	f = lookup_hftid(sc, tid);
 	KASSERT(f->tid == tid, ("%s: filter tid mismatch", __func__));
 	KASSERT(f->pending, ("%s: hashfilter %p [%u] isn't pending.", __func__,
 	    f, tid));
 	KASSERT(f->valid == 0, ("%s: hashfilter %p [%u] is valid already.",
 	    __func__, f, tid));
 	f->pending = 0;
 	if (rpl->status == 0) {
 		f->valid = 1;
 	} else {
 		f->tid = EIO;
 		f->valid = 0;
 		free_filter_resources(f);
 		remove_hftid(sc, f);
 		remove_hf(sc, f);
 		release_tid(sc, tid, &sc->sge.ctrlq[0]);
 		if (f->locked == 0)
 			free(f, M_CXGBE);
 	}
 	cv_broadcast(&sc->tids.hftid_cv);
 	mtx_unlock(&sc->tids.hftid_lock);
 
 	return (0);
 }
 
 int
 t4_del_hashfilter_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct filter_entry *f;
 
 	mtx_lock(&sc->tids.hftid_lock);
 	f = lookup_hftid(sc, tid);
 	KASSERT(f->tid == tid, ("%s: filter tid mismatch", __func__));
 	KASSERT(f->pending, ("%s: hashfilter %p [%u] isn't pending.", __func__,
 	    f, tid));
 	KASSERT(f->valid, ("%s: hashfilter %p [%u] isn't valid.", __func__, f,
 	    tid));
 	f->pending = 0;
 	if (cpl->status == 0) {
 		f->valid = 0;
 		free_filter_resources(f);
 		remove_hftid(sc, f);
 		remove_hf(sc, f);
 		release_tid(sc, tid, &sc->sge.ctrlq[0]);
 		if (f->locked == 0)
 			free(f, M_CXGBE);
 	}
 	cv_broadcast(&sc->tids.hftid_cv);
 	mtx_unlock(&sc->tids.hftid_lock);
 
 	return (0);
 }
 
 static int
 get_tcamfilter(struct adapter *sc, struct t4_filter *t)
 {
 	int i, nfilters;
 	struct filter_entry *f;
 	u_int in_use;
 #ifdef INVARIANTS
 	u_int tid_base;
 #endif
 
 	MPASS(!t->fs.hash);
 
 	if (separate_hpfilter_region(sc) && t->fs.prio) {
 		nfilters = sc->tids.nhpftids;
 		f = sc->tids.hpftid_tab;
 		in_use = sc->tids.hpftids_in_use;
 #ifdef INVARIANTS
 		tid_base = sc->tids.hpftid_base;
 #endif
 	} else {
 		nfilters = sc->tids.nftids;
 		f = sc->tids.ftid_tab;
 		in_use = sc->tids.ftids_in_use;
 #ifdef INVARIANTS
 		tid_base = sc->tids.ftid_base;
 #endif
 	}
 
 	if (in_use == 0 || f == NULL || t->idx >= nfilters) {
 		t->idx = 0xffffffff;
 		return (0);
 	}
 
 	f += t->idx;
 	mtx_lock(&sc->tids.ftid_lock);
 	for (i = t->idx; i < nfilters; i++, f++) {
 		if (f->valid) {
 			MPASS(f->tid == tid_base + i);
 			t->idx = i;
 			t->l2tidx = f->l2te ? f->l2te->idx : 0;
 			t->smtidx = f->smt ? f->smt->idx : 0;
 			if (f->fs.hitcnts)
 				t->hits = get_filter_hits(sc, f->tid);
 			else
 				t->hits = UINT64_MAX;
 			t->fs = f->fs;
 
 			goto done;
 		}
 	}
 	t->idx = 0xffffffff;
 done:
 	mtx_unlock(&sc->tids.ftid_lock);
 	return (0);
 }
 
 static int
 get_hashfilter(struct adapter *sc, struct t4_filter *t)
 {
 	struct tid_info *ti = &sc->tids;
 	int tid;
 	struct filter_entry *f;
 	const int inv_tid = ti->ntids + ti->tid_base;
 
 	MPASS(t->fs.hash);
 
 	if (ti->tids_in_use == 0 || ti->hftid_hash_tid == NULL ||
 	    t->idx >= inv_tid) {
 		t->idx = 0xffffffff;
 		return (0);
 	}
 	if (t->idx < ti->tid_base)
 		t->idx = ti->tid_base;
 
 	mtx_lock(&ti->hftid_lock);
 	for (tid = t->idx; tid < inv_tid; tid++) {
 		f = lookup_hftid(sc, tid);
 		if (f != NULL && f->valid) {
 			t->idx = tid;
 			t->l2tidx = f->l2te ? f->l2te->idx : 0;
 			t->smtidx = f->smt ? f->smt->idx : 0;
 			if (f->fs.hitcnts)
 				t->hits = get_filter_hits(sc, tid);
 			else
 				t->hits = UINT64_MAX;
 			t->fs = f->fs;
 
 			goto done;
 		}
 	}
 	t->idx = 0xffffffff;
 done:
 	mtx_unlock(&ti->hftid_lock);
 	return (0);
 }
 
 static void
 mk_act_open_req6(struct adapter *sc, struct filter_entry *f, int atid,
     uint64_t ftuple, struct cpl_act_open_req6 *cpl)
 {
 	struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl;
 	struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl;
 
 	/* Review changes to CPL after cpl_t6_act_open_req if this goes off. */
 	MPASS(chip_id(sc) >= CHELSIO_T5 && chip_id(sc) <= CHELSIO_T6);
 	MPASS(atid >= 0);
 
 	if (chip_id(sc) == CHELSIO_T5) {
 		INIT_TP_WR(cpl5, 0);
 	} else {
 		INIT_TP_WR(cpl6, 0);
 		cpl6->rsvd2 = 0;
 		cpl6->opt3 = 0;
 	}
 
 	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
 	    V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) |
 	    V_TID_COOKIE(CPL_COOKIE_HASHFILTER)));
 	cpl->local_port = htobe16(f->fs.val.dport);
 	cpl->peer_port = htobe16(f->fs.val.sport);
 	cpl->local_ip_hi = *(uint64_t *)(&f->fs.val.dip);
 	cpl->local_ip_lo = *(((uint64_t *)&f->fs.val.dip) + 1);
 	cpl->peer_ip_hi = *(uint64_t *)(&f->fs.val.sip);
 	cpl->peer_ip_lo = *(((uint64_t *)&f->fs.val.sip) + 1);
 	cpl->opt0 = htobe64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE ||
 	    f->fs.newvlan == VLAN_REWRITE) | V_DELACK(f->fs.hitcnts) |
 	    V_L2T_IDX(f->l2te ? f->l2te->idx : 0) | V_TX_CHAN(f->fs.eport) |
 	    V_NO_CONG(f->fs.rpttid) |
 	    V_ULP_MODE(f->fs.nat_mode ? ULP_MODE_TCPDDP : ULP_MODE_NONE) |
 	    F_TCAM_BYPASS | F_NON_OFFLOAD);
 
 	cpl6->params = htobe64(V_FILTER_TUPLE(ftuple));
 	cpl6->opt2 = htobe32(F_RSS_QUEUE_VALID | V_RSS_QUEUE(f->fs.iq) |
 	    V_TX_QUEUE(f->fs.nat_mode) | V_WND_SCALE_EN(f->fs.nat_flag_chk) |
 	    V_RX_FC_DISABLE(f->fs.nat_seq_chk ? 1 : 0) | F_T5_OPT_2_VALID |
 	    F_RX_CHANNEL | V_SACK_EN(f->fs.swapmac) |
 	    V_CONG_CNTRL((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) |
 	    V_PACE(f->fs.maskhash | (f->fs.dirsteerhash << 1)));
 }
 
 static void
 mk_act_open_req(struct adapter *sc, struct filter_entry *f, int atid,
     uint64_t ftuple, struct cpl_act_open_req *cpl)
 {
 	struct cpl_t5_act_open_req *cpl5 = (void *)cpl;
 	struct cpl_t6_act_open_req *cpl6 = (void *)cpl;
 
 	/* Review changes to CPL after cpl_t6_act_open_req if this goes off. */
 	MPASS(chip_id(sc) >= CHELSIO_T5 && chip_id(sc) <= CHELSIO_T6);
 	MPASS(atid >= 0);
 
 	if (chip_id(sc) == CHELSIO_T5) {
 		INIT_TP_WR(cpl5, 0);
 	} else {
 		INIT_TP_WR(cpl6, 0);
 		cpl6->rsvd2 = 0;
 		cpl6->opt3 = 0;
 	}
 
 	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
 	    V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) |
 	    V_TID_COOKIE(CPL_COOKIE_HASHFILTER)));
 	cpl->local_port = htobe16(f->fs.val.dport);
 	cpl->peer_port = htobe16(f->fs.val.sport);
 	cpl->local_ip = f->fs.val.dip[0] | f->fs.val.dip[1] << 8 |
 	    f->fs.val.dip[2] << 16 | f->fs.val.dip[3] << 24;
 	cpl->peer_ip = f->fs.val.sip[0] | f->fs.val.sip[1] << 8 |
 		f->fs.val.sip[2] << 16 | f->fs.val.sip[3] << 24;
 	cpl->opt0 = htobe64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE ||
 	    f->fs.newvlan == VLAN_REWRITE) | V_DELACK(f->fs.hitcnts) |
 	    V_L2T_IDX(f->l2te ? f->l2te->idx : 0) | V_TX_CHAN(f->fs.eport) |
 	    V_NO_CONG(f->fs.rpttid) |
 	    V_ULP_MODE(f->fs.nat_mode ? ULP_MODE_TCPDDP : ULP_MODE_NONE) |
 	    F_TCAM_BYPASS | F_NON_OFFLOAD);
 
 	cpl6->params = htobe64(V_FILTER_TUPLE(ftuple));
 	cpl6->opt2 = htobe32(F_RSS_QUEUE_VALID | V_RSS_QUEUE(f->fs.iq) |
 	    V_TX_QUEUE(f->fs.nat_mode) | V_WND_SCALE_EN(f->fs.nat_flag_chk) |
 	    V_RX_FC_DISABLE(f->fs.nat_seq_chk ? 1 : 0) | F_T5_OPT_2_VALID |
 	    F_RX_CHANNEL | V_SACK_EN(f->fs.swapmac) |
 	    V_CONG_CNTRL((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) |
 	    V_PACE(f->fs.maskhash | (f->fs.dirsteerhash << 1)));
 }
 
 static int
 act_open_cpl_len16(struct adapter *sc, int isipv6)
 {
 	int idx;
 	static const int sz_table[3][2] = {
 		{
 			howmany(sizeof (struct cpl_act_open_req), 16),
 			howmany(sizeof (struct cpl_act_open_req6), 16)
 		},
 		{
 			howmany(sizeof (struct cpl_t5_act_open_req), 16),
 			howmany(sizeof (struct cpl_t5_act_open_req6), 16)
 		},
 		{
 			howmany(sizeof (struct cpl_t6_act_open_req), 16),
 			howmany(sizeof (struct cpl_t6_act_open_req6), 16)
 		},
 	};
 
 	MPASS(chip_id(sc) >= CHELSIO_T4);
 	idx = min(chip_id(sc) - CHELSIO_T4, 2);
 
 	return (sz_table[idx][!!isipv6]);
 }
 
 static int
 set_hashfilter(struct adapter *sc, struct t4_filter *t, uint64_t ftuple,
     struct l2t_entry *l2te, struct smt_entry *smt)
 {
 	void *wr;
 	struct wrq_cookie cookie;
 	struct filter_entry *f;
 	int rc, atid = -1;
 	uint32_t hash;
 
 	MPASS(t->fs.hash);
 	/* Already validated against fconf, iconf */
 	MPASS((t->fs.val.pfvf_vld & t->fs.val.ovlan_vld) == 0);
 	MPASS((t->fs.mask.pfvf_vld & t->fs.mask.ovlan_vld) == 0);
 
 	hash = hf_hashfn_4t(&t->fs);
 
 	mtx_lock(&sc->tids.hftid_lock);
 	if (lookup_hf(sc, &t->fs, hash) != NULL) {
 		rc = EEXIST;
 		goto done;
 	}
 
 	f = malloc(sizeof(*f), M_CXGBE, M_ZERO | M_NOWAIT);
 	if (__predict_false(f == NULL)) {
 		rc = ENOMEM;
 		goto done;
 	}
 	f->fs = t->fs;
 	f->l2te = l2te;
 	f->smt = smt;
 
 	atid = alloc_atid(sc, f);
 	if (__predict_false(atid) == -1) {
 		free(f, M_CXGBE);
 		rc = EAGAIN;
 		goto done;
 	}
 	MPASS(atid >= 0);
 
 	wr = start_wrq_wr(&sc->sge.ctrlq[0], act_open_cpl_len16(sc, f->fs.type),
 	    &cookie);
 	if (wr == NULL) {
 		free_atid(sc, atid);
 		free(f, M_CXGBE);
 		rc = ENOMEM;
 		goto done;
 	}
 	if (f->fs.type)
 		mk_act_open_req6(sc, f, atid, ftuple, wr);
 	else
 		mk_act_open_req(sc, f, atid, ftuple, wr);
 
 	f->locked = 1; /* ithread mustn't free f if ioctl is still around. */
 	f->pending = 1;
 	f->tid = -1;
 	insert_hf(sc, f, hash);
 	commit_wrq_wr(&sc->sge.ctrlq[0], wr, &cookie);
 
 	for (;;) {
 		MPASS(f->locked);
 		if (f->pending == 0) {
 			if (f->valid) {
 				rc = 0;
 				f->locked = 0;
 				t->idx = f->tid;
 			} else {
 				rc = f->tid;
 				free(f, M_CXGBE);
 			}
 			break;
 		}
 		if (cv_wait_sig(&sc->tids.hftid_cv, &sc->tids.hftid_lock) != 0) {
 			f->locked = 0;
 			rc = EINPROGRESS;
 			break;
 		}
 	}
 done:
 	mtx_unlock(&sc->tids.hftid_lock);
 	return (rc);
 }
 
-/* SET_TCB_FIELD sent as a ULP command looks like this */
-#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
-    sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
-
-static void *
-mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, uint64_t word, uint64_t mask,
-		uint64_t val, uint32_t tid, uint32_t qid)
-{
-	struct ulptx_idata *ulpsc;
-	struct cpl_set_tcb_field_core *req;
-
-	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
-	ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
-
-	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
-	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
-	ulpsc->len = htobe32(sizeof(*req));
-
-	req = (struct cpl_set_tcb_field_core *)(ulpsc + 1);
-	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
-	req->reply_ctrl = htobe16(V_NO_REPLY(1) | V_QUEUENO(qid));
-	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
-	req->mask = htobe64(mask);
-	req->val = htobe64(val);
-
-	ulpsc = (struct ulptx_idata *)(req + 1);
-	if (LEN__SET_TCB_FIELD_ULP % 16) {
-		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
-		ulpsc->len = htobe32(0);
-		return (ulpsc + 1);
-	}
-	return (ulpsc);
-}
-
 /* ABORT_REQ sent as a ULP command looks like this */
 #define LEN__ABORT_REQ_ULP (sizeof(struct ulp_txpkt) + \
 	sizeof(struct ulptx_idata) + sizeof(struct cpl_abort_req_core))
 
 static void *
 mk_abort_req_ulp(struct ulp_txpkt *ulpmc, uint32_t tid)
 {
 	struct ulptx_idata *ulpsc;
 	struct cpl_abort_req_core *req;
 
 	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
 	ulpmc->len = htobe32(howmany(LEN__ABORT_REQ_ULP, 16));
 
 	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 	ulpsc->len = htobe32(sizeof(*req));
 
 	req = (struct cpl_abort_req_core *)(ulpsc + 1);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
 	req->rsvd0 = htonl(0);
 	req->rsvd1 = 0;
 	req->cmd = CPL_ABORT_NO_RST;
 
 	ulpsc = (struct ulptx_idata *)(req + 1);
 	if (LEN__ABORT_REQ_ULP % 16) {
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		ulpsc->len = htobe32(0);
 		return (ulpsc + 1);
 	}
 	return (ulpsc);
 }
 
 /* ABORT_RPL sent as a ULP command looks like this */
 #define LEN__ABORT_RPL_ULP (sizeof(struct ulp_txpkt) + \
 	sizeof(struct ulptx_idata) + sizeof(struct cpl_abort_rpl_core))
 
 static void *
 mk_abort_rpl_ulp(struct ulp_txpkt *ulpmc, uint32_t tid)
 {
 	struct ulptx_idata *ulpsc;
 	struct cpl_abort_rpl_core *rpl;
 
 	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
 	ulpmc->len = htobe32(howmany(LEN__ABORT_RPL_ULP, 16));
 
 	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 	ulpsc->len = htobe32(sizeof(*rpl));
 
 	rpl = (struct cpl_abort_rpl_core *)(ulpsc + 1);
 	OPCODE_TID(rpl) = htobe32(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
 	rpl->rsvd0 = htonl(0);
 	rpl->rsvd1 = 0;
 	rpl->cmd = CPL_ABORT_NO_RST;
 
 	ulpsc = (struct ulptx_idata *)(rpl + 1);
 	if (LEN__ABORT_RPL_ULP % 16) {
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		ulpsc->len = htobe32(0);
 		return (ulpsc + 1);
 	}
 	return (ulpsc);
 }
 
 static inline int
 del_hashfilter_wrlen(void)
 {
 
 	return (sizeof(struct work_request_hdr) +
 	    roundup2(LEN__SET_TCB_FIELD_ULP, 16) +
 	    roundup2(LEN__ABORT_REQ_ULP, 16) +
 	    roundup2(LEN__ABORT_RPL_ULP, 16));
 }
 
 static void
-mk_del_hashfilter_wr(int tid, struct work_request_hdr *wrh, int wrlen, int qid)
+mk_del_hashfilter_wr(struct adapter *sc, int tid, struct work_request_hdr *wrh,
+    int wrlen, int qid)
 {
 	struct ulp_txpkt *ulpmc;
 
 	INIT_ULPTX_WRH(wrh, wrlen, 0, 0);
 	ulpmc = (struct ulp_txpkt *)(wrh + 1);
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_RSS_INFO,
-	    V_TCB_RSS_INFO(M_TCB_RSS_INFO), V_TCB_RSS_INFO(qid), tid, 0);
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, tid, W_TCB_RSS_INFO,
+	    V_TCB_RSS_INFO(M_TCB_RSS_INFO), V_TCB_RSS_INFO(qid));
 	ulpmc = mk_abort_req_ulp(ulpmc, tid);
 	ulpmc = mk_abort_rpl_ulp(ulpmc, tid);
 }
 
 static int
 del_hashfilter(struct adapter *sc, struct t4_filter *t)
 {
 	struct tid_info *ti = &sc->tids;
 	void *wr;
 	struct filter_entry *f;
 	struct wrq_cookie cookie;
 	int rc;
 	const int wrlen = del_hashfilter_wrlen();
 	const int inv_tid = ti->ntids + ti->tid_base;
 
 	MPASS(sc->tids.hftid_hash_4t != NULL);
 	MPASS(sc->tids.ntids > 0);
 
 	if (t->idx < sc->tids.tid_base || t->idx >= inv_tid)
 		return (EINVAL);
 
 	mtx_lock(&ti->hftid_lock);
 	f = lookup_hftid(sc, t->idx);
 	if (f == NULL || f->valid == 0) {
 		rc = EINVAL;
 		goto done;
 	}
 	MPASS(f->tid == t->idx);
 	if (f->locked) {
 		rc = EPERM;
 		goto done;
 	}
 	if (f->pending) {
 		rc = EBUSY;
 		goto done;
 	}
 	wr = start_wrq_wr(&sc->sge.ctrlq[0], howmany(wrlen, 16), &cookie);
 	if (wr == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 
-	mk_del_hashfilter_wr(t->idx, wr, wrlen, sc->sge.fwq.abs_id);
+	mk_del_hashfilter_wr(sc, t->idx, wr, wrlen, sc->sge.fwq.abs_id);
 	f->locked = 1;
 	f->pending = 1;
 	commit_wrq_wr(&sc->sge.ctrlq[0], wr, &cookie);
 	t->fs = f->fs;	/* extra info for the caller */
 
 	for (;;) {
 		MPASS(f->locked);
 		if (f->pending == 0) {
 			if (f->valid) {
 				f->locked = 0;
 				rc = EIO;
 			} else {
 				rc = 0;
 				free(f, M_CXGBE);
 			}
 			break;
 		}
 		if (cv_wait_sig(&ti->hftid_cv, &ti->hftid_lock) != 0) {
 			f->locked = 0;
 			rc = EINPROGRESS;
 			break;
 		}
 	}
 done:
 	mtx_unlock(&ti->hftid_lock);
 	return (rc);
 }
 
 #define WORD_MASK       0xffffffff
 static void
 set_nat_params(struct adapter *sc, struct filter_entry *f, const bool dip,
     const bool sip, const bool dp, const bool sp)
 {
 
 	if (dip) {
 		if (f->fs.type) {
 			set_tcb_field(sc, f->tid, W_TCB_SND_UNA_RAW, WORD_MASK,
 			    f->fs.nat_dip[15] | f->fs.nat_dip[14] << 8 |
 			    f->fs.nat_dip[13] << 16 | f->fs.nat_dip[12] << 24, 1);
 
 			set_tcb_field(sc, f->tid,
 			    W_TCB_SND_UNA_RAW + 1, WORD_MASK,
 			    f->fs.nat_dip[11] | f->fs.nat_dip[10] << 8 |
 			    f->fs.nat_dip[9] << 16 | f->fs.nat_dip[8] << 24, 1);
 
 			set_tcb_field(sc, f->tid,
 			    W_TCB_SND_UNA_RAW + 2, WORD_MASK,
 			    f->fs.nat_dip[7] | f->fs.nat_dip[6] << 8 |
 			    f->fs.nat_dip[5] << 16 | f->fs.nat_dip[4] << 24, 1);
 
 			set_tcb_field(sc, f->tid,
 			    W_TCB_SND_UNA_RAW + 3, WORD_MASK,
 			    f->fs.nat_dip[3] | f->fs.nat_dip[2] << 8 |
 			    f->fs.nat_dip[1] << 16 | f->fs.nat_dip[0] << 24, 1);
 		} else {
 			set_tcb_field(sc, f->tid,
 			    W_TCB_RX_FRAG3_LEN_RAW, WORD_MASK,
 			    f->fs.nat_dip[3] | f->fs.nat_dip[2] << 8 |
 			    f->fs.nat_dip[1] << 16 | f->fs.nat_dip[0] << 24, 1);
 		}
 	}
 
 	if (sip) {
 		if (f->fs.type) {
 			set_tcb_field(sc, f->tid,
 			    W_TCB_RX_FRAG2_PTR_RAW, WORD_MASK,
 			    f->fs.nat_sip[15] | f->fs.nat_sip[14] << 8 |
 			    f->fs.nat_sip[13] << 16 | f->fs.nat_sip[12] << 24, 1);
 
 			set_tcb_field(sc, f->tid,
 			    W_TCB_RX_FRAG2_PTR_RAW + 1, WORD_MASK,
 			    f->fs.nat_sip[11] | f->fs.nat_sip[10] << 8 |
 			    f->fs.nat_sip[9] << 16 | f->fs.nat_sip[8] << 24, 1);
 
 			set_tcb_field(sc, f->tid,
 			    W_TCB_RX_FRAG2_PTR_RAW + 2, WORD_MASK,
 			    f->fs.nat_sip[7] | f->fs.nat_sip[6] << 8 |
 			    f->fs.nat_sip[5] << 16 | f->fs.nat_sip[4] << 24, 1);
 
 			set_tcb_field(sc, f->tid,
 			    W_TCB_RX_FRAG2_PTR_RAW + 3, WORD_MASK,
 			    f->fs.nat_sip[3] | f->fs.nat_sip[2] << 8 |
 			    f->fs.nat_sip[1] << 16 | f->fs.nat_sip[0] << 24, 1);
 
 		} else {
 			set_tcb_field(sc, f->tid,
 			    W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW, WORD_MASK,
 			    f->fs.nat_sip[3] | f->fs.nat_sip[2] << 8 |
 			    f->fs.nat_sip[1] << 16 | f->fs.nat_sip[0] << 24, 1);
 		}
 	}
 
 	set_tcb_field(sc, f->tid, W_TCB_PDU_HDR_LEN, WORD_MASK,
 	    (dp ? f->fs.nat_dport : 0) | (sp ? f->fs.nat_sport << 16 : 0), 1);
 }
 
 /*
  * Returns EINPROGRESS to indicate that at least one TCB update was sent and the
  * last of the series of updates requested a reply.  The reply informs the
  * driver that the filter is fully setup.
  */
 static int
 configure_hashfilter_tcb(struct adapter *sc, struct filter_entry *f)
 {
 	int updated = 0;
 
 	MPASS(f->tid < sc->tids.ntids);
 	MPASS(f->fs.hash);
 	MPASS(f->pending);
 	MPASS(f->valid == 0);
 
 	if (f->fs.newdmac) {
 		set_tcb_tflag(sc, f->tid, S_TF_CCTRL_ECE, 1, 1);
 		updated++;
 	}
 
 	if (f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) {
 		set_tcb_tflag(sc, f->tid, S_TF_CCTRL_RFR, 1, 1);
 		updated++;
 	}
 
 	if (f->fs.newsmac) {
 		MPASS(f->smt != NULL);
 		set_tcb_tflag(sc, f->tid, S_TF_CCTRL_CWR, 1, 1);
 		set_tcb_field(sc, f->tid, W_TCB_SMAC_SEL,
 		    V_TCB_SMAC_SEL(M_TCB_SMAC_SEL), V_TCB_SMAC_SEL(f->smt->idx),
 		    1);
 		updated++;
 	}
 
 	switch(f->fs.nat_mode) {
 	case NAT_MODE_NONE:
 		break;
 	case NAT_MODE_DIP:
 		set_nat_params(sc, f, true, false, false, false);
 		updated++;
 		break;
 	case NAT_MODE_DIP_DP:
 		set_nat_params(sc, f, true, false, true, false);
 		updated++;
 		break;
 	case NAT_MODE_DIP_DP_SIP:
 		set_nat_params(sc, f, true, true, true, false);
 		updated++;
 		break;
 	case NAT_MODE_DIP_DP_SP:
 		set_nat_params(sc, f, true, false, true, true);
 		updated++;
 		break;
 	case NAT_MODE_SIP_SP:
 		set_nat_params(sc, f, false, true, false, true);
 		updated++;
 		break;
 	case NAT_MODE_DIP_SIP_SP:
 		set_nat_params(sc, f, true, true, false, true);
 		updated++;
 		break;
 	case NAT_MODE_ALL:
 		set_nat_params(sc, f, true, true, true, true);
 		updated++;
 		break;
 	default:
 		MPASS(0);	/* should have been validated earlier */
 		break;
 
 	}
 
 	if (f->fs.nat_seq_chk) {
 		set_tcb_field(sc, f->tid, W_TCB_RCV_NXT,
 		    V_TCB_RCV_NXT(M_TCB_RCV_NXT),
 		    V_TCB_RCV_NXT(f->fs.nat_seq_chk), 1);
 		updated++;
 	}
 
 	if (is_t5(sc) && f->fs.action == FILTER_DROP) {
 		/*
 		 * Migrating = 1, Non-offload = 0 to get a T5 hashfilter to drop.
 		 */
 		set_tcb_field(sc, f->tid, W_TCB_T_FLAGS, V_TF_NON_OFFLOAD(1) |
 		    V_TF_MIGRATING(1), V_TF_MIGRATING(1), 1);
 		updated++;
 	}
 
 	/*
 	 * Enable switching after all secondary resources (L2T entry, SMT entry,
 	 * etc.) are setup so that any switched packet will use correct
 	 * values.
 	 */
 	if (f->fs.action == FILTER_SWITCH) {
 		set_tcb_tflag(sc, f->tid, S_TF_CCTRL_ECN, 1, 1);
 		updated++;
 	}
 
 	if (f->fs.hitcnts || updated > 0) {
 		set_tcb_field(sc, f->tid, W_TCB_TIMESTAMP,
 		    V_TCB_TIMESTAMP(M_TCB_TIMESTAMP) |
 		    V_TCB_T_RTT_TS_RECENT_AGE(M_TCB_T_RTT_TS_RECENT_AGE),
 		    V_TCB_TIMESTAMP(0ULL) | V_TCB_T_RTT_TS_RECENT_AGE(0ULL), 0);
 		return (EINPROGRESS);
 	}
 
 	return (0);
 }
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index c1d4af45fd70..a08ddea00d05 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -1,3051 +1,3019 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/aio.h>
 #include <sys/bio.h>
 #include <sys/file.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/toecore.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/ctl/ctl_io.h>
 
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_tcb.h"
 #include "tom/t4_tom.h"
 
 /*
  * Use the 'backend3' field in AIO jobs to store the amount of data
  * received by the AIO job so far.
  */
 #define	aio_received	backend3
 
 static void aio_ddp_requeue_task(void *context, int pending);
 static void ddp_complete_all(struct toepcb *toep, int error);
 static void t4_aio_cancel_active(struct kaiocb *job);
 static void t4_aio_cancel_queued(struct kaiocb *job);
 static int t4_alloc_page_pods_for_rcvbuf(struct ppod_region *pr,
     struct ddp_rcv_buffer *drb);
 static int t4_write_page_pods_for_rcvbuf(struct adapter *sc,
     struct sge_wrq *wrq, int tid, struct ddp_rcv_buffer *drb);
 
 static TAILQ_HEAD(, pageset) ddp_orphan_pagesets;
 static struct mtx ddp_orphan_pagesets_lock;
 static struct task ddp_orphan_task;
 
 #define MAX_DDP_BUFFER_SIZE		(M_TCB_RX_DDP_BUF0_LEN)
 
 /*
  * A page set holds information about a user buffer used for AIO DDP.
  * The page set holds resources such as the VM pages backing the
  * buffer (either held or wired) and the page pods associated with the
  * buffer.  Recently used page sets are cached to allow for efficient
  * reuse of buffers (avoiding the need to re-fault in pages, hold
  * them, etc.).  Note that cached page sets keep the backing pages
  * wired.  The number of wired pages is capped by only allowing for
  * two wired pagesets per connection.  This is not a perfect cap, but
  * is a trade-off for performance.
  *
  * If an application ping-pongs two buffers for a connection via
  * aio_read(2) then those buffers should remain wired and expensive VM
  * fault lookups should be avoided after each buffer has been used
  * once.  If an application uses more than two buffers then this will
  * fall back to doing expensive VM fault lookups for each operation.
  */
 static void
 free_pageset(struct tom_data *td, struct pageset *ps)
 {
 	vm_page_t p;
 	int i;
 
 	if (ps->prsv.prsv_nppods > 0)
 		t4_free_page_pods(&ps->prsv);
 
 	for (i = 0; i < ps->npages; i++) {
 		p = ps->pages[i];
 		vm_page_unwire(p, PQ_INACTIVE);
 	}
 	mtx_lock(&ddp_orphan_pagesets_lock);
 	TAILQ_INSERT_TAIL(&ddp_orphan_pagesets, ps, link);
 	taskqueue_enqueue(taskqueue_thread, &ddp_orphan_task);
 	mtx_unlock(&ddp_orphan_pagesets_lock);
 }
 
 static void
 ddp_free_orphan_pagesets(void *context, int pending)
 {
 	struct pageset *ps;
 
 	mtx_lock(&ddp_orphan_pagesets_lock);
 	while (!TAILQ_EMPTY(&ddp_orphan_pagesets)) {
 		ps = TAILQ_FIRST(&ddp_orphan_pagesets);
 		TAILQ_REMOVE(&ddp_orphan_pagesets, ps, link);
 		mtx_unlock(&ddp_orphan_pagesets_lock);
 		if (ps->vm)
 			vmspace_free(ps->vm);
 		free(ps, M_CXGBE);
 		mtx_lock(&ddp_orphan_pagesets_lock);
 	}
 	mtx_unlock(&ddp_orphan_pagesets_lock);
 }
 
 static void
 recycle_pageset(struct toepcb *toep, struct pageset *ps)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	if (!(toep->ddp.flags & DDP_DEAD)) {
 		KASSERT(toep->ddp.cached_count + toep->ddp.active_count <
 		    nitems(toep->ddp.db), ("too many wired pagesets"));
 		TAILQ_INSERT_HEAD(&toep->ddp.cached_pagesets, ps, link);
 		toep->ddp.cached_count++;
 	} else
 		free_pageset(toep->td, ps);
 }
 
 static void
 ddp_complete_one(struct kaiocb *job, int error)
 {
 	long copied;
 
 	/*
 	 * If this job had copied data out of the socket buffer before
 	 * it was cancelled, report it as a short read rather than an
 	 * error.
 	 */
 	copied = job->aio_received;
 	if (copied != 0 || error == 0)
 		aio_complete(job, copied, 0);
 	else
 		aio_complete(job, -1, error);
 }
 
 static void
 free_ddp_rcv_buffer(struct toepcb *toep, struct ddp_rcv_buffer *drb)
 {
 	t4_free_page_pods(&drb->prsv);
 	contigfree(drb->buf, drb->len, M_CXGBE);
 	free(drb, M_CXGBE);
 	counter_u64_add(toep->ofld_rxq->ddp_buffer_free, 1);
 	free_toepcb(toep);
 }
 
 static void
 recycle_ddp_rcv_buffer(struct toepcb *toep, struct ddp_rcv_buffer *drb)
 {
 	DDP_CACHE_LOCK(toep);
 	if (!(toep->ddp.flags & DDP_DEAD) &&
 	    toep->ddp.cached_count < t4_ddp_rcvbuf_cache) {
 		TAILQ_INSERT_HEAD(&toep->ddp.cached_buffers, drb, link);
 		toep->ddp.cached_count++;
 		DDP_CACHE_UNLOCK(toep);
 	} else {
 		DDP_CACHE_UNLOCK(toep);
 		free_ddp_rcv_buffer(toep, drb);
 	}
 }
 
 static struct ddp_rcv_buffer *
 alloc_cached_ddp_rcv_buffer(struct toepcb *toep)
 {
 	struct ddp_rcv_buffer *drb;
 
 	DDP_CACHE_LOCK(toep);
 	if (!TAILQ_EMPTY(&toep->ddp.cached_buffers)) {
 		drb = TAILQ_FIRST(&toep->ddp.cached_buffers);
 		TAILQ_REMOVE(&toep->ddp.cached_buffers, drb, link);
 		toep->ddp.cached_count--;
 		counter_u64_add(toep->ofld_rxq->ddp_buffer_reuse, 1);
 	} else
 		drb = NULL;
 	DDP_CACHE_UNLOCK(toep);
 	return (drb);
 }
 
 static struct ddp_rcv_buffer *
 alloc_ddp_rcv_buffer(struct toepcb *toep, int how)
 {
 	struct tom_data *td = toep->td;
 	struct adapter *sc = td_adapter(td);
 	struct ddp_rcv_buffer *drb;
 	int error;
 
 	drb = malloc(sizeof(*drb), M_CXGBE, how | M_ZERO);
 	if (drb == NULL)
 		return (NULL);
 
 	drb->buf = contigmalloc(t4_ddp_rcvbuf_len, M_CXGBE, how, 0, ~0,
 	    t4_ddp_rcvbuf_len, 0);
 	if (drb->buf == NULL) {
 		free(drb, M_CXGBE);
 		return (NULL);
 	}
 	drb->len = t4_ddp_rcvbuf_len;
 	drb->refs = 1;
 
 	error = t4_alloc_page_pods_for_rcvbuf(&td->pr, drb);
 	if (error != 0) {
 		contigfree(drb->buf, drb->len, M_CXGBE);
 		free(drb, M_CXGBE);
 		return (NULL);
 	}
 
 	error = t4_write_page_pods_for_rcvbuf(sc, toep->ctrlq, toep->tid, drb);
 	if (error != 0) {
 		t4_free_page_pods(&drb->prsv);
 		contigfree(drb->buf, drb->len, M_CXGBE);
 		free(drb, M_CXGBE);
 		return (NULL);
 	}
 
 	hold_toepcb(toep);
 	counter_u64_add(toep->ofld_rxq->ddp_buffer_alloc, 1);
 	return (drb);
 }
 
 static void
 free_ddp_buffer(struct toepcb *toep, struct ddp_buffer *db)
 {
 	if ((toep->ddp.flags & DDP_RCVBUF) != 0) {
 		if (db->drb != NULL)
 			free_ddp_rcv_buffer(toep, db->drb);
 #ifdef INVARIANTS
 		db->drb = NULL;
 #endif
 		return;
 	}
 
 	if (db->job) {
 		/*
 		 * XXX: If we are un-offloading the socket then we
 		 * should requeue these on the socket somehow.  If we
 		 * got a FIN from the remote end, then this completes
 		 * any remaining requests with an EOF read.
 		 */
 		if (!aio_clear_cancel_function(db->job))
 			ddp_complete_one(db->job, 0);
 #ifdef INVARIANTS
 		db->job = NULL;
 #endif
 	}
 
 	if (db->ps) {
 		free_pageset(toep->td, db->ps);
 #ifdef INVARIANTS
 		db->ps = NULL;
 #endif
 	}
 }
 
 static void
 ddp_init_toep(struct toepcb *toep)
 {
 
 	toep->ddp.flags = DDP_OK;
 	toep->ddp.active_id = -1;
 	mtx_init(&toep->ddp.lock, "t4 ddp", NULL, MTX_DEF);
 	mtx_init(&toep->ddp.cache_lock, "t4 ddp cache", NULL, MTX_DEF);
 }
 
 void
 ddp_uninit_toep(struct toepcb *toep)
 {
 
 	mtx_destroy(&toep->ddp.lock);
 	mtx_destroy(&toep->ddp.cache_lock);
 }
 
 void
 release_ddp_resources(struct toepcb *toep)
 {
 	struct ddp_rcv_buffer *drb;
 	struct pageset *ps;
 	int i;
 
 	DDP_LOCK(toep);
 	DDP_CACHE_LOCK(toep);
 	toep->ddp.flags |= DDP_DEAD;
 	DDP_CACHE_UNLOCK(toep);
 	for (i = 0; i < nitems(toep->ddp.db); i++) {
 		free_ddp_buffer(toep, &toep->ddp.db[i]);
 	}
 	if ((toep->ddp.flags & DDP_AIO) != 0) {
 		while ((ps = TAILQ_FIRST(&toep->ddp.cached_pagesets)) != NULL) {
 			TAILQ_REMOVE(&toep->ddp.cached_pagesets, ps, link);
 			free_pageset(toep->td, ps);
 		}
 		ddp_complete_all(toep, 0);
 	}
 	if ((toep->ddp.flags & DDP_RCVBUF) != 0) {
 		DDP_CACHE_LOCK(toep);
 		while ((drb = TAILQ_FIRST(&toep->ddp.cached_buffers)) != NULL) {
 			TAILQ_REMOVE(&toep->ddp.cached_buffers, drb, link);
 			free_ddp_rcv_buffer(toep, drb);
 		}
 		DDP_CACHE_UNLOCK(toep);
 	}
 	DDP_UNLOCK(toep);
 }
 
 #ifdef INVARIANTS
 void
 ddp_assert_empty(struct toepcb *toep)
 {
 	int i;
 
 	MPASS((toep->ddp.flags & (DDP_TASK_ACTIVE | DDP_DEAD)) != DDP_TASK_ACTIVE);
 	for (i = 0; i < nitems(toep->ddp.db); i++) {
 		if ((toep->ddp.flags & DDP_AIO) != 0) {
 			MPASS(toep->ddp.db[i].job == NULL);
 			MPASS(toep->ddp.db[i].ps == NULL);
 		} else
 			MPASS(toep->ddp.db[i].drb == NULL);
 	}
 	if ((toep->ddp.flags & DDP_AIO) != 0) {
 		MPASS(TAILQ_EMPTY(&toep->ddp.cached_pagesets));
 		MPASS(TAILQ_EMPTY(&toep->ddp.aiojobq));
 	}
 	if ((toep->ddp.flags & DDP_RCVBUF) != 0)
 		MPASS(TAILQ_EMPTY(&toep->ddp.cached_buffers));
 }
 #endif
 
 static void
 complete_ddp_buffer(struct toepcb *toep, struct ddp_buffer *db,
     unsigned int db_idx)
 {
 	struct ddp_rcv_buffer *drb;
 	unsigned int db_flag;
 
 	toep->ddp.active_count--;
 	if (toep->ddp.active_id == db_idx) {
 		if (toep->ddp.active_count == 0) {
 			if ((toep->ddp.flags & DDP_AIO) != 0)
 				KASSERT(toep->ddp.db[db_idx ^ 1].job == NULL,
 				    ("%s: active_count mismatch", __func__));
 			else
 				KASSERT(toep->ddp.db[db_idx ^ 1].drb == NULL,
 				    ("%s: active_count mismatch", __func__));
 			toep->ddp.active_id = -1;
 		} else
 			toep->ddp.active_id ^= 1;
 #ifdef VERBOSE_TRACES
 		CTR3(KTR_CXGBE, "%s: tid %u, ddp_active_id = %d", __func__,
 		    toep->tid, toep->ddp.active_id);
 #endif
 	} else {
 		KASSERT(toep->ddp.active_count != 0 &&
 		    toep->ddp.active_id != -1,
 		    ("%s: active count mismatch", __func__));
 	}
 
 	if ((toep->ddp.flags & DDP_AIO) != 0) {
 		db->cancel_pending = 0;
 		db->job = NULL;
 		recycle_pageset(toep, db->ps);
 		db->ps = NULL;
 	} else {
 		drb = db->drb;
 		if (atomic_fetchadd_int(&drb->refs, -1) == 1)
 			recycle_ddp_rcv_buffer(toep, drb);
 		db->drb = NULL;
 		db->placed = 0;
 	}
 
 	db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;
 	KASSERT(toep->ddp.flags & db_flag,
 	    ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x",
 	    __func__, toep, toep->ddp.flags));
 	toep->ddp.flags &= ~db_flag;
 }
 
 /* Called when m_free drops the last reference. */
 static void
 ddp_rcv_mbuf_done(struct mbuf *m)
 {
 	struct toepcb *toep = m->m_ext.ext_arg1;
 	struct ddp_rcv_buffer *drb = m->m_ext.ext_arg2;
 
 	recycle_ddp_rcv_buffer(toep, drb);
 }
 
 static void
 queue_ddp_rcvbuf_mbuf(struct toepcb *toep, u_int db_idx, u_int len)
 {
 	struct inpcb *inp = toep->inp;
 	struct sockbuf *sb;
 	struct ddp_buffer *db;
 	struct ddp_rcv_buffer *drb;
 	struct mbuf *m;
 
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		printf("%s: failed to allocate mbuf", __func__);
 		return;
 	}
 	m->m_pkthdr.rcvif = toep->vi->ifp;
 
 	db = &toep->ddp.db[db_idx];
 	drb = db->drb;
 	m_extaddref(m, (char *)drb->buf + db->placed, len, &drb->refs,
 	    ddp_rcv_mbuf_done, toep, drb);
 	m->m_pkthdr.len = len;
 	m->m_len = len;
 
 	sb = &inp->inp_socket->so_rcv;
 	SOCKBUF_LOCK_ASSERT(sb);
 	sbappendstream_locked(sb, m, 0);
 
 	db->placed += len;
 	toep->ofld_rxq->rx_toe_ddp_octets += len;
 }
 
 /* XXX: handle_ddp_data code duplication */
 void
 insert_ddp_data(struct toepcb *toep, uint32_t n)
 {
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct ddp_buffer *db;
 	struct kaiocb *job;
 	size_t placed;
 	long copied;
 	unsigned int db_idx;
 #ifdef INVARIANTS
 	unsigned int db_flag;
 #endif
 	bool ddp_rcvbuf;
 
 	INP_WLOCK_ASSERT(inp);
 	DDP_ASSERT_LOCKED(toep);
 
 	ddp_rcvbuf = (toep->ddp.flags & DDP_RCVBUF) != 0;
 	tp->rcv_nxt += n;
 #ifndef USE_DDP_RX_FLOW_CONTROL
 	KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= n;
 #endif
 	CTR2(KTR_CXGBE, "%s: placed %u bytes before falling out of DDP",
 	    __func__, n);
 	while (toep->ddp.active_count > 0) {
 		MPASS(toep->ddp.active_id != -1);
 		db_idx = toep->ddp.active_id;
 #ifdef INVARIANTS
 		db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;
 #endif
 		MPASS((toep->ddp.flags & db_flag) != 0);
 		db = &toep->ddp.db[db_idx];
 		if (ddp_rcvbuf) {
 			placed = n;
 			if (placed > db->drb->len - db->placed)
 				placed = db->drb->len - db->placed;
 			if (placed != 0)
 				queue_ddp_rcvbuf_mbuf(toep, db_idx, placed);
 			complete_ddp_buffer(toep, db, db_idx);
 			n -= placed;
 			continue;
 		}
 		job = db->job;
 		copied = job->aio_received;
 		placed = n;
 		if (placed > job->uaiocb.aio_nbytes - copied)
 			placed = job->uaiocb.aio_nbytes - copied;
 		if (placed > 0) {
 			job->msgrcv = 1;
 			toep->ofld_rxq->rx_aio_ddp_jobs++;
 		}
 		toep->ofld_rxq->rx_aio_ddp_octets += placed;
 		if (!aio_clear_cancel_function(job)) {
 			/*
 			 * Update the copied length for when
 			 * t4_aio_cancel_active() completes this
 			 * request.
 			 */
 			job->aio_received += placed;
 		} else if (copied + placed != 0) {
 			CTR4(KTR_CXGBE,
 			    "%s: completing %p (copied %ld, placed %lu)",
 			    __func__, job, copied, placed);
 			/* XXX: This always completes if there is some data. */
 			aio_complete(job, copied + placed, 0);
 		} else if (aio_set_cancel_function(job, t4_aio_cancel_queued)) {
 			TAILQ_INSERT_HEAD(&toep->ddp.aiojobq, job, list);
 			toep->ddp.waiting_count++;
 		} else
 			aio_cancel(job);
 		n -= placed;
 		complete_ddp_buffer(toep, db, db_idx);
 	}
 
 	MPASS(n == 0);
 }
 
 /* SET_TCB_FIELD sent as a ULP command looks like this */
 #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
     sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
 
 /* RX_DATA_ACK sent as a ULP command looks like this */
 #define LEN__RX_DATA_ACK_ULP (sizeof(struct ulp_txpkt) + \
     sizeof(struct ulptx_idata) + sizeof(struct cpl_rx_data_ack_core))
 
-static inline void *
-mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep,
-    uint64_t word, uint64_t mask, uint64_t val)
-{
-	struct ulptx_idata *ulpsc;
-	struct cpl_set_tcb_field_core *req;
-
-	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
-	ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
-
-	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
-	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
-	ulpsc->len = htobe32(sizeof(*req));
-
-	req = (struct cpl_set_tcb_field_core *)(ulpsc + 1);
-	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid));
-	req->reply_ctrl = htobe16(V_NO_REPLY(1) |
-	    V_QUEUENO(toep->ofld_rxq->iq.abs_id));
-	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
-        req->mask = htobe64(mask);
-        req->val = htobe64(val);
-
-	ulpsc = (struct ulptx_idata *)(req + 1);
-	if (LEN__SET_TCB_FIELD_ULP % 16) {
-		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
-		ulpsc->len = htobe32(0);
-		return (ulpsc + 1);
-	}
-	return (ulpsc);
-}
-
 static inline void *
 mk_rx_data_ack_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep)
 {
 	struct ulptx_idata *ulpsc;
 	struct cpl_rx_data_ack_core *req;
 
 	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
 	ulpmc->len = htobe32(howmany(LEN__RX_DATA_ACK_ULP, 16));
 
 	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 	ulpsc->len = htobe32(sizeof(*req));
 
 	req = (struct cpl_rx_data_ack_core *)(ulpsc + 1);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tid));
 	req->credit_dack = htobe32(F_RX_MODULATE_RX);
 
 	ulpsc = (struct ulptx_idata *)(req + 1);
 	if (LEN__RX_DATA_ACK_ULP % 16) {
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		ulpsc->len = htobe32(0);
 		return (ulpsc + 1);
 	}
 	return (ulpsc);
 }
 
 static struct wrqe *
 mk_update_tcb_for_ddp(struct adapter *sc, struct toepcb *toep, int db_idx,
     struct ppod_reservation *prsv, int offset, uint32_t len,
     uint64_t ddp_flags, uint64_t ddp_flags_mask)
 {
 	struct wrqe *wr;
 	struct work_request_hdr *wrh;
 	struct ulp_txpkt *ulpmc;
 	int wrlen;
 
 	KASSERT(db_idx == 0 || db_idx == 1,
 	    ("%s: bad DDP buffer index %d", __func__, db_idx));
 
 	/*
 	 * We'll send a compound work request that has 3 SET_TCB_FIELDs and an
 	 * RX_DATA_ACK (with RX_MODULATE to speed up delivery).
 	 *
 	 * The work request header is 16B and always ends at a 16B boundary.
 	 * The ULPTX master commands that follow must all end at 16B boundaries
 	 * too so we round up the size to 16.
 	 */
 	wrlen = sizeof(*wrh) + 3 * roundup2(LEN__SET_TCB_FIELD_ULP, 16) +
 	    roundup2(LEN__RX_DATA_ACK_ULP, 16);
 
 	wr = alloc_wrqe(wrlen, toep->ctrlq);
 	if (wr == NULL)
 		return (NULL);
 	wrh = wrtod(wr);
 	INIT_ULPTX_WRH(wrh, wrlen, 1, 0);	/* atomic */
 	ulpmc = (struct ulp_txpkt *)(wrh + 1);
 
 	/* Write the buffer's tag */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
 	    W_TCB_RX_DDP_BUF0_TAG + db_idx,
 	    V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
 	    V_TCB_RX_DDP_BUF0_TAG(prsv->prsv_tag));
 
 	/* Update the current offset in the DDP buffer and its total length */
 	if (db_idx == 0)
-		ulpmc = mk_set_tcb_field_ulp(ulpmc, toep,
+		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
 		    W_TCB_RX_DDP_BUF0_OFFSET,
 		    V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
 		    V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
 		    V_TCB_RX_DDP_BUF0_OFFSET(offset) |
 		    V_TCB_RX_DDP_BUF0_LEN(len));
 	else
-		ulpmc = mk_set_tcb_field_ulp(ulpmc, toep,
+		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
 		    W_TCB_RX_DDP_BUF1_OFFSET,
 		    V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
 		    V_TCB_RX_DDP_BUF1_LEN((u64)M_TCB_RX_DDP_BUF1_LEN << 32),
 		    V_TCB_RX_DDP_BUF1_OFFSET(offset) |
 		    V_TCB_RX_DDP_BUF1_LEN((u64)len << 32));
 
 	/* Update DDP flags */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_FLAGS,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_RX_DDP_FLAGS,
 	    ddp_flags_mask, ddp_flags);
 
 	/* Gratuitous RX_DATA_ACK with RX_MODULATE set to speed up delivery. */
 	ulpmc = mk_rx_data_ack_ulp(ulpmc, toep);
 
 	return (wr);
 }
 
 static int
 handle_ddp_data_aio(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt,
     int len)
 {
 	uint32_t report = be32toh(ddp_report);
 	unsigned int db_idx;
 	struct inpcb *inp = toep->inp;
 	struct ddp_buffer *db;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct kaiocb *job;
 	long copied;
 
 	db_idx = report & F_DDP_BUF_IDX ? 1 : 0;
 
 	if (__predict_false(!(report & F_DDP_INV)))
 		CXGBE_UNIMPLEMENTED("DDP buffer still valid");
 
 	INP_WLOCK(inp);
 	so = inp_inpcbtosocket(inp);
 	sb = &so->so_rcv;
 	DDP_LOCK(toep);
 
 	KASSERT(toep->ddp.active_id == db_idx,
 	    ("completed DDP buffer (%d) != active_id (%d) for tid %d", db_idx,
 	    toep->ddp.active_id, toep->tid));
 	db = &toep->ddp.db[db_idx];
 	job = db->job;
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * This can happen due to an administrative tcpdrop(8).
 		 * Just fail the request with ECONNRESET.
 		 */
 		CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x",
 		    __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags);
 		if (aio_clear_cancel_function(job))
 			ddp_complete_one(job, ECONNRESET);
 		goto completed;
 	}
 
 	tp = intotcpcb(inp);
 
 	/*
 	 * For RX_DDP_COMPLETE, len will be zero and rcv_nxt is the
 	 * sequence number of the next byte to receive.  The length of
 	 * the data received for this message must be computed by
 	 * comparing the new and old values of rcv_nxt.
 	 *
 	 * For RX_DATA_DDP, len might be non-zero, but it is only the
 	 * length of the most recent DMA.  It does not include the
 	 * total length of the data received since the previous update
 	 * for this DDP buffer.  rcv_nxt is the sequence number of the
 	 * first received byte from the most recent DMA.
 	 */
 	len += be32toh(rcv_nxt) - tp->rcv_nxt;
 	tp->rcv_nxt += len;
 	tp->t_rcvtime = ticks;
 #ifndef USE_DDP_RX_FLOW_CONTROL
 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= len;
 #endif
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%s: tid %u, DDP[%d] placed %d bytes (%#x)", __func__,
 	    toep->tid, db_idx, len, report);
 #endif
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    len > (sbspace(sb) / 8 * 7)) {
 		struct adapter *sc = td_adapter(toep->td);
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 	SOCKBUF_UNLOCK(sb);
 	CURVNET_RESTORE();
 
 	job->msgrcv = 1;
 	toep->ofld_rxq->rx_aio_ddp_jobs++;
 	toep->ofld_rxq->rx_aio_ddp_octets += len;
 	if (db->cancel_pending) {
 		/*
 		 * Update the job's length but defer completion to the
 		 * TCB_RPL callback.
 		 */
 		job->aio_received += len;
 		goto out;
 	} else if (!aio_clear_cancel_function(job)) {
 		/*
 		 * Update the copied length for when
 		 * t4_aio_cancel_active() completes this request.
 		 */
 		job->aio_received += len;
 	} else {
 		copied = job->aio_received;
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE,
 		    "%s: tid %u, completing %p (copied %ld, placed %d)",
 		    __func__, toep->tid, job, copied, len);
 #endif
 		aio_complete(job, copied + len, 0);
 		t4_rcvd(&toep->td->tod, tp);
 	}
 
 completed:
 	complete_ddp_buffer(toep, db, db_idx);
 	if (toep->ddp.waiting_count > 0)
 		ddp_queue_toep(toep);
 out:
 	DDP_UNLOCK(toep);
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 static bool
 queue_ddp_rcvbuf(struct toepcb *toep, struct ddp_rcv_buffer *drb)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	struct ddp_buffer *db;
 	struct wrqe *wr;
 	uint64_t ddp_flags, ddp_flags_mask;
 	int buf_flag, db_idx;
 
 	DDP_ASSERT_LOCKED(toep);
 
 	KASSERT((toep->ddp.flags & DDP_DEAD) == 0, ("%s: DDP_DEAD", __func__));
 	KASSERT(toep->ddp.active_count < nitems(toep->ddp.db),
 	    ("%s: no empty DDP buffer slot", __func__));
 
 	/* Determine which DDP buffer to use. */
 	if (toep->ddp.db[0].drb == NULL) {
 		db_idx = 0;
 	} else {
 		MPASS(toep->ddp.db[1].drb == NULL);
 		db_idx = 1;
 	}
 
 	/*
 	 * Permit PSH to trigger a partial completion without
 	 * invalidating the rest of the buffer, but disable the PUSH
 	 * timer.
 	 */
 	ddp_flags = 0;
 	ddp_flags_mask = 0;
 	if (db_idx == 0) {
 		ddp_flags |= V_TF_DDP_PSH_NO_INVALIDATE0(1) |
 		    V_TF_DDP_PUSH_DISABLE_0(0) | V_TF_DDP_PSHF_ENABLE_0(1) |
 		    V_TF_DDP_BUF0_VALID(1);
 		ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE0(1) |
 		    V_TF_DDP_PUSH_DISABLE_0(1) | V_TF_DDP_PSHF_ENABLE_0(1) |
 		    V_TF_DDP_BUF0_FLUSH(1) | V_TF_DDP_BUF0_VALID(1);
 		buf_flag = DDP_BUF0_ACTIVE;
 	} else {
 		ddp_flags |= V_TF_DDP_PSH_NO_INVALIDATE1(1) |
 		    V_TF_DDP_PUSH_DISABLE_1(0) | V_TF_DDP_PSHF_ENABLE_1(1) |
 		    V_TF_DDP_BUF1_VALID(1);
 		ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE1(1) |
 		    V_TF_DDP_PUSH_DISABLE_1(1) | V_TF_DDP_PSHF_ENABLE_1(1) |
 		    V_TF_DDP_BUF1_FLUSH(1) | V_TF_DDP_BUF1_VALID(1);
 		buf_flag = DDP_BUF1_ACTIVE;
 	}
 	MPASS((toep->ddp.flags & buf_flag) == 0);
 	if ((toep->ddp.flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)) == 0) {
 		MPASS(db_idx == 0);
 		MPASS(toep->ddp.active_id == -1);
 		MPASS(toep->ddp.active_count == 0);
 		ddp_flags_mask |= V_TF_DDP_ACTIVE_BUF(1);
 	}
 
 	/*
 	 * The TID for this connection should still be valid.  If
 	 * DDP_DEAD is set, SBS_CANTRCVMORE should be set, so we
 	 * shouldn't be this far anyway.
 	 */
 	wr = mk_update_tcb_for_ddp(sc, toep, db_idx, &drb->prsv, 0, drb->len,
 	    ddp_flags, ddp_flags_mask);
 	if (wr == NULL) {
 		recycle_ddp_rcv_buffer(toep, drb);
 		printf("%s: mk_update_tcb_for_ddp failed\n", __func__);
 		return (false);
 	}
 
 #ifdef VERBOSE_TRACES
 	CTR(KTR_CXGBE,
 	    "%s: tid %u, scheduling DDP[%d] (flags %#lx/%#lx)", __func__,
 	    toep->tid, db_idx, ddp_flags, ddp_flags_mask);
 #endif
 	/*
 	 * Hold a reference on scheduled buffers that is dropped in
 	 * complete_ddp_buffer.
 	 */
 	drb->refs = 1;
 
 	/* Give the chip the go-ahead. */
 	t4_wrq_tx(sc, wr);
 	db = &toep->ddp.db[db_idx];
 	db->drb = drb;
 	toep->ddp.flags |= buf_flag;
 	toep->ddp.active_count++;
 	if (toep->ddp.active_count == 1) {
 		MPASS(toep->ddp.active_id == -1);
 		toep->ddp.active_id = db_idx;
 		CTR2(KTR_CXGBE, "%s: ddp_active_id = %d", __func__,
 		    toep->ddp.active_id);
 	}
 	return (true);
 }
 
 static int
 handle_ddp_data_rcvbuf(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt,
     int len)
 {
 	uint32_t report = be32toh(ddp_report);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct ddp_buffer *db;
 	struct ddp_rcv_buffer *drb;
 	unsigned int db_idx;
 	bool invalidated;
 
 	db_idx = report & F_DDP_BUF_IDX ? 1 : 0;
 
 	invalidated = (report & F_DDP_INV) != 0;
 
 	INP_WLOCK(inp);
 	so = inp_inpcbtosocket(inp);
 	sb = &so->so_rcv;
 	DDP_LOCK(toep);
 
 	KASSERT(toep->ddp.active_id == db_idx,
 	    ("completed DDP buffer (%d) != active_id (%d) for tid %d", db_idx,
 	    toep->ddp.active_id, toep->tid));
 	db = &toep->ddp.db[db_idx];
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * This can happen due to an administrative tcpdrop(8).
 		 * Just ignore the received data.
 		 */
 		CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x",
 		    __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags);
 		if (invalidated)
 			complete_ddp_buffer(toep, db, db_idx);
 		goto out;
 	}
 
 	tp = intotcpcb(inp);
 
 	/*
 	 * For RX_DDP_COMPLETE, len will be zero and rcv_nxt is the
 	 * sequence number of the next byte to receive.  The length of
 	 * the data received for this message must be computed by
 	 * comparing the new and old values of rcv_nxt.
 	 *
 	 * For RX_DATA_DDP, len might be non-zero, but it is only the
 	 * length of the most recent DMA.  It does not include the
 	 * total length of the data received since the previous update
 	 * for this DDP buffer.  rcv_nxt is the sequence number of the
 	 * first received byte from the most recent DMA.
 	 */
 	len += be32toh(rcv_nxt) - tp->rcv_nxt;
 	tp->rcv_nxt += len;
 	tp->t_rcvtime = ticks;
 #ifndef USE_DDP_RX_FLOW_CONTROL
 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= len;
 #endif
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%s: tid %u, DDP[%d] placed %d bytes (%#x)", __func__,
 	    toep->tid, db_idx, len, report);
 #endif
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    len > (sbspace(sb) / 8 * 7)) {
 		struct adapter *sc = td_adapter(toep->td);
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 
 	if (len > 0) {
 		queue_ddp_rcvbuf_mbuf(toep, db_idx, len);
 		t4_rcvd_locked(&toep->td->tod, tp);
 	}
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 	CURVNET_RESTORE();
 
 	if (invalidated)
 		complete_ddp_buffer(toep, db, db_idx);
 	else
 		KASSERT(db->placed < db->drb->len,
 		    ("%s: full DDP buffer not invalidated", __func__));
 
 	if (toep->ddp.active_count != nitems(toep->ddp.db)) {
 		drb = alloc_cached_ddp_rcv_buffer(toep);
 		if (drb == NULL)
 			drb = alloc_ddp_rcv_buffer(toep, M_NOWAIT);
 		if (drb == NULL)
 			ddp_queue_toep(toep);
 		else {
 			if (!queue_ddp_rcvbuf(toep, drb)) {
 				ddp_queue_toep(toep);
 			}
 		}
 	}
 out:
 	DDP_UNLOCK(toep);
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 static int
 handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
 {
 	if ((toep->ddp.flags & DDP_RCVBUF) != 0)
 		return (handle_ddp_data_rcvbuf(toep, ddp_report, rcv_nxt, len));
 	else
 		return (handle_ddp_data_aio(toep, ddp_report, rcv_nxt, len));
 }
 
 void
 handle_ddp_indicate(struct toepcb *toep)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	if ((toep->ddp.flags & DDP_RCVBUF) != 0) {
 		/*
 		 * Indicates are not meaningful for RCVBUF since
 		 * buffers are activated when the socket option is
 		 * set.
 		 */
 		return;
 	}
 
 	MPASS(toep->ddp.active_count == 0);
 	MPASS((toep->ddp.flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)) == 0);
 	if (toep->ddp.waiting_count == 0) {
 		/*
 		 * The pending requests that triggered the request for an
 		 * an indicate were cancelled.  Those cancels should have
 		 * already disabled DDP.  Just ignore this as the data is
 		 * going into the socket buffer anyway.
 		 */
 		return;
 	}
 	CTR3(KTR_CXGBE, "%s: tid %d indicated (%d waiting)", __func__,
 	    toep->tid, toep->ddp.waiting_count);
 	ddp_queue_toep(toep);
 }
 
 CTASSERT(CPL_COOKIE_DDP0 + 1 == CPL_COOKIE_DDP1);
 
 static int
 do_ddp_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	unsigned int db_idx;
 	struct toepcb *toep;
 	struct inpcb *inp;
 	struct ddp_buffer *db;
 	struct kaiocb *job;
 	long copied;
 
 	if (cpl->status != CPL_ERR_NONE)
 		panic("XXX: tcp_rpl failed: %d", cpl->status);
 
 	toep = lookup_tid(sc, tid);
 	inp = toep->inp;
 	switch (cpl->cookie) {
 	case V_WORD(W_TCB_RX_DDP_FLAGS) | V_COOKIE(CPL_COOKIE_DDP0):
 	case V_WORD(W_TCB_RX_DDP_FLAGS) | V_COOKIE(CPL_COOKIE_DDP1):
 		/*
 		 * XXX: This duplicates a lot of code with handle_ddp_data().
 		 */
 		KASSERT((toep->ddp.flags & DDP_AIO) != 0,
 		    ("%s: DDP_RCVBUF", __func__));
 		db_idx = G_COOKIE(cpl->cookie) - CPL_COOKIE_DDP0;
 		MPASS(db_idx < nitems(toep->ddp.db));
 		INP_WLOCK(inp);
 		DDP_LOCK(toep);
 		db = &toep->ddp.db[db_idx];
 
 		/*
 		 * handle_ddp_data() should leave the job around until
 		 * this callback runs once a cancel is pending.
 		 */
 		MPASS(db != NULL);
 		MPASS(db->job != NULL);
 		MPASS(db->cancel_pending);
 
 		/*
 		 * XXX: It's not clear what happens if there is data
 		 * placed when the buffer is invalidated.  I suspect we
 		 * need to read the TCB to see how much data was placed.
 		 *
 		 * For now this just pretends like nothing was placed.
 		 *
 		 * XXX: Note that if we did check the PCB we would need to
 		 * also take care of updating the tp, etc.
 		 */
 		job = db->job;
 		copied = job->aio_received;
 		if (copied == 0) {
 			CTR2(KTR_CXGBE, "%s: cancelling %p", __func__, job);
 			aio_cancel(job);
 		} else {
 			CTR3(KTR_CXGBE, "%s: completing %p (copied %ld)",
 			    __func__, job, copied);
 			aio_complete(job, copied, 0);
 			t4_rcvd(&toep->td->tod, intotcpcb(inp));
 		}
 
 		complete_ddp_buffer(toep, db, db_idx);
 		if (toep->ddp.waiting_count > 0)
 			ddp_queue_toep(toep);
 		DDP_UNLOCK(toep);
 		INP_WUNLOCK(inp);
 		break;
 	default:
 		panic("XXX: unknown tcb_rpl offset %#x, cookie %#x",
 		    G_WORD(cpl->cookie), G_COOKIE(cpl->cookie));
 	}
 
 	return (0);
 }
 
 void
 handle_ddp_close(struct toepcb *toep, struct tcpcb *tp, __be32 rcv_nxt)
 {
 	struct socket *so = toep->inp->inp_socket;
 	struct sockbuf *sb = &so->so_rcv;
 	struct ddp_buffer *db;
 	struct kaiocb *job;
 	long copied;
 	unsigned int db_idx;
 #ifdef INVARIANTS
 	unsigned int db_flag;
 #endif
 	int len, placed;
 	bool ddp_rcvbuf;
 
 	INP_WLOCK_ASSERT(toep->inp);
 	DDP_ASSERT_LOCKED(toep);
 
 	ddp_rcvbuf = (toep->ddp.flags & DDP_RCVBUF) != 0;
 
 	/* - 1 is to ignore the byte for FIN */
 	len = be32toh(rcv_nxt) - tp->rcv_nxt - 1;
 	tp->rcv_nxt += len;
 
 	CTR(KTR_CXGBE, "%s: tid %d placed %u bytes before FIN", __func__,
 	    toep->tid, len);
 	while (toep->ddp.active_count > 0) {
 		MPASS(toep->ddp.active_id != -1);
 		db_idx = toep->ddp.active_id;
 #ifdef INVARIANTS
 		db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE;
 #endif
 		MPASS((toep->ddp.flags & db_flag) != 0);
 		db = &toep->ddp.db[db_idx];
 		if (ddp_rcvbuf) {
 			placed = len;
 			if (placed > db->drb->len - db->placed)
 				placed = db->drb->len - db->placed;
 			if (placed != 0) {
 				SOCKBUF_LOCK(sb);
 				queue_ddp_rcvbuf_mbuf(toep, db_idx, placed);
 				sorwakeup_locked(so);
 				SOCKBUF_UNLOCK_ASSERT(sb);
 			}
 			complete_ddp_buffer(toep, db, db_idx);
 			len -= placed;
 			continue;
 		}
 		job = db->job;
 		copied = job->aio_received;
 		placed = len;
 		if (placed > job->uaiocb.aio_nbytes - copied)
 			placed = job->uaiocb.aio_nbytes - copied;
 		if (placed > 0) {
 			job->msgrcv = 1;
 			toep->ofld_rxq->rx_aio_ddp_jobs++;
 		}
 		toep->ofld_rxq->rx_aio_ddp_octets += placed;
 		if (!aio_clear_cancel_function(job)) {
 			/*
 			 * Update the copied length for when
 			 * t4_aio_cancel_active() completes this
 			 * request.
 			 */
 			job->aio_received += placed;
 		} else {
 			CTR4(KTR_CXGBE, "%s: tid %d completed buf %d len %d",
 			    __func__, toep->tid, db_idx, placed);
 			aio_complete(job, copied + placed, 0);
 		}
 		len -= placed;
 		complete_ddp_buffer(toep, db, db_idx);
 	}
 
 	MPASS(len == 0);
 	if ((toep->ddp.flags & DDP_AIO) != 0)
 		ddp_complete_all(toep, 0);
 }
 
 #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
 	 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
 	 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
 	 F_DDP_INVALID_PPOD | F_DDP_HDRCRC_ERR | F_DDP_DATACRC_ERR)
 
 extern cpl_handler_t t4_cpl_handler[];
 
 static int
 do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	uint32_t vld;
 	struct toepcb *toep = lookup_tid(sc, tid);
 
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	vld = be32toh(cpl->ddpvld);
 	if (__predict_false(vld & DDP_ERR)) {
 		panic("%s: DDP error 0x%x (tid %d, toep %p)",
 		    __func__, vld, tid, toep);
 	}
 
 	if (ulp_mode(toep) == ULP_MODE_ISCSI) {
 		t4_cpl_handler[CPL_RX_ISCSI_DDP](iq, rss, m);
 		return (0);
 	}
 
 	handle_ddp_data(toep, cpl->u.ddp_report, cpl->seq, be16toh(cpl->len));
 
 	return (0);
 }
 
 static int
 do_rx_ddp_complete(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_ddp_complete *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	handle_ddp_data(toep, cpl->ddp_report, cpl->rcv_nxt, 0);
 
 	return (0);
 }
 
 static bool
 set_ddp_ulp_mode(struct toepcb *toep)
 {
 	struct adapter *sc = toep->vi->adapter;
 	struct wrqe *wr;
 	struct work_request_hdr *wrh;
 	struct ulp_txpkt *ulpmc;
 	int fields, len;
 
 	if (!sc->tt.ddp)
 		return (false);
 
 	fields = 0;
 
 	/* Overlay region including W_TCB_RX_DDP_FLAGS */
 	fields += 3;
 
 	/* W_TCB_ULP_TYPE */
 	fields++;
 
 #ifdef USE_DDP_RX_FLOW_CONTROL
 	/* W_TCB_T_FLAGS */
 	fields++;
 #endif
 
 	len = sizeof(*wrh) + fields * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
 	KASSERT(len <= SGE_MAX_WR_LEN,
 	    ("%s: WR with %d TCB field updates too large", __func__, fields));
 
 	wr = alloc_wrqe(len, toep->ctrlq);
 	if (wr == NULL)
 		return (false);
 
 	CTR(KTR_CXGBE, "%s: tid %u", __func__, toep->tid);
 
 	wrh = wrtod(wr);
 	INIT_ULPTX_WRH(wrh, len, 1, 0);	/* atomic */
 	ulpmc = (struct ulp_txpkt *)(wrh + 1);
 
 	/*
 	 * Words 26/27 are zero except for the DDP_OFF flag in
 	 * W_TCB_RX_DDP_FLAGS (27).
 	 */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 26,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 26,
 	    0xffffffffffffffff, (uint64_t)V_TF_DDP_OFF(1) << 32);
 
 	/* Words 28/29 are zero. */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 28,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 28,
 	    0xffffffffffffffff, 0);
 
 	/* Words 30/31 are zero. */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 30,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 30,
 	    0xffffffffffffffff, 0);
 
 	/* Set the ULP mode to ULP_MODE_TCPDDP. */
 	toep->params.ulp_mode = ULP_MODE_TCPDDP;
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_ULP_TYPE,
-	    V_TCB_ULP_TYPE(M_TCB_ULP_TYPE),
-	    V_TCB_ULP_TYPE(ULP_MODE_TCPDDP));
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_ULP_TYPE,
+	    V_TCB_ULP_TYPE(M_TCB_ULP_TYPE), V_TCB_ULP_TYPE(ULP_MODE_TCPDDP));
 
 #ifdef USE_DDP_RX_FLOW_CONTROL
 	/* Set TF_RX_FLOW_CONTROL_DDP. */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_T_FLAGS,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_T_FLAGS,
 	    V_TF_RX_FLOW_CONTROL_DDP(1), V_TF_RX_FLOW_CONTROL_DDP(1));
 #endif
 
 	ddp_init_toep(toep);
 
 	t4_wrq_tx(sc, wr);
 	return (true);
 }
 
 static void
 enable_ddp(struct adapter *sc, struct toepcb *toep)
 {
 	uint64_t ddp_flags;
 
 	KASSERT((toep->ddp.flags & (DDP_ON | DDP_OK | DDP_SC_REQ)) == DDP_OK,
 	    ("%s: toep %p has bad ddp_flags 0x%x",
 	    __func__, toep, toep->ddp.flags));
 
 	CTR3(KTR_CXGBE, "%s: tid %u (time %u)",
 	    __func__, toep->tid, time_uptime);
 
 	ddp_flags = 0;
 	if ((toep->ddp.flags & DDP_AIO) != 0)
 		ddp_flags |= V_TF_DDP_BUF0_INDICATE(1) |
 		    V_TF_DDP_BUF1_INDICATE(1);
 	DDP_ASSERT_LOCKED(toep);
 	toep->ddp.flags |= DDP_SC_REQ;
 	t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_RX_DDP_FLAGS,
 	    V_TF_DDP_OFF(1) | V_TF_DDP_INDICATE_OUT(1) |
 	    V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1) |
 	    V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1), ddp_flags, 0, 0);
 	t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS,
 	    V_TF_RCV_COALESCE_ENABLE(1), 0, 0, 0);
 }
 
 static int
 calculate_hcf(int n1, int n2)
 {
 	int a, b, t;
 
 	if (n1 <= n2) {
 		a = n1;
 		b = n2;
 	} else {
 		a = n2;
 		b = n1;
 	}
 
 	while (a != 0) {
 		t = a;
 		a = b % a;
 		b = t;
 	}
 
 	return (b);
 }
 
 static inline int
 pages_to_nppods(int npages, int ddp_page_shift)
 {
 
 	MPASS(ddp_page_shift >= PAGE_SHIFT);
 
 	return (howmany(npages >> (ddp_page_shift - PAGE_SHIFT), PPOD_PAGES));
 }
 
 static int
 alloc_page_pods(struct ppod_region *pr, u_int nppods, u_int pgsz_idx,
     struct ppod_reservation *prsv)
 {
 	vmem_addr_t addr;       /* relative to start of region */
 
 	if (vmem_alloc(pr->pr_arena, PPOD_SZ(nppods), M_NOWAIT | M_FIRSTFIT,
 	    &addr) != 0)
 		return (ENOMEM);
 
 #ifdef VERBOSE_TRACES
 	CTR5(KTR_CXGBE, "%-17s arena %p, addr 0x%08x, nppods %d, pgsz %d",
 	    __func__, pr->pr_arena, (uint32_t)addr & pr->pr_tag_mask,
 	    nppods, 1 << pr->pr_page_shift[pgsz_idx]);
 #endif
 
 	/*
 	 * The hardware tagmask includes an extra invalid bit but the arena was
 	 * seeded with valid values only.  An allocation out of this arena will
 	 * fit inside the tagmask but won't have the invalid bit set.
 	 */
 	MPASS((addr & pr->pr_tag_mask) == addr);
 	MPASS((addr & pr->pr_invalid_bit) == 0);
 
 	prsv->prsv_pr = pr;
 	prsv->prsv_tag = V_PPOD_PGSZ(pgsz_idx) | addr;
 	prsv->prsv_nppods = nppods;
 
 	return (0);
 }
 
 static int
 t4_alloc_page_pods_for_vmpages(struct ppod_region *pr, vm_page_t *pages,
     int npages, struct ppod_reservation *prsv)
 {
 	int i, hcf, seglen, idx, nppods;
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.  We combine
 	 * contiguous physical pages into larger segments to get the best DDP
 	 * page size possible.  This is the largest of the four sizes in
 	 * A_ULP_RX_TDDP_PSZ that evenly divides the HCF of the segment sizes in
 	 * the page list.
 	 */
 	hcf = 0;
 	for (i = 0; i < npages; i++) {
 		seglen = PAGE_SIZE;
 		while (i < npages - 1 &&
 		    VM_PAGE_TO_PHYS(pages[i]) + PAGE_SIZE ==
 		    VM_PAGE_TO_PHYS(pages[i + 1])) {
 			seglen += PAGE_SIZE;
 			i++;
 		}
 
 		hcf = calculate_hcf(hcf, seglen);
 		if (hcf < (1 << pr->pr_page_shift[1])) {
 			idx = 0;
 			goto have_pgsz;	/* give up, short circuit */
 		}
 	}
 
 #define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
 	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
 	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
 		if ((hcf & PR_PAGE_MASK(idx)) == 0)
 			break;
 	}
 #undef PR_PAGE_MASK
 
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
 	nppods = pages_to_nppods(npages, pr->pr_page_shift[idx]);
 	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
 		return (ENOMEM);
 	MPASS(prsv->prsv_nppods > 0);
 
 	return (0);
 }
 
 int
 t4_alloc_page_pods_for_ps(struct ppod_region *pr, struct pageset *ps)
 {
 	struct ppod_reservation *prsv = &ps->prsv;
 
 	KASSERT(prsv->prsv_nppods == 0,
 	    ("%s: page pods already allocated", __func__));
 
 	return (t4_alloc_page_pods_for_vmpages(pr, ps->pages, ps->npages,
 	    prsv));
 }
 
 int
 t4_alloc_page_pods_for_bio(struct ppod_region *pr, struct bio *bp,
     struct ppod_reservation *prsv)
 {
 
 	MPASS(bp->bio_flags & BIO_UNMAPPED);
 
 	return (t4_alloc_page_pods_for_vmpages(pr, bp->bio_ma, bp->bio_ma_n,
 	    prsv));
 }
 
 int
 t4_alloc_page_pods_for_buf(struct ppod_region *pr, vm_offset_t buf, int len,
     struct ppod_reservation *prsv)
 {
 	int hcf, seglen, idx, npages, nppods;
 	uintptr_t start_pva, end_pva, pva, p1;
 
 	MPASS(buf > 0);
 	MPASS(len > 0);
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.  We combine
 	 * contiguous physical pages into larger segments to get the best DDP
 	 * page size possible.  This is the largest of the four sizes in
 	 * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
 	 * in the page list.
 	 */
 	hcf = 0;
 	start_pva = trunc_page(buf);
 	end_pva = trunc_page(buf + len - 1);
 	pva = start_pva;
 	while (pva <= end_pva) {
 		seglen = PAGE_SIZE;
 		p1 = pmap_kextract(pva);
 		pva += PAGE_SIZE;
 		while (pva <= end_pva && p1 + seglen == pmap_kextract(pva)) {
 			seglen += PAGE_SIZE;
 			pva += PAGE_SIZE;
 		}
 
 		hcf = calculate_hcf(hcf, seglen);
 		if (hcf < (1 << pr->pr_page_shift[1])) {
 			idx = 0;
 			goto have_pgsz;	/* give up, short circuit */
 		}
 	}
 
 #define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
 	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
 	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
 		if ((hcf & PR_PAGE_MASK(idx)) == 0)
 			break;
 	}
 #undef PR_PAGE_MASK
 
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
 	npages = 1;
 	npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
 	nppods = howmany(npages, PPOD_PAGES);
 	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
 		return (ENOMEM);
 	MPASS(prsv->prsv_nppods > 0);
 
 	return (0);
 }
 
 static int
 t4_alloc_page_pods_for_rcvbuf(struct ppod_region *pr,
     struct ddp_rcv_buffer *drb)
 {
 	struct ppod_reservation *prsv = &drb->prsv;
 
 	KASSERT(prsv->prsv_nppods == 0,
 	    ("%s: page pods already allocated", __func__));
 
 	return (t4_alloc_page_pods_for_buf(pr, (vm_offset_t)drb->buf, drb->len,
 	    prsv));
 }
 
 int
 t4_alloc_page_pods_for_sgl(struct ppod_region *pr, struct ctl_sg_entry *sgl,
     int entries, struct ppod_reservation *prsv)
 {
 	int hcf, seglen, idx = 0, npages, nppods, i, len;
 	uintptr_t start_pva, end_pva, pva, p1 ;
 	vm_offset_t buf;
 	struct ctl_sg_entry *sge;
 
 	MPASS(entries > 0);
 	MPASS(sgl);
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.	We combine
 	 * contiguous physical pages into larger segments to get the best DDP
 	 * page size possible.	This is the largest of the four sizes in
 	 * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
 	 * in the page list.
 	 */
 	hcf = 0;
 	for (i = entries - 1; i >= 0; i--) {
 		sge = sgl + i;
 		buf = (vm_offset_t)sge->addr;
 		len = sge->len;
 		start_pva = trunc_page(buf);
 		end_pva = trunc_page(buf + len - 1);
 		pva = start_pva;
 		while (pva <= end_pva) {
 			seglen = PAGE_SIZE;
 			p1 = pmap_kextract(pva);
 			pva += PAGE_SIZE;
 			while (pva <= end_pva && p1 + seglen ==
 			    pmap_kextract(pva)) {
 				seglen += PAGE_SIZE;
 				pva += PAGE_SIZE;
 			}
 
 			hcf = calculate_hcf(hcf, seglen);
 			if (hcf < (1 << pr->pr_page_shift[1])) {
 				idx = 0;
 				goto have_pgsz; /* give up, short circuit */
 			}
 		}
 	}
 #define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
 	MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
 	for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
 		if ((hcf & PR_PAGE_MASK(idx)) == 0)
 			break;
 	}
 #undef PR_PAGE_MASK
 
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
 	npages = 0;
 	while (entries--) {
 		npages++;
 		start_pva = trunc_page((vm_offset_t)sgl->addr);
 		end_pva = trunc_page((vm_offset_t)sgl->addr + sgl->len - 1);
 		npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
 		sgl = sgl + 1;
 	}
 	nppods = howmany(npages, PPOD_PAGES);
 	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
 		return (ENOMEM);
 	MPASS(prsv->prsv_nppods > 0);
 	return (0);
 }
 
 void
 t4_free_page_pods(struct ppod_reservation *prsv)
 {
 	struct ppod_region *pr = prsv->prsv_pr;
 	vmem_addr_t addr;
 
 	MPASS(prsv != NULL);
 	MPASS(prsv->prsv_nppods != 0);
 
 	addr = prsv->prsv_tag & pr->pr_tag_mask;
 	MPASS((addr & pr->pr_invalid_bit) == 0);
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%-17s arena %p, addr 0x%08x, nppods %d", __func__,
 	    pr->pr_arena, addr, prsv->prsv_nppods);
 #endif
 
 	vmem_free(pr->pr_arena, addr, PPOD_SZ(prsv->prsv_nppods));
 	prsv->prsv_nppods = 0;
 }
 
 #define NUM_ULP_TX_SC_IMM_PPODS (256 / PPOD_SIZE)
 
 int
 t4_write_page_pods_for_ps(struct adapter *sc, struct sge_wrq *wrq, int tid,
     struct pageset *ps)
 {
 	struct wrqe *wr;
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz, idx;
 	u_int ppod_addr;
 	uint32_t cmd;
 	struct ppod_reservation *prsv = &ps->prsv;
 	struct ppod_region *pr = prsv->prsv_pr;
 	vm_paddr_t pa;
 
 	KASSERT(!(ps->flags & PS_PPODS_WRITTEN),
 	    ("%s: page pods already written", __func__));
 	MPASS(prsv->prsv_nppods > 0);
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		wr = alloc_wrqe(len, wrq);
 		if (wr == NULL)
 			return (ENOMEM);	/* ok to just bail out */
 		ulpmc = wrtod(wr);
 
 		INIT_ULPTX_WR(ulpmc, len, 0, 0);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(tid) | prsv->prsv_tag);
 			ppod->len_offset = htobe64(V_PPOD_LEN(ps->len) |
 			    V_PPOD_OFST(ps->offset));
 			ppod->rsvd = 0;
 			idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE);
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (idx < ps->npages) {
 					pa = VM_PAGE_TO_PHYS(ps->pages[idx]);
 					ppod->addr[k] = htobe64(pa);
 					idx += ddp_pgsz / PAGE_SIZE;
 				} else
 					ppod->addr[k] = 0;
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 			}
 
 		}
 
 		t4_wrq_tx(sc, wr);
 	}
 	ps->flags |= PS_PPODS_WRITTEN;
 
 	return (0);
 }
 
 static int
 t4_write_page_pods_for_rcvbuf(struct adapter *sc, struct sge_wrq *wrq, int tid,
     struct ddp_rcv_buffer *drb)
 {
 	struct wrqe *wr;
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz;
 	u_int ppod_addr, offset;
 	uint32_t cmd;
 	struct ppod_reservation *prsv = &drb->prsv;
 	struct ppod_region *pr = prsv->prsv_pr;
 	uintptr_t end_pva, pva;
 	vm_paddr_t pa;
 
 	MPASS(prsv->prsv_nppods > 0);
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	offset = (uintptr_t)drb->buf & PAGE_MASK;
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	pva = trunc_page((uintptr_t)drb->buf);
 	end_pva = trunc_page((uintptr_t)drb->buf + drb->len - 1);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		MPASS(n > 0);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		wr = alloc_wrqe(len, wrq);
 		if (wr == NULL)
 			return (ENOMEM);	/* ok to just bail out */
 		ulpmc = wrtod(wr);
 
 		INIT_ULPTX_WR(ulpmc, len, 0, 0);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(tid) | prsv->prsv_tag);
 			ppod->len_offset = htobe64(V_PPOD_LEN(drb->len) |
 			    V_PPOD_OFST(offset));
 			ppod->rsvd = 0;
 
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (pva > end_pva)
 					ppod->addr[k] = 0;
 				else {
 					pa = pmap_kextract(pva);
 					ppod->addr[k] = htobe64(pa);
 					pva += ddp_pgsz;
 				}
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 			}
 
 			/*
 			 * Walk back 1 segment so that the first address in the
 			 * next pod is the same as the last one in the current
 			 * pod.
 			 */
 			pva -= ddp_pgsz;
 		}
 
 		t4_wrq_tx(sc, wr);
 	}
 
 	MPASS(pva <= end_pva);
 
 	return (0);
 }
 
 static struct mbuf *
 alloc_raw_wr_mbuf(int len)
 {
 	struct mbuf *m;
 
 	if (len <= MHLEN)
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 	else if (len <= MCLBYTES)
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	else
 		m = NULL;
 	if (m == NULL)
 		return (NULL);
 	m->m_pkthdr.len = len;
 	m->m_len = len;
 	set_mbuf_raw_wr(m, true);
 	return (m);
 }
 
 int
 t4_write_page_pods_for_bio(struct adapter *sc, struct toepcb *toep,
     struct ppod_reservation *prsv, struct bio *bp, struct mbufq *wrq)
 {
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz, idx;
 	u_int ppod_addr;
 	uint32_t cmd;
 	struct ppod_region *pr = prsv->prsv_pr;
 	vm_paddr_t pa;
 	struct mbuf *m;
 
 	MPASS(bp->bio_flags & BIO_UNMAPPED);
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		MPASS(n > 0);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		m = alloc_raw_wr_mbuf(len);
 		if (m == NULL)
 			return (ENOMEM);
 
 		ulpmc = mtod(m, struct ulp_mem_io *);
 		INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(toep->tid) |
 			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
 			ppod->len_offset = htobe64(V_PPOD_LEN(bp->bio_bcount) |
 			    V_PPOD_OFST(bp->bio_ma_offset));
 			ppod->rsvd = 0;
 			idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE);
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (idx < bp->bio_ma_n) {
 					pa = VM_PAGE_TO_PHYS(bp->bio_ma[idx]);
 					ppod->addr[k] = htobe64(pa);
 					idx += ddp_pgsz / PAGE_SIZE;
 				} else
 					ppod->addr[k] = 0;
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, toep->tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 			}
 		}
 
 		mbufq_enqueue(wrq, m);
 	}
 
 	return (0);
 }
 
 int
 t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep,
     struct ppod_reservation *prsv, vm_offset_t buf, int buflen,
     struct mbufq *wrq)
 {
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz;
 	u_int ppod_addr, offset;
 	uint32_t cmd;
 	struct ppod_region *pr = prsv->prsv_pr;
 	uintptr_t end_pva, pva;
 	vm_paddr_t pa;
 	struct mbuf *m;
 
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	offset = buf & PAGE_MASK;
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	pva = trunc_page(buf);
 	end_pva = trunc_page(buf + buflen - 1);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		MPASS(n > 0);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		m = alloc_raw_wr_mbuf(len);
 		if (m == NULL)
 			return (ENOMEM);
 		ulpmc = mtod(m, struct ulp_mem_io *);
 
 		INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(toep->tid) |
 			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
 			ppod->len_offset = htobe64(V_PPOD_LEN(buflen) |
 			    V_PPOD_OFST(offset));
 			ppod->rsvd = 0;
 
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (pva > end_pva)
 					ppod->addr[k] = 0;
 				else {
 					pa = pmap_kextract(pva);
 					ppod->addr[k] = htobe64(pa);
 					pva += ddp_pgsz;
 				}
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, toep->tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 			}
 
 			/*
 			 * Walk back 1 segment so that the first address in the
 			 * next pod is the same as the last one in the current
 			 * pod.
 			 */
 			pva -= ddp_pgsz;
 		}
 
 		mbufq_enqueue(wrq, m);
 	}
 
 	MPASS(pva <= end_pva);
 
 	return (0);
 }
 
 int
 t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep,
     struct ppod_reservation *prsv, struct ctl_sg_entry *sgl, int entries,
     int xferlen, struct mbufq *wrq)
 {
 	struct ulp_mem_io *ulpmc;
 	struct ulptx_idata *ulpsc;
 	struct pagepod *ppod;
 	int i, j, k, n, chunk, len, ddp_pgsz;
 	u_int ppod_addr, offset, sg_offset = 0;
 	uint32_t cmd;
 	struct ppod_region *pr = prsv->prsv_pr;
 	uintptr_t pva;
 	vm_paddr_t pa;
 	struct mbuf *m;
 
 	MPASS(sgl != NULL);
 	MPASS(entries > 0);
 	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
 	if (is_t4(sc))
 		cmd |= htobe32(F_ULP_MEMIO_ORDER);
 	else
 		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
 	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
 	offset = (vm_offset_t)sgl->addr & PAGE_MASK;
 	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
 	pva = trunc_page((vm_offset_t)sgl->addr);
 	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
 
 		/* How many page pods are we writing in this cycle */
 		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
 		MPASS(n > 0);
 		chunk = PPOD_SZ(n);
 		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
 
 		m = alloc_raw_wr_mbuf(len);
 		if (m == NULL)
 			return (ENOMEM);
 		ulpmc = mtod(m, struct ulp_mem_io *);
 
 		INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
 		ulpmc->cmd = cmd;
 		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
 		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
 		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
 
 		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
 		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
 		ulpsc->len = htobe32(chunk);
 
 		ppod = (struct pagepod *)(ulpsc + 1);
 		for (j = 0; j < n; i++, j++, ppod++) {
 			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
 			    V_PPOD_TID(toep->tid) |
 			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
 			ppod->len_offset = htobe64(V_PPOD_LEN(xferlen) |
 			    V_PPOD_OFST(offset));
 			ppod->rsvd = 0;
 
 			for (k = 0; k < nitems(ppod->addr); k++) {
 				if (entries != 0) {
 					pa = pmap_kextract(pva + sg_offset);
 					ppod->addr[k] = htobe64(pa);
 				} else
 					ppod->addr[k] = 0;
 
 #if 0
 				CTR5(KTR_CXGBE,
 				    "%s: tid %d ppod[%d]->addr[%d] = %p",
 				    __func__, toep->tid, i, k,
 				    be64toh(ppod->addr[k]));
 #endif
 
 				/*
 				 * If this is the last entry in a pod,
 				 * reuse the same entry for first address
 				 * in the next pod.
 				 */
 				if (k + 1 == nitems(ppod->addr))
 					break;
 
 				/*
 				 * Don't move to the next DDP page if the
 				 * sgl is already finished.
 				 */
 				if (entries == 0)
 					continue;
 
 				sg_offset += ddp_pgsz;
 				if (sg_offset == sgl->len) {
 					/*
 					 * This sgl entry is done.  Go
 					 * to the next.
 					 */
 					entries--;
 					sgl++;
 					sg_offset = 0;
 					if (entries != 0)
 						pva = trunc_page(
 						    (vm_offset_t)sgl->addr);
 				}
 			}
 		}
 
 		mbufq_enqueue(wrq, m);
 	}
 
 	return (0);
 }
 
 /*
  * Prepare a pageset for DDP.  This sets up page pods.
  */
 static int
 prep_pageset(struct adapter *sc, struct toepcb *toep, struct pageset *ps)
 {
 	struct tom_data *td = sc->tom_softc;
 
 	if (ps->prsv.prsv_nppods == 0 &&
 	    t4_alloc_page_pods_for_ps(&td->pr, ps) != 0) {
 		return (0);
 	}
 	if (!(ps->flags & PS_PPODS_WRITTEN) &&
 	    t4_write_page_pods_for_ps(sc, toep->ctrlq, toep->tid, ps) != 0) {
 		return (0);
 	}
 
 	return (1);
 }
 
 int
 t4_init_ppod_region(struct ppod_region *pr, struct t4_range *r, u_int psz,
     const char *name)
 {
 	int i;
 
 	MPASS(pr != NULL);
 	MPASS(r->size > 0);
 
 	pr->pr_start = r->start;
 	pr->pr_len = r->size;
 	pr->pr_page_shift[0] = 12 + G_HPZ0(psz);
 	pr->pr_page_shift[1] = 12 + G_HPZ1(psz);
 	pr->pr_page_shift[2] = 12 + G_HPZ2(psz);
 	pr->pr_page_shift[3] = 12 + G_HPZ3(psz);
 
 	/* The SGL -> page pod algorithm requires the sizes to be in order. */
 	for (i = 1; i < nitems(pr->pr_page_shift); i++) {
 		if (pr->pr_page_shift[i] <= pr->pr_page_shift[i - 1])
 			return (ENXIO);
 	}
 
 	pr->pr_tag_mask = ((1 << fls(r->size)) - 1) & V_PPOD_TAG(M_PPOD_TAG);
 	pr->pr_alias_mask = V_PPOD_TAG(M_PPOD_TAG) & ~pr->pr_tag_mask;
 	if (pr->pr_tag_mask == 0 || pr->pr_alias_mask == 0)
 		return (ENXIO);
 	pr->pr_alias_shift = fls(pr->pr_tag_mask);
 	pr->pr_invalid_bit = 1 << (pr->pr_alias_shift - 1);
 
 	pr->pr_arena = vmem_create(name, 0, pr->pr_len, PPOD_SIZE, 0,
 	    M_FIRSTFIT | M_NOWAIT);
 	if (pr->pr_arena == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 void
 t4_free_ppod_region(struct ppod_region *pr)
 {
 
 	MPASS(pr != NULL);
 
 	if (pr->pr_arena)
 		vmem_destroy(pr->pr_arena);
 	bzero(pr, sizeof(*pr));
 }
 
 static int
 pscmp(struct pageset *ps, struct vmspace *vm, vm_offset_t start, int npages,
     int pgoff, int len)
 {
 
 	if (ps->start != start || ps->npages != npages ||
 	    ps->offset != pgoff || ps->len != len)
 		return (1);
 
 	return (ps->vm != vm || ps->vm_timestamp != vm->vm_map.timestamp);
 }
 
 static int
 hold_aio(struct toepcb *toep, struct kaiocb *job, struct pageset **pps)
 {
 	struct vmspace *vm;
 	vm_map_t map;
 	vm_offset_t start, end, pgoff;
 	struct pageset *ps;
 	int n;
 
 	DDP_ASSERT_LOCKED(toep);
 
 	/*
 	 * The AIO subsystem will cancel and drain all requests before
 	 * permitting a process to exit or exec, so p_vmspace should
 	 * be stable here.
 	 */
 	vm = job->userproc->p_vmspace;
 	map = &vm->vm_map;
 	start = (uintptr_t)job->uaiocb.aio_buf;
 	pgoff = start & PAGE_MASK;
 	end = round_page(start + job->uaiocb.aio_nbytes);
 	start = trunc_page(start);
 
 	if (end - start > MAX_DDP_BUFFER_SIZE) {
 		/*
 		 * Truncate the request to a short read.
 		 * Alternatively, we could DDP in chunks to the larger
 		 * buffer, but that would be quite a bit more work.
 		 *
 		 * When truncating, round the request down to avoid
 		 * crossing a cache line on the final transaction.
 		 */
 		end = rounddown2(start + MAX_DDP_BUFFER_SIZE, CACHE_LINE_SIZE);
 #ifdef VERBOSE_TRACES
 		CTR4(KTR_CXGBE, "%s: tid %d, truncating size from %lu to %lu",
 		    __func__, toep->tid, (unsigned long)job->uaiocb.aio_nbytes,
 		    (unsigned long)(end - (start + pgoff)));
 		job->uaiocb.aio_nbytes = end - (start + pgoff);
 #endif
 		end = round_page(end);
 	}
 
 	n = atop(end - start);
 
 	/*
 	 * Try to reuse a cached pageset.
 	 */
 	TAILQ_FOREACH(ps, &toep->ddp.cached_pagesets, link) {
 		if (pscmp(ps, vm, start, n, pgoff,
 		    job->uaiocb.aio_nbytes) == 0) {
 			TAILQ_REMOVE(&toep->ddp.cached_pagesets, ps, link);
 			toep->ddp.cached_count--;
 			*pps = ps;
 			return (0);
 		}
 	}
 
 	/*
 	 * If there are too many cached pagesets to create a new one,
 	 * free a pageset before creating a new one.
 	 */
 	KASSERT(toep->ddp.active_count + toep->ddp.cached_count <=
 	    nitems(toep->ddp.db), ("%s: too many wired pagesets", __func__));
 	if (toep->ddp.active_count + toep->ddp.cached_count ==
 	    nitems(toep->ddp.db)) {
 		KASSERT(toep->ddp.cached_count > 0,
 		    ("no cached pageset to free"));
 		ps = TAILQ_LAST(&toep->ddp.cached_pagesets, pagesetq);
 		TAILQ_REMOVE(&toep->ddp.cached_pagesets, ps, link);
 		toep->ddp.cached_count--;
 		free_pageset(toep->td, ps);
 	}
 	DDP_UNLOCK(toep);
 
 	/* Create a new pageset. */
 	ps = malloc(sizeof(*ps) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK |
 	    M_ZERO);
 	ps->pages = (vm_page_t *)(ps + 1);
 	ps->vm_timestamp = map->timestamp;
 	ps->npages = vm_fault_quick_hold_pages(map, start, end - start,
 	    VM_PROT_WRITE, ps->pages, n);
 
 	DDP_LOCK(toep);
 	if (ps->npages < 0) {
 		free(ps, M_CXGBE);
 		return (EFAULT);
 	}
 
 	KASSERT(ps->npages == n, ("hold_aio: page count mismatch: %d vs %d",
 	    ps->npages, n));
 
 	ps->offset = pgoff;
 	ps->len = job->uaiocb.aio_nbytes;
 	refcount_acquire(&vm->vm_refcnt);
 	ps->vm = vm;
 	ps->start = start;
 
 	CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d",
 	    __func__, toep->tid, ps, job, ps->npages);
 	*pps = ps;
 	return (0);
 }
 
 static void
 ddp_complete_all(struct toepcb *toep, int error)
 {
 	struct kaiocb *job;
 
 	DDP_ASSERT_LOCKED(toep);
 	KASSERT((toep->ddp.flags & DDP_AIO) != 0, ("%s: DDP_RCVBUF", __func__));
 	while (!TAILQ_EMPTY(&toep->ddp.aiojobq)) {
 		job = TAILQ_FIRST(&toep->ddp.aiojobq);
 		TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 		toep->ddp.waiting_count--;
 		if (aio_clear_cancel_function(job))
 			ddp_complete_one(job, error);
 	}
 }
 
 static void
 aio_ddp_cancel_one(struct kaiocb *job)
 {
 	long copied;
 
 	/*
 	 * If this job had copied data out of the socket buffer before
 	 * it was cancelled, report it as a short read rather than an
 	 * error.
 	 */
 	copied = job->aio_received;
 	if (copied != 0)
 		aio_complete(job, copied, 0);
 	else
 		aio_cancel(job);
 }
 
 /*
  * Called when the main loop wants to requeue a job to retry it later.
  * Deals with the race of the job being cancelled while it was being
  * examined.
  */
 static void
 aio_ddp_requeue_one(struct toepcb *toep, struct kaiocb *job)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	if (!(toep->ddp.flags & DDP_DEAD) &&
 	    aio_set_cancel_function(job, t4_aio_cancel_queued)) {
 		TAILQ_INSERT_HEAD(&toep->ddp.aiojobq, job, list);
 		toep->ddp.waiting_count++;
 	} else
 		aio_ddp_cancel_one(job);
 }
 
 static void
 aio_ddp_requeue(struct toepcb *toep)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	struct socket *so;
 	struct sockbuf *sb;
 	struct inpcb *inp;
 	struct kaiocb *job;
 	struct ddp_buffer *db;
 	size_t copied, offset, resid;
 	struct pageset *ps;
 	struct mbuf *m;
 	uint64_t ddp_flags, ddp_flags_mask;
 	struct wrqe *wr;
 	int buf_flag, db_idx, error;
 
 	DDP_ASSERT_LOCKED(toep);
 
 restart:
 	if (toep->ddp.flags & DDP_DEAD) {
 		MPASS(toep->ddp.waiting_count == 0);
 		MPASS(toep->ddp.active_count == 0);
 		return;
 	}
 
 	if (toep->ddp.waiting_count == 0 ||
 	    toep->ddp.active_count == nitems(toep->ddp.db)) {
 		return;
 	}
 
 	job = TAILQ_FIRST(&toep->ddp.aiojobq);
 	so = job->fd_file->f_data;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	/* We will never get anything unless we are or were connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		SOCKBUF_UNLOCK(sb);
 		ddp_complete_all(toep, ENOTCONN);
 		return;
 	}
 
 	KASSERT(toep->ddp.active_count == 0 || sbavail(sb) == 0,
 	    ("%s: pending sockbuf data and DDP is active", __func__));
 
 	/* Abort if socket has reported problems. */
 	/* XXX: Wait for any queued DDP's to finish and/or flush them? */
 	if (so->so_error && sbavail(sb) == 0) {
 		toep->ddp.waiting_count--;
 		TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 		if (!aio_clear_cancel_function(job)) {
 			SOCKBUF_UNLOCK(sb);
 			goto restart;
 		}
 
 		/*
 		 * If this job has previously copied some data, report
 		 * a short read and leave the error to be reported by
 		 * a future request.
 		 */
 		copied = job->aio_received;
 		if (copied != 0) {
 			SOCKBUF_UNLOCK(sb);
 			aio_complete(job, copied, 0);
 			goto restart;
 		}
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(sb);
 		aio_complete(job, -1, error);
 		goto restart;
 	}
 
 	/*
 	 * Door is closed.  If there is pending data in the socket buffer,
 	 * deliver it.  If there are pending DDP requests, wait for those
 	 * to complete.  Once they have completed, return EOF reads.
 	 */
 	if (sb->sb_state & SBS_CANTRCVMORE && sbavail(sb) == 0) {
 		SOCKBUF_UNLOCK(sb);
 		if (toep->ddp.active_count != 0)
 			return;
 		ddp_complete_all(toep, 0);
 		return;
 	}
 
 	/*
 	 * If DDP is not enabled and there is no pending socket buffer
 	 * data, try to enable DDP.
 	 */
 	if (sbavail(sb) == 0 && (toep->ddp.flags & DDP_ON) == 0) {
 		SOCKBUF_UNLOCK(sb);
 
 		/*
 		 * Wait for the card to ACK that DDP is enabled before
 		 * queueing any buffers.  Currently this waits for an
 		 * indicate to arrive.  This could use a TCB_SET_FIELD_RPL
 		 * message to know that DDP was enabled instead of waiting
 		 * for the indicate which would avoid copying the indicate
 		 * if no data is pending.
 		 *
 		 * XXX: Might want to limit the indicate size to the size
 		 * of the first queued request.
 		 */
 		if ((toep->ddp.flags & DDP_SC_REQ) == 0)
 			enable_ddp(sc, toep);
 		return;
 	}
 	SOCKBUF_UNLOCK(sb);
 
 	/*
 	 * If another thread is queueing a buffer for DDP, let it
 	 * drain any work and return.
 	 */
 	if (toep->ddp.queueing != NULL)
 		return;
 
 	/* Take the next job to prep it for DDP. */
 	toep->ddp.waiting_count--;
 	TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 	if (!aio_clear_cancel_function(job))
 		goto restart;
 	toep->ddp.queueing = job;
 
 	/* NB: This drops DDP_LOCK while it holds the backing VM pages. */
 	error = hold_aio(toep, job, &ps);
 	if (error != 0) {
 		ddp_complete_one(job, error);
 		toep->ddp.queueing = NULL;
 		goto restart;
 	}
 
 	SOCKBUF_LOCK(sb);
 	if (so->so_error && sbavail(sb) == 0) {
 		copied = job->aio_received;
 		if (copied != 0) {
 			SOCKBUF_UNLOCK(sb);
 			recycle_pageset(toep, ps);
 			aio_complete(job, copied, 0);
 			toep->ddp.queueing = NULL;
 			goto restart;
 		}
 
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(sb);
 		recycle_pageset(toep, ps);
 		aio_complete(job, -1, error);
 		toep->ddp.queueing = NULL;
 		goto restart;
 	}
 
 	if (sb->sb_state & SBS_CANTRCVMORE && sbavail(sb) == 0) {
 		SOCKBUF_UNLOCK(sb);
 		recycle_pageset(toep, ps);
 		if (toep->ddp.active_count != 0) {
 			/*
 			 * The door is closed, but there are still pending
 			 * DDP buffers.  Requeue.  These jobs will all be
 			 * completed once those buffers drain.
 			 */
 			aio_ddp_requeue_one(toep, job);
 			toep->ddp.queueing = NULL;
 			return;
 		}
 		ddp_complete_one(job, 0);
 		ddp_complete_all(toep, 0);
 		toep->ddp.queueing = NULL;
 		return;
 	}
 
 sbcopy:
 	/*
 	 * If the toep is dead, there shouldn't be any data in the socket
 	 * buffer, so the above case should have handled this.
 	 */
 	MPASS(!(toep->ddp.flags & DDP_DEAD));
 
 	/*
 	 * If there is pending data in the socket buffer (either
 	 * from before the requests were queued or a DDP indicate),
 	 * copy those mbufs out directly.
 	 */
 	copied = 0;
 	offset = ps->offset + job->aio_received;
 	MPASS(job->aio_received <= job->uaiocb.aio_nbytes);
 	resid = job->uaiocb.aio_nbytes - job->aio_received;
 	m = sb->sb_mb;
 	KASSERT(m == NULL || toep->ddp.active_count == 0,
 	    ("%s: sockbuf data with active DDP", __func__));
 	while (m != NULL && resid > 0) {
 		struct iovec iov[1];
 		struct uio uio;
 #ifdef INVARIANTS
 		int error;
 #endif
 
 		iov[0].iov_base = mtod(m, void *);
 		iov[0].iov_len = m->m_len;
 		if (iov[0].iov_len > resid)
 			iov[0].iov_len = resid;
 		uio.uio_iov = iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_offset = 0;
 		uio.uio_resid = iov[0].iov_len;
 		uio.uio_segflg = UIO_SYSSPACE;
 		uio.uio_rw = UIO_WRITE;
 #ifdef INVARIANTS
 		error = uiomove_fromphys(ps->pages, offset + copied,
 		    uio.uio_resid, &uio);
 #else
 		uiomove_fromphys(ps->pages, offset + copied, uio.uio_resid, &uio);
 #endif
 		MPASS(error == 0 && uio.uio_resid == 0);
 		copied += uio.uio_offset;
 		resid -= uio.uio_offset;
 		m = m->m_next;
 	}
 	if (copied != 0) {
 		sbdrop_locked(sb, copied);
 		job->aio_received += copied;
 		job->msgrcv = 1;
 		copied = job->aio_received;
 		inp = sotoinpcb(so);
 		if (!INP_TRY_WLOCK(inp)) {
 			/*
 			 * The reference on the socket file descriptor in
 			 * the AIO job should keep 'sb' and 'inp' stable.
 			 * Our caller has a reference on the 'toep' that
 			 * keeps it stable.
 			 */
 			SOCKBUF_UNLOCK(sb);
 			DDP_UNLOCK(toep);
 			INP_WLOCK(inp);
 			DDP_LOCK(toep);
 			SOCKBUF_LOCK(sb);
 
 			/*
 			 * If the socket has been closed, we should detect
 			 * that and complete this request if needed on
 			 * the next trip around the loop.
 			 */
 		}
 		t4_rcvd_locked(&toep->td->tod, intotcpcb(inp));
 		INP_WUNLOCK(inp);
 		if (resid == 0 || toep->ddp.flags & DDP_DEAD) {
 			/*
 			 * We filled the entire buffer with socket
 			 * data, DDP is not being used, or the socket
 			 * is being shut down, so complete the
 			 * request.
 			 */
 			SOCKBUF_UNLOCK(sb);
 			recycle_pageset(toep, ps);
 			aio_complete(job, copied, 0);
 			toep->ddp.queueing = NULL;
 			goto restart;
 		}
 
 		/*
 		 * If DDP is not enabled, requeue this request and restart.
 		 * This will either enable DDP or wait for more data to
 		 * arrive on the socket buffer.
 		 */
 		if ((toep->ddp.flags & (DDP_ON | DDP_SC_REQ)) != DDP_ON) {
 			SOCKBUF_UNLOCK(sb);
 			recycle_pageset(toep, ps);
 			aio_ddp_requeue_one(toep, job);
 			toep->ddp.queueing = NULL;
 			goto restart;
 		}
 
 		/*
 		 * An indicate might have arrived and been added to
 		 * the socket buffer while it was unlocked after the
 		 * copy to lock the INP.  If so, restart the copy.
 		 */
 		if (sbavail(sb) != 0)
 			goto sbcopy;
 	}
 	SOCKBUF_UNLOCK(sb);
 
 	if (prep_pageset(sc, toep, ps) == 0) {
 		recycle_pageset(toep, ps);
 		aio_ddp_requeue_one(toep, job);
 		toep->ddp.queueing = NULL;
 
 		/*
 		 * XXX: Need to retry this later.  Mostly need a trigger
 		 * when page pods are freed up.
 		 */
 		printf("%s: prep_pageset failed\n", __func__);
 		return;
 	}
 
 	/* Determine which DDP buffer to use. */
 	if (toep->ddp.db[0].job == NULL) {
 		db_idx = 0;
 	} else {
 		MPASS(toep->ddp.db[1].job == NULL);
 		db_idx = 1;
 	}
 
 	ddp_flags = 0;
 	ddp_flags_mask = 0;
 	if (db_idx == 0) {
 		ddp_flags |= V_TF_DDP_BUF0_VALID(1);
 		if (so->so_state & SS_NBIO)
 			ddp_flags |= V_TF_DDP_BUF0_FLUSH(1);
 		ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE0(1) |
 		    V_TF_DDP_PUSH_DISABLE_0(1) | V_TF_DDP_PSHF_ENABLE_0(1) |
 		    V_TF_DDP_BUF0_FLUSH(1) | V_TF_DDP_BUF0_VALID(1);
 		buf_flag = DDP_BUF0_ACTIVE;
 	} else {
 		ddp_flags |= V_TF_DDP_BUF1_VALID(1);
 		if (so->so_state & SS_NBIO)
 			ddp_flags |= V_TF_DDP_BUF1_FLUSH(1);
 		ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE1(1) |
 		    V_TF_DDP_PUSH_DISABLE_1(1) | V_TF_DDP_PSHF_ENABLE_1(1) |
 		    V_TF_DDP_BUF1_FLUSH(1) | V_TF_DDP_BUF1_VALID(1);
 		buf_flag = DDP_BUF1_ACTIVE;
 	}
 	MPASS((toep->ddp.flags & buf_flag) == 0);
 	if ((toep->ddp.flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)) == 0) {
 		MPASS(db_idx == 0);
 		MPASS(toep->ddp.active_id == -1);
 		MPASS(toep->ddp.active_count == 0);
 		ddp_flags_mask |= V_TF_DDP_ACTIVE_BUF(1);
 	}
 
 	/*
 	 * The TID for this connection should still be valid.  If DDP_DEAD
 	 * is set, SBS_CANTRCVMORE should be set, so we shouldn't be
 	 * this far anyway.  Even if the socket is closing on the other
 	 * end, the AIO job holds a reference on this end of the socket
 	 * which will keep it open and keep the TCP PCB attached until
 	 * after the job is completed.
 	 */
 	wr = mk_update_tcb_for_ddp(sc, toep, db_idx, &ps->prsv, ps->len,
 	    job->aio_received, ddp_flags, ddp_flags_mask);
 	if (wr == NULL) {
 		recycle_pageset(toep, ps);
 		aio_ddp_requeue_one(toep, job);
 		toep->ddp.queueing = NULL;
 
 		/*
 		 * XXX: Need a way to kick a retry here.
 		 *
 		 * XXX: We know the fixed size needed and could
 		 * preallocate this using a blocking request at the
 		 * start of the task to avoid having to handle this
 		 * edge case.
 		 */
 		printf("%s: mk_update_tcb_for_ddp failed\n", __func__);
 		return;
 	}
 
 	if (!aio_set_cancel_function(job, t4_aio_cancel_active)) {
 		free_wrqe(wr);
 		recycle_pageset(toep, ps);
 		aio_ddp_cancel_one(job);
 		toep->ddp.queueing = NULL;
 		goto restart;
 	}
 
 #ifdef VERBOSE_TRACES
 	CTR6(KTR_CXGBE,
 	    "%s: tid %u, scheduling %p for DDP[%d] (flags %#lx/%#lx)", __func__,
 	    toep->tid, job, db_idx, ddp_flags, ddp_flags_mask);
 #endif
 	/* Give the chip the go-ahead. */
 	t4_wrq_tx(sc, wr);
 	db = &toep->ddp.db[db_idx];
 	db->cancel_pending = 0;
 	db->job = job;
 	db->ps = ps;
 	toep->ddp.queueing = NULL;
 	toep->ddp.flags |= buf_flag;
 	toep->ddp.active_count++;
 	if (toep->ddp.active_count == 1) {
 		MPASS(toep->ddp.active_id == -1);
 		toep->ddp.active_id = db_idx;
 		CTR2(KTR_CXGBE, "%s: ddp_active_id = %d", __func__,
 		    toep->ddp.active_id);
 	}
 	goto restart;
 }
 
 void
 ddp_queue_toep(struct toepcb *toep)
 {
 
 	DDP_ASSERT_LOCKED(toep);
 	if (toep->ddp.flags & DDP_TASK_ACTIVE)
 		return;
 	toep->ddp.flags |= DDP_TASK_ACTIVE;
 	hold_toepcb(toep);
 	soaio_enqueue(&toep->ddp.requeue_task);
 }
 
 static void
 aio_ddp_requeue_task(void *context, int pending)
 {
 	struct toepcb *toep = context;
 
 	DDP_LOCK(toep);
 	aio_ddp_requeue(toep);
 	toep->ddp.flags &= ~DDP_TASK_ACTIVE;
 	DDP_UNLOCK(toep);
 
 	free_toepcb(toep);
 }
 
 static void
 t4_aio_cancel_active(struct kaiocb *job)
 {
 	struct socket *so = job->fd_file->f_data;
 	struct tcpcb *tp = sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	struct adapter *sc = td_adapter(toep->td);
 	uint64_t valid_flag;
 	int i;
 
 	DDP_LOCK(toep);
 	if (aio_cancel_cleared(job)) {
 		DDP_UNLOCK(toep);
 		aio_ddp_cancel_one(job);
 		return;
 	}
 
 	for (i = 0; i < nitems(toep->ddp.db); i++) {
 		if (toep->ddp.db[i].job == job) {
 			/* Should only ever get one cancel request for a job. */
 			MPASS(toep->ddp.db[i].cancel_pending == 0);
 
 			/*
 			 * Invalidate this buffer.  It will be
 			 * cancelled or partially completed once the
 			 * card ACKs the invalidate.
 			 */
 			valid_flag = i == 0 ? V_TF_DDP_BUF0_VALID(1) :
 			    V_TF_DDP_BUF1_VALID(1);
 			t4_set_tcb_field(sc, toep->ctrlq, toep,
 			    W_TCB_RX_DDP_FLAGS, valid_flag, 0, 1,
 			    CPL_COOKIE_DDP0 + i);
 			toep->ddp.db[i].cancel_pending = 1;
 			CTR2(KTR_CXGBE, "%s: request %p marked pending",
 			    __func__, job);
 			break;
 		}
 	}
 	DDP_UNLOCK(toep);
 }
 
 static void
 t4_aio_cancel_queued(struct kaiocb *job)
 {
 	struct socket *so = job->fd_file->f_data;
 	struct tcpcb *tp = sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 
 	DDP_LOCK(toep);
 	if (!aio_cancel_cleared(job)) {
 		TAILQ_REMOVE(&toep->ddp.aiojobq, job, list);
 		toep->ddp.waiting_count--;
 		if (toep->ddp.waiting_count == 0)
 			ddp_queue_toep(toep);
 	}
 	CTR2(KTR_CXGBE, "%s: request %p cancelled", __func__, job);
 	DDP_UNLOCK(toep);
 
 	aio_ddp_cancel_one(job);
 }
 
 int
 t4_aio_queue_ddp(struct socket *so, struct kaiocb *job)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct toepcb *toep = tp->t_toe;
 
 	/* Ignore writes. */
 	if (job->uaiocb.aio_lio_opcode != LIO_READ)
 		return (EOPNOTSUPP);
 
 	INP_WLOCK(inp);
 	if (__predict_false(ulp_mode(toep) == ULP_MODE_NONE)) {
 		if (!set_ddp_ulp_mode(toep)) {
 			INP_WUNLOCK(inp);
 			return (EOPNOTSUPP);
 		}
 	}
 	INP_WUNLOCK(inp);
 
 	DDP_LOCK(toep);
 
 	/*
 	 * If DDP is being used for all normal receive, don't use it
 	 * for AIO.
 	 */
 	if ((toep->ddp.flags & DDP_RCVBUF) != 0) {
 		DDP_UNLOCK(toep);
 		return (EOPNOTSUPP);
 	}
 
 	/*
 	 * XXX: Think about possibly returning errors for ENOTCONN,
 	 * etc.  Perhaps the caller would only queue the request
 	 * if it failed with EOPNOTSUPP?
 	 */
 
 #ifdef VERBOSE_TRACES
 	CTR3(KTR_CXGBE, "%s: queueing %p for tid %u", __func__, job, toep->tid);
 #endif
 	if (!aio_set_cancel_function(job, t4_aio_cancel_queued))
 		panic("new job was cancelled");
 	TAILQ_INSERT_TAIL(&toep->ddp.aiojobq, job, list);
 	toep->ddp.waiting_count++;
 
 	if ((toep->ddp.flags & DDP_AIO) == 0) {
 		toep->ddp.flags |= DDP_AIO;
 		TAILQ_INIT(&toep->ddp.cached_pagesets);
 		TAILQ_INIT(&toep->ddp.aiojobq);
 		TASK_INIT(&toep->ddp.requeue_task, 0, aio_ddp_requeue_task,
 		    toep);
 	}
 
 	/*
 	 * Try to handle this request synchronously.  If this has
 	 * to block because the task is running, it will just bail
 	 * and let the task handle it instead.
 	 */
 	aio_ddp_requeue(toep);
 	DDP_UNLOCK(toep);
 	return (0);
 }
 
 static void
 ddp_rcvbuf_requeue(struct toepcb *toep)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	struct inpcb *inp;
 	struct ddp_rcv_buffer *drb;
 
 	DDP_ASSERT_LOCKED(toep);
 restart:
 	if ((toep->ddp.flags & DDP_DEAD) != 0) {
 		MPASS(toep->ddp.active_count == 0);
 		return;
 	}
 
 	/* If both buffers are active, nothing to do. */
 	if (toep->ddp.active_count == nitems(toep->ddp.db)) {
 		return;
 	}
 
 	inp = toep->inp;
 	so = inp->inp_socket;
 	sb = &so->so_rcv;
 
 	drb = alloc_cached_ddp_rcv_buffer(toep);
 	DDP_UNLOCK(toep);
 
 	if (drb == NULL) {
 		drb = alloc_ddp_rcv_buffer(toep, M_WAITOK);
 		if (drb == NULL) {
 			printf("%s: failed to allocate buffer\n", __func__);
 			DDP_LOCK(toep);
 			return;
 		}
 	}
 
 	DDP_LOCK(toep);
 	if ((toep->ddp.flags & DDP_DEAD) != 0 ||
 	    toep->ddp.active_count == nitems(toep->ddp.db)) {
 		recycle_ddp_rcv_buffer(toep, drb);
 		return;
 	}
 
 	/* We will never get anything unless we are or were connected. */
 	SOCKBUF_LOCK(sb);
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		SOCKBUF_UNLOCK(sb);
 		recycle_ddp_rcv_buffer(toep, drb);
 		return;
 	}
 
 	/* Abort if socket has reported problems or is closed. */
 	if (so->so_error != 0 || (sb->sb_state & SBS_CANTRCVMORE) != 0) {
 		SOCKBUF_UNLOCK(sb);
 		recycle_ddp_rcv_buffer(toep, drb);
 		return;
 	}
 	SOCKBUF_UNLOCK(sb);
 
 	if (!queue_ddp_rcvbuf(toep, drb)) {
 		/*
 		 * XXX: Need a way to kick a retry here.
 		 *
 		 * XXX: We know the fixed size needed and could
 		 * preallocate the work request using a blocking
 		 * request at the start of the task to avoid having to
 		 * handle this edge case.
 		 */
 		return;
 	}
 	goto restart;
 }
 
 static void
 ddp_rcvbuf_requeue_task(void *context, int pending)
 {
 	struct toepcb *toep = context;
 
 	DDP_LOCK(toep);
 	ddp_rcvbuf_requeue(toep);
 	toep->ddp.flags &= ~DDP_TASK_ACTIVE;
 	DDP_UNLOCK(toep);
 
 	free_toepcb(toep);
 }
 
 int
 t4_enable_ddp_rcv(struct socket *so, struct toepcb *toep)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct adapter *sc = td_adapter(toep->td);
 
 	INP_WLOCK(inp);
 	switch (ulp_mode(toep)) {
 	case ULP_MODE_TCPDDP:
 		break;
 	case ULP_MODE_NONE:
 		if (set_ddp_ulp_mode(toep))
 			break;
 		/* FALLTHROUGH */
 	default:
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 	INP_WUNLOCK(inp);
 
 	DDP_LOCK(toep);
 
 	/*
 	 * If DDP is being used for AIO already, don't use it for
 	 * normal receive.
 	 */
 	if ((toep->ddp.flags & DDP_AIO) != 0) {
 		DDP_UNLOCK(toep);
 		return (EOPNOTSUPP);
 	}
 
 	if ((toep->ddp.flags & DDP_RCVBUF) != 0) {
 		DDP_UNLOCK(toep);
 		return (EBUSY);
 	}
 
 	toep->ddp.flags |= DDP_RCVBUF;
 	TAILQ_INIT(&toep->ddp.cached_buffers);
 	enable_ddp(sc, toep);
 	TASK_INIT(&toep->ddp.requeue_task, 0, ddp_rcvbuf_requeue_task, toep);
 	ddp_queue_toep(toep);
 	DDP_UNLOCK(toep);
 	return (0);
 }
 
 void
 t4_ddp_mod_load(void)
 {
 	if (t4_ddp_rcvbuf_len < PAGE_SIZE)
 		t4_ddp_rcvbuf_len = PAGE_SIZE;
 	if (t4_ddp_rcvbuf_len > MAX_DDP_BUFFER_SIZE)
 		t4_ddp_rcvbuf_len = MAX_DDP_BUFFER_SIZE;
 	if (!powerof2(t4_ddp_rcvbuf_len))
 		t4_ddp_rcvbuf_len = 1 << fls(t4_ddp_rcvbuf_len);
 
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, do_ddp_tcb_rpl,
 	    CPL_COOKIE_DDP0);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, do_ddp_tcb_rpl,
 	    CPL_COOKIE_DDP1);
 	t4_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
 	t4_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
 	TAILQ_INIT(&ddp_orphan_pagesets);
 	mtx_init(&ddp_orphan_pagesets_lock, "ddp orphans", NULL, MTX_DEF);
 	TASK_INIT(&ddp_orphan_task, 0, ddp_free_orphan_pagesets, NULL);
 }
 
 void
 t4_ddp_mod_unload(void)
 {
 
 	taskqueue_drain(taskqueue_thread, &ddp_orphan_task);
 	MPASS(TAILQ_EMPTY(&ddp_orphan_pagesets));
 	mtx_destroy(&ddp_orphan_pagesets_lock);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_DDP0);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_DDP1);
 	t4_register_cpl_handler(CPL_RX_DATA_DDP, NULL);
 	t4_register_cpl_handler(CPL_RX_DDP_COMPLETE, NULL);
 }
 #endif
diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index bdd03edd3a6f..c6377980fca9 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -1,1333 +1,1297 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2017-2018 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_kern_tls.h"
 
 #include <sys/cdefs.h>
 #ifdef KERN_TLS
 #include <sys/param.h>
 #include <sys/ktr.h>
 #include <sys/ktls.h>
 #include <sys/sglist.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>
 
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_tcb.h"
 #include "crypto/t4_crypto.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 /*
  * The TCP sequence number of a CPL_TLS_DATA mbuf is saved here while
  * the mbuf is in the ulp_pdu_reclaimq.
  */
 #define	tls_tcp_seq	PH_loc.thirtytwo[0]
 
 static void
 t4_set_tls_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask,
     uint64_t val)
 {
 	struct adapter *sc = td_adapter(toep->td);
 
 	t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, word, mask, val, 0, 0);
 }
 
 /* TLS and DTLS common routines */
 bool
 can_tls_offload(struct adapter *sc)
 {
 
 	return (sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS);
 }
 
 int
 tls_tx_key(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	return (tls_ofld->tx_key_addr >= 0);
 }
 
 /* Set TF_RX_QUIESCE to pause receive. */
 static void
 t4_set_rx_quiesce(struct toepcb *toep)
 {
 	struct adapter *sc = td_adapter(toep->td);
 
 	t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, W_TCB_T_FLAGS,
 	    V_TF_RX_QUIESCE(1), V_TF_RX_QUIESCE(1), 1, CPL_COOKIE_TOM);
 }
 
 /* Clear TF_RX_QUIESCE to re-enable receive. */
 static void
 t4_clear_rx_quiesce(struct toepcb *toep)
 {
 
 	t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0);
 }
 
 /* TLS/DTLS content type  for CPL SFO */
 static inline unsigned char
 tls_content_type(unsigned char content_type)
 {
 	switch (content_type) {
 	case CONTENT_TYPE_CCS:
 		return CPL_TX_TLS_SFO_TYPE_CCS;
 	case CONTENT_TYPE_ALERT:
 		return CPL_TX_TLS_SFO_TYPE_ALERT;
 	case CONTENT_TYPE_HANDSHAKE:
 		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
 	case CONTENT_TYPE_APP_DATA:
 		return CPL_TX_TLS_SFO_TYPE_DATA;
 	default:
 		return CPL_TX_TLS_SFO_TYPE_CUSTOM;
 	}
 }
 
 /* TLS Key memory management */
 static void
 clear_tls_keyid(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct adapter *sc = td_adapter(toep->td);
 
 	if (tls_ofld->rx_key_addr >= 0) {
 		t4_free_tls_keyid(sc, tls_ofld->rx_key_addr);
 		tls_ofld->rx_key_addr = -1;
 	}
 	if (tls_ofld->tx_key_addr >= 0) {
 		t4_free_tls_keyid(sc, tls_ofld->tx_key_addr);
 		tls_ofld->tx_key_addr = -1;
 	}
 }
 
 static int
 get_tp_plen_max(struct ktls_session *tls)
 {
 	int plen = ((min(3*4096, TP_TX_PG_SZ))/1448) * 1448;
 
 	return (tls->params.max_frame_len <= 8192 ? plen : FC_TP_PLEN_MAX);
 }
 
 /* Send request to get the key-id */
 static int
 tls_program_key_id(struct toepcb *toep, struct ktls_session *tls,
     int direction)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct adapter *sc = td_adapter(toep->td);
 	struct ofld_tx_sdesc *txsd;
 	int keyid;
 	struct wrqe *wr;
 	struct tls_key_req *kwr;
 	struct tls_keyctx *kctx;
 
 #ifdef INVARIANTS
 	int kwrlen, kctxlen, len;
 
 	kwrlen = sizeof(*kwr);
 	kctxlen = roundup2(sizeof(*kctx), 32);
 	len = roundup2(kwrlen + kctxlen, 16);
 	MPASS(TLS_KEY_WR_SZ == len);
 #endif
 	if (toep->txsd_avail == 0)
 		return (EAGAIN);
 
 	if ((keyid = t4_alloc_tls_keyid(sc)) < 0) {
 		return (ENOSPC);
 	}
 
 	wr = alloc_wrqe(TLS_KEY_WR_SZ, &toep->ofld_txq->wrq);
 	if (wr == NULL) {
 		t4_free_tls_keyid(sc, keyid);
 		return (ENOMEM);
 	}
 	kwr = wrtod(wr);
 	memset(kwr, 0, TLS_KEY_WR_SZ);
 
 	t4_write_tlskey_wr(tls, direction, toep->tid, F_FW_WR_COMPL, keyid,
 	    kwr);
 	kctx = (struct tls_keyctx *)(kwr + 1);
 	if (direction == KTLS_TX)
 		tls_ofld->tx_key_addr = keyid;
 	else
 		tls_ofld->rx_key_addr = keyid;
 	t4_tls_key_ctx(tls, direction, kctx);
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	txsd->tx_credits = DIV_ROUND_UP(TLS_KEY_WR_SZ, 16);
 	txsd->plen = 0;
 	toep->tx_credits -= txsd->tx_credits;
 	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 
 	t4_wrq_tx(sc, wr);
 
 	return (0);
 }
 
 int
 tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
 {
 	struct adapter *sc = td_adapter(toep->td);
 	int error, explicit_iv_size, mac_first;
 
 	if (!can_tls_offload(sc))
 		return (EINVAL);
 
 	if (direction == KTLS_RX) {
 		if (ulp_mode(toep) != ULP_MODE_NONE)
 			return (EINVAL);
 		if ((toep->flags & TPF_TLS_STARTING) != 0)
 			return (EINVAL);
 	} else {
 		switch (ulp_mode(toep)) {
 		case ULP_MODE_NONE:
 		case ULP_MODE_TLS:
 		case ULP_MODE_TCPDDP:
 			break;
 		default:
 			return (EINVAL);
 		}
 	}
 
 	switch (tls->params.cipher_algorithm) {
 	case CRYPTO_AES_CBC:
 		/* XXX: Explicitly ignore any provided IV. */
 		switch (tls->params.cipher_key_len) {
 		case 128 / 8:
 		case 192 / 8:
 		case 256 / 8:
 			break;
 		default:
 			return (EINVAL);
 		}
 		switch (tls->params.auth_algorithm) {
 		case CRYPTO_SHA1_HMAC:
 		case CRYPTO_SHA2_256_HMAC:
 		case CRYPTO_SHA2_384_HMAC:
 			break;
 		default:
 			return (EPROTONOSUPPORT);
 		}
 		explicit_iv_size = AES_BLOCK_LEN;
 		mac_first = 1;
 		break;
 	case CRYPTO_AES_NIST_GCM_16:
 		if (tls->params.iv_len != SALT_SIZE) {
 			return (EINVAL);
 		}
 		switch (tls->params.cipher_key_len) {
 		case 128 / 8:
 		case 192 / 8:
 		case 256 / 8:
 			break;
 		default:
 			return (EINVAL);
 		}
 		explicit_iv_size = 8;
 		mac_first = 0;
 		break;
 	default:
 		return (EPROTONOSUPPORT);
 	}
 
 	/* Only TLS 1.1 and TLS 1.2 are currently supported. */
 	if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
 	    tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
 	    tls->params.tls_vminor > TLS_MINOR_VER_TWO) {
 		return (EPROTONOSUPPORT);
 	}
 
 	/* Bail if we already have a key. */
 	if (direction == KTLS_TX) {
 		if (toep->tls.tx_key_addr != -1)
 			return (EOPNOTSUPP);
 	} else {
 		if (toep->tls.rx_key_addr != -1)
 			return (EOPNOTSUPP);
 	}
 
 	error = tls_program_key_id(toep, tls, direction);
 	if (error)
 		return (error);
 
 	if (direction == KTLS_TX) {
 		toep->tls.scmd0.seqno_numivs =
 			(V_SCMD_SEQ_NO_CTRL(3) |
 			 V_SCMD_PROTO_VERSION(t4_tls_proto_ver(tls)) |
 			 V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
 			 V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
 			 V_SCMD_CIPH_MODE(t4_tls_cipher_mode(tls)) |
 			 V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
 			 V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
 			 V_SCMD_IV_SIZE(explicit_iv_size / 2));
 
 		toep->tls.scmd0.ivgen_hdrlen =
 			(V_SCMD_IV_GEN_CTRL(1) |
 			 V_SCMD_KEY_CTX_INLINE(0) |
 			 V_SCMD_TLS_FRAG_ENABLE(1));
 
 		toep->tls.iv_len = explicit_iv_size;
 		toep->tls.frag_size = tls->params.max_frame_len;
 		toep->tls.fcplenmax = get_tp_plen_max(tls);
 		toep->tls.expn_per_ulp = tls->params.tls_hlen +
 		    tls->params.tls_tlen;
 		toep->tls.pdus_per_ulp = 1;
 		toep->tls.adjusted_plen = toep->tls.expn_per_ulp +
 		    tls->params.max_frame_len;
 		toep->tls.tx_key_info_size = t4_tls_key_info_size(tls);
 	} else {
 		toep->flags |= TPF_TLS_STARTING | TPF_TLS_RX_QUIESCING;
 		toep->tls.rx_version = tls->params.tls_vmajor << 8 |
 		    tls->params.tls_vminor;
 
 		CTR2(KTR_CXGBE, "%s: tid %d setting RX_QUIESCE", __func__,
 		    toep->tid);
 		t4_set_rx_quiesce(toep);
 	}
 
 	return (0);
 }
 
 void
 tls_init_toep(struct toepcb *toep)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 
 	tls_ofld->rx_key_addr = -1;
 	tls_ofld->tx_key_addr = -1;
 }
 
 void
 tls_uninit_toep(struct toepcb *toep)
 {
 
 	clear_tls_keyid(toep);
 }
 
 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
 #define	MIN_OFLD_TLSTX_CREDITS(toep)					\
 	(howmany(sizeof(struct fw_tlstx_data_wr) +			\
 	    sizeof(struct cpl_tx_tls_sfo) + sizeof(struct ulptx_idata) + \
 	    sizeof(struct ulptx_sc_memrd) +				\
 	    AES_BLOCK_LEN + 1, 16))
 
 static void
 write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep,
     unsigned int plen, unsigned int expn, uint8_t credits, int shove)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	unsigned int len = plen + expn;
 
 	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_TLSTX_DATA_WR) |
 	    V_FW_TLSTX_DATA_WR_COMPL(1) |
 	    V_FW_TLSTX_DATA_WR_IMMDLEN(0));
 	txwr->flowid_len16 = htobe32(V_FW_TLSTX_DATA_WR_FLOWID(toep->tid) |
 	    V_FW_TLSTX_DATA_WR_LEN16(credits));
 	txwr->plen = htobe32(len);
 	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ULP_MODE_TLS) |
 	    V_TX_URG(0) | /* F_T6_TX_FORCE | */ V_TX_SHOVE(shove));
 	txwr->ctxloc_to_exp = htobe32(V_FW_TLSTX_DATA_WR_NUMIVS(1) |
 	    V_FW_TLSTX_DATA_WR_EXP(expn) |
 	    V_FW_TLSTX_DATA_WR_CTXLOC(TLS_SFO_WR_CONTEXTLOC_DDR) |
 	    V_FW_TLSTX_DATA_WR_IVDSGL(0) |
 	    V_FW_TLSTX_DATA_WR_KEYSIZE(tls_ofld->tx_key_info_size >> 4));
 	txwr->mfs = htobe16(tls_ofld->frag_size);
 	txwr->adjustedplen_pkd = htobe16(
 	    V_FW_TLSTX_DATA_WR_ADJUSTEDPLEN(tls_ofld->adjusted_plen));
 	txwr->expinplenmax_pkd = htobe16(
 	    V_FW_TLSTX_DATA_WR_EXPINPLENMAX(tls_ofld->expn_per_ulp));
 	txwr->pdusinplenmax_pkd =
 	    V_FW_TLSTX_DATA_WR_PDUSINPLENMAX(tls_ofld->pdus_per_ulp);
 }
 
 static void
 write_tlstx_cpl(struct cpl_tx_tls_sfo *cpl, struct toepcb *toep,
     struct tls_hdr *tls_hdr, unsigned int plen, uint64_t seqno)
 {
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	int data_type, seglen;
 
 	seglen = plen;
 	data_type = tls_content_type(tls_hdr->type);
 	cpl->op_to_seg_len = htobe32(V_CPL_TX_TLS_SFO_OPCODE(CPL_TX_TLS_SFO) |
 	    V_CPL_TX_TLS_SFO_DATA_TYPE(data_type) |
 	    V_CPL_TX_TLS_SFO_CPL_LEN(2) | V_CPL_TX_TLS_SFO_SEG_LEN(seglen));
 	cpl->pld_len = htobe32(plen);
 	if (data_type == CPL_TX_TLS_SFO_TYPE_CUSTOM)
 		cpl->type_protover = htobe32(
 		    V_CPL_TX_TLS_SFO_TYPE(tls_hdr->type));
 	cpl->seqno_numivs = htobe32(tls_ofld->scmd0.seqno_numivs |
 	    V_SCMD_NUM_IVS(1));
 	cpl->ivgen_hdrlen = htobe32(tls_ofld->scmd0.ivgen_hdrlen);
 	cpl->scmd1 = htobe64(seqno);
 }
 
 static int
 count_ext_pgs_segs(struct mbuf *m)
 {
 	vm_paddr_t nextpa;
 	u_int i, nsegs;
 
 	MPASS(m->m_epg_npgs > 0);
 	nsegs = 1;
 	nextpa = m->m_epg_pa[0] + PAGE_SIZE;
 	for (i = 1; i < m->m_epg_npgs; i++) {
 		if (nextpa != m->m_epg_pa[i])
 			nsegs++;
 		nextpa = m->m_epg_pa[i] + PAGE_SIZE;
 	}
 	return (nsegs);
 }
 
 static void
 write_ktlstx_sgl(void *dst, struct mbuf *m, int nsegs)
 {
 	struct ulptx_sgl *usgl = dst;
 	vm_paddr_t pa;
 	uint32_t len;
 	int i, j;
 
 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
 
 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 	    V_ULPTX_NSGE(nsegs));
 
 	/* Figure out the first S/G length. */
 	pa = m->m_epg_pa[0] + m->m_epg_1st_off;
 	usgl->addr0 = htobe64(pa);
 	len = m_epg_pagelen(m, 0, m->m_epg_1st_off);
 	pa += len;
 	for (i = 1; i < m->m_epg_npgs; i++) {
 		if (m->m_epg_pa[i] != pa)
 			break;
 		len += m_epg_pagelen(m, i, 0);
 		pa += m_epg_pagelen(m, i, 0);
 	}
 	usgl->len0 = htobe32(len);
 #ifdef INVARIANTS
 	nsegs--;
 #endif
 
 	j = -1;
 	for (; i < m->m_epg_npgs; i++) {
 		if (j == -1 || m->m_epg_pa[i] != pa) {
 			if (j >= 0)
 				usgl->sge[j / 2].len[j & 1] = htobe32(len);
 			j++;
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 			pa = m->m_epg_pa[i];
 			usgl->sge[j / 2].addr[j & 1] = htobe64(pa);
 			len = m_epg_pagelen(m, i, 0);
 			pa += len;
 		} else {
 			len += m_epg_pagelen(m, i, 0);
 			pa += m_epg_pagelen(m, i, 0);
 		}
 	}
 	if (j >= 0) {
 		usgl->sge[j / 2].len[j & 1] = htobe32(len);
 
 		if ((j & 1) == 0)
 			usgl->sge[j / 2].len[1] = htobe32(0);
 	}
 	KASSERT(nsegs == 0, ("%s: nsegs %d, m %p", __func__, nsegs, m));
 }
 
 /*
  * Similar to t4_push_frames() but handles sockets that contain TLS
  * record mbufs.
  */
 void
 t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct tls_hdr *thdr;
 	struct fw_tlstx_data_wr *txwr;
 	struct cpl_tx_tls_sfo *cpl;
 	struct ulptx_idata *idata;
 	struct ulptx_sc_memrd *memrd;
 	struct wrqe *wr;
 	struct mbuf *m;
 	u_int nsegs, credits, wr_len;
 	u_int expn_size;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_snd;
 	int tls_size, tx_credits, shove, sowwakeup;
 	struct ofld_tx_sdesc *txsd;
 	char *buf;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
 	    ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS,
 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
 	KASSERT(tls_tx_key(toep),
 	    ("%s: TX key not set for toep %p", __func__, toep));
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
 	    __func__, toep->tid, toep->flags, tp->t_flags);
 #endif
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 #ifdef RATELIMIT
 	if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
 	    (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
 	}
 #endif
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	for (;;) {
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 
 		SOCKBUF_LOCK(sb);
 		sowwakeup = drop;
 		if (drop) {
 			sbdrop_locked(sb, drop);
 			drop = 0;
 		}
 
 		m = sb->sb_sndptr != NULL ? sb->sb_sndptr->m_next : sb->sb_mb;
 
 		/*
 		 * Send a FIN if requested, but only if there's no
 		 * more data to send.
 		 */
 		if (m == NULL && toep->flags & TPF_SEND_FIN) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 			t4_close_conn(sc, toep);
 			return;
 		}
 
 		/*
 		 * If there is no ready data to send, wait until more
 		 * data arrives.
 		 */
 		if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 #ifdef VERBOSE_TRACES
 			CTR2(KTR_CXGBE, "%s: tid %d no ready data to send",
 			    __func__, toep->tid);
 #endif
 			return;
 		}
 
 		KASSERT(m->m_flags & M_EXTPG, ("%s: mbuf %p is not NOMAP",
 		    __func__, m));
 		KASSERT(m->m_epg_tls != NULL,
 		    ("%s: mbuf %p doesn't have TLS session", __func__, m));
 
 		/* Calculate WR length. */
 		wr_len = sizeof(struct fw_tlstx_data_wr) +
 		    sizeof(struct cpl_tx_tls_sfo) +
 		    sizeof(struct ulptx_idata) + sizeof(struct ulptx_sc_memrd);
 
 		/* Explicit IVs for AES-CBC and AES-GCM are <= 16. */
 		MPASS(toep->tls.iv_len <= AES_BLOCK_LEN);
 		wr_len += AES_BLOCK_LEN;
 
 		/* Account for SGL in work request length. */
 		nsegs = count_ext_pgs_segs(m);
 		wr_len += sizeof(struct ulptx_sgl) +
 		    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 
 		/* Not enough credits for this work request. */
 		if (howmany(wr_len, 16) > tx_credits) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
 				SOCKBUF_UNLOCK(sb);
 			SOCKBUF_UNLOCK_ASSERT(sb);
 #ifdef VERBOSE_TRACES
 			CTR5(KTR_CXGBE,
 	    "%s: tid %d mbuf %p requires %d credits, but only %d available",
 			    __func__, toep->tid, m, howmany(wr_len, 16),
 			    tx_credits);
 #endif
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 
 		/* Shove if there is no additional data pending. */
 		shove = ((m->m_next == NULL ||
 		    (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
 		    (tp->t_flags & TF_MORETOCOME) == 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf &&
 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
 		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
 			if (!sbreserve_locked(so, SO_SND, newsize, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
 		}
 		if (sowwakeup)
 			sowwakeup_locked(so);
 		else
 			SOCKBUF_UNLOCK(sb);
 		SOCKBUF_UNLOCK_ASSERT(sb);
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq);
 		if (wr == NULL) {
 			/* XXX: how will we recover from this? */
 			toep->flags |= TPF_TX_SUSPENDED;
 			return;
 		}
 
 		thdr = (struct tls_hdr *)&m->m_epg_hdr;
 #ifdef VERBOSE_TRACES
 		CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x",
 		    __func__, toep->tid, m->m_epg_seqno, thdr->type,
 		    m->m_len);
 #endif
 		txwr = wrtod(wr);
 		cpl = (struct cpl_tx_tls_sfo *)(txwr + 1);
 		memset(txwr, 0, roundup2(wr_len, 16));
 		credits = howmany(wr_len, 16);
 		expn_size = m->m_epg_hdrlen +
 		    m->m_epg_trllen;
 		tls_size = m->m_len - expn_size;
 		write_tlstx_wr(txwr, toep, tls_size, expn_size, credits, shove);
 		write_tlstx_cpl(cpl, toep, thdr, tls_size, m->m_epg_seqno);
 
 		idata = (struct ulptx_idata *)(cpl + 1);
 		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
 		idata->len = htobe32(0);
 		memrd = (struct ulptx_sc_memrd *)(idata + 1);
 		memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) |
 		    V_ULP_TX_SC_MORE(1) |
 		    V_ULPTX_LEN16(toep->tls.tx_key_info_size >> 4));
 		memrd->addr = htobe32(toep->tls.tx_key_addr >> 5);
 
 		/* Copy IV. */
 		buf = (char *)(memrd + 1);
 		memcpy(buf, thdr + 1, toep->tls.iv_len);
 		buf += AES_BLOCK_LEN;
 
 		write_ktlstx_sgl(buf, m, nsegs);
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		toep->tx_credits -= credits;
 
 		tp->snd_nxt += m->m_len;
 		tp->snd_max += m->m_len;
 
 		SOCKBUF_LOCK(sb);
 		sb->sb_sndptr = m;
 		SOCKBUF_UNLOCK(sb);
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TLSTX_CREDITS(toep))
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = m->m_len;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		counter_u64_add(toep->ofld_txq->tx_toe_tls_records, 1);
 		counter_u64_add(toep->ofld_txq->tx_toe_tls_octets, m->m_len);
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	}
 }
 
 /*
  * For TLS data we place received mbufs received via CPL_TLS_DATA into
  * an mbufq in the TLS offload state.  When CPL_RX_TLS_CMP is
  * received, the completed PDUs are placed into the socket receive
  * buffer.
  *
  * The TLS code reuses the ulp_pdu_reclaimq to hold the pending mbufs.
  */
 static int
 do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_tls_data *cpl = mtod(m, const void *);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	int len;
 
 	/* XXX: Should this match do_rx_data instead? */
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 	len = m->m_pkthdr.len;
 
 	toep->ofld_rxq->rx_toe_tls_octets += len;
 
 	KASSERT(len == G_CPL_TLS_DATA_LENGTH(be32toh(cpl->length_pkd)),
 	    ("%s: payload length mismatch", __func__));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	/* Save TCP sequence number. */
 	m->m_pkthdr.tls_tcp_seq = be32toh(cpl->seq);
 
 	if (mbufq_enqueue(&toep->ulp_pdu_reclaimq, m)) {
 #ifdef INVARIANTS
 		panic("Failed to queue TLS data packet");
 #else
 		printf("%s: Failed to queue TLS data packet\n", __func__);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 #endif
 	}
 
 	tp = intotcpcb(inp);
 	tp->t_rcvtime = ticks;
 
 #ifdef VERBOSE_TRACES
 	CTR4(KTR_CXGBE, "%s: tid %u len %d seq %u", __func__, tid, len,
 	    be32toh(cpl->seq));
 #endif
 
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_tls_cmp *cpl = mtod(m, const void *);
 	struct tlsrx_hdr_pkt *tls_hdr_pkt;
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	struct mbuf *tls_data;
 	struct tls_get_record *tgr;
 	struct mbuf *control;
 	int pdu_length, trailer_len;
 #if defined(KTR) || defined(INVARIANTS)
 	int len;
 #endif
 
 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
 	KASSERT(!(toep->flags & TPF_SYNQE),
 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 #if defined(KTR) || defined(INVARIANTS)
 	len = m->m_pkthdr.len;
 #endif
 
 	toep->ofld_rxq->rx_toe_tls_records++;
 
 	KASSERT(len == G_CPL_RX_TLS_CMP_LENGTH(be32toh(cpl->pdulength_length)),
 	    ("%s: payload length mismatch", __func__));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	pdu_length = G_CPL_RX_TLS_CMP_PDULENGTH(be32toh(cpl->pdulength_length));
 
 	so = inp_inpcbtosocket(inp);
 	tp = intotcpcb(inp);
 
 #ifdef VERBOSE_TRACES
 	CTR6(KTR_CXGBE, "%s: tid %u PDU len %d len %d seq %u, rcv_nxt %u",
 	    __func__, tid, pdu_length, len, be32toh(cpl->seq), tp->rcv_nxt);
 #endif
 
 	tp->rcv_nxt += pdu_length;
 	KASSERT(tp->rcv_wnd >= pdu_length,
 	    ("%s: negative window size", __func__));
 	tp->rcv_wnd -= pdu_length;
 
 	/* XXX: Not sure what to do about urgent data. */
 
 	/*
 	 * The payload of this CPL is the TLS header followed by
 	 * additional fields.
 	 */
 	KASSERT(m->m_len >= sizeof(*tls_hdr_pkt),
 	    ("%s: payload too small", __func__));
 	tls_hdr_pkt = mtod(m, void *);
 
 	tls_data = mbufq_dequeue(&toep->ulp_pdu_reclaimq);
 	if (tls_data != NULL) {
 		KASSERT(be32toh(cpl->seq) == tls_data->m_pkthdr.tls_tcp_seq,
 		    ("%s: sequence mismatch", __func__));
 	}
 
 	/* Report decryption errors as EBADMSG. */
 	if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != 0) {
 		CTR4(KTR_CXGBE, "%s: tid %u TLS error %#x ddp_vld %#x",
 		    __func__, toep->tid, tls_hdr_pkt->res_to_mac_error,
 		    be32toh(cpl->ddp_valid));
 		m_freem(m);
 		m_freem(tls_data);
 
 		CURVNET_SET(toep->vnet);
 		so->so_error = EBADMSG;
 		sorwakeup(so);
 
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/* Handle data received after the socket is closed. */
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
 		struct epoch_tracker et;
 
 		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
 		    __func__, tid, pdu_length);
 		m_freem(m);
 		m_freem(tls_data);
 		SOCKBUF_UNLOCK(sb);
 		INP_WUNLOCK(inp);
 
 		CURVNET_SET(toep->vnet);
 		NET_EPOCH_ENTER(et);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp != NULL)
 			INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 
 		return (0);
 	}
 
 	/*
 	 * If there is any data in the 'sb_mtls' chain of the socket
 	 * or we aren't able to allocate the control mbuf, append the
 	 * record as a CSUM_TLS_DECRYPTED packet to 'sb_mtls' rather
 	 * than as a decrypted record to 'sb_m'.
 	 */
 	if (sb->sb_mtls != NULL)
 		control = NULL;
 	else
 		control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD,
 		    IPPROTO_TCP, M_NOWAIT);
 
 	if (control != NULL) {
 		tgr = (struct tls_get_record *)
 		    CMSG_DATA(mtod(control, struct cmsghdr *));
 		memset(tgr, 0, sizeof(*tgr));
 		tgr->tls_type = tls_hdr_pkt->type;
 		tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8;
 		tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff;
 		if (tls_data != NULL) {
 			m_last(tls_data)->m_flags |= M_EOR;
 			tgr->tls_length = htobe16(tls_data->m_pkthdr.len);
 		} else
 			tgr->tls_length = 0;
 
 		m_freem(m);
 		m = tls_data;
 	} else {
 		M_ASSERTPKTHDR(m);
 
 		/* It's ok that any explicit IV is missing. */
 		m->m_len = sb->sb_tls_info->params.tls_hlen;
 		m->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED;
 		m->m_pkthdr.len = m->m_len;
 		if (tls_data != NULL) {
 			m->m_pkthdr.len += tls_data->m_pkthdr.len;
 			m_demote_pkthdr(tls_data);
 			m->m_next = tls_data;
 		}
 
 		/*
 		 * Grow the chain by the trailer, but without
 		 * contents.  The trailer will be thrown away by
 		 * ktls_decrypt.  Note that ktls_decrypt assumes the
 		 * trailer is tls_tlen bytes long, so append that many
 		 * bytes not the actual trailer size computed from
 		 * pdu_length.
 		 */
 		trailer_len = sb->sb_tls_info->params.tls_tlen;
 		if (tls_data != NULL) {
 			m_last(tls_data)->m_len += trailer_len;
 			tls_data = NULL;
 		} else
 			m->m_len += trailer_len;
 		m->m_pkthdr.len += trailer_len;
 		tls_hdr_pkt->length = htobe16(m->m_pkthdr.len -
 		    sizeof(struct tls_record_layer));
 	}
 
 	/* receive buffer autosize */
 	MPASS(toep->vnet == so->so_vnet);
 	CURVNET_SET(toep->vnet);
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    m->m_pkthdr.len > (sbspace(sb) / 8 * 7)) {
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 
 	if (control != NULL)
 		sbappendcontrol_locked(sb, m, control, 0);
 	else
 		sbappendstream_locked(sb, m, 0);
 	t4_rcvd_locked(&toep->td->tod, tp);
 
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 void
 do_rx_data_tls(const struct cpl_rx_data *cpl, struct toepcb *toep,
     struct mbuf *m)
 {
 	struct inpcb *inp = toep->inp;
 	struct tls_ofld_info *tls_ofld = &toep->tls;
 	struct tls_hdr *hdr;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	int len;
 
 	len = m->m_pkthdr.len;
 
 	INP_WLOCK_ASSERT(inp);
 
 	so = inp_inpcbtosocket(inp);
 	tp = intotcpcb(inp);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	CURVNET_SET(toep->vnet);
 
 	tp->rcv_nxt += len;
 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= len;
 
 	/* Do we have a full TLS header? */
 	if (len < sizeof(*hdr)) {
 		CTR3(KTR_CXGBE, "%s: tid %u len %d: too short for a TLS header",
 		    __func__, toep->tid, len);
 		so->so_error = EMSGSIZE;
 		goto out;
 	}
 	hdr = mtod(m, struct tls_hdr *);
 
 	/* Is the header valid? */
 	if (be16toh(hdr->version) != tls_ofld->rx_version) {
 		CTR3(KTR_CXGBE, "%s: tid %u invalid version %04x",
 		    __func__, toep->tid, be16toh(hdr->version));
 		so->so_error = EINVAL;
 		goto out;
 	}
 	if (be16toh(hdr->length) < sizeof(*hdr)) {
 		CTR3(KTR_CXGBE, "%s: tid %u invalid length %u",
 		    __func__, toep->tid, be16toh(hdr->length));
 		so->so_error = EBADMSG;
 		goto out;
 	}
 
 	/* Did we get a truncated record? */
 	if (len < be16toh(hdr->length)) {
 		CTR4(KTR_CXGBE, "%s: tid %u truncated TLS record (%d vs %u)",
 		    __func__, toep->tid, len, be16toh(hdr->length));
 
 		so->so_error = EMSGSIZE;
 		goto out;
 	}
 
 	/* Is the header type unknown? */
 	switch (hdr->type) {
 	case CONTENT_TYPE_CCS:
 	case CONTENT_TYPE_ALERT:
 	case CONTENT_TYPE_APP_DATA:
 	case CONTENT_TYPE_HANDSHAKE:
 		break;
 	default:
 		CTR3(KTR_CXGBE, "%s: tid %u invalid TLS record type %u",
 		    __func__, toep->tid, hdr->type);
 		so->so_error = EBADMSG;
 		goto out;
 	}
 
 	/*
 	 * Just punt.  Although this could fall back to software
 	 * decryption, this case should never really happen.
 	 */
 	CTR4(KTR_CXGBE, "%s: tid %u dropping TLS record type %u, length %u",
 	    __func__, toep->tid, hdr->type, be16toh(hdr->length));
 	so->so_error = EBADMSG;
 
 out:
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 
 	m_freem(m);
 }
 
-/* SET_TCB_FIELD sent as a ULP command looks like this */
-#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
-    sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
-
-static inline void *
-mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep,
-    uint64_t word, uint64_t mask, uint64_t val)
-{
-	struct ulptx_idata *ulpsc;
-	struct cpl_set_tcb_field_core *req;
-
-	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
-	ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
-
-	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
-	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
-	ulpsc->len = htobe32(sizeof(*req));
-
-	req = (struct cpl_set_tcb_field_core *)(ulpsc + 1);
-	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid));
-	req->reply_ctrl = htobe16(V_NO_REPLY(1) |
-	    V_QUEUENO(toep->ofld_rxq->iq.abs_id));
-	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
-	req->mask = htobe64(mask);
-	req->val = htobe64(val);
-
-	ulpsc = (struct ulptx_idata *)(req + 1);
-	if (LEN__SET_TCB_FIELD_ULP % 16) {
-		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
-		ulpsc->len = htobe32(0);
-		return (ulpsc + 1);
-	}
-	return (ulpsc);
-}
-
 /*
  * Send a work request setting multiple TCB fields to enable
  * ULP_MODE_TLS.
  */
 static void
 tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno)
 {
 	struct wrqe *wr;
 	struct work_request_hdr *wrh;
 	struct ulp_txpkt *ulpmc;
 	int fields, key_offset, len;
 
 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE,
 	    ("%s: tid %d already ULP_MODE_TLS", __func__, toep->tid));
 
 	fields = 0;
 
 	/* 2 writes for the overlay region */
 	fields += 2;
 
 	/* W_TCB_TLS_SEQ */
 	fields++;
 
 	/* W_TCB_ULP_RAW */
 	fields++;
 
 	/* W_TCB_ULP_TYPE */
 	fields ++;
 
 	/* W_TCB_T_FLAGS */
 	fields++;
 
 	len = sizeof(*wrh) + fields * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
 	KASSERT(len <= SGE_MAX_WR_LEN,
 	    ("%s: WR with %d TCB field updates too large", __func__, fields));
 
 	wr = alloc_wrqe(len, toep->ctrlq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: out of memory", __func__);
 	}
 
 	wrh = wrtod(wr);
 	INIT_ULPTX_WRH(wrh, len, 1, 0);	/* atomic */
 	ulpmc = (struct ulp_txpkt *)(wrh + 1);
 
 	/*
 	 * Clear the TLS overlay region: 1023:832.
 	 *
 	 * Words 26/27 are always set to zero.  Words 28/29
 	 * contain seqno and are set when enabling TLS
 	 * decryption.  Word 30 is zero and Word 31 contains
 	 * the keyid.
 	 */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 26,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 26,
 	    0xffffffffffffffff, 0);
 
 	/*
 	 * RX key tags are an index into the key portion of MA
 	 * memory stored as an offset from the base address in
 	 * units of 64 bytes.
 	 */
 	key_offset = toep->tls.rx_key_addr - sc->vres.key.start;
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 30,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 30,
 	    0xffffffffffffffff,
 	    (uint64_t)V_TCB_RX_TLS_KEY_TAG(key_offset / 64) << 32);
 
 	CTR3(KTR_CXGBE, "%s: tid %d enable TLS seqno %lu", __func__,
 	    toep->tid, seqno);
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_TLS_SEQ,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_TLS_SEQ,
 	    V_TCB_TLS_SEQ(M_TCB_TLS_SEQ), V_TCB_TLS_SEQ(seqno));
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_ULP_RAW,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_ULP_RAW,
 	    V_TCB_ULP_RAW(M_TCB_ULP_RAW),
 	    V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) | V_TF_TLS_CONTROL(1) |
 	    V_TF_TLS_ACTIVE(1) | V_TF_TLS_ENABLE(1))));
 
 	toep->flags &= ~TPF_TLS_STARTING;
 	toep->flags |= TPF_TLS_RECEIVE;
 
 	/* Set the ULP mode to ULP_MODE_TLS. */
 	toep->params.ulp_mode = ULP_MODE_TLS;
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_ULP_TYPE,
-	    V_TCB_ULP_TYPE(M_TCB_ULP_TYPE),
-	    V_TCB_ULP_TYPE(ULP_MODE_TLS));
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_ULP_TYPE,
+	    V_TCB_ULP_TYPE(M_TCB_ULP_TYPE), V_TCB_ULP_TYPE(ULP_MODE_TLS));
 
 	/* Clear TF_RX_QUIESCE. */
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_T_FLAGS,
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_T_FLAGS,
 	    V_TF_RX_QUIESCE(1), 0);
 
 	t4_wrq_tx(sc, wr);
 }
 
 /*
  * Examine the pending data in the socket buffer and either enable TLS
  * RX or request more encrypted data.
  */
 static void
 tls_check_rx_sockbuf(struct adapter *sc, struct toepcb *toep,
     struct sockbuf *sb)
 {
 	uint64_t seqno;
 	size_t resid;
 	bool have_header;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	MPASS(toep->tls.rx_resid == 0);
 
 	have_header = ktls_pending_rx_info(sb, &seqno, &resid);
 	CTR5(KTR_CXGBE, "%s: tid %d have_header %d seqno %lu resid %zu",
 	    __func__, toep->tid, have_header, seqno, resid);
 
 	/*
 	 * If we have a partial header or we need fewer bytes than the
 	 * size of a TLS record, re-enable receive and pause again once
 	 * we get more data to try again.
 	 */
 	if (!have_header || resid != 0) {
 		CTR(KTR_CXGBE, "%s: tid %d waiting for more data", __func__,
 		    toep->tid);
 		toep->flags &= ~TPF_TLS_RX_QUIESCED;
 		t4_clear_rx_quiesce(toep);
 		return;
 	}
 
 	tls_update_tcb(sc, toep, seqno);
 }
 
 void
 tls_received_starting_data(struct adapter *sc, struct toepcb *toep,
     struct sockbuf *sb, int len)
 {
 	MPASS(toep->flags & TPF_TLS_STARTING);
 
 	/* Data was received before quiescing took effect. */
 	if ((toep->flags & TPF_TLS_RX_QUIESCING) != 0)
 		return;
 
 	/*
 	 * A previous call to tls_check_rx_sockbuf needed more data.
 	 * Now that more data has arrived, quiesce receive again and
 	 * check the state once the quiesce has completed.
 	 */
 	if ((toep->flags & TPF_TLS_RX_QUIESCED) == 0) {
 		CTR(KTR_CXGBE, "%s: tid %d quiescing", __func__, toep->tid);
 		toep->flags |= TPF_TLS_RX_QUIESCING;
 		t4_set_rx_quiesce(toep);
 		return;
 	}
 
 	KASSERT(len <= toep->tls.rx_resid,
 	    ("%s: received excess bytes %d (waiting for %zu)", __func__, len,
 	    toep->tls.rx_resid));
 	toep->tls.rx_resid -= len;
 	if (toep->tls.rx_resid != 0)
 		return;
 
 	tls_check_rx_sockbuf(sc, toep, sb);
 }
 
 static int
 do_tls_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep;
 	struct inpcb *inp;
 	struct socket *so;
 	struct sockbuf *sb;
 
 	if (cpl->status != CPL_ERR_NONE)
 		panic("XXX: tcp_rpl failed: %d", cpl->status);
 
 	toep = lookup_tid(sc, tid);
 	inp = toep->inp;
 	switch (cpl->cookie) {
 	case V_WORD(W_TCB_T_FLAGS) | V_COOKIE(CPL_COOKIE_TOM):
 		INP_WLOCK(inp);
 		if ((toep->flags & TPF_TLS_STARTING) == 0)
 			panic("%s: connection is not starting TLS RX\n",
 			    __func__);
 		MPASS((toep->flags & TPF_TLS_RX_QUIESCING) != 0);
 
 		toep->flags &= ~TPF_TLS_RX_QUIESCING;
 		toep->flags |= TPF_TLS_RX_QUIESCED;
 
 		so = inp->inp_socket;
 		sb = &so->so_rcv;
 		SOCKBUF_LOCK(sb);
 		tls_check_rx_sockbuf(sc, toep, sb);
 		SOCKBUF_UNLOCK(sb);
 		INP_WUNLOCK(inp);
 		break;
 	default:
 		panic("XXX: unknown tcb_rpl offset %#x, cookie %#x",
 		    G_WORD(cpl->cookie), G_COOKIE(cpl->cookie));
 	}
 
 	return (0);
 }
 
 void
 t4_tls_mod_load(void)
 {
 
 	t4_register_cpl_handler(CPL_TLS_DATA, do_tls_data);
 	t4_register_cpl_handler(CPL_RX_TLS_CMP, do_rx_tls_cmp);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, do_tls_tcb_rpl,
 	    CPL_COOKIE_TOM);
 }
 
 void
 t4_tls_mod_unload(void)
 {
 
 	t4_register_cpl_handler(CPL_TLS_DATA, NULL);
 	t4_register_cpl_handler(CPL_RX_TLS_CMP, NULL);
 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_TOM);
 }
 #endif	/* TCP_OFFLOAD */
 #endif	/* KERN_TLS */
diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c
index ac5bba75f904..3fe34c7c01a3 100644
--- a/sys/dev/cxgbe/tom/t4_tom.c
+++ b/sys/dev/cxgbe/tom/t4_tom.c
@@ -1,2101 +1,2067 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_ratelimit.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/limits.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/refcount.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/scope6_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 #include <netinet/cc/cc.h>
 
 #ifdef TCP_OFFLOAD
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_regs_values.h"
 #include "common/t4_tcb.h"
 #include "t4_clip.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 #include "tom/t4_tls.h"
 
 static struct protosw toe_protosw;
 static struct protosw toe6_protosw;
 
 /* Module ops */
 static int t4_tom_mod_load(void);
 static int t4_tom_mod_unload(void);
 static int t4_tom_modevent(module_t, int, void *);
 
 /* ULD ops and helpers */
 static int t4_tom_activate(struct adapter *);
 static int t4_tom_deactivate(struct adapter *);
 
 static struct uld_info tom_uld_info = {
 	.uld_id = ULD_TOM,
 	.activate = t4_tom_activate,
 	.deactivate = t4_tom_deactivate,
 };
 
 static void release_offload_resources(struct toepcb *);
 static int alloc_tid_tabs(struct tid_info *);
 static void free_tid_tabs(struct tid_info *);
 static void free_tom_data(struct adapter *, struct tom_data *);
 static void reclaim_wr_resources(void *, int);
 
 struct toepcb *
 alloc_toepcb(struct vi_info *vi, int flags)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct toepcb *toep;
 	int tx_credits, txsd_total, len;
 
 	/*
 	 * The firmware counts tx work request credits in units of 16 bytes
 	 * each.  Reserve room for an ABORT_REQ so the driver never has to worry
 	 * about tx credits if it wants to abort a connection.
 	 */
 	tx_credits = sc->params.ofldq_wr_cred;
 	tx_credits -= howmany(sizeof(struct cpl_abort_req), 16);
 
 	/*
 	 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte
 	 * immediate payload, and firmware counts tx work request credits in
 	 * units of 16 byte.  Calculate the maximum work requests possible.
 	 */
 	txsd_total = tx_credits /
 	    howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16);
 
 	len = offsetof(struct toepcb, txsd) +
 	    txsd_total * sizeof(struct ofld_tx_sdesc);
 
 	toep = malloc(len, M_CXGBE, M_ZERO | flags);
 	if (toep == NULL)
 		return (NULL);
 
 	refcount_init(&toep->refcount, 1);
 	toep->td = sc->tom_softc;
 	toep->vi = vi;
 	toep->tid = -1;
 	toep->tx_total = tx_credits;
 	toep->tx_credits = tx_credits;
 	mbufq_init(&toep->ulp_pduq, INT_MAX);
 	mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX);
 	toep->txsd_total = txsd_total;
 	toep->txsd_avail = txsd_total;
 	toep->txsd_pidx = 0;
 	toep->txsd_cidx = 0;
 	aiotx_init_toep(toep);
 
 	return (toep);
 }
 
 /*
  * Initialize a toepcb after its params have been filled out.
  */
 int
 init_toepcb(struct vi_info *vi, struct toepcb *toep)
 {
 	struct conn_params *cp = &toep->params;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct tx_cl_rl_params *tc;
 
 	if (cp->tc_idx >= 0 && cp->tc_idx < sc->params.nsched_cls) {
 		tc = &pi->sched_params->cl_rl[cp->tc_idx];
 		mtx_lock(&sc->tc_lock);
 		if (tc->state != CS_HW_CONFIGURED) {
 			CH_ERR(vi, "tid %d cannot be bound to traffic class %d "
 			    "because it is not configured (its state is %d)\n",
 			    toep->tid, cp->tc_idx, tc->state);
 			cp->tc_idx = -1;
 		} else {
 			tc->refcount++;
 		}
 		mtx_unlock(&sc->tc_lock);
 	}
 	toep->ofld_txq = &sc->sge.ofld_txq[cp->txq_idx];
 	toep->ofld_rxq = &sc->sge.ofld_rxq[cp->rxq_idx];
 	toep->ctrlq = &sc->sge.ctrlq[pi->port_id];
 
 	tls_init_toep(toep);
 	MPASS(ulp_mode(toep) != ULP_MODE_TCPDDP);
 
 	toep->flags |= TPF_INITIALIZED;
 
 	return (0);
 }
 
 struct toepcb *
 hold_toepcb(struct toepcb *toep)
 {
 
 	refcount_acquire(&toep->refcount);
 	return (toep);
 }
 
 void
 free_toepcb(struct toepcb *toep)
 {
 
 	if (refcount_release(&toep->refcount) == 0)
 		return;
 
 	KASSERT(!(toep->flags & TPF_ATTACHED),
 	    ("%s: attached to an inpcb", __func__));
 	KASSERT(!(toep->flags & TPF_CPL_PENDING),
 	    ("%s: CPL pending", __func__));
 
 	if (toep->flags & TPF_INITIALIZED) {
 		if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 			ddp_uninit_toep(toep);
 		tls_uninit_toep(toep);
 	}
 	free(toep, M_CXGBE);
 }
 
 /*
  * Set up the socket for TCP offload.
  */
 void
 offload_socket(struct socket *so, struct toepcb *toep)
 {
 	struct tom_data *td = toep->td;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct sockbuf *sb;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/* Update socket */
 	sb = &so->so_snd;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags |= SB_NOCOALESCE;
 	SOCKBUF_UNLOCK(sb);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags |= SB_NOCOALESCE;
 	if (inp->inp_vflag & INP_IPV6)
 		so->so_proto = &toe6_protosw;
 	else
 		so->so_proto = &toe_protosw;
 	SOCKBUF_UNLOCK(sb);
 
 	/* Update TCP PCB */
 	tp->tod = &td->tod;
 	tp->t_toe = toep;
 	tp->t_flags |= TF_TOE;
 
 	/* Install an extra hold on inp */
 	toep->inp = inp;
 	toep->flags |= TPF_ATTACHED;
 	in_pcbref(inp);
 
 	/* Add the TOE PCB to the active list */
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 }
 
 void
 restore_so_proto(struct socket *so, bool v6)
 {
 	if (v6)
 		so->so_proto = &tcp6_protosw;
 	else
 		so->so_proto = &tcp_protosw;
 }
 
 /* This is _not_ the normal way to "unoffload" a socket. */
 void
 undo_offload_socket(struct socket *so)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct toepcb *toep = tp->t_toe;
 	struct tom_data *td = toep->td;
 	struct sockbuf *sb;
 
 	INP_WLOCK_ASSERT(inp);
 
 	sb = &so->so_snd;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags &= ~SB_NOCOALESCE;
 	SOCKBUF_UNLOCK(sb);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	sb->sb_flags &= ~SB_NOCOALESCE;
 	restore_so_proto(so, inp->inp_vflag & INP_IPV6);
 	SOCKBUF_UNLOCK(sb);
 
 	tp->tod = NULL;
 	tp->t_toe = NULL;
 	tp->t_flags &= ~TF_TOE;
 
 	toep->inp = NULL;
 	toep->flags &= ~TPF_ATTACHED;
 	if (in_pcbrele_wlocked(inp))
 		panic("%s: inp freed.", __func__);
 
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_REMOVE(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 }
 
 static void
 release_offload_resources(struct toepcb *toep)
 {
 	struct tom_data *td = toep->td;
 	struct adapter *sc = td_adapter(td);
 	int tid = toep->tid;
 
 	KASSERT(!(toep->flags & TPF_CPL_PENDING),
 	    ("%s: %p has CPL pending.", __func__, toep));
 	KASSERT(!(toep->flags & TPF_ATTACHED),
 	    ("%s: %p is still attached.", __func__, toep));
 
 	CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)",
 	    __func__, toep, tid, toep->l2te, toep->ce);
 
 	/*
 	 * These queues should have been emptied at approximately the same time
 	 * that a normal connection's socket's so_snd would have been purged or
 	 * drained.  Do _not_ clean up here.
 	 */
 	MPASS(mbufq_empty(&toep->ulp_pduq));
 	MPASS(mbufq_empty(&toep->ulp_pdu_reclaimq));
 #ifdef INVARIANTS
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		ddp_assert_empty(toep);
 #endif
 	MPASS(TAILQ_EMPTY(&toep->aiotx_jobq));
 
 	if (toep->l2te)
 		t4_l2t_release(toep->l2te);
 
 	if (tid >= 0) {
 		remove_tid(sc, tid, toep->ce ? 2 : 1);
 		release_tid(sc, tid, toep->ctrlq);
 	}
 
 	if (toep->ce)
 		t4_release_clip_entry(sc, toep->ce);
 
 	if (toep->params.tc_idx != -1)
 		t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->params.tc_idx);
 
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_REMOVE(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 
 	free_toepcb(toep);
 }
 
 /*
  * The kernel is done with the TCP PCB and this is our opportunity to unhook the
  * toepcb hanging off of it.  If the TOE driver is also done with the toepcb (no
  * pending CPL) then it is time to release all resources tied to the toepcb.
  *
  * Also gets called when an offloaded active open fails and the TOM wants the
  * kernel to take the TCP PCB back.
  */
 static void
 t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
 {
 #if defined(KTR) || defined(INVARIANTS)
 	struct inpcb *inp = tptoinpcb(tp);
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 	KASSERT(toep->flags & TPF_ATTACHED,
 	    ("%s: not attached", __func__));
 
 #ifdef KTR
 	if (tp->t_state == TCPS_SYN_SENT) {
 		CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)",
 		    __func__, toep->tid, toep, toep->flags, inp,
 		    inp->inp_flags);
 	} else {
 		CTR6(KTR_CXGBE,
 		    "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)",
 		    toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp,
 		    inp->inp_flags);
 	}
 #endif
 
 	tp->tod = NULL;
 	tp->t_toe = NULL;
 	tp->t_flags &= ~TF_TOE;
 	toep->flags &= ~TPF_ATTACHED;
 
 	if (!(toep->flags & TPF_CPL_PENDING))
 		release_offload_resources(toep);
 }
 
 /*
  * setsockopt handler.
  */
 static void
 t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct toepcb *toep = tp->t_toe;
 
 	if (dir == SOPT_GET)
 		return;
 
 	CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name);
 
 	switch (name) {
 	case TCP_NODELAY:
 		if (tp->t_state != TCPS_ESTABLISHED)
 			break;
 		toep->params.nagle = tp->t_flags & TF_NODELAY ? 0 : 1;
 		t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS,
 		    V_TF_NAGLE(1), V_TF_NAGLE(toep->params.nagle), 0, 0);
 		break;
 	default:
 		break;
 	}
 }
 
 static inline uint64_t
 get_tcb_tflags(const uint64_t *tcb)
 {
 
 	return ((be64toh(tcb[14]) << 32) | (be64toh(tcb[15]) >> 32));
 }
 
 static inline uint32_t
 get_tcb_field(const uint64_t *tcb, u_int word, uint32_t mask, u_int shift)
 {
 #define LAST_WORD ((TCB_SIZE / 4) - 1)
 	uint64_t t1, t2;
 	int flit_idx;
 
 	MPASS(mask != 0);
 	MPASS(word <= LAST_WORD);
 	MPASS(shift < 32);
 
 	flit_idx = (LAST_WORD - word) / 2;
 	if (word & 0x1)
 		shift += 32;
 	t1 = be64toh(tcb[flit_idx]) >> shift;
 	t2 = 0;
 	if (fls(mask) > 64 - shift) {
 		/*
 		 * Will spill over into the next logical flit, which is the flit
 		 * before this one.  The flit_idx before this one must be valid.
 		 */
 		MPASS(flit_idx > 0);
 		t2 = be64toh(tcb[flit_idx - 1]) << (64 - shift);
 	}
 	return ((t2 | t1) & mask);
 #undef LAST_WORD
 }
 #define GET_TCB_FIELD(tcb, F) \
     get_tcb_field(tcb, W_TCB_##F, M_TCB_##F, S_TCB_##F)
 
 /*
  * Issues a CPL_GET_TCB to read the entire TCB for the tid.
  */
 static int
 send_get_tcb(struct adapter *sc, u_int tid)
 {
 	struct cpl_get_tcb *cpl;
 	struct wrq_cookie cookie;
 
 	MPASS(tid >= sc->tids.tid_base);
 	MPASS(tid - sc->tids.tid_base < sc->tids.ntids);
 
 	cpl = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*cpl), 16),
 	    &cookie);
 	if (__predict_false(cpl == NULL))
 		return (ENOMEM);
 	bzero(cpl, sizeof(*cpl));
 	INIT_TP_WR(cpl, tid);
 	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_GET_TCB, tid));
 	cpl->reply_ctrl = htobe16(V_REPLY_CHAN(0) |
 	    V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id));
 	cpl->cookie = 0xff;
 	commit_wrq_wr(&sc->sge.ctrlq[0], cpl, &cookie);
 
 	return (0);
 }
 
 static struct tcb_histent *
 alloc_tcb_histent(struct adapter *sc, u_int tid, int flags)
 {
 	struct tcb_histent *te;
 
 	MPASS(flags == M_NOWAIT || flags == M_WAITOK);
 
 	te = malloc(sizeof(*te), M_CXGBE, M_ZERO | flags);
 	if (te == NULL)
 		return (NULL);
 	mtx_init(&te->te_lock, "TCB entry", NULL, MTX_DEF);
 	callout_init_mtx(&te->te_callout, &te->te_lock, 0);
 	te->te_adapter = sc;
 	te->te_tid = tid;
 
 	return (te);
 }
 
 static void
 free_tcb_histent(struct tcb_histent *te)
 {
 
 	mtx_destroy(&te->te_lock);
 	free(te, M_CXGBE);
 }
 
 /*
  * Start tracking the tid in the TCB history.
  */
 int
 add_tid_to_history(struct adapter *sc, u_int tid)
 {
 	struct tcb_histent *te = NULL;
 	struct tom_data *td = sc->tom_softc;
 	int rc;
 
 	MPASS(tid >= sc->tids.tid_base);
 	MPASS(tid - sc->tids.tid_base < sc->tids.ntids);
 
 	if (td->tcb_history == NULL)
 		return (ENXIO);
 
 	rw_wlock(&td->tcb_history_lock);
 	if (td->tcb_history[tid] != NULL) {
 		rc = EEXIST;
 		goto done;
 	}
 	te = alloc_tcb_histent(sc, tid, M_NOWAIT);
 	if (te == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 	mtx_lock(&te->te_lock);
 	rc = send_get_tcb(sc, tid);
 	if (rc == 0) {
 		te->te_flags |= TE_RPL_PENDING;
 		td->tcb_history[tid] = te;
 	} else {
 		free(te, M_CXGBE);
 	}
 	mtx_unlock(&te->te_lock);
 done:
 	rw_wunlock(&td->tcb_history_lock);
 	return (rc);
 }
 
 static void
 remove_tcb_histent(struct tcb_histent *te)
 {
 	struct adapter *sc = te->te_adapter;
 	struct tom_data *td = sc->tom_softc;
 
 	rw_assert(&td->tcb_history_lock, RA_WLOCKED);
 	mtx_assert(&te->te_lock, MA_OWNED);
 	MPASS(td->tcb_history[te->te_tid] == te);
 
 	td->tcb_history[te->te_tid] = NULL;
 	free_tcb_histent(te);
 	rw_wunlock(&td->tcb_history_lock);
 }
 
 static inline struct tcb_histent *
 lookup_tcb_histent(struct adapter *sc, u_int tid, bool addrem)
 {
 	struct tcb_histent *te;
 	struct tom_data *td = sc->tom_softc;
 
 	MPASS(tid >= sc->tids.tid_base);
 	MPASS(tid - sc->tids.tid_base < sc->tids.ntids);
 
 	if (td->tcb_history == NULL)
 		return (NULL);
 
 	if (addrem)
 		rw_wlock(&td->tcb_history_lock);
 	else
 		rw_rlock(&td->tcb_history_lock);
 	te = td->tcb_history[tid];
 	if (te != NULL) {
 		mtx_lock(&te->te_lock);
 		return (te);	/* with both locks held */
 	}
 	if (addrem)
 		rw_wunlock(&td->tcb_history_lock);
 	else
 		rw_runlock(&td->tcb_history_lock);
 
 	return (te);
 }
 
 static inline void
 release_tcb_histent(struct tcb_histent *te)
 {
 	struct adapter *sc = te->te_adapter;
 	struct tom_data *td = sc->tom_softc;
 
 	mtx_assert(&te->te_lock, MA_OWNED);
 	mtx_unlock(&te->te_lock);
 	rw_assert(&td->tcb_history_lock, RA_RLOCKED);
 	rw_runlock(&td->tcb_history_lock);
 }
 
 static void
 request_tcb(void *arg)
 {
 	struct tcb_histent *te = arg;
 
 	mtx_assert(&te->te_lock, MA_OWNED);
 
 	/* Noone else is supposed to update the histent. */
 	MPASS(!(te->te_flags & TE_RPL_PENDING));
 	if (send_get_tcb(te->te_adapter, te->te_tid) == 0)
 		te->te_flags |= TE_RPL_PENDING;
 	else
 		callout_schedule(&te->te_callout, hz / 100);
 }
 
 static void
 update_tcb_histent(struct tcb_histent *te, const uint64_t *tcb)
 {
 	struct tom_data *td = te->te_adapter->tom_softc;
 	uint64_t tflags = get_tcb_tflags(tcb);
 	uint8_t sample = 0;
 
 	if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != GET_TCB_FIELD(tcb, SND_UNA_RAW)) {
 		if (GET_TCB_FIELD(tcb, T_RXTSHIFT) != 0)
 			sample |= TS_RTO;
 		if (GET_TCB_FIELD(tcb, T_DUPACKS) != 0)
 			sample |= TS_DUPACKS;
 		if (GET_TCB_FIELD(tcb, T_DUPACKS) >= td->dupack_threshold)
 			sample |= TS_FASTREXMT;
 	}
 
 	if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != 0) {
 		uint32_t snd_wnd;
 
 		sample |= TS_SND_BACKLOGGED;	/* for whatever reason. */
 
 		snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV);
 		if (tflags & V_TF_RECV_SCALE(1))
 			snd_wnd <<= GET_TCB_FIELD(tcb, RCV_SCALE);
 		if (GET_TCB_FIELD(tcb, SND_CWND) < snd_wnd)
 			sample |= TS_CWND_LIMITED;	/* maybe due to CWND */
 	}
 
 	if (tflags & V_TF_CCTRL_ECN(1)) {
 
 		/*
 		 * CE marker on incoming IP hdr, echoing ECE back in the TCP
 		 * hdr.  Indicates congestion somewhere on the way from the peer
 		 * to this node.
 		 */
 		if (tflags & V_TF_CCTRL_ECE(1))
 			sample |= TS_ECN_ECE;
 
 		/*
 		 * ECE seen and CWR sent (or about to be sent).  Might indicate
 		 * congestion on the way to the peer.  This node is reducing its
 		 * congestion window in response.
 		 */
 		if (tflags & (V_TF_CCTRL_CWR(1) | V_TF_CCTRL_RFR(1)))
 			sample |= TS_ECN_CWR;
 	}
 
 	te->te_sample[te->te_pidx] = sample;
 	if (++te->te_pidx == nitems(te->te_sample))
 		te->te_pidx = 0;
 	memcpy(te->te_tcb, tcb, TCB_SIZE);
 	te->te_flags |= TE_ACTIVE;
 }
 
 static int
 do_get_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_get_tcb_rpl *cpl = mtod(m, const void *);
 	const uint64_t *tcb = (const uint64_t *)(const void *)(cpl + 1);
 	struct tcb_histent *te;
 	const u_int tid = GET_TID(cpl);
 	bool remove;
 
 	remove = GET_TCB_FIELD(tcb, T_STATE) == TCPS_CLOSED;
 	te = lookup_tcb_histent(sc, tid, remove);
 	if (te == NULL) {
 		/* Not in the history.  Who issued the GET_TCB for this? */
 		device_printf(sc->dev, "tcb %u: flags 0x%016jx, state %u, "
 		    "srtt %u, sscale %u, rscale %u, cookie 0x%x\n", tid,
 		    (uintmax_t)get_tcb_tflags(tcb), GET_TCB_FIELD(tcb, T_STATE),
 		    GET_TCB_FIELD(tcb, T_SRTT), GET_TCB_FIELD(tcb, SND_SCALE),
 		    GET_TCB_FIELD(tcb, RCV_SCALE), cpl->cookie);
 		goto done;
 	}
 
 	MPASS(te->te_flags & TE_RPL_PENDING);
 	te->te_flags &= ~TE_RPL_PENDING;
 	if (remove) {
 		remove_tcb_histent(te);
 	} else {
 		update_tcb_histent(te, tcb);
 		callout_reset(&te->te_callout, hz / 10, request_tcb, te);
 		release_tcb_histent(te);
 	}
 done:
 	m_freem(m);
 	return (0);
 }
 
 static void
 fill_tcp_info_from_tcb(struct adapter *sc, uint64_t *tcb, struct tcp_info *ti)
 {
 	uint32_t v;
 
 	ti->tcpi_state = GET_TCB_FIELD(tcb, T_STATE);
 
 	v = GET_TCB_FIELD(tcb, T_SRTT);
 	ti->tcpi_rtt = tcp_ticks_to_us(sc, v);
 
 	v = GET_TCB_FIELD(tcb, T_RTTVAR);
 	ti->tcpi_rttvar = tcp_ticks_to_us(sc, v);
 
 	ti->tcpi_snd_ssthresh = GET_TCB_FIELD(tcb, SND_SSTHRESH);
 	ti->tcpi_snd_cwnd = GET_TCB_FIELD(tcb, SND_CWND);
 	ti->tcpi_rcv_nxt = GET_TCB_FIELD(tcb, RCV_NXT);
 	ti->tcpi_rcv_adv = GET_TCB_FIELD(tcb, RCV_ADV);
 	ti->tcpi_dupacks = GET_TCB_FIELD(tcb, T_DUPACKS);
 
 	v = GET_TCB_FIELD(tcb, TX_MAX);
 	ti->tcpi_snd_nxt = v - GET_TCB_FIELD(tcb, SND_NXT_RAW);
 	ti->tcpi_snd_una = v - GET_TCB_FIELD(tcb, SND_UNA_RAW);
 	ti->tcpi_snd_max = v - GET_TCB_FIELD(tcb, SND_MAX_RAW);
 
 	/* Receive window being advertised by us. */
 	ti->tcpi_rcv_wscale = GET_TCB_FIELD(tcb, SND_SCALE);	/* Yes, SND. */
 	ti->tcpi_rcv_space = GET_TCB_FIELD(tcb, RCV_WND);
 
 	/* Send window */
 	ti->tcpi_snd_wscale = GET_TCB_FIELD(tcb, RCV_SCALE);	/* Yes, RCV. */
 	ti->tcpi_snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV);
 	if (get_tcb_tflags(tcb) & V_TF_RECV_SCALE(1))
 		ti->tcpi_snd_wnd <<= ti->tcpi_snd_wscale;
 	else
 		ti->tcpi_snd_wscale = 0;
 
 }
 
 static void
 fill_tcp_info_from_history(struct adapter *sc, struct tcb_histent *te,
     struct tcp_info *ti)
 {
 
 	fill_tcp_info_from_tcb(sc, te->te_tcb, ti);
 }
 
 /*
  * Reads the TCB for the given tid using a memory window and copies it to 'buf'
  * in the same format as CPL_GET_TCB_RPL.
  */
 static void
 read_tcb_using_memwin(struct adapter *sc, u_int tid, uint64_t *buf)
 {
 	int i, j, k, rc;
 	uint32_t addr;
 	u_char *tcb, tmp;
 
 	MPASS(tid >= sc->tids.tid_base);
 	MPASS(tid - sc->tids.tid_base < sc->tids.ntids);
 
 	addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE;
 	rc = read_via_memwin(sc, 2, addr, (uint32_t *)buf, TCB_SIZE);
 	if (rc != 0)
 		return;
 
 	tcb = (u_char *)buf;
 	for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) {
 		for (k = 0; k < 16; k++) {
 			tmp = tcb[i + k];
 			tcb[i + k] = tcb[j + k];
 			tcb[j + k] = tmp;
 		}
 	}
 }
 
 static void
 fill_tcp_info(struct adapter *sc, u_int tid, struct tcp_info *ti)
 {
 	uint64_t tcb[TCB_SIZE / sizeof(uint64_t)];
 	struct tcb_histent *te;
 
 	ti->tcpi_toe_tid = tid;
 	te = lookup_tcb_histent(sc, tid, false);
 	if (te != NULL) {
 		fill_tcp_info_from_history(sc, te, ti);
 		release_tcb_histent(te);
 	} else {
 		if (!(sc->debug_flags & DF_DISABLE_TCB_CACHE)) {
 			/* XXX: tell firmware to flush TCB cache. */
 		}
 		read_tcb_using_memwin(sc, tid, tcb);
 		fill_tcp_info_from_tcb(sc, tcb, ti);
 	}
 }
 
 /*
  * Called by the kernel to allow the TOE driver to "refine" values filled up in
  * the tcp_info for an offloaded connection.
  */
 static void
 t4_tcp_info(struct toedev *tod, const struct tcpcb *tp, struct tcp_info *ti)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct toepcb *toep = tp->t_toe;
 
 	INP_LOCK_ASSERT(tptoinpcb(tp));
 	MPASS(ti != NULL);
 
 	fill_tcp_info(sc, toep->tid, ti);
 }
 
 #ifdef KERN_TLS
 static int
 t4_alloc_tls_session(struct toedev *tod, struct tcpcb *tp,
     struct ktls_session *tls, int direction)
 {
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	MPASS(tls != NULL);
 
 	return (tls_alloc_ktls(toep, tls, direction));
 }
 #endif
 
-/* SET_TCB_FIELD sent as a ULP command looks like this */
-#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
-    sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
-
-static void *
-mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, uint64_t word, uint64_t mask,
-		uint64_t val, uint32_t tid)
-{
-	struct ulptx_idata *ulpsc;
-	struct cpl_set_tcb_field_core *req;
-
-	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0));
-	ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
-
-	ulpsc = (struct ulptx_idata *)(ulpmc + 1);
-	ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
-	ulpsc->len = htobe32(sizeof(*req));
-
-	req = (struct cpl_set_tcb_field_core *)(ulpsc + 1);
-	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
-	req->reply_ctrl = htobe16(V_NO_REPLY(1));
-	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
-	req->mask = htobe64(mask);
-	req->val = htobe64(val);
-
-	ulpsc = (struct ulptx_idata *)(req + 1);
-	if (LEN__SET_TCB_FIELD_ULP % 16) {
-		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
-		ulpsc->len = htobe32(0);
-		return (ulpsc + 1);
-	}
-	return (ulpsc);
-}
-
 static void
 send_mss_flowc_wr(struct adapter *sc, struct toepcb *toep)
 {
 	struct wrq_cookie cookie;
 	struct fw_flowc_wr *flowc;
 	struct ofld_tx_sdesc *txsd;
 	const int flowclen = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval);
 	const int flowclen16 = howmany(flowclen, 16);
 
 	if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0) {
 		CH_ERR(sc, "%s: tid %u out of tx credits (%d, %d).\n", __func__,
 		    toep->tid, toep->tx_credits, toep->txsd_avail);
 		return;
 	}
 
 	flowc = start_wrq_wr(&toep->ofld_txq->wrq, flowclen16, &cookie);
 	if (__predict_false(flowc == NULL)) {
 		CH_ERR(sc, "ENOMEM in %s for tid %u.\n", __func__, toep->tid);
 		return;
 	}
 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 	    V_FW_FLOWC_WR_NPARAMS(1));
 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) |
 	    V_FW_WR_FLOWID(toep->tid));
 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_MSS;
 	flowc->mnemval[0].val = htobe32(toep->params.emss);
 
 	txsd = &toep->txsd[toep->txsd_pidx];
 	txsd->tx_credits = flowclen16;
 	txsd->plen = 0;
 	toep->tx_credits -= txsd->tx_credits;
 	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 	commit_wrq_wr(&toep->ofld_txq->wrq, flowc, &cookie);
 }
 
 static void
 t4_pmtu_update(struct toedev *tod, struct tcpcb *tp, tcp_seq seq, int mtu)
 {
 	struct work_request_hdr *wrh;
 	struct ulp_txpkt *ulpmc;
 	int idx, len;
 	struct wrq_cookie cookie;
 	struct inpcb *inp = tptoinpcb(tp);
 	struct toepcb *toep = tp->t_toe;
 	struct adapter *sc = td_adapter(toep->td);
 	unsigned short *mtus = &sc->params.mtus[0];
 
 	INP_WLOCK_ASSERT(inp);
 	MPASS(mtu > 0);	/* kernel is supposed to provide something usable. */
 
 	/* tp->snd_una and snd_max are in host byte order too. */
 	seq = be32toh(seq);
 
 	CTR6(KTR_CXGBE, "%s: tid %d, seq 0x%08x, mtu %u, mtu_idx %u (%d)",
 	    __func__, toep->tid, seq, mtu, toep->params.mtu_idx,
 	    mtus[toep->params.mtu_idx]);
 
 	if (ulp_mode(toep) == ULP_MODE_NONE &&	/* XXX: Read TCB otherwise? */
 	    (SEQ_LT(seq, tp->snd_una) || SEQ_GEQ(seq, tp->snd_max))) {
 		CTR5(KTR_CXGBE,
 		    "%s: tid %d, seq 0x%08x not in range [0x%08x, 0x%08x).",
 		    __func__, toep->tid, seq, tp->snd_una, tp->snd_max);
 		return;
 	}
 
 	/* Find the best mtu_idx for the suggested MTU. */
 	for (idx = 0; idx < NMTUS - 1 && mtus[idx + 1] <= mtu; idx++)
 		continue;
 	if (idx >= toep->params.mtu_idx)
 		return;	/* Never increase the PMTU (just like the kernel). */
 
 	/*
 	 * We'll send a compound work request with 2 SET_TCB_FIELDs -- the first
 	 * one updates the mtu_idx and the second one triggers a retransmit.
 	 */
 	len = sizeof(*wrh) + 2 * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
 	wrh = start_wrq_wr(toep->ctrlq, howmany(len, 16), &cookie);
 	if (wrh == NULL) {
 		CH_ERR(sc, "failed to change mtu_idx of tid %d (%u -> %u).\n",
 		    toep->tid, toep->params.mtu_idx, idx);
 		return;
 	}
 	INIT_ULPTX_WRH(wrh, len, 1, 0);	/* atomic */
 	ulpmc = (struct ulp_txpkt *)(wrh + 1);
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_T_MAXSEG,
-	    V_TCB_T_MAXSEG(M_TCB_T_MAXSEG), V_TCB_T_MAXSEG(idx), toep->tid);
-	ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_TIMESTAMP,
-	    V_TCB_TIMESTAMP(0x7FFFFULL << 11), 0, toep->tid);
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_T_MAXSEG,
+	    V_TCB_T_MAXSEG(M_TCB_T_MAXSEG), V_TCB_T_MAXSEG(idx));
+	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_TIMESTAMP,
+	    V_TCB_TIMESTAMP(0x7FFFFULL << 11), 0);
 	commit_wrq_wr(toep->ctrlq, wrh, &cookie);
 
 	/* Update the software toepcb and tcpcb. */
 	toep->params.mtu_idx = idx;
 	tp->t_maxseg = mtus[toep->params.mtu_idx];
 	if (inp->inp_inc.inc_flags & INC_ISIPV6)
 		tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 		tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr);
 	toep->params.emss = tp->t_maxseg;
 	if (tp->t_flags & TF_RCVD_TSTMP)
 		toep->params.emss -= TCPOLEN_TSTAMP_APPA;
 
 	/* Update the firmware flowc. */
 	send_mss_flowc_wr(sc, toep);
 
 	/* Update the MTU in the kernel's hostcache. */
 	if (sc->tt.update_hc_on_pmtu_change != 0) {
 		struct in_conninfo inc = {0};
 
 		inc.inc_fibnum = inp->inp_inc.inc_fibnum;
 		if (inp->inp_inc.inc_flags & INC_ISIPV6) {
 			inc.inc_flags |= INC_ISIPV6;
 			inc.inc6_faddr = inp->inp_inc.inc6_faddr;
 		} else {
 			inc.inc_faddr = inp->inp_inc.inc_faddr;
 		}
 		tcp_hc_updatemtu(&inc, mtu);
 	}
 
 	CTR6(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u), t_maxseg %u, emss %u",
 	    __func__, toep->tid, toep->params.mtu_idx,
 	    mtus[toep->params.mtu_idx], tp->t_maxseg, toep->params.emss);
 }
 
 /*
  * The TOE driver will not receive any more CPLs for the tid associated with the
  * toepcb; release the hold on the inpcb.
  */
 void
 final_cpl_received(struct toepcb *toep)
 {
 	struct inpcb *inp = toep->inp;
 	bool need_wakeup;
 
 	KASSERT(inp != NULL, ("%s: inp is NULL", __func__));
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_CPL_PENDING,
 	    ("%s: CPL not pending already?", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)",
 	    __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags);
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP)
 		release_ddp_resources(toep);
 	toep->inp = NULL;
 	need_wakeup = (toep->flags & TPF_WAITING_FOR_FINAL) != 0;
 	toep->flags &= ~(TPF_CPL_PENDING | TPF_WAITING_FOR_FINAL);
 	mbufq_drain(&toep->ulp_pduq);
 	mbufq_drain(&toep->ulp_pdu_reclaimq);
 
 	if (!(toep->flags & TPF_ATTACHED))
 		release_offload_resources(toep);
 
 	if (!in_pcbrele_wlocked(inp))
 		INP_WUNLOCK(inp);
 
 	if (need_wakeup) {
 		struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep);
 
 		mtx_lock(lock);
 		wakeup(toep);
 		mtx_unlock(lock);
 	}
 }
 
 void
 insert_tid(struct adapter *sc, int tid, void *ctx, int ntids)
 {
 	struct tid_info *t = &sc->tids;
 
 	MPASS(tid >= t->tid_base);
 	MPASS(tid - t->tid_base < t->ntids);
 
 	t->tid_tab[tid - t->tid_base] = ctx;
 	atomic_add_int(&t->tids_in_use, ntids);
 }
 
 void *
 lookup_tid(struct adapter *sc, int tid)
 {
 	struct tid_info *t = &sc->tids;
 
 	return (t->tid_tab[tid - t->tid_base]);
 }
 
 void
 update_tid(struct adapter *sc, int tid, void *ctx)
 {
 	struct tid_info *t = &sc->tids;
 
 	t->tid_tab[tid - t->tid_base] = ctx;
 }
 
 void
 remove_tid(struct adapter *sc, int tid, int ntids)
 {
 	struct tid_info *t = &sc->tids;
 
 	t->tid_tab[tid - t->tid_base] = NULL;
 	atomic_subtract_int(&t->tids_in_use, ntids);
 }
 
 /*
  * What mtu_idx to use, given a 4-tuple.  Note that both s->mss and tcp_mssopt
  * have the MSS that we should advertise in our SYN.  Advertised MSS doesn't
  * account for any TCP options so the effective MSS (only payload, no headers or
  * options) could be different.
  */
 static int
 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc,
     struct offload_settings *s)
 {
 	unsigned short *mtus = &sc->params.mtus[0];
 	int i, mss, mtu;
 
 	MPASS(inc != NULL);
 
 	mss = s->mss > 0 ? s->mss : tcp_mssopt(inc);
 	if (inc->inc_flags & INC_ISIPV6)
 		mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 		mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr);
 
 	for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++)
 		continue;
 
 	return (i);
 }
 
 /*
  * Determine the receive window size for a socket.
  */
 u_long
 select_rcv_wnd(struct socket *so)
 {
 	unsigned long wnd;
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	wnd = sbspace(&so->so_rcv);
 	if (wnd < MIN_RCV_WND)
 		wnd = MIN_RCV_WND;
 
 	return min(wnd, MAX_RCV_WND);
 }
 
 int
 select_rcv_wscale(void)
 {
 	int wscale = 0;
 	unsigned long space = sb_max;
 
 	if (space > MAX_RCV_WND)
 		space = MAX_RCV_WND;
 
 	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
 		wscale++;
 
 	return (wscale);
 }
 
 __be64
 calc_options0(struct vi_info *vi, struct conn_params *cp)
 {
 	uint64_t opt0 = 0;
 
 	opt0 |= F_TCAM_BYPASS;
 
 	MPASS(cp->wscale >= 0 && cp->wscale <= M_WND_SCALE);
 	opt0 |= V_WND_SCALE(cp->wscale);
 
 	MPASS(cp->mtu_idx >= 0 && cp->mtu_idx < NMTUS);
 	opt0 |= V_MSS_IDX(cp->mtu_idx);
 
 	MPASS(cp->ulp_mode >= 0 && cp->ulp_mode <= M_ULP_MODE);
 	opt0 |= V_ULP_MODE(cp->ulp_mode);
 
 	MPASS(cp->opt0_bufsize >= 0 && cp->opt0_bufsize <= M_RCV_BUFSIZ);
 	opt0 |= V_RCV_BUFSIZ(cp->opt0_bufsize);
 
 	MPASS(cp->l2t_idx >= 0 && cp->l2t_idx < vi->adapter->vres.l2t.size);
 	opt0 |= V_L2T_IDX(cp->l2t_idx);
 
 	opt0 |= V_SMAC_SEL(vi->smt_idx);
 	opt0 |= V_TX_CHAN(vi->pi->tx_chan);
 
 	MPASS(cp->keepalive == 0 || cp->keepalive == 1);
 	opt0 |= V_KEEP_ALIVE(cp->keepalive);
 
 	MPASS(cp->nagle == 0 || cp->nagle == 1);
 	opt0 |= V_NAGLE(cp->nagle);
 
 	return (htobe64(opt0));
 }
 
 __be32
 calc_options2(struct vi_info *vi, struct conn_params *cp)
 {
 	uint32_t opt2 = 0;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 
 	/*
 	 * rx flow control, rx coalesce, congestion control, and tx pace are all
 	 * explicitly set by the driver.  On T5+ the ISS is also set by the
 	 * driver to the value picked by the kernel.
 	 */
 	if (is_t4(sc)) {
 		opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID;
 		opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID;
 	} else {
 		opt2 |= F_T5_OPT_2_VALID;	/* all 4 valid */
 		opt2 |= F_T5_ISS;		/* ISS provided in CPL */
 	}
 
 	MPASS(cp->sack == 0 || cp->sack == 1);
 	opt2 |= V_SACK_EN(cp->sack);
 
 	MPASS(cp->tstamp == 0 || cp->tstamp == 1);
 	opt2 |= V_TSTAMPS_EN(cp->tstamp);
 
 	if (cp->wscale > 0)
 		opt2 |= F_WND_SCALE_EN;
 
 	MPASS(cp->ecn == 0 || cp->ecn == 1);
 	opt2 |= V_CCTRL_ECN(cp->ecn);
 
 	opt2 |= V_TX_QUEUE(TX_MODQ(pi->tx_chan));
 	opt2 |= V_PACE(0);
 	opt2 |= F_RSS_QUEUE_VALID;
 	opt2 |= V_RSS_QUEUE(sc->sge.ofld_rxq[cp->rxq_idx].iq.abs_id);
 	if (chip_id(sc) <= CHELSIO_T6) {
 		MPASS(pi->rx_chan == 0 || pi->rx_chan == 1);
 		opt2 |= V_RX_CHANNEL(pi->rx_chan);
 	}
 
 	MPASS(cp->cong_algo >= 0 && cp->cong_algo <= M_CONG_CNTRL);
 	opt2 |= V_CONG_CNTRL(cp->cong_algo);
 
 	MPASS(cp->rx_coalesce == 0 || cp->rx_coalesce == 1);
 	if (cp->rx_coalesce == 1)
 		opt2 |= V_RX_COALESCE(M_RX_COALESCE);
 
 	opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0);
 	MPASS(cp->ulp_mode != ULP_MODE_TCPDDP);
 
 	return (htobe32(opt2));
 }
 
 uint64_t
 select_ntuple(struct vi_info *vi, struct l2t_entry *e)
 {
 	struct adapter *sc = vi->adapter;
 	struct tp_params *tp = &sc->params.tp;
 	uint64_t ntuple = 0;
 
 	/*
 	 * Initialize each of the fields which we care about which are present
 	 * in the Compressed Filter Tuple.
 	 */
 	if (tp->vlan_shift >= 0 && EVL_VLANOFTAG(e->vlan) != CPL_L2T_VLAN_NONE)
 		ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift;
 
 	if (tp->port_shift >= 0)
 		ntuple |= (uint64_t)e->lport << tp->port_shift;
 
 	if (tp->protocol_shift >= 0)
 		ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift;
 
 	if (tp->vnic_shift >= 0 && tp->vnic_mode == FW_VNIC_MODE_PF_VF) {
 		ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vi->vin) |
 		    V_FT_VNID_ID_PF(sc->pf) | V_FT_VNID_ID_VLD(vi->vfvld)) <<
 		    tp->vnic_shift;
 	}
 
 	if (is_t4(sc))
 		return (htobe32((uint32_t)ntuple));
 	else
 		return (htobe64(V_FILTER_TUPLE(ntuple)));
 }
 
 /*
  * Initialize various connection parameters.
  */
 void
 init_conn_params(struct vi_info *vi , struct offload_settings *s,
     struct in_conninfo *inc, struct socket *so,
     const struct tcp_options *tcpopt, int16_t l2t_idx, struct conn_params *cp)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct tom_tunables *tt = &sc->tt;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	u_long wnd;
 	u_int q_idx;
 
 	MPASS(s->offload != 0);
 
 	/* Congestion control algorithm */
 	if (s->cong_algo >= 0)
 		cp->cong_algo = s->cong_algo & M_CONG_CNTRL;
 	else if (sc->tt.cong_algorithm >= 0)
 		cp->cong_algo = tt->cong_algorithm & M_CONG_CNTRL;
 	else {
 		struct cc_algo *cc = CC_ALGO(tp);
 
 		if (strcasecmp(cc->name, "reno") == 0)
 			cp->cong_algo = CONG_ALG_RENO;
 		else if (strcasecmp(cc->name, "tahoe") == 0)
 			cp->cong_algo = CONG_ALG_TAHOE;
 		if (strcasecmp(cc->name, "newreno") == 0)
 			cp->cong_algo = CONG_ALG_NEWRENO;
 		if (strcasecmp(cc->name, "highspeed") == 0)
 			cp->cong_algo = CONG_ALG_HIGHSPEED;
 		else {
 			/*
 			 * Use newreno in case the algorithm selected by the
 			 * host stack is not supported by the hardware.
 			 */
 			cp->cong_algo = CONG_ALG_NEWRENO;
 		}
 	}
 
 	/* Tx traffic scheduling class. */
 	if (s->sched_class >= 0 && s->sched_class < sc->params.nsched_cls)
 		cp->tc_idx = s->sched_class;
 	else
 		cp->tc_idx = -1;
 
 	/* Nagle's algorithm. */
 	if (s->nagle >= 0)
 		cp->nagle = s->nagle > 0 ? 1 : 0;
 	else
 		cp->nagle = tp->t_flags & TF_NODELAY ? 0 : 1;
 
 	/* TCP Keepalive. */
 	if (V_tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE)
 		cp->keepalive = 1;
 	else
 		cp->keepalive = 0;
 
 	/* Optimization that's specific to T5 @ 40G. */
 	if (tt->tx_align >= 0)
 		cp->tx_align =  tt->tx_align > 0 ? 1 : 0;
 	else if (chip_id(sc) == CHELSIO_T5 &&
 	    (port_top_speed(pi) > 10 || sc->params.nports > 2))
 		cp->tx_align = 1;
 	else
 		cp->tx_align = 0;
 
 	/* ULP mode. */
 	cp->ulp_mode = ULP_MODE_NONE;
 
 	/* Rx coalescing. */
 	if (s->rx_coalesce >= 0)
 		cp->rx_coalesce = s->rx_coalesce > 0 ? 1 : 0;
 	else if (tt->rx_coalesce >= 0)
 		cp->rx_coalesce = tt->rx_coalesce > 0 ? 1 : 0;
 	else
 		cp->rx_coalesce = 1;	/* default */
 
 	/*
 	 * Index in the PMTU table.  This controls the MSS that we announce in
 	 * our SYN initially, but after ESTABLISHED it controls the MSS that we
 	 * use to send data.
 	 */
 	cp->mtu_idx = find_best_mtu_idx(sc, inc, s);
 
 	/* Tx queue for this connection. */
 	if (s->txq == QUEUE_RANDOM)
 		q_idx = arc4random();
 	else if (s->txq == QUEUE_ROUNDROBIN)
 		q_idx = atomic_fetchadd_int(&vi->txq_rr, 1);
 	else
 		q_idx = s->txq;
 	cp->txq_idx = vi->first_ofld_txq + q_idx % vi->nofldtxq;
 
 	/* Rx queue for this connection. */
 	if (s->rxq == QUEUE_RANDOM)
 		q_idx = arc4random();
 	else if (s->rxq == QUEUE_ROUNDROBIN)
 		q_idx = atomic_fetchadd_int(&vi->rxq_rr, 1);
 	else
 		q_idx = s->rxq;
 	cp->rxq_idx = vi->first_ofld_rxq + q_idx % vi->nofldrxq;
 
 	if (SOLISTENING(so)) {
 		/* Passive open */
 		MPASS(tcpopt != NULL);
 
 		/* TCP timestamp option */
 		if (tcpopt->tstamp &&
 		    (s->tstamp > 0 || (s->tstamp < 0 && V_tcp_do_rfc1323)))
 			cp->tstamp = 1;
 		else
 			cp->tstamp = 0;
 
 		/* SACK */
 		if (tcpopt->sack &&
 		    (s->sack > 0 || (s->sack < 0 && V_tcp_do_sack)))
 			cp->sack = 1;
 		else
 			cp->sack = 0;
 
 		/* Receive window scaling. */
 		if (tcpopt->wsf > 0 && tcpopt->wsf < 15 && V_tcp_do_rfc1323)
 			cp->wscale = select_rcv_wscale();
 		else
 			cp->wscale = 0;
 
 		/* ECN */
 		if (tcpopt->ecn &&	/* XXX: review. */
 		    (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn)))
 			cp->ecn = 1;
 		else
 			cp->ecn = 0;
 
 		wnd = max(so->sol_sbrcv_hiwat, MIN_RCV_WND);
 		cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ);
 
 		if (tt->sndbuf > 0)
 			cp->sndbuf = tt->sndbuf;
 		else if (so->sol_sbsnd_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf)
 			cp->sndbuf = 256 * 1024;
 		else
 			cp->sndbuf = so->sol_sbsnd_hiwat;
 	} else {
 		/* Active open */
 
 		/* TCP timestamp option */
 		if (s->tstamp > 0 ||
 		    (s->tstamp < 0 && (tp->t_flags & TF_REQ_TSTMP)))
 			cp->tstamp = 1;
 		else
 			cp->tstamp = 0;
 
 		/* SACK */
 		if (s->sack > 0 ||
 		    (s->sack < 0 && (tp->t_flags & TF_SACK_PERMIT)))
 			cp->sack = 1;
 		else
 			cp->sack = 0;
 
 		/* Receive window scaling */
 		if (tp->t_flags & TF_REQ_SCALE)
 			cp->wscale = select_rcv_wscale();
 		else
 			cp->wscale = 0;
 
 		/* ECN */
 		if (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn == 1))
 			cp->ecn = 1;
 		else
 			cp->ecn = 0;
 
 		SOCKBUF_LOCK(&so->so_rcv);
 		wnd = max(select_rcv_wnd(so), MIN_RCV_WND);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ);
 
 		if (tt->sndbuf > 0)
 			cp->sndbuf = tt->sndbuf;
 		else {
 			SOCKBUF_LOCK(&so->so_snd);
 			if (so->so_snd.sb_flags & SB_AUTOSIZE &&
 			    V_tcp_do_autosndbuf)
 				cp->sndbuf = 256 * 1024;
 			else
 				cp->sndbuf = so->so_snd.sb_hiwat;
 			SOCKBUF_UNLOCK(&so->so_snd);
 		}
 	}
 
 	cp->l2t_idx = l2t_idx;
 
 	/* This will be initialized on ESTABLISHED. */
 	cp->emss = 0;
 }
 
 int
 negative_advice(int status)
 {
 
 	return (status == CPL_ERR_RTX_NEG_ADVICE ||
 	    status == CPL_ERR_PERSIST_NEG_ADVICE ||
 	    status == CPL_ERR_KEEPALV_NEG_ADVICE);
 }
 
 static int
 alloc_tid_tab(struct tid_info *t, int flags)
 {
 
 	MPASS(t->ntids > 0);
 	MPASS(t->tid_tab == NULL);
 
 	t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE,
 	    M_ZERO | flags);
 	if (t->tid_tab == NULL)
 		return (ENOMEM);
 	atomic_store_rel_int(&t->tids_in_use, 0);
 
 	return (0);
 }
 
 static void
 free_tid_tab(struct tid_info *t)
 {
 
 	KASSERT(t->tids_in_use == 0,
 	    ("%s: %d tids still in use.", __func__, t->tids_in_use));
 
 	free(t->tid_tab, M_CXGBE);
 	t->tid_tab = NULL;
 }
 
 static int
 alloc_stid_tab(struct tid_info *t, int flags)
 {
 
 	MPASS(t->nstids > 0);
 	MPASS(t->stid_tab == NULL);
 
 	t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
 	    M_ZERO | flags);
 	if (t->stid_tab == NULL)
 		return (ENOMEM);
 	mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
 	t->stids_in_use = 0;
 	TAILQ_INIT(&t->stids);
 	t->nstids_free_head = t->nstids;
 
 	return (0);
 }
 
 static void
 free_stid_tab(struct tid_info *t)
 {
 
 	KASSERT(t->stids_in_use == 0,
 	    ("%s: %d tids still in use.", __func__, t->stids_in_use));
 
 	if (mtx_initialized(&t->stid_lock))
 		mtx_destroy(&t->stid_lock);
 	free(t->stid_tab, M_CXGBE);
 	t->stid_tab = NULL;
 }
 
 static void
 free_tid_tabs(struct tid_info *t)
 {
 
 	free_tid_tab(t);
 	free_stid_tab(t);
 }
 
 static int
 alloc_tid_tabs(struct tid_info *t)
 {
 	int rc;
 
 	rc = alloc_tid_tab(t, M_NOWAIT);
 	if (rc != 0)
 		goto failed;
 
 	rc = alloc_stid_tab(t, M_NOWAIT);
 	if (rc != 0)
 		goto failed;
 
 	return (0);
 failed:
 	free_tid_tabs(t);
 	return (rc);
 }
 
 static inline void
 alloc_tcb_history(struct adapter *sc, struct tom_data *td)
 {
 
 	if (sc->tids.ntids == 0 || sc->tids.ntids > 1024)
 		return;
 	rw_init(&td->tcb_history_lock, "TCB history");
 	td->tcb_history = malloc(sc->tids.ntids * sizeof(*td->tcb_history),
 	    M_CXGBE, M_ZERO | M_NOWAIT);
 	td->dupack_threshold = G_DUPACKTHRESH(t4_read_reg(sc, A_TP_PARA_REG0));
 }
 
 static inline void
 free_tcb_history(struct adapter *sc, struct tom_data *td)
 {
 #ifdef INVARIANTS
 	int i;
 
 	if (td->tcb_history != NULL) {
 		for (i = 0; i < sc->tids.ntids; i++) {
 			MPASS(td->tcb_history[i] == NULL);
 		}
 	}
 #endif
 	free(td->tcb_history, M_CXGBE);
 	if (rw_initialized(&td->tcb_history_lock))
 		rw_destroy(&td->tcb_history_lock);
 }
 
 static void
 free_tom_data(struct adapter *sc, struct tom_data *td)
 {
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	KASSERT(TAILQ_EMPTY(&td->toep_list),
 	    ("%s: TOE PCB list is not empty.", __func__));
 	KASSERT(td->lctx_count == 0,
 	    ("%s: lctx hash table is not empty.", __func__));
 
 	t4_free_ppod_region(&td->pr);
 
 	if (td->listen_mask != 0)
 		hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
 
 	if (mtx_initialized(&td->unsent_wr_lock))
 		mtx_destroy(&td->unsent_wr_lock);
 	if (mtx_initialized(&td->lctx_hash_lock))
 		mtx_destroy(&td->lctx_hash_lock);
 	if (mtx_initialized(&td->toep_list_lock))
 		mtx_destroy(&td->toep_list_lock);
 
 	free_tcb_history(sc, td);
 	free_tid_tabs(&sc->tids);
 	free(td, M_CXGBE);
 }
 
 static char *
 prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen,
     int *buflen)
 {
 	char *pkt;
 	struct tcphdr *th;
 	int ipv6, len;
 	const int maxlen =
 	    max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) +
 	    max(sizeof(struct ip), sizeof(struct ip6_hdr)) +
 	    sizeof(struct tcphdr);
 
 	MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN);
 
 	pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT);
 	if (pkt == NULL)
 		return (NULL);
 
 	ipv6 = inp->inp_vflag & INP_IPV6;
 	len = 0;
 
 	if (EVL_VLANOFTAG(vtag) == 0xfff) {
 		struct ether_header *eh = (void *)pkt;
 
 		if (ipv6)
 			eh->ether_type = htons(ETHERTYPE_IPV6);
 		else
 			eh->ether_type = htons(ETHERTYPE_IP);
 
 		len += sizeof(*eh);
 	} else {
 		struct ether_vlan_header *evh = (void *)pkt;
 
 		evh->evl_encap_proto = htons(ETHERTYPE_VLAN);
 		evh->evl_tag = htons(vtag);
 		if (ipv6)
 			evh->evl_proto = htons(ETHERTYPE_IPV6);
 		else
 			evh->evl_proto = htons(ETHERTYPE_IP);
 
 		len += sizeof(*evh);
 	}
 
 	if (ipv6) {
 		struct ip6_hdr *ip6 = (void *)&pkt[len];
 
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		if (open_type == OPEN_TYPE_ACTIVE) {
 			ip6->ip6_src = inp->in6p_laddr;
 			ip6->ip6_dst = inp->in6p_faddr;
 		} else if (open_type == OPEN_TYPE_LISTEN) {
 			ip6->ip6_src = inp->in6p_laddr;
 			ip6->ip6_dst = ip6->ip6_src;
 		}
 
 		len += sizeof(*ip6);
 	} else {
 		struct ip *ip = (void *)&pkt[len];
 
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(*ip) >> 2;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr));
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_p = IPPROTO_TCP;
 		if (open_type == OPEN_TYPE_ACTIVE) {
 			ip->ip_src = inp->inp_laddr;
 			ip->ip_dst = inp->inp_faddr;
 		} else if (open_type == OPEN_TYPE_LISTEN) {
 			ip->ip_src = inp->inp_laddr;
 			ip->ip_dst = ip->ip_src;
 		}
 
 		len += sizeof(*ip);
 	}
 
 	th = (void *)&pkt[len];
 	if (open_type == OPEN_TYPE_ACTIVE) {
 		th->th_sport = inp->inp_lport;	/* network byte order already */
 		th->th_dport = inp->inp_fport;	/* ditto */
 	} else if (open_type == OPEN_TYPE_LISTEN) {
 		th->th_sport = inp->inp_lport;	/* network byte order already */
 		th->th_dport = th->th_sport;
 	}
 	len += sizeof(th);
 
 	*pktlen = *buflen = len;
 	return (pkt);
 }
 
 const struct offload_settings *
 lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m,
     uint16_t vtag, struct inpcb *inp)
 {
 	const struct t4_offload_policy *op;
 	char *pkt;
 	struct offload_rule *r;
 	int i, matched, pktlen, buflen;
 	static const struct offload_settings allow_offloading_settings = {
 		.offload = 1,
 		.rx_coalesce = -1,
 		.cong_algo = -1,
 		.sched_class = -1,
 		.tstamp = -1,
 		.sack = -1,
 		.nagle = -1,
 		.ecn = -1,
 		.ddp = -1,
 		.tls = -1,
 		.txq = QUEUE_RANDOM,
 		.rxq = QUEUE_RANDOM,
 		.mss = -1,
 	};
 	static const struct offload_settings disallow_offloading_settings = {
 		.offload = 0,
 		/* rest is irrelevant when offload is off. */
 	};
 
 	rw_assert(&sc->policy_lock, RA_LOCKED);
 
 	/*
 	 * If there's no Connection Offloading Policy attached to the device
 	 * then we need to return a default static policy.  If
 	 * "cop_managed_offloading" is true, then we need to disallow
 	 * offloading until a COP is attached to the device.  Otherwise we
 	 * allow offloading ...
 	 */
 	op = sc->policy;
 	if (op == NULL) {
 		if (sc->tt.cop_managed_offloading)
 			return (&disallow_offloading_settings);
 		else
 			return (&allow_offloading_settings);
 	}
 
 	switch (open_type) {
 	case OPEN_TYPE_ACTIVE:
 	case OPEN_TYPE_LISTEN:
 		pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen);
 		break;
 	case OPEN_TYPE_PASSIVE:
 		MPASS(m != NULL);
 		pkt = mtod(m, char *);
 		MPASS(*pkt == CPL_PASS_ACCEPT_REQ);
 		pkt += sizeof(struct cpl_pass_accept_req);
 		pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req);
 		buflen = m->m_len - sizeof(struct cpl_pass_accept_req);
 		break;
 	default:
 		MPASS(0);
 		return (&disallow_offloading_settings);
 	}
 
 	if (pkt == NULL || pktlen == 0 || buflen == 0)
 		return (&disallow_offloading_settings);
 
 	matched = 0;
 	r = &op->rule[0];
 	for (i = 0; i < op->nrules; i++, r++) {
 		if (r->open_type != open_type &&
 		    r->open_type != OPEN_TYPE_DONTCARE) {
 			continue;
 		}
 		matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen);
 		if (matched)
 			break;
 	}
 
 	if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN)
 		free(pkt, M_CXGBE);
 
 	return (matched ? &r->settings : &disallow_offloading_settings);
 }
 
 static void
 reclaim_wr_resources(void *arg, int count)
 {
 	struct tom_data *td = arg;
 	STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list);
 	struct cpl_act_open_req *cpl;
 	u_int opcode, atid, tid;
 	struct wrqe *wr;
 	struct adapter *sc = td_adapter(td);
 
 	mtx_lock(&td->unsent_wr_lock);
 	STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe);
 	mtx_unlock(&td->unsent_wr_lock);
 
 	while ((wr = STAILQ_FIRST(&twr_list)) != NULL) {
 		STAILQ_REMOVE_HEAD(&twr_list, link);
 
 		cpl = wrtod(wr);
 		opcode = GET_OPCODE(cpl);
 
 		switch (opcode) {
 		case CPL_ACT_OPEN_REQ:
 		case CPL_ACT_OPEN_REQ6:
 			atid = G_TID_TID(be32toh(OPCODE_TID(cpl)));
 			CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid);
 			act_open_failure_cleanup(sc, atid, EHOSTUNREACH);
 			free(wr, M_CXGBE);
 			break;
 		case CPL_PASS_ACCEPT_RPL:
 			tid = GET_TID(cpl);
 			CTR2(KTR_CXGBE, "%s: tid %u ", __func__, tid);
 			synack_failure_cleanup(sc, tid);
 			free(wr, M_CXGBE);
 			break;
 		default:
 			log(LOG_ERR, "%s: leaked work request %p, wr_len %d, "
 			    "opcode %x\n", __func__, wr, wr->wr_len, opcode);
 			/* WR not freed here; go look at it with a debugger.  */
 		}
 	}
 }
 
 /*
  * Ground control to Major TOM
  * Commencing countdown, engines on
  */
 static int
 t4_tom_activate(struct adapter *sc)
 {
 	struct tom_data *td;
 	struct toedev *tod;
 	struct vi_info *vi;
 	int i, rc, v;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	/* per-adapter softc for TOM */
 	td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT);
 	if (td == NULL)
 		return (ENOMEM);
 
 	/* List of TOE PCBs and associated lock */
 	mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF);
 	TAILQ_INIT(&td->toep_list);
 
 	/* Listen context */
 	mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF);
 	td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE,
 	    &td->listen_mask, HASH_NOWAIT);
 
 	/* List of WRs for which L2 resolution failed */
 	mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF);
 	STAILQ_INIT(&td->unsent_wr_list);
 	TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td);
 
 	/* TID tables */
 	rc = alloc_tid_tabs(&sc->tids);
 	if (rc != 0)
 		goto done;
 
 	rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp,
 	    t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods");
 	if (rc != 0)
 		goto done;
 	t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK,
 	    V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask);
 
 	alloc_tcb_history(sc, td);
 
 	/* toedev ops */
 	tod = &td->tod;
 	init_toedev(tod);
 	tod->tod_softc = sc;
 	tod->tod_connect = t4_connect;
 	tod->tod_listen_start = t4_listen_start;
 	tod->tod_listen_stop = t4_listen_stop;
 	tod->tod_rcvd = t4_rcvd;
 	tod->tod_output = t4_tod_output;
 	tod->tod_send_rst = t4_send_rst;
 	tod->tod_send_fin = t4_send_fin;
 	tod->tod_pcb_detach = t4_pcb_detach;
 	tod->tod_l2_update = t4_l2_update;
 	tod->tod_syncache_added = t4_syncache_added;
 	tod->tod_syncache_removed = t4_syncache_removed;
 	tod->tod_syncache_respond = t4_syncache_respond;
 	tod->tod_offload_socket = t4_offload_socket;
 	tod->tod_ctloutput = t4_ctloutput;
 	tod->tod_tcp_info = t4_tcp_info;
 #ifdef KERN_TLS
 	tod->tod_alloc_tls_session = t4_alloc_tls_session;
 #endif
 	tod->tod_pmtu_update = t4_pmtu_update;
 
 	for_each_port(sc, i) {
 		for_each_vi(sc->port[i], v, vi) {
 			SETTOEDEV(vi->ifp, &td->tod);
 		}
 	}
 
 	sc->tom_softc = td;
 	register_toedev(sc->tom_softc);
 
 done:
 	if (rc != 0)
 		free_tom_data(sc, td);
 	return (rc);
 }
 
 static int
 t4_tom_deactivate(struct adapter *sc)
 {
 	int rc = 0;
 	struct tom_data *td = sc->tom_softc;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (td == NULL)
 		return (0);	/* XXX. KASSERT? */
 
 	if (sc->offload_map != 0)
 		return (EBUSY);	/* at least one port has IFCAP_TOE enabled */
 
 	if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI))
 		return (EBUSY);	/* both iWARP and iSCSI rely on the TOE. */
 
 	mtx_lock(&td->toep_list_lock);
 	if (!TAILQ_EMPTY(&td->toep_list))
 		rc = EBUSY;
 	mtx_unlock(&td->toep_list_lock);
 
 	mtx_lock(&td->lctx_hash_lock);
 	if (td->lctx_count > 0)
 		rc = EBUSY;
 	mtx_unlock(&td->lctx_hash_lock);
 
 	taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources);
 	mtx_lock(&td->unsent_wr_lock);
 	if (!STAILQ_EMPTY(&td->unsent_wr_list))
 		rc = EBUSY;
 	mtx_unlock(&td->unsent_wr_lock);
 
 	if (rc == 0) {
 		unregister_toedev(sc->tom_softc);
 		free_tom_data(sc, td);
 		sc->tom_softc = NULL;
 	}
 
 	return (rc);
 }
 
 static int
 t4_ctloutput_tom(struct socket *so, struct sockopt *sopt)
 {
 	struct tcpcb *tp = sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	int error, optval;
 
 	if (sopt->sopt_level == IPPROTO_TCP && sopt->sopt_name == TCP_USE_DDP) {
 		if (sopt->sopt_dir != SOPT_SET)
 			return (EOPNOTSUPP);
 
 		if (sopt->sopt_td != NULL) {
 			/* Only settable by the kernel. */
 			return (EPERM);
 		}
 
 		error = sooptcopyin(sopt, &optval, sizeof(optval),
 		    sizeof(optval));
 		if (error != 0)
 			return (error);
 
 		if (optval != 0)
 			return (t4_enable_ddp_rcv(so, toep));
 		else
 			return (EOPNOTSUPP);
 	}
 	return (tcp_ctloutput(so, sopt));
 }
 
 static int
 t4_aio_queue_tom(struct socket *so, struct kaiocb *job)
 {
 	struct tcpcb *tp = sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	int error;
 
 	/*
 	 * No lock is needed as TOE sockets never change between
 	 * active and passive.
 	 */
 	if (SOLISTENING(so))
 		return (EINVAL);
 
 	if (ulp_mode(toep) == ULP_MODE_TCPDDP ||
 	    ulp_mode(toep) == ULP_MODE_NONE) {
 		error = t4_aio_queue_ddp(so, job);
 		if (error != EOPNOTSUPP)
 			return (error);
 	}
 
 	return (t4_aio_queue_aiotx(so, job));
 }
 
 static int
 t4_tom_mod_load(void)
 {
 	/* CPL handlers */
 	t4_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
 	t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2,
 	    CPL_COOKIE_TOM);
 	t4_init_connect_cpl_handlers();
 	t4_init_listen_cpl_handlers();
 	t4_init_cpl_io_handlers();
 
 	t4_ddp_mod_load();
 	t4_tls_mod_load();
 
 	bcopy(&tcp_protosw, &toe_protosw, sizeof(toe_protosw));
 	toe_protosw.pr_ctloutput = t4_ctloutput_tom;
 	toe_protosw.pr_aio_queue = t4_aio_queue_tom;
 
 	bcopy(&tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw));
 	toe6_protosw.pr_ctloutput = t4_ctloutput_tom;
 	toe6_protosw.pr_aio_queue = t4_aio_queue_tom;
 
 	return (t4_register_uld(&tom_uld_info));
 }
 
 static void
 tom_uninit(struct adapter *sc, void *arg __unused)
 {
 	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun"))
 		return;
 
 	/* Try to free resources (works only if no port has IFCAP_TOE) */
 	if (uld_active(sc, ULD_TOM))
 		t4_deactivate_uld(sc, ULD_TOM);
 
 	end_synchronized_op(sc, 0);
 }
 
 static int
 t4_tom_mod_unload(void)
 {
 	t4_iterate(tom_uninit, NULL);
 
 	if (t4_unregister_uld(&tom_uld_info) == EBUSY)
 		return (EBUSY);
 
 	t4_tls_mod_unload();
 	t4_ddp_mod_unload();
 
 	t4_uninit_connect_cpl_handlers();
 	t4_uninit_listen_cpl_handlers();
 	t4_uninit_cpl_io_handlers();
 	t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, NULL, CPL_COOKIE_TOM);
 	t4_register_cpl_handler(CPL_GET_TCB_RPL, NULL);
 
 	return (0);
 }
 #endif	/* TCP_OFFLOAD */
 
 static int
 t4_tom_modevent(module_t mod, int cmd, void *arg)
 {
 	int rc = 0;
 
 #ifdef TCP_OFFLOAD
 	switch (cmd) {
 	case MOD_LOAD:
 		rc = t4_tom_mod_load();
 		break;
 
 	case MOD_UNLOAD:
 		rc = t4_tom_mod_unload();
 		break;
 
 	default:
 		rc = EINVAL;
 	}
 #else
 	printf("t4_tom: compiled without TCP_OFFLOAD support.\n");
 	rc = EOPNOTSUPP;
 #endif
 	return (rc);
 }
 
 static moduledata_t t4_tom_moddata= {
 	"t4_tom",
 	t4_tom_modevent,
 	0
 };
 
 MODULE_VERSION(t4_tom, 1);
 MODULE_DEPEND(t4_tom, toecore, 1, 1, 1);
 MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1);
 DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);