Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/in_pcb.h
Show First 20 Lines • Show All 150 Lines • ▼ Show 20 Lines | |||||
* stable snapshot of connections while allowing more common list modifications | * stable snapshot of connections while allowing more common list modifications | ||||
* to safely grab the pcblist lock just while adding or removing a connection | * to safely grab the pcblist lock just while adding or removing a connection | ||||
* from the global list. | * from the global list. | ||||
* | * | ||||
* Key: | * Key: | ||||
* (b) - Protected by the hpts lock. | * (b) - Protected by the hpts lock. | ||||
* (c) - Constant after initialization | * (c) - Constant after initialization | ||||
* (e) - Protected by the net_epoch_prempt epoch | * (e) - Protected by the net_epoch_prempt epoch | ||||
* (g) - Protected by the pcbgroup lock | |||||
* (i) - Protected by the inpcb lock | * (i) - Protected by the inpcb lock | ||||
* (p) - Protected by the pcbinfo lock for the inpcb | * (p) - Protected by the pcbinfo lock for the inpcb | ||||
* (l) - Protected by the pcblist lock for the inpcb | * (l) - Protected by the pcblist lock for the inpcb | ||||
* (h) - Protected by the pcbhash lock for the inpcb | * (h) - Protected by the pcbhash lock for the inpcb | ||||
* (s) - Protected by another subsystem's locks | * (s) - Protected by another subsystem's locks | ||||
* (x) - Undefined locking | * (x) - Undefined locking | ||||
* | * | ||||
* Notes on the tcp_hpts: | * Notes on the tcp_hpts: | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | |||||
* protocols (especially SCTP). | * protocols (especially SCTP). | ||||
*/ | */ | ||||
struct icmp6_filter; | struct icmp6_filter; | ||||
struct inpcbpolicy; | struct inpcbpolicy; | ||||
struct m_snd_tag; | struct m_snd_tag; | ||||
struct inpcb { | struct inpcb { | ||||
/* Cache line #1 (amd64) */ | /* Cache line #1 (amd64) */ | ||||
CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */ | CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */ | ||||
CK_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ | |||||
struct rwlock inp_lock; | struct rwlock inp_lock; | ||||
/* Cache line #2 (amd64) */ | /* Cache line #2 (amd64) */ | ||||
#define inp_start_zero inp_hpts | #define inp_start_zero inp_hpts | ||||
#define inp_zero_size (sizeof(struct inpcb) - \ | #define inp_zero_size (sizeof(struct inpcb) - \ | ||||
offsetof(struct inpcb, inp_start_zero)) | offsetof(struct inpcb, inp_start_zero)) | ||||
TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */ | TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */ | ||||
uint32_t inp_hpts_request; /* Current hpts request, zero if | uint32_t inp_hpts_request; /* Current hpts request, zero if | ||||
Show All 28 Lines | volatile uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */ | ||||
inp_spare_bits2 : 3; | inp_spare_bits2 : 3; | ||||
uint8_t inp_numa_domain; /* numa domain */ | uint8_t inp_numa_domain; /* numa domain */ | ||||
void *inp_ppcb; /* (i) pointer to per-protocol pcb */ | void *inp_ppcb; /* (i) pointer to per-protocol pcb */ | ||||
struct socket *inp_socket; /* (i) back pointer to socket */ | struct socket *inp_socket; /* (i) back pointer to socket */ | ||||
uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */ | uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */ | ||||
uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */ | uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */ | ||||
TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */ | TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */ | ||||
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ | struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ | ||||
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ | |||||
CK_LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */ | |||||
struct ucred *inp_cred; /* (c) cache of socket cred */ | struct ucred *inp_cred; /* (c) cache of socket cred */ | ||||
u_int32_t inp_flow; /* (i) IPv6 flow information */ | u_int32_t inp_flow; /* (i) IPv6 flow information */ | ||||
u_char inp_vflag; /* (i) IP version flag (v4/v6) */ | u_char inp_vflag; /* (i) IP version flag (v4/v6) */ | ||||
u_char inp_ip_ttl; /* (i) time to live proto */ | u_char inp_ip_ttl; /* (i) time to live proto */ | ||||
u_char inp_ip_p; /* (c) protocol proto */ | u_char inp_ip_p; /* (c) protocol proto */ | ||||
u_char inp_ip_minttl; /* (i) minimum TTL or drop */ | u_char inp_ip_minttl; /* (i) minimum TTL or drop */ | ||||
uint32_t inp_flowid; /* (x) flow id / queue id */ | uint32_t inp_flowid; /* (x) flow id / queue id */ | ||||
struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */ | struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */ | ||||
▲ Show 20 Lines • Show All 129 Lines • ▼ Show 20 Lines | |||||
* - ipi_list_lock covering mutable global fields (such as the global | * - ipi_list_lock covering mutable global fields (such as the global | ||||
* pcb list) | * pcb list) | ||||
* | * | ||||
* The lock order is: | * The lock order is: | ||||
* | * | ||||
* ipi_lock (before) | * ipi_lock (before) | ||||
* inpcb locks (before) | * inpcb locks (before) | ||||
* ipi_list locks (before) | * ipi_list locks (before) | ||||
* {ipi_hash_lock, pcbgroup locks} | |||||
* | * | ||||
* Locking key: | * Locking key: | ||||
* | * | ||||
* (c) Constant or nearly constant after initialisation | * (c) Constant or nearly constant after initialisation | ||||
* (e) - Protected by the net_epoch_prempt epoch | * (e) - Protected by the net_epoch_prempt epoch | ||||
* (g) Locked by ipi_lock | * (g) Locked by ipi_lock | ||||
* (l) Locked by ipi_list_lock | * (l) Locked by ipi_list_lock | ||||
* (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock | * (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock | ||||
* (p) Protected by one or more pcbgroup locks | |||||
* (x) Synchronisation properties poorly defined | * (x) Synchronisation properties poorly defined | ||||
*/ | */ | ||||
struct inpcbinfo { | struct inpcbinfo { | ||||
/* | /* | ||||
* Global lock protecting inpcb list modification | * Global lock protecting inpcb list modification | ||||
*/ | */ | ||||
struct mtx ipi_lock; | struct mtx ipi_lock; | ||||
Show All 17 Lines | struct inpcbinfo { | ||||
u_short ipi_lasthi; /* (x) */ | u_short ipi_lasthi; /* (x) */ | ||||
/* | /* | ||||
* UMA zone from which inpcbs are allocated for this protocol. | * UMA zone from which inpcbs are allocated for this protocol. | ||||
*/ | */ | ||||
struct uma_zone *ipi_zone; /* (c) */ | struct uma_zone *ipi_zone; /* (c) */ | ||||
/* | /* | ||||
* Connection groups associated with this protocol. These fields are | * Global lock protecting modification hash lookup tables. | ||||
* constant, but pcbgroup structures themselves are protected by | |||||
* per-pcbgroup locks. | |||||
*/ | */ | ||||
struct inpcbgroup *ipi_pcbgroups; /* (c) */ | |||||
u_int ipi_npcbgroups; /* (c) */ | |||||
u_int ipi_hashfields; /* (c) */ | |||||
/* | |||||
* Global lock protecting modification non-pcbgroup hash lookup tables. | |||||
*/ | |||||
struct mtx ipi_hash_lock; | struct mtx ipi_hash_lock; | ||||
/* | /* | ||||
* Global hash of inpcbs, hashed by local and foreign addresses and | * Global hash of inpcbs, hashed by local and foreign addresses and | ||||
* port numbers. | * port numbers. | ||||
*/ | */ | ||||
struct inpcbhead *ipi_hashbase; /* (h) */ | struct inpcbhead *ipi_hashbase; /* (h) */ | ||||
u_long ipi_hashmask; /* (h) */ | u_long ipi_hashmask; /* (h) */ | ||||
/* | /* | ||||
* Global hash of inpcbs, hashed by only local port number. | * Global hash of inpcbs, hashed by only local port number. | ||||
*/ | */ | ||||
struct inpcbporthead *ipi_porthashbase; /* (h) */ | struct inpcbporthead *ipi_porthashbase; /* (h) */ | ||||
u_long ipi_porthashmask; /* (h) */ | u_long ipi_porthashmask; /* (h) */ | ||||
/* | /* | ||||
* List of wildcard inpcbs for use with pcbgroups. In the past, was | |||||
* per-pcbgroup but is now global. All pcbgroup locks must be held | |||||
* to modify the list, so any is sufficient to read it. | |||||
*/ | |||||
struct inpcbhead *ipi_wildbase; /* (p) */ | |||||
u_long ipi_wildmask; /* (p) */ | |||||
/* | |||||
* Load balance groups used for the SO_REUSEPORT_LB option, | * Load balance groups used for the SO_REUSEPORT_LB option, | ||||
* hashed by local port. | * hashed by local port. | ||||
*/ | */ | ||||
struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (h) */ | struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (h) */ | ||||
u_long ipi_lbgrouphashmask; /* (h) */ | u_long ipi_lbgrouphashmask; /* (h) */ | ||||
/* | /* | ||||
* Pointer to network stack instance | * Pointer to network stack instance | ||||
*/ | */ | ||||
struct vnet *ipi_vnet; /* (c) */ | struct vnet *ipi_vnet; /* (c) */ | ||||
/* | /* | ||||
* general use 2 | * general use 2 | ||||
*/ | */ | ||||
void *ipi_pspare[2]; | void *ipi_pspare[2]; | ||||
/* | /* | ||||
* Global lock protecting global inpcb list, inpcb count, etc. | * Global lock protecting global inpcb list, inpcb count, etc. | ||||
*/ | */ | ||||
struct rwlock ipi_list_lock; | struct rwlock ipi_list_lock; | ||||
}; | }; | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
/* | /* | ||||
* Connection groups hold sets of connections that have similar CPU/thread | |||||
* affinity. Each connection belongs to exactly one connection group. | |||||
*/ | |||||
struct inpcbgroup { | |||||
/* | |||||
* Per-connection group hash of inpcbs, hashed by local and foreign | |||||
* addresses and port numbers. | |||||
*/ | |||||
struct inpcbhead *ipg_hashbase; /* (c) */ | |||||
u_long ipg_hashmask; /* (c) */ | |||||
/* | |||||
* Notional affinity of this pcbgroup. | |||||
*/ | |||||
u_int ipg_cpu; /* (p) */ | |||||
/* | |||||
* Per-connection group lock, not to be confused with ipi_lock. | |||||
* Protects the hash table hung off the group, but also the global | |||||
* wildcard list in inpcbinfo. | |||||
*/ | |||||
struct mtx ipg_lock; | |||||
} __aligned(CACHE_LINE_SIZE); | |||||
/* | |||||
* Load balance groups used for the SO_REUSEPORT_LB socket option. Each group | * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group | ||||
* (or unique address:port combination) can be re-used at most | * (or unique address:port combination) can be re-used at most | ||||
* INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which | * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which | ||||
* is dynamically resized as processes bind/unbind to that specific group. | * is dynamically resized as processes bind/unbind to that specific group. | ||||
*/ | */ | ||||
struct inpcblbgroup { | struct inpcblbgroup { | ||||
CK_LIST_ENTRY(inpcblbgroup) il_list; | CK_LIST_ENTRY(inpcblbgroup) il_list; | ||||
struct epoch_context il_epoch_ctx; | struct epoch_context il_epoch_ctx; | ||||
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | #define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ | ||||
IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ | IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ | ||||
IN6P_MTU) | IN6P_MTU) | ||||
/* | /* | ||||
* Flags for inp_flags2. | * Flags for inp_flags2. | ||||
*/ | */ | ||||
#define INP_MBUF_L_ACKS 0x00000001 /* We need large mbufs for ack compression */ | #define INP_MBUF_L_ACKS 0x00000001 /* We need large mbufs for ack compression */ | ||||
#define INP_MBUF_ACKCMP 0x00000002 /* TCP mbuf ack compression ok */ | #define INP_MBUF_ACKCMP 0x00000002 /* TCP mbuf ack compression ok */ | ||||
#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */ | /* 0x00000004 */ | ||||
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */ | #define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */ | ||||
#define INP_FREED 0x00000010 /* inp itself is not valid */ | #define INP_FREED 0x00000010 /* inp itself is not valid */ | ||||
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */ | #define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */ | ||||
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */ | #define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */ | ||||
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */ | #define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */ | ||||
#define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */ | #define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */ | ||||
#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */ | #define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */ | ||||
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ | #define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | |||||
#define V_ipport_tcpallocs VNET(ipport_tcpallocs) | #define V_ipport_tcpallocs VNET(ipport_tcpallocs) | ||||
void in_pcbinfo_destroy(struct inpcbinfo *); | void in_pcbinfo_destroy(struct inpcbinfo *); | ||||
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, | void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, | ||||
int, int, char *, uma_init, u_int); | int, int, char *, uma_init, u_int); | ||||
int in_pcbbind_check_bindmulti(const struct inpcb *ni, | int in_pcbbind_check_bindmulti(const struct inpcb *ni, | ||||
const struct inpcb *oi); | const struct inpcb *oi); | ||||
struct inpcbgroup * | |||||
in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t); | |||||
struct inpcbgroup * | |||||
in_pcbgroup_byinpcb(struct inpcb *); | |||||
struct inpcbgroup * | |||||
in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short, | |||||
struct in_addr, u_short); | |||||
void in_pcbgroup_destroy(struct inpcbinfo *); | |||||
int in_pcbgroup_enabled(struct inpcbinfo *); | |||||
void in_pcbgroup_init(struct inpcbinfo *, u_int, int); | |||||
void in_pcbgroup_remove(struct inpcb *); | |||||
void in_pcbgroup_update(struct inpcb *); | |||||
void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *); | |||||
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); | void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); | ||||
int in_pcballoc(struct socket *, struct inpcbinfo *); | int in_pcballoc(struct socket *, struct inpcbinfo *); | ||||
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); | int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); | ||||
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, | int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, | ||||
u_short *, struct ucred *); | u_short *, struct ucred *); | ||||
int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); | int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); | ||||
int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *, | int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *, | ||||
▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines |