Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/in_pcb.h
Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | |||||
#include <sys/epoch.h> | #include <sys/epoch.h> | ||||
#include <sys/_lock.h> | #include <sys/_lock.h> | ||||
#include <sys/_mutex.h> | #include <sys/_mutex.h> | ||||
#include <sys/_rwlock.h> | #include <sys/_rwlock.h> | ||||
#include <net/route.h> | #include <net/route.h> | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/proc.h> | |||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/smr.h> | |||||
#include <net/vnet.h> | #include <net/vnet.h> | ||||
#include <vm/uma.h> | #include <vm/uma.h> | ||||
#endif | #endif | ||||
#include <sys/ck.h> | #include <sys/ck.h> | ||||
/* | /* | ||||
* struct inpcb is the common protocol control block structure used in most | * struct inpcb is the common protocol control block structure used in most | ||||
* IP transport protocols. | * IP transport protocols. | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | |||||
#define inc6_laddr inc_ie.ie6_laddr | #define inc6_laddr inc_ie.ie6_laddr | ||||
#define inc6_zoneid inc_ie.ie6_zoneid | #define inc6_zoneid inc_ie.ie6_zoneid | ||||
#if defined(_KERNEL) || defined(_WANT_INPCB) | #if defined(_KERNEL) || defined(_WANT_INPCB) | ||||
/* | /* | ||||
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and | * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and | ||||
* IPv6 sockets. In the case of TCP and UDP, further per-connection state is | * IPv6 sockets. In the case of TCP and UDP, further per-connection state is | ||||
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb | * hung off of inp_ppcb most of the time. Almost all fields of struct inpcb | ||||
* are static after creation or protected by a per-inpcb rwlock, inp_lock. A | * are static after creation or protected by a per-inpcb rwlock, inp_lock. | ||||
* few fields are protected by multiple locks as indicated in the locking notes | |||||
* below. For these fields, all of the listed locks must be write-locked for | |||||
* any modifications. However, these fields can be safely read while any one of | |||||
* the listed locks are read-locked. This model can permit greater concurrency | |||||
* for read operations. For example, connections can be looked up while only | |||||
* holding a read lock on the global pcblist lock. This is important for | |||||
* performance when attempting to find the connection for a packet given its IP | |||||
* and port tuple. | |||||
* | * | ||||
* One noteworthy exception is that the global pcbinfo lock follows a different | * A inpcb database is indexed by addresses/ports hash as well as list of | ||||
* set of rules in relation to the inp_list field. Rather than being | * all pcbs that belong to a certain proto. Database lookups or list traversals | ||||
* write-locked for modifications and read-locked for list iterations, it must | * are be performed inside SMR section. Once desired PCB is found its own | ||||
* be read-locked during modifications and write-locked during list iterations. | * lock is to be obtained and SMR section exited. | ||||
* This ensures that the relatively rare global list iterations safely walk a | |||||
* stable snapshot of connections while allowing more common list modifications | |||||
* to safely grab the pcblist lock just while adding or removing a connection | |||||
* from the global list. | |||||
* | * | ||||
* Key: | * Key: | ||||
* (b) - Protected by the hpts lock. | * (b) - Protected by the hpts lock. | ||||
* (c) - Constant after initialization | * (c) - Constant after initialization | ||||
* (e) - Protected by the net_epoch_prempt epoch | * (e) - Protected by the SMR section | ||||
* (i) - Protected by the inpcb lock | * (i) - Protected by the inpcb lock | ||||
* (p) - Protected by the pcbinfo lock for the inpcb | * (p) - Protected by the pcbinfo lock for the inpcb | ||||
* (l) - Protected by the pcblist lock for the inpcb | |||||
* (h) - Protected by the pcbhash lock for the inpcb | * (h) - Protected by the pcbhash lock for the inpcb | ||||
* (s) - Protected by another subsystem's locks | * (s) - Protected by another subsystem's locks | ||||
* (x) - Undefined locking | * (x) - Undefined locking | ||||
* | * | ||||
* Notes on the tcp_hpts: | * Notes on the tcp_hpts: | ||||
* | * | ||||
* First Hpts lock order is | * First Hpts lock order is | ||||
* 1) INP_WLOCK() | * 1) INP_WLOCK() | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | |||||
* to a field, a write lock must generally be held. | * to a field, a write lock must generally be held. | ||||
* | * | ||||
* netinet/netinet6-layer code should not assume that the inp_socket pointer | * netinet/netinet6-layer code should not assume that the inp_socket pointer | ||||
* is safe to dereference without inp_lock being held, even for protocols | * is safe to dereference without inp_lock being held, even for protocols | ||||
* other than TCP (where the inpcb persists during TIMEWAIT even after the | * other than TCP (where the inpcb persists during TIMEWAIT even after the | ||||
* socket has been freed), or there may be close(2)-related races. | * socket has been freed), or there may be close(2)-related races. | ||||
* | * | ||||
* The inp_vflag field is overloaded, and would otherwise ideally be (c). | * The inp_vflag field is overloaded, and would otherwise ideally be (c). | ||||
* | |||||
* TODO: Currently only the TCP stack is leveraging the global pcbinfo lock | |||||
* read-lock usage during modification, this model can be applied to other | |||||
* protocols (especially SCTP). | |||||
*/ | */ | ||||
struct icmp6_filter; | struct icmp6_filter; | ||||
struct inpcbpolicy; | struct inpcbpolicy; | ||||
struct m_snd_tag; | struct m_snd_tag; | ||||
struct inpcb { | struct inpcb { | ||||
/* Cache line #1 (amd64) */ | /* Cache line #1 (amd64) */ | ||||
CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */ | CK_LIST_ENTRY(inpcb) inp_hash; /* (w:h/r:e) hash list */ | ||||
struct rwlock inp_lock; | struct rwlock inp_lock; | ||||
/* Cache line #2 (amd64) */ | /* Cache line #2 (amd64) */ | ||||
#define inp_start_zero inp_hpts | #define inp_start_zero inp_hpts | ||||
#define inp_zero_size (sizeof(struct inpcb) - \ | #define inp_zero_size (sizeof(struct inpcb) - \ | ||||
offsetof(struct inpcb, inp_start_zero)) | offsetof(struct inpcb, inp_start_zero)) | ||||
TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */ | TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */ | ||||
uint32_t inp_hpts_request; /* Current hpts request, zero if | uint32_t inp_hpts_request; /* Current hpts request, zero if | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | struct { | ||||
/* (i) IP multicast options */ | /* (i) IP multicast options */ | ||||
struct ip6_moptions *in6p_moptions; | struct ip6_moptions *in6p_moptions; | ||||
/* (i) ICMPv6 code type filter */ | /* (i) ICMPv6 code type filter */ | ||||
struct icmp6_filter *in6p_icmp6filt; | struct icmp6_filter *in6p_icmp6filt; | ||||
/* (i) IPV6_CHECKSUM setsockopt */ | /* (i) IPV6_CHECKSUM setsockopt */ | ||||
int in6p_cksum; | int in6p_cksum; | ||||
short in6p_hops; | short in6p_hops; | ||||
}; | }; | ||||
CK_LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ | CK_LIST_ENTRY(inpcb) inp_portlist; /* (r:e/w:h) port list */ | ||||
struct inpcbport *inp_phd; /* (i/h) head of this list */ | struct inpcbport *inp_phd; /* (r:e/w:h) head of this list */ | ||||
inp_gen_t inp_gencnt; /* (c) generation count */ | inp_gen_t inp_gencnt; /* (c) generation count */ | ||||
void *spare_ptr; /* Spare pointer. */ | void *spare_ptr; /* Spare pointer. */ | ||||
rt_gen_t inp_rt_cookie; /* generation for route entry */ | rt_gen_t inp_rt_cookie; /* generation for route entry */ | ||||
union { /* cached L3 information */ | union { /* cached L3 information */ | ||||
struct route inp_route; | struct route inp_route; | ||||
struct route_in6 inp_route6; | struct route_in6 inp_route6; | ||||
}; | }; | ||||
CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */ | CK_LIST_ENTRY(inpcb) inp_list; /* (r:e/w:p) all PCBs for proto */ | ||||
/* (e[r]) for list iteration */ | |||||
/* (p[w]/l) for addition/removal */ | |||||
struct epoch_context inp_epoch_ctx; | |||||
}; | }; | ||||
#endif /* _KERNEL */ | #endif /* _KERNEL */ | ||||
#define inp_fport inp_inc.inc_fport | #define inp_fport inp_inc.inc_fport | ||||
#define inp_lport inp_inc.inc_lport | #define inp_lport inp_inc.inc_lport | ||||
#define inp_faddr inp_inc.inc_faddr | #define inp_faddr inp_inc.inc_faddr | ||||
#define inp_laddr inp_inc.inc_laddr | #define inp_laddr inp_inc.inc_laddr | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | struct xinpgen { | ||||
so_gen_t xig_sogen; /* socket generation count this time */ | so_gen_t xig_sogen; /* socket generation count this time */ | ||||
uint64_t _xig_spare64[4]; | uint64_t _xig_spare64[4]; | ||||
} __aligned(8); | } __aligned(8); | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *); | void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *); | ||||
#endif | #endif | ||||
#endif /* _SYS_SOCKETVAR_H_ */ | #endif /* _SYS_SOCKETVAR_H_ */ | ||||
struct inpcbport { | #ifdef _KERNEL | ||||
struct epoch_context phd_epoch_ctx; | /* | ||||
CK_LIST_ENTRY(inpcbport) phd_hash; | |||||
struct inpcbhead phd_pcblist; | |||||
u_short phd_port; | |||||
}; | |||||
/*- | |||||
* Global data structure for each high-level protocol (UDP, TCP, ...) in both | * Global data structure for each high-level protocol (UDP, TCP, ...) in both | ||||
* IPv4 and IPv6. Holds inpcb lists and information for managing them. | * IPv4 and IPv6. Holds inpcb lists and information for managing them. | ||||
* | * | ||||
* Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and | * The pcbs are protected with SMR section and thus all lists in inpcbinfo | ||||
* ipi_list_lock: | * are CK-lists. Locking is required to insert a pcb into database. Two | ||||
* - ipi_lock covering the global pcb list stability during loop iteration, | * locks are provided: one for the hash and one for the global list of pcbs, | ||||
* - ipi_hash_lock covering the hashed lookup tables, | * as well as overall count and generation count. | ||||
* - ipi_list_lock covering mutable global fields (such as the global | |||||
* pcb list) | |||||
* | * | ||||
* The lock order is: | |||||
* | |||||
* ipi_lock (before) | |||||
* inpcb locks (before) | |||||
* ipi_list locks (before) | |||||
* | |||||
* Locking key: | * Locking key: | ||||
* | * | ||||
* (c) Constant or nearly constant after initialisation | * (c) Constant or nearly constant after initialisation | ||||
* (e) - Protected by the net_epoch_prempt epoch | * (e) Protected by SMR section | ||||
* (g) Locked by ipi_lock | * (g) Locked by ipi_lock | ||||
* (l) Locked by ipi_list_lock | * (h) Locked by ipi_hash_lock | ||||
* (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock | |||||
* (x) Synchronisation properties poorly defined | |||||
*/ | */ | ||||
struct inpcbinfo { | struct inpcbinfo { | ||||
/* | /* | ||||
* Global lock protecting inpcb list modification | * Global lock protecting inpcb list modification | ||||
*/ | */ | ||||
struct mtx ipi_lock; | struct mtx ipi_lock; | ||||
struct inpcbhead ipi_listhead; /* (r:e/w:g) */ | |||||
u_int ipi_count; /* (g) */ | |||||
/* | /* | ||||
* Global list of inpcbs on the protocol. | |||||
*/ | |||||
struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */ | |||||
u_int ipi_count; /* (l) */ | |||||
/* | |||||
* Generation count -- incremented each time a connection is allocated | * Generation count -- incremented each time a connection is allocated | ||||
* or freed. | * or freed. | ||||
*/ | */ | ||||
u_quad_t ipi_gencnt; /* (l) */ | u_quad_t ipi_gencnt; /* (g) */ | ||||
/* | /* | ||||
* Fields associated with port lookup and allocation. | * Fields associated with port lookup and allocation. | ||||
*/ | */ | ||||
u_short ipi_lastport; /* (x) */ | u_short ipi_lastport; /* (h) */ | ||||
u_short ipi_lastlow; /* (x) */ | u_short ipi_lastlow; /* (h) */ | ||||
u_short ipi_lasthi; /* (x) */ | u_short ipi_lasthi; /* (h) */ | ||||
/* | /* | ||||
* UMA zone from which inpcbs are allocated for this protocol. | * UMA zone from which inpcbs are allocated for this protocol. | ||||
*/ | */ | ||||
struct uma_zone *ipi_zone; /* (c) */ | uma_zone_t ipi_zone; /* (c) */ | ||||
uma_zone_t ipi_portzone; /* (c) */ | |||||
smr_t ipi_smr; /* (c) */ | |||||
/* | /* | ||||
* Global lock protecting modification hash lookup tables. | |||||
*/ | |||||
struct mtx ipi_hash_lock; | |||||
/* | |||||
* Global hash of inpcbs, hashed by local and foreign addresses and | * Global hash of inpcbs, hashed by local and foreign addresses and | ||||
* port numbers. | * port numbers. | ||||
*/ | */ | ||||
struct inpcbhead *ipi_hashbase; /* (h) */ | struct mtx ipi_hash_lock; | ||||
u_long ipi_hashmask; /* (h) */ | struct inpcbhead *ipi_hashbase; /* (r:e/w:h) */ | ||||
u_long ipi_hashmask; /* (c) */ | |||||
/* | /* | ||||
* Global hash of inpcbs, hashed by only local port number. | * Global hash of inpcbs, hashed by only local port number. | ||||
*/ | */ | ||||
struct inpcbporthead *ipi_porthashbase; /* (h) */ | struct inpcbporthead *ipi_porthashbase; /* (h) */ | ||||
u_long ipi_porthashmask; /* (h) */ | u_long ipi_porthashmask; /* (h) */ | ||||
/* | /* | ||||
* Load balance groups used for the SO_REUSEPORT_LB option, | * Load balance groups used for the SO_REUSEPORT_LB option, | ||||
* hashed by local port. | * hashed by local port. | ||||
*/ | */ | ||||
struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (h) */ | struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (r:e/w:h) */ | ||||
u_long ipi_lbgrouphashmask; /* (h) */ | u_long ipi_lbgrouphashmask; /* (h) */ | ||||
/* | /* | ||||
* Pointer to network stack instance | * Pointer to network stack instance | ||||
*/ | */ | ||||
struct vnet *ipi_vnet; /* (c) */ | struct vnet *ipi_vnet; /* (c) */ | ||||
/* | |||||
* general use 2 | |||||
*/ | |||||
void *ipi_pspare[2]; | |||||
/* | |||||
* Global lock protecting global inpcb list, inpcb count, etc. | |||||
*/ | |||||
struct rwlock ipi_list_lock; | |||||
}; | }; | ||||
#ifdef _KERNEL | |||||
/* | /* | ||||
* Load balance groups used for the SO_REUSEPORT_LB socket option. Each group | * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group | ||||
* (or unique address:port combination) can be re-used at most | * (or unique address:port combination) can be re-used at most | ||||
* INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which | * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which | ||||
* is dynamically resized as processes bind/unbind to that specific group. | * is dynamically resized as processes bind/unbind to that specific group. | ||||
*/ | */ | ||||
struct inpcblbgroup { | struct inpcblbgroup { | ||||
CK_LIST_ENTRY(inpcblbgroup) il_list; | CK_LIST_ENTRY(inpcblbgroup) il_list; | ||||
struct epoch_context il_epoch_ctx; | struct epoch_context il_epoch_ctx; | ||||
uint16_t il_lport; /* (c) */ | uint16_t il_lport; /* (c) */ | ||||
u_char il_vflag; /* (c) */ | u_char il_vflag; /* (c) */ | ||||
u_int8_t il_numa_domain; | u_int8_t il_numa_domain; | ||||
uint32_t il_pad2; | uint32_t il_pad2; | ||||
union in_dependaddr il_dependladdr; /* (c) */ | union in_dependaddr il_dependladdr; /* (c) */ | ||||
#define il_laddr il_dependladdr.id46_addr.ia46_addr4 | #define il_laddr il_dependladdr.id46_addr.ia46_addr4 | ||||
#define il6_laddr il_dependladdr.id6_addr | #define il6_laddr il_dependladdr.id6_addr | ||||
uint32_t il_inpsiz; /* max count in il_inp[] (h) */ | uint32_t il_inpsiz; /* max count in il_inp[] (h) */ | ||||
uint32_t il_inpcnt; /* cur count in il_inp[] (h) */ | uint32_t il_inpcnt; /* cur count in il_inp[] (h) */ | ||||
struct inpcb *il_inp[]; /* (h) */ | struct inpcb *il_inp[]; /* (h) */ | ||||
}; | }; | ||||
#define INP_LOCK_INIT(inp, d, t) \ | #define INP_LOCK_INIT(inp, d, t) \ | ||||
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK) | rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK) | ||||
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock) | #define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock) | ||||
#define INP_RLOCK(inp) rw_rlock(&(inp)->inp_lock) | #define INP_RLOCK(inp) rw_rlock(&(inp)->inp_lock) | ||||
#define INP_WLOCK(inp) rw_wlock(&(inp)->inp_lock) | #define INP_WLOCK(inp) rw_wlock(&(inp)->inp_lock) | ||||
#define INP_TRY_RLOCK(inp) rw_try_rlock(&(inp)->inp_lock) | #define INP_TRY_RLOCK(inp) rw_try_rlock(&(inp)->inp_lock) | ||||
#define INP_TRY_WLOCK(inp) rw_try_wlock(&(inp)->inp_lock) | #define INP_TRY_WLOCK(inp) rw_try_wlock(&(inp)->inp_lock) | ||||
#define INP_RUNLOCK(inp) rw_runlock(&(inp)->inp_lock) | #define INP_RUNLOCK(inp) rw_runlock(&(inp)->inp_lock) | ||||
#define INP_WUNLOCK(inp) rw_wunlock(&(inp)->inp_lock) | #define INP_WUNLOCK(inp) rw_wunlock(&(inp)->inp_lock) | ||||
#define INP_UNLOCK(inp) rw_unlock(&(inp)->inp_lock) | #define INP_UNLOCK(inp) rw_unlock(&(inp)->inp_lock) | ||||
Show All 31 Lines | |||||
struct tcpcb * | struct tcpcb * | ||||
inp_inpcbtotcpcb(struct inpcb *inp); | inp_inpcbtotcpcb(struct inpcb *inp); | ||||
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, | void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, | ||||
uint32_t *faddr, uint16_t *fp); | uint32_t *faddr, uint16_t *fp); | ||||
int inp_so_options(const struct inpcb *inp); | int inp_so_options(const struct inpcb *inp); | ||||
#endif /* _KERNEL */ | #endif /* _KERNEL */ | ||||
#define INP_INFO_LOCK_INIT(ipi, d) \ | |||||
mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE) | |||||
#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock) | |||||
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock) | #define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock) | ||||
#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock) | |||||
#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock) | #define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock) | ||||
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock) | #define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock) | ||||
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock)) | #define INP_INFO_LOCK_ASSERT(ipi) MPASS(SMR_ENTERED((ipi)->ipi_smr) || \ | ||||
mtx_owned(&(ipi)->ipi_lock)) | |||||
#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED) | #define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED) | ||||
#define INP_INFO_WUNLOCK_ASSERT(ipi) \ | #define INP_INFO_WUNLOCK_ASSERT(ipi) \ | ||||
mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED) | mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED) | ||||
#define INP_LIST_LOCK_INIT(ipi, d) \ | |||||
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0) | |||||
#define INP_LIST_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_RLOCK(ipi) rw_rlock(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_WLOCK(ipi) rw_wlock(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_list_lock) | |||||
#define INP_LIST_LOCK_ASSERT(ipi) \ | |||||
rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED) | |||||
#define INP_LIST_RLOCK_ASSERT(ipi) \ | |||||
rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED) | |||||
#define INP_LIST_WLOCK_ASSERT(ipi) \ | |||||
rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED) | |||||
#define INP_LIST_UNLOCK_ASSERT(ipi) \ | |||||
rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED) | |||||
#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF) | |||||
#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock) | |||||
#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock) | #define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock) | ||||
#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock) | #define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock) | ||||
#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock)) | #define INP_HASH_LOCK_ASSERT(ipi) MPASS(SMR_ENTERED((ipi)->ipi_smr) || \ | ||||
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED); | mtx_owned(&(ipi)->ipi_hash_lock)) | ||||
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, \ | |||||
MA_OWNED) | |||||
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ | |||||
MTX_DEF | MTX_DUPOK) | |||||
#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock) | |||||
#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock) | |||||
#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED) | |||||
#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock) | |||||
#define INP_PCBHASH(faddr, lport, fport, mask) \ | #define INP_PCBHASH(faddr, lport, fport, mask) \ | ||||
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) | (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) | ||||
#define INP_PCBPORTHASH(lport, mask) \ | #define INP_PCBPORTHASH(lport, mask) \ | ||||
(ntohs((lport)) & (mask)) | (ntohs((lport)) & (mask)) | ||||
#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \ | #define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \ | ||||
((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) | ((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) | ||||
#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3]) | #define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3]) | ||||
Show All 11 Lines | |||||
#define INP_RECVRETOPTS 0x00000002 /* receive IP options for reply */ | #define INP_RECVRETOPTS 0x00000002 /* receive IP options for reply */ | ||||
#define INP_RECVDSTADDR 0x00000004 /* receive IP dst address */ | #define INP_RECVDSTADDR 0x00000004 /* receive IP dst address */ | ||||
#define INP_HDRINCL 0x00000008 /* user supplies entire IP header */ | #define INP_HDRINCL 0x00000008 /* user supplies entire IP header */ | ||||
#define INP_HIGHPORT 0x00000010 /* user wants "high" port binding */ | #define INP_HIGHPORT 0x00000010 /* user wants "high" port binding */ | ||||
#define INP_LOWPORT 0x00000020 /* user wants "low" port binding */ | #define INP_LOWPORT 0x00000020 /* user wants "low" port binding */ | ||||
#define INP_ANONPORT 0x00000040 /* port chosen for user */ | #define INP_ANONPORT 0x00000040 /* port chosen for user */ | ||||
#define INP_RECVIF 0x00000080 /* receive incoming interface */ | #define INP_RECVIF 0x00000080 /* receive incoming interface */ | ||||
#define INP_MTUDISC 0x00000100 /* user can do MTU discovery */ | #define INP_MTUDISC 0x00000100 /* user can do MTU discovery */ | ||||
/* 0x000200 unused: was INP_FAITH */ | /* INP_FREED 0x00000200 private to in_pcb.c */ | ||||
#define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */ | #define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */ | ||||
#define INP_DONTFRAG 0x00000800 /* don't fragment packet */ | #define INP_DONTFRAG 0x00000800 /* don't fragment packet */ | ||||
#define INP_BINDANY 0x00001000 /* allow bind to any address */ | #define INP_BINDANY 0x00001000 /* allow bind to any address */ | ||||
#define INP_INHASHLIST 0x00002000 /* in_pcbinshash() has been called */ | #define INP_INHASHLIST 0x00002000 /* in_pcbinshash() has been called */ | ||||
#define INP_RECVTOS 0x00004000 /* receive incoming IP TOS */ | #define INP_RECVTOS 0x00004000 /* receive incoming IP TOS */ | ||||
#define IN6P_IPV6_V6ONLY 0x00008000 /* restrict AF_INET6 socket for v6 */ | #define IN6P_IPV6_V6ONLY 0x00008000 /* restrict AF_INET6 socket for v6 */ | ||||
#define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */ | #define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */ | ||||
#define IN6P_HOPLIMIT 0x00020000 /* receive hoplimit */ | #define IN6P_HOPLIMIT 0x00020000 /* receive hoplimit */ | ||||
Show All 21 Lines | |||||
/* | /* | ||||
* Flags for inp_flags2. | * Flags for inp_flags2. | ||||
*/ | */ | ||||
#define INP_MBUF_L_ACKS 0x00000001 /* We need large mbufs for ack compression */ | #define INP_MBUF_L_ACKS 0x00000001 /* We need large mbufs for ack compression */ | ||||
#define INP_MBUF_ACKCMP 0x00000002 /* TCP mbuf ack compression ok */ | #define INP_MBUF_ACKCMP 0x00000002 /* TCP mbuf ack compression ok */ | ||||
/* 0x00000004 */ | /* 0x00000004 */ | ||||
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */ | #define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */ | ||||
#define INP_FREED 0x00000010 /* inp itself is not valid */ | /* 0x00000010 */ | ||||
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */ | #define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */ | ||||
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */ | #define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */ | ||||
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */ | #define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */ | ||||
#define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */ | #define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */ | ||||
#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */ | #define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */ | ||||
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ | #define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ | ||||
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */ | #define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */ | ||||
#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */ | #define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */ | ||||
#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */ | #define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */ | ||||
#define INP_SUPPORTS_MBUFQ 0x00004000 /* Supports the mbuf queue method of LRO */ | #define INP_SUPPORTS_MBUFQ 0x00004000 /* Supports the mbuf queue method of LRO */ | ||||
#define INP_MBUF_QUEUE_READY 0x00008000 /* The transport is pacing, inputs can be queued */ | #define INP_MBUF_QUEUE_READY 0x00008000 /* The transport is pacing, inputs can be queued */ | ||||
#define INP_DONT_SACK_QUEUE 0x00010000 /* If a sack arrives do not wake me */ | #define INP_DONT_SACK_QUEUE 0x00010000 /* If a sack arrives do not wake me */ | ||||
#define INP_2PCP_SET 0x00020000 /* If the Eth PCP should be set explicitly */ | #define INP_2PCP_SET 0x00020000 /* If the Eth PCP should be set explicitly */ | ||||
#define INP_2PCP_BIT0 0x00040000 /* Eth PCP Bit 0 */ | #define INP_2PCP_BIT0 0x00040000 /* Eth PCP Bit 0 */ | ||||
#define INP_2PCP_BIT1 0x00080000 /* Eth PCP Bit 1 */ | #define INP_2PCP_BIT1 0x00080000 /* Eth PCP Bit 1 */ | ||||
#define INP_2PCP_BIT2 0x00100000 /* Eth PCP Bit 2 */ | #define INP_2PCP_BIT2 0x00100000 /* Eth PCP Bit 2 */ | ||||
#define INP_2PCP_BASE INP_2PCP_BIT0 | #define INP_2PCP_BASE INP_2PCP_BIT0 | ||||
#define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2) | #define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2) | ||||
#define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */ | #define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */ | ||||
/* | /* | ||||
* Flags passed to in_pcblookup*() functions. | * Flags passed to in_pcblookup*(), inp_smr_lock() and inp_next(). | ||||
*/ | */ | ||||
#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */ | typedef enum { | ||||
#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */ | INPLOOKUP_WILDCARD = 0x00000001, /* Allow wildcard sockets. */ | ||||
#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */ | INPLOOKUP_RLOCKPCB = 0x00000002, /* Return inpcb read-locked. */ | ||||
INPLOOKUP_WLOCKPCB = 0x00000004, /* Return inpcb write-locked. */ | |||||
} inp_lookup_t; | |||||
#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \ | #define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \ | ||||
INPLOOKUP_WLOCKPCB) | INPLOOKUP_WLOCKPCB) | ||||
#define INPLOOKUP_LOCKMASK (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB) | |||||
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) | #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) | ||||
#define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family | #define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family | ||||
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) | #define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) | ||||
/* | |||||
* Constants for pcbinfo.ipi_hashfields. | |||||
*/ | |||||
#define IPI_HASHFIELDS_NONE 0 | |||||
#define IPI_HASHFIELDS_2TUPLE 1 | |||||
#define IPI_HASHFIELDS_4TUPLE 2 | |||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
VNET_DECLARE(int, ipport_reservedhigh); | VNET_DECLARE(int, ipport_reservedhigh); | ||||
VNET_DECLARE(int, ipport_reservedlow); | VNET_DECLARE(int, ipport_reservedlow); | ||||
VNET_DECLARE(int, ipport_lowfirstauto); | VNET_DECLARE(int, ipport_lowfirstauto); | ||||
VNET_DECLARE(int, ipport_lowlastauto); | VNET_DECLARE(int, ipport_lowlastauto); | ||||
VNET_DECLARE(int, ipport_firstauto); | VNET_DECLARE(int, ipport_firstauto); | ||||
VNET_DECLARE(int, ipport_lastauto); | VNET_DECLARE(int, ipport_lastauto); | ||||
VNET_DECLARE(int, ipport_hifirstauto); | VNET_DECLARE(int, ipport_hifirstauto); | ||||
Show All 14 Lines | |||||
#define V_ipport_hilastauto VNET(ipport_hilastauto) | #define V_ipport_hilastauto VNET(ipport_hilastauto) | ||||
#define V_ipport_randomized VNET(ipport_randomized) | #define V_ipport_randomized VNET(ipport_randomized) | ||||
#define V_ipport_randomcps VNET(ipport_randomcps) | #define V_ipport_randomcps VNET(ipport_randomcps) | ||||
#define V_ipport_randomtime VNET(ipport_randomtime) | #define V_ipport_randomtime VNET(ipport_randomtime) | ||||
#define V_ipport_stoprandom VNET(ipport_stoprandom) | #define V_ipport_stoprandom VNET(ipport_stoprandom) | ||||
#define V_ipport_tcpallocs VNET(ipport_tcpallocs) | #define V_ipport_tcpallocs VNET(ipport_tcpallocs) | ||||
void in_pcbinfo_destroy(struct inpcbinfo *); | void in_pcbinfo_destroy(struct inpcbinfo *); | ||||
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, | void in_pcbinfo_init(struct inpcbinfo *, const char *, u_int, int, char *, | ||||
int, int, char *, uma_init, u_int); | uma_init); | ||||
int in_pcbbind_check_bindmulti(const struct inpcb *ni, | int in_pcbbind_check_bindmulti(const struct inpcb *ni, | ||||
const struct inpcb *oi); | const struct inpcb *oi); | ||||
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); | void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); | ||||
int in_pcballoc(struct socket *, struct inpcbinfo *); | int in_pcballoc(struct socket *, struct inpcbinfo *); | ||||
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); | int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); | ||||
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, | int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, | ||||
Show All 15 Lines | in_pcblookup(struct inpcbinfo *, struct in_addr, u_int, | ||||
struct in_addr, u_int, int, struct ifnet *); | struct in_addr, u_int, int, struct ifnet *); | ||||
struct inpcb * | struct inpcb * | ||||
in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int, | in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int, | ||||
struct in_addr, u_int, int, struct ifnet *, struct mbuf *); | struct in_addr, u_int, int, struct ifnet *, struct mbuf *); | ||||
void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, | void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, | ||||
int, struct inpcb *(*)(struct inpcb *, int)); | int, struct inpcb *(*)(struct inpcb *, int)); | ||||
void in_pcbref(struct inpcb *); | void in_pcbref(struct inpcb *); | ||||
void in_pcbrehash(struct inpcb *); | void in_pcbrehash(struct inpcb *); | ||||
int in_pcbrele_rlocked(struct inpcb *); | bool in_pcbrele_rlocked(struct inpcb *); | ||||
int in_pcbrele_wlocked(struct inpcb *); | bool in_pcbrele_wlocked(struct inpcb *); | ||||
typedef bool inp_match_t(const struct inpcb *, void *); | |||||
struct inpcb_iterator { | |||||
const struct inpcbinfo *ipi; | |||||
struct inpcb *inp; | |||||
inp_match_t *match; | |||||
void *ctx; | |||||
int hash; | |||||
#define INP_ALL_LIST -1 | |||||
const inp_lookup_t lock; | |||||
}; | |||||
/* Note: sparse initializers guarantee .inp = NULL. */ | |||||
#define INP_ITERATOR(_ipi, _lock, _match, _ctx) \ | |||||
{ \ | |||||
.ipi = (_ipi), \ | |||||
.lock = (_lock), \ | |||||
.hash = INP_ALL_LIST, \ | |||||
.match = (_match), \ | |||||
.ctx = (_ctx), \ | |||||
} | |||||
#define INP_ALL_ITERATOR(_ipi, _lock) \ | |||||
{ \ | |||||
.ipi = (_ipi), \ | |||||
.lock = (_lock), \ | |||||
.hash = INP_ALL_LIST, \ | |||||
} | |||||
struct inpcb *inp_next(struct inpcb_iterator *); | |||||
void in_losing(struct inpcb *); | void in_losing(struct inpcb *); | ||||
void in_pcbsetsolabel(struct socket *so); | void in_pcbsetsolabel(struct socket *so); | ||||
int in_getpeeraddr(struct socket *so, struct sockaddr **nam); | int in_getpeeraddr(struct socket *so, struct sockaddr **nam); | ||||
int in_getsockaddr(struct socket *so, struct sockaddr **nam); | int in_getsockaddr(struct socket *so, struct sockaddr **nam); | ||||
struct sockaddr * | struct sockaddr * | ||||
in_sockaddr(in_port_t port, struct in_addr *addr); | in_sockaddr(in_port_t port, struct in_addr *addr); | ||||
void in_pcbsosetlabel(struct socket *so); | void in_pcbsosetlabel(struct socket *so); | ||||
#ifdef RATELIMIT | #ifdef RATELIMIT | ||||
Show All 16 Lines |