Changeset View
Changeset View
Standalone View
Standalone View
head/sys/netinet/in_pcb.c
Show First 20 Lines • Show All 204 Lines • ▼ Show 20 Lines | SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, | ||||
CTLFLAG_VNET | CTLFLAG_RW, | CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(ipport_randomcps), 0, "Maximum number of random port " | &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port " | ||||
"allocations before switching to a sequental one"); | "allocations before switching to a sequental one"); | ||||
SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, | SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, | ||||
CTLFLAG_VNET | CTLFLAG_RW, | CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(ipport_randomtime), 0, | &VNET_NAME(ipport_randomtime), 0, | ||||
"Minimum time to keep sequental port " | "Minimum time to keep sequental port " | ||||
"allocation before switching to a random one"); | "allocation before switching to a random one"); | ||||
#ifdef RATELIMIT | |||||
counter_u64_t rate_limit_active; | |||||
counter_u64_t rate_limit_alloc_fail; | |||||
counter_u64_t rate_limit_set_ok; | |||||
static SYSCTL_NODE(_net_inet_ip, OID_AUTO, rl, CTLFLAG_RD, 0, | |||||
"IP Rate Limiting"); | |||||
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, active, CTLFLAG_RD, | |||||
&rate_limit_active, "Active rate limited connections"); | |||||
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, alloc_fail, CTLFLAG_RD, | |||||
&rate_limit_alloc_fail, "Rate limited connection failures"); | |||||
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, set_ok, CTLFLAG_RD, | |||||
&rate_limit_set_ok, "Rate limited setting succeeded"); | |||||
#endif /* RATELIMIT */ | |||||
#endif /* INET */ | #endif /* INET */ | ||||
/* | /* | ||||
* in_pcb.c: manage the Protocol Control Blocks. | * in_pcb.c: manage the Protocol Control Blocks. | ||||
* | * | ||||
* NOTE: It is assumed that most of these functions will be called with | * NOTE: It is assumed that most of these functions will be called with | ||||
* the pcbinfo lock held, and often, the inpcb lock held, as these utility | * the pcbinfo lock held, and often, the inpcb lock held, as these utility | ||||
* functions often modify hash chains or addresses in pcbs. | * functions often modify hash chains or addresses in pcbs. | ||||
▲ Show 20 Lines • Show All 2,944 Lines • ▼ Show 20 Lines | |||||
* Modify TX rate limit based on the existing "inp->inp_snd_tag", | * Modify TX rate limit based on the existing "inp->inp_snd_tag", | ||||
* if any. | * if any. | ||||
*/ | */ | ||||
int | int | ||||
in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate) | in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate) | ||||
{ | { | ||||
union if_snd_tag_modify_params params = { | union if_snd_tag_modify_params params = { | ||||
.rate_limit.max_rate = max_pacing_rate, | .rate_limit.max_rate = max_pacing_rate, | ||||
.rate_limit.flags = M_NOWAIT, | |||||
}; | }; | ||||
struct m_snd_tag *mst; | struct m_snd_tag *mst; | ||||
struct ifnet *ifp; | struct ifnet *ifp; | ||||
int error; | int error; | ||||
mst = inp->inp_snd_tag; | mst = inp->inp_snd_tag; | ||||
if (mst == NULL) | if (mst == NULL) | ||||
return (EINVAL); | return (EINVAL); | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* Allocate a new TX rate limit send tag from the network interface | * Allocate a new TX rate limit send tag from the network interface | ||||
* given by the "ifp" argument and save it in "inp->inp_snd_tag": | * given by the "ifp" argument and save it in "inp->inp_snd_tag": | ||||
*/ | */ | ||||
int | int | ||||
in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, | in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, | ||||
uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate) | uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate, struct m_snd_tag **st) | ||||
{ | { | ||||
union if_snd_tag_alloc_params params = { | union if_snd_tag_alloc_params params = { | ||||
.rate_limit.hdr.type = (max_pacing_rate == -1U) ? | .rate_limit.hdr.type = (max_pacing_rate == -1U) ? | ||||
IF_SND_TAG_TYPE_UNLIMITED : IF_SND_TAG_TYPE_RATE_LIMIT, | IF_SND_TAG_TYPE_UNLIMITED : IF_SND_TAG_TYPE_RATE_LIMIT, | ||||
.rate_limit.hdr.flowid = flowid, | .rate_limit.hdr.flowid = flowid, | ||||
.rate_limit.hdr.flowtype = flowtype, | .rate_limit.hdr.flowtype = flowtype, | ||||
.rate_limit.max_rate = max_pacing_rate, | .rate_limit.max_rate = max_pacing_rate, | ||||
.rate_limit.flags = M_NOWAIT, | |||||
}; | }; | ||||
int error; | int error; | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
if (inp->inp_snd_tag != NULL) | if (*st != NULL) | ||||
return (EINVAL); | return (EINVAL); | ||||
if (ifp->if_snd_tag_alloc == NULL) { | if (ifp->if_snd_tag_alloc == NULL) { | ||||
error = EOPNOTSUPP; | error = EOPNOTSUPP; | ||||
} else { | } else { | ||||
error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag); | error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag); | ||||
if (error == 0) { | |||||
counter_u64_add(rate_limit_set_ok, 1); | |||||
counter_u64_add(rate_limit_active, 1); | |||||
} else | |||||
counter_u64_add(rate_limit_alloc_fail, 1); | |||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
void | |||||
in_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst) | |||||
{ | |||||
if (ifp == NULL) | |||||
return; | |||||
/* | /* | ||||
* If the device was detached while we still had reference(s) | |||||
* on the ifp, we assume if_snd_tag_free() was replaced with | |||||
* stubs. | |||||
*/ | |||||
ifp->if_snd_tag_free(mst); | |||||
/* release reference count on network interface */ | |||||
if_rele(ifp); | |||||
counter_u64_add(rate_limit_active, -1); | |||||
} | |||||
/* | |||||
* Free an existing TX rate limit tag based on the "inp->inp_snd_tag", | * Free an existing TX rate limit tag based on the "inp->inp_snd_tag", | ||||
* if any: | * if any: | ||||
*/ | */ | ||||
void | void | ||||
in_pcbdetach_txrtlmt(struct inpcb *inp) | in_pcbdetach_txrtlmt(struct inpcb *inp) | ||||
{ | { | ||||
struct m_snd_tag *mst; | struct m_snd_tag *mst; | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
mst = inp->inp_snd_tag; | mst = inp->inp_snd_tag; | ||||
inp->inp_snd_tag = NULL; | inp->inp_snd_tag = NULL; | ||||
if (mst == NULL) | if (mst == NULL) | ||||
return; | return; | ||||
m_snd_tag_rele(mst); | m_snd_tag_rele(mst); | ||||
} | } | ||||
/* | int | ||||
* This function should be called when the INP_RATE_LIMIT_CHANGED flag | in_pcboutput_txrtlmt_locked(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb, uint32_t max_pacing_rate) | ||||
* is set in the fast path and will attach/detach/modify the TX rate | |||||
* limit send tag based on the socket's so_max_pacing_rate value. | |||||
*/ | |||||
void | |||||
in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) | |||||
{ | { | ||||
struct socket *socket; | |||||
uint32_t max_pacing_rate; | |||||
bool did_upgrade; | |||||
int error; | int error; | ||||
if (inp == NULL) | |||||
return; | |||||
socket = inp->inp_socket; | |||||
if (socket == NULL) | |||||
return; | |||||
if (!INP_WLOCKED(inp)) { | |||||
/* | /* | ||||
* NOTE: If the write locking fails, we need to bail | |||||
* out and use the non-ratelimited ring for the | |||||
* transmit until there is a new chance to get the | |||||
* write lock. | |||||
*/ | |||||
if (!INP_TRY_UPGRADE(inp)) | |||||
return; | |||||
did_upgrade = 1; | |||||
} else { | |||||
did_upgrade = 0; | |||||
} | |||||
/* | |||||
* NOTE: The so_max_pacing_rate value is read unlocked, | |||||
* because atomic updates are not required since the variable | |||||
* is checked at every mbuf we send. It is assumed that the | |||||
* variable read itself will be atomic. | |||||
*/ | |||||
max_pacing_rate = socket->so_max_pacing_rate; | |||||
/* | |||||
* If the existing send tag is for the wrong interface due to | * If the existing send tag is for the wrong interface due to | ||||
* a route change, first drop the existing tag. Set the | * a route change, first drop the existing tag. Set the | ||||
* CHANGED flag so that we will keep trying to allocate a new | * CHANGED flag so that we will keep trying to allocate a new | ||||
* tag if we fail to allocate one this time. | * tag if we fail to allocate one this time. | ||||
*/ | */ | ||||
if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) { | if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) { | ||||
in_pcbdetach_txrtlmt(inp); | in_pcbdetach_txrtlmt(inp); | ||||
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; | inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; | ||||
Show All 17 Lines | if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) { | ||||
* In order to utilize packet pacing with RSS, we need | * In order to utilize packet pacing with RSS, we need | ||||
* to wait until there is a valid RSS hash before we | * to wait until there is a valid RSS hash before we | ||||
* can proceed: | * can proceed: | ||||
*/ | */ | ||||
if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { | if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { | ||||
error = EAGAIN; | error = EAGAIN; | ||||
} else { | } else { | ||||
error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb), | error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb), | ||||
mb->m_pkthdr.flowid, max_pacing_rate); | mb->m_pkthdr.flowid, max_pacing_rate, &inp->inp_snd_tag); | ||||
} | } | ||||
} else { | } else { | ||||
error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); | error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); | ||||
} | } | ||||
if (error == 0 || error == EOPNOTSUPP) | if (error == 0 || error == EOPNOTSUPP) | ||||
inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; | inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; | ||||
return (error); | |||||
} | |||||
/* | |||||
* This function should be called when the INP_RATE_LIMIT_CHANGED flag | |||||
* is set in the fast path and will attach/detach/modify the TX rate | |||||
* limit send tag based on the socket's so_max_pacing_rate value. | |||||
*/ | |||||
void | |||||
in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) | |||||
{ | |||||
struct socket *socket; | |||||
uint32_t max_pacing_rate; | |||||
bool did_upgrade; | |||||
int error; | |||||
if (inp == NULL) | |||||
return; | |||||
socket = inp->inp_socket; | |||||
if (socket == NULL) | |||||
return; | |||||
if (!INP_WLOCKED(inp)) { | |||||
/* | |||||
* NOTE: If the write locking fails, we need to bail | |||||
* out and use the non-ratelimited ring for the | |||||
* transmit until there is a new chance to get the | |||||
* write lock. | |||||
*/ | |||||
if (!INP_TRY_UPGRADE(inp)) | |||||
return; | |||||
did_upgrade = 1; | |||||
} else { | |||||
did_upgrade = 0; | |||||
} | |||||
/* | |||||
* NOTE: The so_max_pacing_rate value is read unlocked, | |||||
* because atomic updates are not required since the variable | |||||
* is checked at every mbuf we send. It is assumed that the | |||||
* variable read itself will be atomic. | |||||
*/ | |||||
max_pacing_rate = socket->so_max_pacing_rate; | |||||
error = in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate); | |||||
if (did_upgrade) | if (did_upgrade) | ||||
INP_DOWNGRADE(inp); | INP_DOWNGRADE(inp); | ||||
} | } | ||||
/* | /* | ||||
* Track route changes for TX rate limiting. | * Track route changes for TX rate limiting. | ||||
*/ | */ | ||||
void | void | ||||
Show All 25 Lines | in_pcboutput_eagain(struct inpcb *inp) | ||||
in_pcbdetach_txrtlmt(inp); | in_pcbdetach_txrtlmt(inp); | ||||
/* make sure new mbuf send tag allocation is made */ | /* make sure new mbuf send tag allocation is made */ | ||||
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; | inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; | ||||
if (did_upgrade) | if (did_upgrade) | ||||
INP_DOWNGRADE(inp); | INP_DOWNGRADE(inp); | ||||
} | } | ||||
static void | |||||
rl_init(void *st) | |||||
{ | |||||
rate_limit_active = counter_u64_alloc(M_WAITOK); | |||||
rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK); | |||||
rate_limit_set_ok = counter_u64_alloc(M_WAITOK); | |||||
} | |||||
SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL); | |||||
#endif /* RATELIMIT */ | #endif /* RATELIMIT */ |