Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_hpts.c
Show All 27 Lines | |||||
#include "opt_inet.h" | #include "opt_inet.h" | ||||
#include "opt_inet6.h" | #include "opt_inet6.h" | ||||
#include "opt_tcpdebug.h" | #include "opt_tcpdebug.h" | ||||
/** | /** | ||||
* Some notes about usage. | * Some notes about usage. | ||||
* | * | ||||
* The tcp_hpts system is designed to provide a high precision timer | * The tcp_hpts system is designed to provide a high precision timer | ||||
* system for tcp. Its main purpose is to provide a mechanism for | * system for tcp. Its main purpose is to provide a mechanism for | ||||
* pacing packets out onto the wire. It can be used in two ways | * pacing packets out onto the wire. It can be used in two ways | ||||
* by a given TCP stack (and those two methods can be used simultaneously). | * by a given TCP stack (and those two methods can be used simultaneously). | ||||
* | * | ||||
* First, and probably the main thing its used by Rack and BBR, it can | * First, and probably the main thing its used by Rack and BBR, it can | ||||
* be used to call tcp_output() of a transport stack at some time in the future. | * be used to call tcp_output() of a transport stack at some time in the future. | ||||
* The normal way this is done is that tcp_output() of the stack schedules | * The normal way this is done is that tcp_output() of the stack schedules | ||||
* itself to be called again by calling tcp_hpts_insert(tcpcb, slot). The | * itself to be called again by calling tcp_hpts_insert(tcpcb, slot). The | ||||
* slot is the time from now that the stack wants to be called but it | * slot is the time from now that the stack wants to be called but it | ||||
Show All 9 Lines | |||||
* arrival of incoming ack's). So it would add something like: | * arrival of incoming ack's). So it would add something like: | ||||
* | * | ||||
* if (inp->inp_in_hpts) | * if (inp->inp_in_hpts) | ||||
* return; | * return; | ||||
* | * | ||||
* to prevent output processing until the time alotted has gone by. | * to prevent output processing until the time alotted has gone by. | ||||
* Of course this is a bare bones example and the stack will probably | * Of course this is a bare bones example and the stack will probably | ||||
* have more consideration then just the above. | * have more consideration then just the above. | ||||
* | * | ||||
* Now the second function (actually two functions I guess :D) | * Now the second function (actually two functions I guess :D) | ||||
* the tcp_hpts system provides is the ability to either abort | * the tcp_hpts system provides is the ability to either abort | ||||
* a connection (later) or process input on a connection. | * a connection (later) or process input on a connection. | ||||
* Why would you want to do this? To keep processor locality | * Why would you want to do this? To keep processor locality | ||||
* and or not have to worry about untangling any recursive | * and or not have to worry about untangling any recursive | ||||
* locks. The input function now is hooked to the new LRO | * locks. The input function now is hooked to the new LRO | ||||
* system as well. | * system as well. | ||||
* | * | ||||
* In order to use the input redirection function the | * In order to use the input redirection function the | ||||
* tcp stack must define an input function for | * tcp stack must define an input function for | ||||
* tfb_do_queued_segments(). This function understands | * tfb_do_queued_segments(). This function understands | ||||
* how to dequeue a array of packets that were input and | * how to dequeue a array of packets that were input and | ||||
* knows how to call the correct processing routine. | * knows how to call the correct processing routine. | ||||
* | * | ||||
* Locking in this is important as well so most likely the | * Locking in this is important as well so most likely the | ||||
* stack will need to define the tfb_do_segment_nounlock() | * stack will need to define the tfb_do_segment_nounlock() | ||||
* splitting tfb_do_segment() into two parts. The main processing | * splitting tfb_do_segment() into two parts. The main processing | ||||
* part that does not unlock the INP and returns a value of 1 or 0. | * part that does not unlock the INP and returns a value of 1 or 0. | ||||
* It returns 0 if all is well and the lock was not released. It | * It returns 0 if all is well and the lock was not released. It | ||||
* returns 1 if we had to destroy the TCB (a reset received etc). | * returns 1 if we had to destroy the TCB (a reset received etc). | ||||
* The remains of tfb_do_segment() then become just a simple call | * The remains of tfb_do_segment() then become just a simple call | ||||
* to the tfb_do_segment_nounlock() function and check the return | * to the tfb_do_segment_nounlock() function and check the return | ||||
* code and possibly unlock. | * code and possibly unlock. | ||||
* | * | ||||
* The stack must also set the flag on the INP that it supports this | * The stack must also set the flag on the INP that it supports this | ||||
* feature i.e. INP_SUPPORTS_MBUFQ. The LRO code recoginizes | * feature i.e. INP_SUPPORTS_MBUFQ. The LRO code recoginizes | ||||
* this flag as well and will queue packets when it is set. | * this flag as well and will queue packets when it is set. | ||||
* There are other flags as well INP_MBUF_QUEUE_READY and | * There are other flags as well INP_MBUF_QUEUE_READY and | ||||
* INP_DONT_SACK_QUEUE. The first flag tells the LRO code | * INP_DONT_SACK_QUEUE. The first flag tells the LRO code | ||||
* that we are in the pacer for output so there is no | * that we are in the pacer for output so there is no | ||||
* need to wake up the hpts system to get immediate | * need to wake up the hpts system to get immediate | ||||
* input. The second tells the LRO code that its okay | * input. The second tells the LRO code that its okay | ||||
* if a SACK arrives you can still defer input and let | * if a SACK arrives you can still defer input and let | ||||
* the current hpts timer run (this is usually set when | * the current hpts timer run (this is usually set when | ||||
* a rack timer is up so we know SACK's are happening | * a rack timer is up so we know SACK's are happening | ||||
* on the connection already and don't want to wakeup yet). | * on the connection already and don't want to wakeup yet). | ||||
* | * | ||||
* There is a common functions within the rack_bbr_common code | * There is a common functions within the rack_bbr_common code | ||||
* version i.e. ctf_do_queued_segments(). This function | * version i.e. ctf_do_queued_segments(). This function | ||||
* knows how to take the input queue of packets from | * knows how to take the input queue of packets from | ||||
* tp->t_in_pkts and process them digging out | * tp->t_in_pkts and process them digging out | ||||
* all the arguments, calling any bpf tap and | * all the arguments, calling any bpf tap and | ||||
* calling into tfb_do_segment_nounlock(). The common | * calling into tfb_do_segment_nounlock(). The common | ||||
* function (ctf_do_queued_segments()) requires that | * function (ctf_do_queued_segments()) requires that | ||||
* you have defined the tfb_do_segment_nounlock() as | * you have defined the tfb_do_segment_nounlock() as | ||||
* described above. | * described above. | ||||
* | * | ||||
* The second feature of the input side of hpts is the | * The second feature of the input side of hpts is the | ||||
* dropping of a connection. This is due to the way that | * dropping of a connection. This is due to the way that | ||||
* locking may have occured on the INP_WLOCK. So if | * locking may have occured on the INP_WLOCK. So if | ||||
* a stack wants to drop a connection it calls: | * a stack wants to drop a connection it calls: | ||||
* | * | ||||
* tcp_set_inp_to_drop(tp, ETIMEDOUT) | * tcp_set_inp_to_drop(tp, ETIMEDOUT) | ||||
* | * | ||||
* To schedule the tcp_hpts system to call | * To schedule the tcp_hpts system to call | ||||
* | * | ||||
* tcp_drop(tp, drop_reason) | * tcp_drop(tp, drop_reason) | ||||
* | * | ||||
* at a future point. This is quite handy to prevent locking | * at a future point. This is quite handy to prevent locking | ||||
* issues when dropping connections. | * issues when dropping connections. | ||||
* | * | ||||
*/ | */ | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
▲ Show 20 Lines • Show All 152 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
int error; | int error; | ||||
uint32_t new; | uint32_t new; | ||||
new = hpts_sleep_max; | new = hpts_sleep_max; | ||||
error = sysctl_handle_int(oidp, &new, 0, req); | error = sysctl_handle_int(oidp, &new, 0, req); | ||||
if (error == 0 && req->newptr) { | if (error == 0 && req->newptr) { | ||||
if ((new < (NUM_OF_HPTSI_SLOTS / 4)) || | if ((new < (NUM_OF_HPTSI_SLOTS / 4)) || | ||||
(new > HPTS_MAX_SLEEP_ALLOWED)) | (new > HPTS_MAX_SLEEP_ALLOWED)) | ||||
error = EINVAL; | error = EINVAL; | ||||
else | else | ||||
hpts_sleep_max = new; | hpts_sleep_max = new; | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
SYSCTL_PROC(_net_inet_tcp_hpts, OID_AUTO, maxsleep, | SYSCTL_PROC(_net_inet_tcp_hpts, OID_AUTO, maxsleep, | ||||
▲ Show 20 Lines • Show All 315 Lines • ▼ Show 20 Lines | |||||
* Called normally with the INP_LOCKED but it | * Called normally with the INP_LOCKED but it | ||||
* does not matter, the hpts lock is the key | * does not matter, the hpts lock is the key | ||||
* but the lock order allows us to hold the | * but the lock order allows us to hold the | ||||
* INP lock and then get the hpts lock. | * INP lock and then get the hpts lock. | ||||
* | * | ||||
* Valid values in the flags are | * Valid values in the flags are | ||||
* HPTS_REMOVE_OUTPUT - remove from the output of the hpts. | * HPTS_REMOVE_OUTPUT - remove from the output of the hpts. | ||||
* HPTS_REMOVE_INPUT - remove from the input of the hpts. | * HPTS_REMOVE_INPUT - remove from the input of the hpts. | ||||
* Note that you can use one or both values together | * Note that you can use one or both values together | ||||
* and get two actions. | * and get two actions. | ||||
*/ | */ | ||||
void | void | ||||
__tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line) | __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line) | ||||
{ | { | ||||
struct tcp_hpts_entry *hpts; | struct tcp_hpts_entry *hpts; | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
Show All 18 Lines | hpts_tick(uint32_t wheel_tick, uint32_t plus) | ||||
*/ | */ | ||||
KASSERT(wheel_tick < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_tick)); | KASSERT(wheel_tick < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_tick)); | ||||
return ((wheel_tick + plus) % NUM_OF_HPTSI_SLOTS); | return ((wheel_tick + plus) % NUM_OF_HPTSI_SLOTS); | ||||
} | } | ||||
static inline int | static inline int | ||||
tick_to_wheel(uint32_t cts_in_wticks) | tick_to_wheel(uint32_t cts_in_wticks) | ||||
{ | { | ||||
/* | /* | ||||
* Given a timestamp in wheel ticks (10usec inc's) | * Given a timestamp in wheel ticks (10usec inc's) | ||||
* map it to our limited space wheel. | * map it to our limited space wheel. | ||||
*/ | */ | ||||
return (cts_in_wticks % NUM_OF_HPTSI_SLOTS); | return (cts_in_wticks % NUM_OF_HPTSI_SLOTS); | ||||
} | } | ||||
static inline int | static inline int | ||||
hpts_ticks_diff(int prev_tick, int tick_now) | hpts_ticks_diff(int prev_tick, int tick_now) | ||||
{ | { | ||||
/* | /* | ||||
* Given two ticks that are someplace | * Given two ticks that are someplace | ||||
* on our wheel. How far are they apart? | * on our wheel. How far are they apart? | ||||
*/ | */ | ||||
if (tick_now > prev_tick) | if (tick_now > prev_tick) | ||||
return (tick_now - prev_tick); | return (tick_now - prev_tick); | ||||
else if (tick_now == prev_tick) | else if (tick_now == prev_tick) | ||||
/* | /* | ||||
* Special case, same means we can go all of our | * Special case, same means we can go all of our | ||||
* wheel less one slot. | * wheel less one slot. | ||||
*/ | */ | ||||
return (NUM_OF_HPTSI_SLOTS - 1); | return (NUM_OF_HPTSI_SLOTS - 1); | ||||
else | else | ||||
return ((NUM_OF_HPTSI_SLOTS - prev_tick) + tick_now); | return ((NUM_OF_HPTSI_SLOTS - prev_tick) + tick_now); | ||||
} | } | ||||
/* | /* | ||||
* Given a tick on the wheel that is the current time | * Given a tick on the wheel that is the current time | ||||
* mapped to the wheel (wheel_tick), what is the maximum | * mapped to the wheel (wheel_tick), what is the maximum | ||||
* distance forward that can be obtained without | * distance forward that can be obtained without | ||||
* wrapping past either prev_tick or running_tick | * wrapping past either prev_tick or running_tick | ||||
* depending on the htps state? Also if passed | * depending on the htps state? Also if passed | ||||
* a uint32_t *, fill it with the tick location. | * a uint32_t *, fill it with the tick location. | ||||
* | * | ||||
* Note if you do not give this function the current | * Note if you do not give this function the current | ||||
* time (that you think it is) mapped to the wheel | * time (that you think it is) mapped to the wheel | ||||
* then the results will not be what you expect and | * then the results will not be what you expect and | ||||
* could lead to invalid inserts. | * could lead to invalid inserts. | ||||
*/ | */ | ||||
static inline int32_t | static inline int32_t | ||||
max_ticks_available(struct tcp_hpts_entry *hpts, uint32_t wheel_tick, uint32_t *target_tick) | max_ticks_available(struct tcp_hpts_entry *hpts, uint32_t wheel_tick, uint32_t *target_tick) | ||||
{ | { | ||||
uint32_t dis_to_travel, end_tick, pacer_to_now, avail_on_wheel; | uint32_t dis_to_travel, end_tick, pacer_to_now, avail_on_wheel; | ||||
Show All 18 Lines | if ((hpts->p_hpts_active == 1) && | ||||
*/ | */ | ||||
end_tick = hpts->p_prev_slot; | end_tick = hpts->p_prev_slot; | ||||
if (end_tick == 0) | if (end_tick == 0) | ||||
end_tick = NUM_OF_HPTSI_SLOTS - 1; | end_tick = NUM_OF_HPTSI_SLOTS - 1; | ||||
else | else | ||||
end_tick--; | end_tick--; | ||||
if (target_tick) | if (target_tick) | ||||
*target_tick = end_tick; | *target_tick = end_tick; | ||||
/* | /* | ||||
* Now we have close to the full wheel left minus the | * Now we have close to the full wheel left minus the | ||||
* time it has been since the pacer went to sleep. Note | * time it has been since the pacer went to sleep. Note | ||||
* that wheel_tick, passed in, should be the current time | * that wheel_tick, passed in, should be the current time | ||||
* from the perspective of the caller, mapped to the wheel. | * from the perspective of the caller, mapped to the wheel. | ||||
*/ | */ | ||||
if (hpts->p_prev_slot != wheel_tick) | if (hpts->p_prev_slot != wheel_tick) | ||||
dis_to_travel = hpts_ticks_diff(hpts->p_prev_slot, wheel_tick); | dis_to_travel = hpts_ticks_diff(hpts->p_prev_slot, wheel_tick); | ||||
else | else | ||||
dis_to_travel = 1; | dis_to_travel = 1; | ||||
/* | /* | ||||
* dis_to_travel in this case is the space from when the | * dis_to_travel in this case is the space from when the | ||||
* pacer stopped (p_prev_slot) and where our wheel_tick | * pacer stopped (p_prev_slot) and where our wheel_tick | ||||
* is now. To know how many slots we can put it in we | * is now. To know how many slots we can put it in we | ||||
* subtract from the wheel size. We would not want | * subtract from the wheel size. We would not want | ||||
* to place something after p_prev_slot or it will | * to place something after p_prev_slot or it will | ||||
* get ran too soon. | * get ran too soon. | ||||
*/ | */ | ||||
return (NUM_OF_HPTSI_SLOTS - dis_to_travel); | return (NUM_OF_HPTSI_SLOTS - dis_to_travel); | ||||
} | } | ||||
/* | /* | ||||
* So how many slots are open between p_runningtick -> p_cur_slot | * So how many slots are open between p_runningtick -> p_cur_slot | ||||
* that is what is currently un-available for insertion. Special | * that is what is currently un-available for insertion. Special | ||||
* case when we are at the last slot, this gets 1, so that | * case when we are at the last slot, this gets 1, so that | ||||
* the answer to how many slots are available is all but 1. | * the answer to how many slots are available is all but 1. | ||||
*/ | */ | ||||
if (hpts->p_runningtick == hpts->p_cur_slot) | if (hpts->p_runningtick == hpts->p_cur_slot) | ||||
dis_to_travel = 1; | dis_to_travel = 1; | ||||
else | else | ||||
dis_to_travel = hpts_ticks_diff(hpts->p_runningtick, hpts->p_cur_slot); | dis_to_travel = hpts_ticks_diff(hpts->p_runningtick, hpts->p_cur_slot); | ||||
/* | /* | ||||
* How long has the pacer been running? | * How long has the pacer been running? | ||||
*/ | */ | ||||
if (hpts->p_cur_slot != wheel_tick) { | if (hpts->p_cur_slot != wheel_tick) { | ||||
/* The pacer is a bit late */ | /* The pacer is a bit late */ | ||||
pacer_to_now = hpts_ticks_diff(hpts->p_cur_slot, wheel_tick); | pacer_to_now = hpts_ticks_diff(hpts->p_cur_slot, wheel_tick); | ||||
} else { | } else { | ||||
/* The pacer is right on time, now == pacers start time */ | /* The pacer is right on time, now == pacers start time */ | ||||
pacer_to_now = 0; | pacer_to_now = 0; | ||||
} | } | ||||
/* | /* | ||||
* To get the number left we can insert into we simply | * To get the number left we can insert into we simply | ||||
* subract the distance the pacer has to run from how | * subract the distance the pacer has to run from how | ||||
* many slots there are. | * many slots there are. | ||||
*/ | */ | ||||
avail_on_wheel = NUM_OF_HPTSI_SLOTS - dis_to_travel; | avail_on_wheel = NUM_OF_HPTSI_SLOTS - dis_to_travel; | ||||
/* | /* | ||||
* Now how many of those we will eat due to the pacer's | * Now how many of those we will eat due to the pacer's | ||||
* time (p_cur_slot) of start being behind the | * time (p_cur_slot) of start being behind the | ||||
* real time (wheel_tick)? | * real time (wheel_tick)? | ||||
*/ | */ | ||||
if (avail_on_wheel <= pacer_to_now) { | if (avail_on_wheel <= pacer_to_now) { | ||||
/* | /* | ||||
* Wheel wrap, we can't fit on the wheel, that | * Wheel wrap, we can't fit on the wheel, that | ||||
* is unusual the system must be way overloaded! | * is unusual the system must be way overloaded! | ||||
* Insert into the assured tick, and return special | * Insert into the assured tick, and return special | ||||
* "0". | * "0". | ||||
*/ | */ | ||||
counter_u64_add(combined_wheel_wrap, 1); | counter_u64_add(combined_wheel_wrap, 1); | ||||
*target_tick = hpts->p_nxt_slot; | *target_tick = hpts->p_nxt_slot; | ||||
return (0); | return (0); | ||||
} else { | } else { | ||||
/* | /* | ||||
* We know how many slots are open | * We know how many slots are open | ||||
* on the wheel (the reverse of what | * on the wheel (the reverse of what | ||||
* is left to run. Take away the time | * is left to run. Take away the time | ||||
* the pacer started to now (wheel_tick) | * the pacer started to now (wheel_tick) | ||||
* and that tells you how many slots are | * and that tells you how many slots are | ||||
* open that can be inserted into that won't | * open that can be inserted into that won't | ||||
* be touched by the pacer until later. | * be touched by the pacer until later. | ||||
*/ | */ | ||||
return (avail_on_wheel - pacer_to_now); | return (avail_on_wheel - pacer_to_now); | ||||
} | } | ||||
} | } | ||||
static int | static int | ||||
tcp_queue_to_hpts_immediate_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line, int32_t noref) | tcp_queue_to_hpts_immediate_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line, int32_t noref) | ||||
{ | { | ||||
uint32_t need_wake = 0; | uint32_t need_wake = 0; | ||||
HPTS_MTX_ASSERT(hpts); | HPTS_MTX_ASSERT(hpts); | ||||
if (inp->inp_in_hpts == 0) { | if (inp->inp_in_hpts == 0) { | ||||
/* Ok we need to set it on the hpts in the current slot */ | /* Ok we need to set it on the hpts in the current slot */ | ||||
inp->inp_hpts_request = 0; | inp->inp_hpts_request = 0; | ||||
if ((hpts->p_hpts_active == 0) || | if ((hpts->p_hpts_active == 0) || | ||||
(hpts->p_wheel_complete)) { | (hpts->p_wheel_complete)) { | ||||
/* | /* | ||||
* A sleeping hpts we want in next slot to run | * A sleeping hpts we want in next slot to run | ||||
* note that in this state p_prev_slot == p_cur_slot | * note that in this state p_prev_slot == p_cur_slot | ||||
*/ | */ | ||||
inp->inp_hptsslot = hpts_tick(hpts->p_prev_slot, 1); | inp->inp_hptsslot = hpts_tick(hpts->p_prev_slot, 1); | ||||
if ((hpts->p_on_min_sleep == 0) && (hpts->p_hpts_active == 0)) | if ((hpts->p_on_min_sleep == 0) && (hpts->p_hpts_active == 0)) | ||||
need_wake = 1; | need_wake = 1; | ||||
} else if ((void *)inp == hpts->p_inp) { | } else if ((void *)inp == hpts->p_inp) { | ||||
/* | /* | ||||
* The hpts system is running and the caller | * The hpts system is running and the caller | ||||
* was awoken by the hpts system. | * was awoken by the hpts system. | ||||
* We can't allow you to go into the same slot we | * We can't allow you to go into the same slot we | ||||
* are in (we don't want a loop :-D). | * are in (we don't want a loop :-D). | ||||
*/ | */ | ||||
inp->inp_hptsslot = hpts->p_nxt_slot; | inp->inp_hptsslot = hpts->p_nxt_slot; | ||||
} else | } else | ||||
inp->inp_hptsslot = hpts->p_runningtick; | inp->inp_hptsslot = hpts->p_runningtick; | ||||
hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref); | hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref); | ||||
if (need_wake) { | if (need_wake) { | ||||
Show All 21 Lines | __tcp_queue_to_hpts_immediate(struct inpcb *inp, int32_t line) | ||||
return (ret); | return (ret); | ||||
} | } | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
static void | static void | ||||
check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t inp_hptsslot, int line) | check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t inp_hptsslot, int line) | ||||
{ | { | ||||
/* | /* | ||||
* Sanity checks for the pacer with invariants | * Sanity checks for the pacer with invariants | ||||
* on insert. | * on insert. | ||||
*/ | */ | ||||
if (inp_hptsslot >= NUM_OF_HPTSI_SLOTS) | if (inp_hptsslot >= NUM_OF_HPTSI_SLOTS) | ||||
panic("hpts:%p inp:%p slot:%d > max", | panic("hpts:%p inp:%p slot:%d > max", | ||||
hpts, inp, inp_hptsslot); | hpts, inp, inp_hptsslot); | ||||
if ((hpts->p_hpts_active) && | if ((hpts->p_hpts_active) && | ||||
(hpts->p_wheel_complete == 0)) { | (hpts->p_wheel_complete == 0)) { | ||||
/* | /* | ||||
* If the pacer is processing a arc | * If the pacer is processing a arc | ||||
* of the wheel, we need to make | * of the wheel, we need to make | ||||
* sure we are not inserting within | * sure we are not inserting within | ||||
* that arc. | * that arc. | ||||
*/ | */ | ||||
int distance, yet_to_run; | int distance, yet_to_run; | ||||
distance = hpts_ticks_diff(hpts->p_runningtick, inp_hptsslot); | distance = hpts_ticks_diff(hpts->p_runningtick, inp_hptsslot); | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | if (inp->inp_in_hpts == 0) { | ||||
if (diag) { | if (diag) { | ||||
diag->wheel_tick = wheel_tick; | diag->wheel_tick = wheel_tick; | ||||
diag->maxticks = maxticks; | diag->maxticks = maxticks; | ||||
diag->wheel_cts = wheel_cts; | diag->wheel_cts = wheel_cts; | ||||
} | } | ||||
if (maxticks == 0) { | if (maxticks == 0) { | ||||
/* The pacer is in a wheel wrap behind, yikes! */ | /* The pacer is in a wheel wrap behind, yikes! */ | ||||
if (slot > 1) { | if (slot > 1) { | ||||
/* | /* | ||||
* Reduce by 1 to prevent a forever loop in | * Reduce by 1 to prevent a forever loop in | ||||
* case something else is wrong. Note this | * case something else is wrong. Note this | ||||
* probably does not hurt because the pacer | * probably does not hurt because the pacer | ||||
* if its true is so far behind we will be | * if its true is so far behind we will be | ||||
* > 1second late calling anyway. | * > 1second late calling anyway. | ||||
*/ | */ | ||||
slot--; | slot--; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 381 Lines • ▼ Show 20 Lines | if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) && tp->t_in_pkt) { | ||||
if (inp->inp_in_input) | if (inp->inp_in_input) | ||||
tcp_hpts_remove(inp, HPTS_REMOVE_INPUT); | tcp_hpts_remove(inp, HPTS_REMOVE_INPUT); | ||||
dropped = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0); | dropped = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0); | ||||
if (dropped) { | if (dropped) { | ||||
/* Re-acquire the wlock so we can release the reference */ | /* Re-acquire the wlock so we can release the reference */ | ||||
INP_WLOCK(inp); | INP_WLOCK(inp); | ||||
} | } | ||||
} else if (tp->t_in_pkt) { | } else if (tp->t_in_pkt) { | ||||
/* | /* | ||||
* We reach here only if we had a | * We reach here only if we had a | ||||
* stack that supported INP_SUPPORTS_MBUFQ | * stack that supported INP_SUPPORTS_MBUFQ | ||||
* and then somehow switched to a stack that | * and then somehow switched to a stack that | ||||
* does not. The packets are basically stranded | * does not. The packets are basically stranded | ||||
* and would hang with the connection until | * and would hang with the connection until | ||||
* cleanup without this code. Its not the | * cleanup without this code. Its not the | ||||
* best way but I know of no other way to | * best way but I know of no other way to | ||||
* handle it since the stack needs functions | * handle it since the stack needs functions | ||||
* it does not have to handle queued packets. | * it does not have to handle queued packets. | ||||
Show All 35 Lines | tcp_hptsi(struct tcp_hpts_entry *hpts) | ||||
hpts->saved_curslot = hpts->p_cur_slot; | hpts->saved_curslot = hpts->p_cur_slot; | ||||
hpts->saved_prev_slot = hpts->p_prev_slot; | hpts->saved_prev_slot = hpts->p_prev_slot; | ||||
hpts->p_lasttick = hpts->p_curtick; | hpts->p_lasttick = hpts->p_curtick; | ||||
hpts->p_curtick = tcp_gethptstick(&tv); | hpts->p_curtick = tcp_gethptstick(&tv); | ||||
hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick); | hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick); | ||||
if ((hpts->p_on_queue_cnt == 0) || | if ((hpts->p_on_queue_cnt == 0) || | ||||
(hpts->p_lasttick == hpts->p_curtick)) { | (hpts->p_lasttick == hpts->p_curtick)) { | ||||
/* | /* | ||||
* No time has yet passed, | * No time has yet passed, | ||||
* or nothing to do. | * or nothing to do. | ||||
*/ | */ | ||||
hpts->p_prev_slot = hpts->p_cur_slot; | hpts->p_prev_slot = hpts->p_cur_slot; | ||||
hpts->p_lasttick = hpts->p_curtick; | hpts->p_lasttick = hpts->p_curtick; | ||||
goto no_run; | goto no_run; | ||||
} | } | ||||
again: | again: | ||||
hpts->p_wheel_complete = 0; | hpts->p_wheel_complete = 0; | ||||
HPTS_MTX_ASSERT(hpts); | HPTS_MTX_ASSERT(hpts); | ||||
ticks_to_run = hpts_ticks_diff(hpts->p_prev_slot, hpts->p_cur_slot); | ticks_to_run = hpts_ticks_diff(hpts->p_prev_slot, hpts->p_cur_slot); | ||||
if (((hpts->p_curtick - hpts->p_lasttick) > ticks_to_run) && | if (((hpts->p_curtick - hpts->p_lasttick) > ticks_to_run) && | ||||
(hpts->p_on_queue_cnt != 0)) { | (hpts->p_on_queue_cnt != 0)) { | ||||
/* | /* | ||||
* Wheel wrap is occuring, basically we | * Wheel wrap is occuring, basically we | ||||
* are behind and the distance between | * are behind and the distance between | ||||
* run's has spread so much it has exceeded | * run's has spread so much it has exceeded | ||||
* the time on the wheel (1.024 seconds). This | * the time on the wheel (1.024 seconds). This | ||||
* is ugly and should NOT be happening. We | * is ugly and should NOT be happening. We | ||||
* need to run the entire wheel. We last processed | * need to run the entire wheel. We last processed | ||||
* p_prev_slot, so that needs to be the last slot | * p_prev_slot, so that needs to be the last slot | ||||
* we run. The next slot after that should be our | * we run. The next slot after that should be our | ||||
* reserved first slot for new, and then starts | * reserved first slot for new, and then starts | ||||
* the running postion. Now the problem is the | * the running postion. Now the problem is the | ||||
* reserved "not to yet" place does not exist | * reserved "not to yet" place does not exist | ||||
* and there may be inp's in there that need | * and there may be inp's in there that need | ||||
* running. We can merge those into the | * running. We can merge those into the | ||||
* first slot at the head. | * first slot at the head. | ||||
*/ | */ | ||||
wrap_loop_cnt++; | wrap_loop_cnt++; | ||||
hpts->p_nxt_slot = hpts_tick(hpts->p_prev_slot, 1); | hpts->p_nxt_slot = hpts_tick(hpts->p_prev_slot, 1); | ||||
hpts->p_runningtick = hpts_tick(hpts->p_prev_slot, 2); | hpts->p_runningtick = hpts_tick(hpts->p_prev_slot, 2); | ||||
/* | /* | ||||
* Adjust p_cur_slot to be where we are starting from | * Adjust p_cur_slot to be where we are starting from | ||||
* hopefully we will catch up (fat chance if something | * hopefully we will catch up (fat chance if something | ||||
* is broken this bad :( ) | * is broken this bad :( ) | ||||
*/ | */ | ||||
hpts->p_cur_slot = hpts->p_prev_slot; | hpts->p_cur_slot = hpts->p_prev_slot; | ||||
/* | /* | ||||
* The next slot has guys to run too, and that would | * The next slot has guys to run too, and that would | ||||
* be where we would normally start, lets move them into | * be where we would normally start, lets move them into | ||||
* the next slot (p_prev_slot + 2) so that we will | * the next slot (p_prev_slot + 2) so that we will | ||||
* run them, the extra 10usecs of late (by being | * run them, the extra 10usecs of late (by being | ||||
* put behind) does not really matter in this situation. | * put behind) does not really matter in this situation. | ||||
*/ | */ | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
/* | /* | ||||
* To prevent a panic we need to update the inpslot to the | * To prevent a panic we need to update the inpslot to the | ||||
* new location. This is safe since it takes both the | * new location. This is safe since it takes both the | ||||
* INP lock and the pacer mutex to change the inp_hptsslot. | * INP lock and the pacer mutex to change the inp_hptsslot. | ||||
*/ | */ | ||||
TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts) { | TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts) { | ||||
inp->inp_hptsslot = hpts->p_runningtick; | inp->inp_hptsslot = hpts->p_runningtick; | ||||
} | } | ||||
#endif | #endif | ||||
TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningtick], | TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningtick], | ||||
&hpts->p_hptss[hpts->p_nxt_slot], inp_hpts); | &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts); | ||||
ticks_to_run = NUM_OF_HPTSI_SLOTS - 1; | ticks_to_run = NUM_OF_HPTSI_SLOTS - 1; | ||||
counter_u64_add(wheel_wrap, 1); | counter_u64_add(wheel_wrap, 1); | ||||
} else { | } else { | ||||
/* | /* | ||||
* Nxt slot is always one after p_runningtick though | * Nxt slot is always one after p_runningtick though | ||||
* its not used usually unless we are doing wheel wrap. | * its not used usually unless we are doing wheel wrap. | ||||
*/ | */ | ||||
hpts->p_nxt_slot = hpts->p_prev_slot; | hpts->p_nxt_slot = hpts->p_prev_slot; | ||||
hpts->p_runningtick = hpts_tick(hpts->p_prev_slot, 1); | hpts->p_runningtick = hpts_tick(hpts->p_prev_slot, 1); | ||||
} | } | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
if (TAILQ_EMPTY(&hpts->p_input) && | if (TAILQ_EMPTY(&hpts->p_input) && | ||||
Show All 34 Lines | #endif | ||||
if ((ninp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningtick])) != NULL) { | if ((ninp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningtick])) != NULL) { | ||||
/* We prefetch the next inp if possible */ | /* We prefetch the next inp if possible */ | ||||
kern_prefetch(ninp, &prefetch_ninp); | kern_prefetch(ninp, &prefetch_ninp); | ||||
prefetch_ninp = 1; | prefetch_ninp = 1; | ||||
} | } | ||||
if (inp->inp_hpts_request) { | if (inp->inp_hpts_request) { | ||||
/* | /* | ||||
* This guy is deferred out further in time | * This guy is deferred out further in time | ||||
* then our wheel had available on it. | * then our wheel had available on it. | ||||
* Push him back on the wheel or run it | * Push him back on the wheel or run it | ||||
* depending. | * depending. | ||||
*/ | */ | ||||
uint32_t maxticks, last_tick, remaining_slots; | uint32_t maxticks, last_tick, remaining_slots; | ||||
remaining_slots = ticks_to_run - (i + 1); | remaining_slots = ticks_to_run - (i + 1); | ||||
if (inp->inp_hpts_request > remaining_slots) { | if (inp->inp_hpts_request > remaining_slots) { | ||||
/* | /* | ||||
Show All 12 Lines | #endif | ||||
hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], __LINE__, 1); | hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], __LINE__, 1); | ||||
hpts->p_inp = NULL; | hpts->p_inp = NULL; | ||||
continue; | continue; | ||||
} | } | ||||
inp->inp_hpts_request = 0; | inp->inp_hpts_request = 0; | ||||
/* Fall through we will so do it now */ | /* Fall through we will so do it now */ | ||||
} | } | ||||
/* | /* | ||||
* We clear the hpts flag here after dealing with | * We clear the hpts flag here after dealing with | ||||
* remaining slots. This way anyone looking with the | * remaining slots. This way anyone looking with the | ||||
* TCB lock will see its on the hpts until just | * TCB lock will see its on the hpts until just | ||||
* before we unlock. | * before we unlock. | ||||
*/ | */ | ||||
inp->inp_in_hpts = 0; | inp->inp_in_hpts = 0; | ||||
mtx_unlock(&hpts->p_mtx); | mtx_unlock(&hpts->p_mtx); | ||||
INP_WLOCK(inp); | INP_WLOCK(inp); | ||||
if (in_pcbrele_wlocked(inp)) { | if (in_pcbrele_wlocked(inp)) { | ||||
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | #ifdef INVARIANTS | ||||
if (TAILQ_EMPTY(&hpts->p_input) && | if (TAILQ_EMPTY(&hpts->p_input) && | ||||
(hpts->p_on_inqueue_cnt != 0)) { | (hpts->p_on_inqueue_cnt != 0)) { | ||||
panic("tp:%p in_hpts input empty but cnt:%d", | panic("tp:%p in_hpts input empty but cnt:%d", | ||||
hpts, hpts->p_on_inqueue_cnt); | hpts, hpts->p_on_inqueue_cnt); | ||||
} | } | ||||
#endif | #endif | ||||
hpts->p_prev_slot = hpts->p_cur_slot; | hpts->p_prev_slot = hpts->p_cur_slot; | ||||
hpts->p_lasttick = hpts->p_curtick; | hpts->p_lasttick = hpts->p_curtick; | ||||
if (loop_cnt > max_pacer_loops) { | if (loop_cnt > max_pacer_loops) { | ||||
/* | /* | ||||
* Something is serious slow we have | * Something is serious slow we have | ||||
* looped through processing the wheel | * looped through processing the wheel | ||||
* and by the time we cleared the | * and by the time we cleared the | ||||
* needs to run max_pacer_loops time | * needs to run max_pacer_loops time | ||||
* we still needed to run. That means | * we still needed to run. That means | ||||
* the system is hopelessly behind and | * the system is hopelessly behind and | ||||
* can never catch up :( | * can never catch up :( | ||||
* | * | ||||
* We will just lie to this thread | * We will just lie to this thread | ||||
* and let it thing p_curtick is | * and let it thing p_curtick is | ||||
* correct. When it next awakens | * correct. When it next awakens | ||||
* it will find itself further behind. | * it will find itself further behind. | ||||
*/ | */ | ||||
counter_u64_add(hpts_hopelessly_behind, 1); | counter_u64_add(hpts_hopelessly_behind, 1); | ||||
goto no_run; | goto no_run; | ||||
} | } | ||||
hpts->p_curtick = tcp_gethptstick(&tv); | hpts->p_curtick = tcp_gethptstick(&tv); | ||||
hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick); | hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick); | ||||
if ((wrap_loop_cnt < 2) && | if ((wrap_loop_cnt < 2) && | ||||
(hpts->p_lasttick != hpts->p_curtick)) { | (hpts->p_lasttick != hpts->p_curtick)) { | ||||
counter_u64_add(hpts_loops, 1); | counter_u64_add(hpts_loops, 1); | ||||
loop_cnt++; | loop_cnt++; | ||||
goto again; | goto again; | ||||
} | } | ||||
no_run: | no_run: | ||||
/* | /* | ||||
* Set flag to tell that we are done for | * Set flag to tell that we are done for | ||||
* any slot input that happens during | * any slot input that happens during | ||||
* input. | * input. | ||||
*/ | */ | ||||
hpts->p_wheel_complete = 1; | hpts->p_wheel_complete = 1; | ||||
/* | /* | ||||
* Run any input that may be there not covered | * Run any input that may be there not covered | ||||
* in running data. | * in running data. | ||||
*/ | */ | ||||
if (!TAILQ_EMPTY(&hpts->p_input)) { | if (!TAILQ_EMPTY(&hpts->p_input)) { | ||||
tcp_input_data(hpts, &tv); | tcp_input_data(hpts, &tv); | ||||
/* | /* | ||||
* Now did we spend too long running | * Now did we spend too long running | ||||
* input and need to run more ticks? | * input and need to run more ticks? | ||||
▲ Show 20 Lines • Show All 287 Lines • Show Last 20 Lines |