Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_hpts.c
Show First 20 Lines • Show All 194 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
#define NUM_OF_HPTSI_SLOTS 102400 | #define NUM_OF_HPTSI_SLOTS 102400 | ||||
/* Each hpts has its own p_mtx which is used for locking */ | /* Each hpts has its own p_mtx which is used for locking */ | ||||
#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED) | #define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED) | ||||
#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx) | #define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx) | ||||
#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx) | #define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx) | ||||
TAILQ_HEAD(hptsh, inpcb); | |||||
struct tcp_hpts_entry { | struct tcp_hpts_entry { | ||||
/* Cache line 0x00 */ | /* Cache line 0x00 */ | ||||
struct mtx p_mtx; /* Mutex for hpts */ | struct mtx p_mtx; /* Mutex for hpts */ | ||||
struct timeval p_mysleep; /* Our min sleep time */ | struct timeval p_mysleep; /* Our min sleep time */ | ||||
uint64_t syscall_cnt; | uint64_t syscall_cnt; | ||||
uint64_t sleeping; /* What the actual sleep was (if sleeping) */ | uint64_t sleeping; /* What the actual sleep was (if sleeping) */ | ||||
uint16_t p_hpts_active; /* Flag that says hpts is awake */ | uint16_t p_hpts_active; /* Flag that says hpts is awake */ | ||||
uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */ | uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */ | ||||
uint32_t p_curtick; /* Tick in 10 us the hpts is going to */ | uint32_t p_curtick; /* Tick in 10 us the hpts is going to */ | ||||
uint32_t p_runningslot; /* Current tick we are at if we are running */ | uint32_t p_runningslot; /* Current tick we are at if we are running */ | ||||
uint32_t p_prev_slot; /* Previous slot we were on */ | uint32_t p_prev_slot; /* Previous slot we were on */ | ||||
uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */ | uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */ | ||||
uint32_t p_nxt_slot; /* The next slot outside the current range of | uint32_t p_nxt_slot; /* The next slot outside the current range of | ||||
* slots that the hpts is running on. */ | * slots that the hpts is running on. */ | ||||
int32_t p_on_queue_cnt; /* Count on queue in this hpts */ | int32_t p_on_queue_cnt; /* Count on queue in this hpts */ | ||||
uint32_t p_lasttick; /* Last tick before the current one */ | uint32_t p_lasttick; /* Last tick before the current one */ | ||||
uint8_t p_direct_wake :1, /* boolean */ | uint8_t p_direct_wake :1, /* boolean */ | ||||
p_on_min_sleep:1, /* boolean */ | p_on_min_sleep:1, /* boolean */ | ||||
p_hpts_wake_scheduled:1, /* boolean */ | p_hpts_wake_scheduled:1, /* boolean */ | ||||
p_avail:5; | p_avail:5; | ||||
uint8_t p_fill[3]; /* Fill to 32 bits */ | uint8_t p_fill[3]; /* Fill to 32 bits */ | ||||
/* Cache line 0x40 */ | /* Cache line 0x40 */ | ||||
void *p_inp; | |||||
TAILQ_HEAD(, inpcb) p_dropq; /* Delayed drop queue */ | TAILQ_HEAD(, inpcb) p_dropq; /* Delayed drop queue */ | ||||
/* Hptsi wheel */ | struct hptsh { | ||||
struct hptsh *p_hptss; | TAILQ_HEAD(, inpcb) head; | ||||
uint32_t count; | |||||
uint32_t gencnt; | |||||
} *p_hptss; /* Hptsi wheel */ | |||||
uint32_t p_dropq_cnt; /* Count on drop queue */ | uint32_t p_dropq_cnt; /* Count on drop queue */ | ||||
uint32_t p_dropq_gencnt; | uint32_t p_dropq_gencnt; | ||||
uint32_t p_hpts_sleep_time; /* Current sleep interval having a max | uint32_t p_hpts_sleep_time; /* Current sleep interval having a max | ||||
* of 255ms */ | * of 255ms */ | ||||
uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */ | uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */ | ||||
uint32_t saved_lasttick; /* for logging */ | uint32_t saved_lasttick; /* for logging */ | ||||
uint32_t saved_curtick; /* for logging */ | uint32_t saved_curtick; /* for logging */ | ||||
uint32_t saved_curslot; /* for logging */ | uint32_t saved_curslot; /* for logging */ | ||||
uint32_t saved_prev_slot; /* for logging */ | uint32_t saved_prev_slot; /* for logging */ | ||||
uint32_t p_delayed_by; /* How much were we delayed by */ | uint32_t p_delayed_by; /* How much were we delayed by */ | ||||
/* Cache line 0x80 */ | /* Cache line 0x80 */ | ||||
struct sysctl_ctx_list hpts_ctx; | struct sysctl_ctx_list hpts_ctx; | ||||
struct sysctl_oid *hpts_root; | struct sysctl_oid *hpts_root; | ||||
struct intr_event *ie; | struct intr_event *ie; | ||||
void *ie_cookie; | void *ie_cookie; | ||||
uint16_t p_num; /* The hpts number one per cpu */ | uint16_t p_num; /* The hpts number one per cpu */ | ||||
uint16_t p_cpu; /* The hpts CPU */ | uint16_t p_cpu; /* The hpts CPU */ | ||||
/* There is extra space in here */ | /* There is extra space in here */ | ||||
/* Cache line 0x100 */ | /* Cache line 0x100 */ | ||||
struct callout co __aligned(CACHE_LINE_SIZE); | struct callout co __aligned(CACHE_LINE_SIZE); | ||||
} __aligned(CACHE_LINE_SIZE); | } __aligned(CACHE_LINE_SIZE); | ||||
struct tcp_hptsi { | static struct tcp_hptsi { | ||||
struct proc *rp_proc; /* Process structure for hpts */ | |||||
struct tcp_hpts_entry **rp_ent; /* Array of hptss */ | struct tcp_hpts_entry **rp_ent; /* Array of hptss */ | ||||
uint32_t *cts_last_ran; | uint32_t *cts_last_ran; | ||||
uint32_t rp_num_hptss; /* Number of hpts threads */ | uint32_t rp_num_hptss; /* Number of hpts threads */ | ||||
}; | } tcp_pace; | ||||
MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts"); | MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts"); | ||||
#ifdef RSS | #ifdef RSS | ||||
static int tcp_bind_threads = 1; | static int tcp_bind_threads = 1; | ||||
#else | #else | ||||
static int tcp_bind_threads = 2; | static int tcp_bind_threads = 2; | ||||
#endif | #endif | ||||
static int tcp_use_irq_cpu = 0; | static int tcp_use_irq_cpu = 0; | ||||
static struct tcp_hptsi tcp_pace; | |||||
static uint32_t *cts_last_ran; | static uint32_t *cts_last_ran; | ||||
static int hpts_does_tp_logging = 0; | static int hpts_does_tp_logging = 0; | ||||
static int hpts_use_assigned_cpu = 1; | static int hpts_use_assigned_cpu = 1; | ||||
static int32_t hpts_uses_oldest = OLDEST_THRESHOLD; | static int32_t hpts_uses_oldest = OLDEST_THRESHOLD; | ||||
static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout); | static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout); | ||||
static void tcp_hpts_thread(void *ctx); | static void tcp_hpts_thread(void *ctx); | ||||
static void tcp_init_hptsi(void *st); | static void tcp_init_hptsi(void *st); | ||||
Show All 22 Lines | |||||
static int32_t tcp_hpts_precision = 120; | static int32_t tcp_hpts_precision = 120; | ||||
static struct hpts_domain_info { | static struct hpts_domain_info { | ||||
int count; | int count; | ||||
int cpu[MAXCPU]; | int cpu[MAXCPU]; | ||||
} hpts_domains[MAXMEMDOM]; | } hpts_domains[MAXMEMDOM]; | ||||
enum { | |||||
IHPTS_NONE = 0, | |||||
IHPTS_ONQUEUE, | |||||
IHPTS_MOVING, | |||||
}; | |||||
counter_u64_t hpts_hopelessly_behind; | counter_u64_t hpts_hopelessly_behind; | ||||
SYSCTL_COUNTER_U64(_net_inet_tcp_hpts_stats, OID_AUTO, hopeless, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_net_inet_tcp_hpts_stats, OID_AUTO, hopeless, CTLFLAG_RD, | ||||
&hpts_hopelessly_behind, | &hpts_hopelessly_behind, | ||||
"Number of times hpts could not catch up and was behind hopelessly"); | "Number of times hpts could not catch up and was behind hopelessly"); | ||||
counter_u64_t hpts_loops; | counter_u64_t hpts_loops; | ||||
▲ Show 20 Lines • Show All 203 Lines • ▼ Show 20 Lines | |||||
hpts_timeout_swi(void *arg) | hpts_timeout_swi(void *arg) | ||||
{ | { | ||||
struct tcp_hpts_entry *hpts; | struct tcp_hpts_entry *hpts; | ||||
hpts = (struct tcp_hpts_entry *)arg; | hpts = (struct tcp_hpts_entry *)arg; | ||||
swi_sched(hpts->ie_cookie, 0); | swi_sched(hpts->ie_cookie, 0); | ||||
} | } | ||||
static inline void | static void | ||||
hpts_sane_pace_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hptsh *head, int clear) | inp_hpts_insert(struct inpcb *inp, struct tcp_hpts_entry *hpts) | ||||
{ | { | ||||
HPTS_MTX_ASSERT(hpts); | struct hptsh *hptsh; | ||||
KASSERT(hpts->p_cpu == inp->inp_hpts_cpu, ("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp)); | |||||
KASSERT(inp->inp_in_hpts != 0, ("%s: hpts:%p inp:%p not on the hpts?", __FUNCTION__, hpts, inp)); | |||||
TAILQ_REMOVE(head, inp, inp_hpts); | |||||
hpts->p_on_queue_cnt--; | |||||
KASSERT(hpts->p_on_queue_cnt >= 0, | |||||
("Hpts goes negative inp:%p hpts:%p", | |||||
inp, hpts)); | |||||
if (clear) { | |||||
inp->inp_hpts_request = 0; | |||||
inp->inp_in_hpts = 0; | |||||
} | |||||
} | |||||
static inline void | INP_WLOCK_ASSERT(inp); | ||||
hpts_sane_pace_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hptsh *head, int line, int noref) | |||||
{ | |||||
HPTS_MTX_ASSERT(hpts); | HPTS_MTX_ASSERT(hpts); | ||||
KASSERT(hpts->p_cpu == inp->inp_hpts_cpu, | MPASS(hpts->p_cpu == inp->inp_hpts_cpu); | ||||
("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp)); | MPASS(!(inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT))); | ||||
KASSERT(((noref == 1) && (inp->inp_in_hpts == 1)) || | |||||
((noref == 0) && (inp->inp_in_hpts == 0)), | hptsh = &hpts->p_hptss[inp->inp_hptsslot]; | ||||
("%s: hpts:%p inp:%p already on the hpts?", | |||||
__FUNCTION__, hpts, inp)); | if (inp->inp_in_hpts == IHPTS_NONE) { | ||||
TAILQ_INSERT_TAIL(head, inp, inp_hpts); | inp->inp_in_hpts = IHPTS_ONQUEUE; | ||||
inp->inp_in_hpts = 1; | |||||
hpts->p_on_queue_cnt++; | |||||
if (noref == 0) { | |||||
in_pcbref(inp); | in_pcbref(inp); | ||||
} else if (inp->inp_in_hpts == IHPTS_MOVING) { | |||||
inp->inp_in_hpts = IHPTS_ONQUEUE; | |||||
} else | |||||
MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE); | |||||
inp->inp_hpts_gencnt = hptsh->gencnt; | |||||
TAILQ_INSERT_TAIL(&hptsh->head, inp, inp_hpts); | |||||
hptsh->count++; | |||||
hpts->p_on_queue_cnt++; | |||||
} | } | ||||
} | |||||
static struct tcp_hpts_entry * | static struct tcp_hpts_entry * | ||||
tcp_hpts_lock(struct inpcb *inp) | tcp_hpts_lock(struct inpcb *inp) | ||||
{ | { | ||||
struct tcp_hpts_entry *hpts; | struct tcp_hpts_entry *hpts; | ||||
int32_t hpts_num; | |||||
again: | INP_LOCK_ASSERT(inp); | ||||
hpts_num = inp->inp_hpts_cpu; | |||||
hpts = tcp_pace.rp_ent[hpts_num]; | hpts = tcp_pace.rp_ent[inp->inp_hpts_cpu]; | ||||
KASSERT(mtx_owned(&hpts->p_mtx) == 0, | HPTS_LOCK(hpts); | ||||
("Hpts:%p owns mtx prior-to lock line:%d", | |||||
hpts, __LINE__)); | |||||
mtx_lock(&hpts->p_mtx); | |||||
if (hpts_num != inp->inp_hpts_cpu) { | |||||
mtx_unlock(&hpts->p_mtx); | |||||
goto again; | |||||
} | |||||
return (hpts); | return (hpts); | ||||
} | } | ||||
static struct tcp_hpts_entry * | static struct tcp_hpts_entry * | ||||
tcp_dropq_lock(struct inpcb *inp) | tcp_dropq_lock(struct inpcb *inp) | ||||
{ | { | ||||
struct tcp_hpts_entry *hpts; | struct tcp_hpts_entry *hpts; | ||||
int32_t hpts_num; | |||||
again: | INP_LOCK_ASSERT(inp); | ||||
hpts_num = inp->inp_dropq_cpu; | |||||
hpts = tcp_pace.rp_ent[hpts_num]; | hpts = tcp_pace.rp_ent[inp->inp_dropq_cpu]; | ||||
KASSERT(mtx_owned(&hpts->p_mtx) == 0, | HPTS_LOCK(hpts); | ||||
("Hpts:%p owns mtx prior-to lock line:%d", | |||||
hpts, __LINE__)); | |||||
mtx_lock(&hpts->p_mtx); | |||||
if (hpts_num != inp->inp_dropq_cpu) { | |||||
mtx_unlock(&hpts->p_mtx); | |||||
goto again; | |||||
} | |||||
return (hpts); | return (hpts); | ||||
} | } | ||||
static void | static void | ||||
tcp_remove_hpts_ref(struct inpcb *inp, struct tcp_hpts_entry *hpts, int line) | inp_hpts_release(struct inpcb *inp) | ||||
{ | { | ||||
int32_t ret; | bool released __diagused; | ||||
ret = in_pcbrele_wlocked(inp); | inp->inp_in_hpts = IHPTS_NONE; | ||||
KASSERT(ret != 1, ("inpcb:%p release ret 1", inp)); | released = in_pcbrele_wlocked(inp); | ||||
MPASS(released == false); | |||||
} | } | ||||
static void | static void | ||||
tcp_hpts_remove_locked_output(struct tcp_hpts_entry *hpts, struct inpcb *inp, int32_t flags, int32_t line) | |||||
{ | |||||
if (inp->inp_in_hpts) { | |||||
hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], 1); | |||||
tcp_remove_hpts_ref(inp, hpts, line); | |||||
} | |||||
} | |||||
static void | |||||
tcp_dropq_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp) | tcp_dropq_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp) | ||||
{ | { | ||||
bool released __diagused; | bool released __diagused; | ||||
HPTS_MTX_ASSERT(hpts); | HPTS_MTX_ASSERT(hpts); | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
if (inp->inp_in_dropq != IHPTS_ONQUEUE) | if (inp->inp_in_dropq != IHPTS_ONQUEUE) | ||||
Show All 34 Lines | |||||
* HPTS_REMOVE_DROPQ - remove from the drop queue of the hpts. | * HPTS_REMOVE_DROPQ - remove from the drop queue of the hpts. | ||||
* Note that you can use one or both values together | * Note that you can use one or both values together | ||||
* and get two actions. | * and get two actions. | ||||
*/ | */ | ||||
void | void | ||||
__tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line) | __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line) | ||||
{ | { | ||||
struct tcp_hpts_entry *hpts; | struct tcp_hpts_entry *hpts; | ||||
struct hptsh *hptsh; | |||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
if (flags & HPTS_REMOVE_OUTPUT) { | |||||
hpts = tcp_hpts_lock(inp); | |||||
tcp_hpts_remove_locked_output(hpts, inp, flags, line); | |||||
mtx_unlock(&hpts->p_mtx); | |||||
} | |||||
if (flags & HPTS_REMOVE_DROPQ) { | if (flags & HPTS_REMOVE_DROPQ) { | ||||
hpts = tcp_dropq_lock(inp); | hpts = tcp_dropq_lock(inp); | ||||
tcp_dropq_remove(hpts, inp); | tcp_dropq_remove(hpts, inp); | ||||
mtx_unlock(&hpts->p_mtx); | mtx_unlock(&hpts->p_mtx); | ||||
} | } | ||||
MPASS(flags & HPTS_REMOVE_OUTPUT); | |||||
hpts = tcp_hpts_lock(inp); | |||||
if (inp->inp_in_hpts == IHPTS_ONQUEUE) { | |||||
hptsh = &hpts->p_hptss[inp->inp_hptsslot]; | |||||
inp->inp_hpts_request = 0; | |||||
if (__predict_true(inp->inp_hpts_gencnt == hptsh->gencnt)) { | |||||
TAILQ_REMOVE(&hptsh->head, inp, inp_hpts); | |||||
MPASS(hptsh->count > 0); | |||||
hptsh->count--; | |||||
MPASS(hpts->p_on_queue_cnt > 0); | |||||
hpts->p_on_queue_cnt--; | |||||
inp_hpts_release(inp); | |||||
} else { | |||||
/* | |||||
* tcp_hptsi() now owns the TAILQ head of this inp. | |||||
* Can't TAILQ_REMOVE, just mark it. | |||||
*/ | |||||
#ifdef INVARIANTS | |||||
struct inpcb *tmp; | |||||
TAILQ_FOREACH(tmp, &hptsh->head, inp_hpts) | |||||
MPASS(tmp != inp); | |||||
#endif | |||||
inp->inp_in_hpts = IHPTS_MOVING; | |||||
inp->inp_hptsslot = -1; | |||||
} | } | ||||
} else if (inp->inp_in_hpts == IHPTS_MOVING) { | |||||
/* | |||||
* Handle a special race condition: | |||||
* tcp_hptsi() moves inpcb to detached tailq | |||||
* tcp_hpts_remove() marks as IHPTS_MOVING, slot = -1 | |||||
* tcp_hpts_insert() sets slot to a meaningful value | |||||
* tcp_hpts_remove() again (we are here!), then in_pcbdrop() | |||||
* tcp_hptsi() finds pcb with meaningful slot and INP_DROPPED | |||||
*/ | |||||
inp->inp_hptsslot = -1; | |||||
} | |||||
HPTS_UNLOCK(hpts); | |||||
} | |||||
bool | |||||
tcp_in_hpts(struct inpcb *inp) | |||||
{ | |||||
return (inp->inp_in_hpts == IHPTS_ONQUEUE); | |||||
} | |||||
static inline int | static inline int | ||||
hpts_slot(uint32_t wheel_slot, uint32_t plus) | hpts_slot(uint32_t wheel_slot, uint32_t plus) | ||||
{ | { | ||||
/* | /* | ||||
* Given a slot on the wheel, what slot | * Given a slot on the wheel, what slot | ||||
* is that plus ticks out? | * is that plus ticks out? | ||||
*/ | */ | ||||
KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_slot)); | KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_slot)); | ||||
▲ Show 20 Lines • Show All 146 Lines • ▼ Show 20 Lines | if (avail_on_wheel <= pacer_to_now) { | ||||
* and that tells you how many slots are | * and that tells you how many slots are | ||||
* open that can be inserted into that won't | * open that can be inserted into that won't | ||||
* be touched by the pacer until later. | * be touched by the pacer until later. | ||||
*/ | */ | ||||
return (avail_on_wheel - pacer_to_now); | return (avail_on_wheel - pacer_to_now); | ||||
} | } | ||||
} | } | ||||
static int | |||||
tcp_queue_to_hpts_immediate_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line, int32_t noref) | |||||
{ | |||||
uint32_t need_wake = 0; | |||||
HPTS_MTX_ASSERT(hpts); | |||||
if (inp->inp_in_hpts == 0) { | |||||
/* Ok we need to set it on the hpts in the current slot */ | |||||
inp->inp_hpts_request = 0; | |||||
if ((hpts->p_hpts_active == 0) || | |||||
(hpts->p_wheel_complete)) { | |||||
/* | |||||
* A sleeping hpts we want in next slot to run | |||||
* note that in this state p_prev_slot == p_cur_slot | |||||
*/ | |||||
inp->inp_hptsslot = hpts_slot(hpts->p_prev_slot, 1); | |||||
if ((hpts->p_on_min_sleep == 0) && (hpts->p_hpts_active == 0)) | |||||
need_wake = 1; | |||||
} else if ((void *)inp == hpts->p_inp) { | |||||
/* | |||||
* The hpts system is running and the caller | |||||
* was awoken by the hpts system. | |||||
* We can't allow you to go into the same slot we | |||||
* are in (we don't want a loop :-D). | |||||
*/ | |||||
inp->inp_hptsslot = hpts->p_nxt_slot; | |||||
} else | |||||
inp->inp_hptsslot = hpts->p_runningslot; | |||||
hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref); | |||||
if (need_wake) { | |||||
/* | |||||
* Activate the hpts if it is sleeping and its | |||||
* timeout is not 1. | |||||
*/ | |||||
hpts->p_direct_wake = 1; | |||||
tcp_wakehpts(hpts); | |||||
} | |||||
} | |||||
return (need_wake); | |||||
} | |||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
static void | static void | ||||
check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t inp_hptsslot, int line) | check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t inp_hptsslot, int line) | ||||
{ | { | ||||
/* | /* | ||||
* Sanity checks for the pacer with invariants | * Sanity checks for the pacer with invariants | ||||
* on insert. | * on insert. | ||||
*/ | */ | ||||
Show All 19 Lines | KASSERT(yet_to_run <= distance, | ||||
("hpts:%p inp:%p slot:%d distance:%d yet_to_run:%d rs:%d cs:%d", | ("hpts:%p inp:%p slot:%d distance:%d yet_to_run:%d rs:%d cs:%d", | ||||
hpts, inp, inp_hptsslot, | hpts, inp, inp_hptsslot, | ||||
distance, yet_to_run, | distance, yet_to_run, | ||||
hpts->p_runningslot, hpts->p_cur_slot)); | hpts->p_runningslot, hpts->p_cur_slot)); | ||||
} | } | ||||
} | } | ||||
#endif | #endif | ||||
static void | uint32_t | ||||
tcp_hpts_insert_locked(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t slot, int32_t line, | tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag) | ||||
struct hpts_diag *diag, struct timeval *tv) | |||||
{ | { | ||||
uint32_t need_new_to = 0; | struct tcp_hpts_entry *hpts; | ||||
uint32_t wheel_cts; | struct timeval tv; | ||||
int32_t wheel_slot, maxslots, last_slot; | uint32_t slot_on, wheel_cts, last_slot, need_new_to = 0; | ||||
int32_t wheel_slot, maxslots; | |||||
int cpu; | int cpu; | ||||
int8_t need_wakeup = 0; | bool need_wakeup = false; | ||||
HPTS_MTX_ASSERT(hpts); | INP_WLOCK_ASSERT(inp); | ||||
MPASS(!tcp_in_hpts(inp)); | |||||
MPASS(!(inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT))); | |||||
/* | |||||
* We now return the next-slot the hpts will be on, beyond its | |||||
* current run (if up) or where it was when it stopped if it is | |||||
* sleeping. | |||||
*/ | |||||
hpts = tcp_hpts_lock(inp); | |||||
microuptime(&tv); | |||||
if (diag) { | if (diag) { | ||||
memset(diag, 0, sizeof(struct hpts_diag)); | memset(diag, 0, sizeof(struct hpts_diag)); | ||||
diag->p_hpts_active = hpts->p_hpts_active; | diag->p_hpts_active = hpts->p_hpts_active; | ||||
diag->p_prev_slot = hpts->p_prev_slot; | diag->p_prev_slot = hpts->p_prev_slot; | ||||
diag->p_runningslot = hpts->p_runningslot; | diag->p_runningslot = hpts->p_runningslot; | ||||
diag->p_nxt_slot = hpts->p_nxt_slot; | diag->p_nxt_slot = hpts->p_nxt_slot; | ||||
diag->p_cur_slot = hpts->p_cur_slot; | diag->p_cur_slot = hpts->p_cur_slot; | ||||
diag->p_curtick = hpts->p_curtick; | diag->p_curtick = hpts->p_curtick; | ||||
diag->p_lasttick = hpts->p_lasttick; | diag->p_lasttick = hpts->p_lasttick; | ||||
diag->slot_req = slot; | diag->slot_req = slot; | ||||
diag->p_on_min_sleep = hpts->p_on_min_sleep; | diag->p_on_min_sleep = hpts->p_on_min_sleep; | ||||
diag->hpts_sleep_time = hpts->p_hpts_sleep_time; | diag->hpts_sleep_time = hpts->p_hpts_sleep_time; | ||||
} | } | ||||
KASSERT(inp->inp_in_hpts == 0, ("Hpts:%p tp:%p already on hpts and add?", hpts, inp)); | |||||
if (slot == 0) { | if (slot == 0) { | ||||
/* Immediate */ | /* Ok we need to set it on the hpts in the current slot */ | ||||
tcp_queue_to_hpts_immediate_locked(inp, hpts, line, 0); | inp->inp_hpts_request = 0; | ||||
return; | if ((hpts->p_hpts_active == 0) || (hpts->p_wheel_complete)) { | ||||
/* | |||||
* A sleeping hpts we want in next slot to run | |||||
* note that in this state p_prev_slot == p_cur_slot | |||||
*/ | |||||
inp->inp_hptsslot = hpts_slot(hpts->p_prev_slot, 1); | |||||
if ((hpts->p_on_min_sleep == 0) && | |||||
(hpts->p_hpts_active == 0)) | |||||
need_wakeup = true; | |||||
} else | |||||
inp->inp_hptsslot = hpts->p_runningslot; | |||||
if (__predict_true(inp->inp_in_hpts != IHPTS_MOVING)) | |||||
inp_hpts_insert(inp, hpts); | |||||
if (need_wakeup) { | |||||
/* | |||||
* Activate the hpts if it is sleeping and its | |||||
* timeout is not 1. | |||||
*/ | |||||
hpts->p_direct_wake = 1; | |||||
tcp_wakehpts(hpts); | |||||
} | } | ||||
slot_on = hpts->p_nxt_slot; | |||||
HPTS_UNLOCK(hpts); | |||||
return (slot_on); | |||||
} | |||||
/* Get the current time relative to the wheel */ | /* Get the current time relative to the wheel */ | ||||
wheel_cts = tcp_tv_to_hptstick(tv); | wheel_cts = tcp_tv_to_hptstick(&tv); | ||||
/* Map it onto the wheel */ | /* Map it onto the wheel */ | ||||
wheel_slot = tick_to_wheel(wheel_cts); | wheel_slot = tick_to_wheel(wheel_cts); | ||||
/* Now what's the max we can place it at? */ | /* Now what's the max we can place it at? */ | ||||
maxslots = max_slots_available(hpts, wheel_slot, &last_slot); | maxslots = max_slots_available(hpts, wheel_slot, &last_slot); | ||||
if (diag) { | if (diag) { | ||||
diag->wheel_slot = wheel_slot; | diag->wheel_slot = wheel_slot; | ||||
diag->maxslots = maxslots; | diag->maxslots = maxslots; | ||||
diag->wheel_cts = wheel_cts; | diag->wheel_cts = wheel_cts; | ||||
Show All 23 Lines | tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag) | ||||
} | } | ||||
if (diag) { | if (diag) { | ||||
diag->slot_remaining = inp->inp_hpts_request; | diag->slot_remaining = inp->inp_hpts_request; | ||||
diag->inp_hptsslot = inp->inp_hptsslot; | diag->inp_hptsslot = inp->inp_hptsslot; | ||||
} | } | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
check_if_slot_would_be_wrong(hpts, inp, inp->inp_hptsslot, line); | check_if_slot_would_be_wrong(hpts, inp, inp->inp_hptsslot, line); | ||||
#endif | #endif | ||||
hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, 0); | if (__predict_true(inp->inp_in_hpts != IHPTS_MOVING)) | ||||
inp_hpts_insert(inp, hpts); | |||||
if ((hpts->p_hpts_active == 0) && | if ((hpts->p_hpts_active == 0) && | ||||
(inp->inp_hpts_request == 0) && | (inp->inp_hpts_request == 0) && | ||||
(hpts->p_on_min_sleep == 0)) { | (hpts->p_on_min_sleep == 0)) { | ||||
/* | /* | ||||
* The hpts is sleeping and NOT on a minimum | * The hpts is sleeping and NOT on a minimum | ||||
* sleep time, we need to figure out where | * sleep time, we need to figure out where | ||||
* it will wake up at and if we need to reschedule | * it will wake up at and if we need to reschedule | ||||
* its time-out. | * its time-out. | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | if (need_wakeup) { | ||||
co_ret = callout_reset_sbt_on(&hpts->co, sb, 0, | co_ret = callout_reset_sbt_on(&hpts->co, sb, 0, | ||||
hpts_timeout_swi, hpts, cpu, | hpts_timeout_swi, hpts, cpu, | ||||
(C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))); | (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))); | ||||
if (diag) { | if (diag) { | ||||
diag->need_new_to = need_new_to; | diag->need_new_to = need_new_to; | ||||
diag->co_ret = co_ret; | diag->co_ret = co_ret; | ||||
} | } | ||||
} | } | ||||
} | |||||
uint32_t | |||||
tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag) | |||||
{ | |||||
struct tcp_hpts_entry *hpts; | |||||
uint32_t slot_on; | |||||
struct timeval tv; | |||||
/* | |||||
* We now return the next-slot the hpts will be on, beyond its | |||||
* current run (if up) or where it was when it stopped if it is | |||||
* sleeping. | |||||
*/ | |||||
INP_WLOCK_ASSERT(inp); | |||||
hpts = tcp_hpts_lock(inp); | |||||
microuptime(&tv); | |||||
tcp_hpts_insert_locked(hpts, inp, slot, line, diag, &tv); | |||||
slot_on = hpts->p_nxt_slot; | slot_on = hpts->p_nxt_slot; | ||||
mtx_unlock(&hpts->p_mtx); | HPTS_UNLOCK(hpts); | ||||
return (slot_on); | return (slot_on); | ||||
} | } | ||||
uint32_t | |||||
__tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line){ | |||||
return (tcp_hpts_insert_diag(inp, slot, line, NULL)); | |||||
} | |||||
void | void | ||||
tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason) | tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason) | ||||
{ | { | ||||
struct tcp_hpts_entry *hpts; | struct tcp_hpts_entry *hpts; | ||||
struct tcpcb *tp = intotcpcb(inp); | struct tcpcb *tp = intotcpcb(inp); | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
inp->inp_hpts_drop_reas = reason; | inp->inp_hpts_drop_reas = reason; | ||||
Show All 10 Lines | tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason) | ||||
if ((hpts->p_hpts_active == 0) && (hpts->p_on_min_sleep == 0)){ | if ((hpts->p_hpts_active == 0) && (hpts->p_on_min_sleep == 0)){ | ||||
hpts->p_direct_wake = 1; | hpts->p_direct_wake = 1; | ||||
tcp_wakehpts(hpts); | tcp_wakehpts(hpts); | ||||
} | } | ||||
HPTS_UNLOCK(hpts); | HPTS_UNLOCK(hpts); | ||||
} | } | ||||
static uint16_t | uint16_t | ||||
hpts_random_cpu(struct inpcb *inp){ | hpts_random_cpu(struct inpcb *inp){ | ||||
/* | /* | ||||
* No flow type set distribute the load randomly. | * No flow type set distribute the load randomly. | ||||
*/ | */ | ||||
uint16_t cpuid; | uint16_t cpuid; | ||||
uint32_t ran; | uint32_t ran; | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines | while (m) { | ||||
m_freem(m); | m_freem(m); | ||||
m = n; | m = n; | ||||
if (m) | if (m) | ||||
n = m->m_nextpkt; | n = m->m_nextpkt; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Do NOT try to optimize the processing of inp's | * Delayed drop functionality is factored out into separate function, | ||||
* by first pulling off all the inp's into a temporary | * but logic is similar to the logic of tcp_hptsi(). | ||||
* list (e.g. TAILQ_CONCAT). If you do that the subtle | |||||
* interactions of switching CPU's will kill because of | |||||
* problems in the linked list manipulation. Basically | |||||
* you would switch cpu's with the hpts mutex locked | |||||
* but then while you were processing one of the inp's | |||||
* some other one that you switch will get a new | |||||
* packet on the different CPU. It will insert it | |||||
* on the new hpts's input list. Creating a temporary | |||||
* link in the inp will not fix it either, since | |||||
* the other hpts will be doing the same thing and | |||||
* you will both end up using the temporary link. | |||||
* | |||||
* You will die in an ASSERT for tailq corruption if you | |||||
* run INVARIANTS or you will die horribly without | |||||
* INVARIANTS in some unknown way with a corrupt linked | |||||
* list. | |||||
*/ | */ | ||||
static void | static void | ||||
tcp_delayed_drop(struct tcp_hpts_entry *hpts) | tcp_delayed_drop(struct tcp_hpts_entry *hpts) | ||||
{ | { | ||||
TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head); | TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head); | ||||
struct inpcb *inp, *tmp; | struct inpcb *inp, *tmp; | ||||
struct tcpcb *tp; | struct tcpcb *tp; | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | tcp_hpts_set_max_sleep(struct tcp_hpts_entry *hpts, int wrap_loop_cnt) | ||||
uint32_t t = 0, i, fnd = 0; | uint32_t t = 0, i, fnd = 0; | ||||
if ((hpts->p_on_queue_cnt) && (wrap_loop_cnt < 2)) { | if ((hpts->p_on_queue_cnt) && (wrap_loop_cnt < 2)) { | ||||
/* | /* | ||||
* Find next slot that is occupied and use that to | * Find next slot that is occupied and use that to | ||||
* be the sleep time. | * be the sleep time. | ||||
*/ | */ | ||||
for (i = 0, t = hpts_slot(hpts->p_cur_slot, 1); i < NUM_OF_HPTSI_SLOTS; i++) { | for (i = 0, t = hpts_slot(hpts->p_cur_slot, 1); i < NUM_OF_HPTSI_SLOTS; i++) { | ||||
if (TAILQ_EMPTY(&hpts->p_hptss[t]) == 0) { | if (TAILQ_EMPTY(&hpts->p_hptss[t].head) == 0) { | ||||
fnd = 1; | fnd = 1; | ||||
break; | break; | ||||
} | } | ||||
t = (t + 1) % NUM_OF_HPTSI_SLOTS; | t = (t + 1) % NUM_OF_HPTSI_SLOTS; | ||||
} | } | ||||
KASSERT(fnd != 0, ("Hpts:%p cnt:%d but none found", hpts, hpts->p_on_queue_cnt)); | KASSERT(fnd != 0, ("Hpts:%p cnt:%d but none found", hpts, hpts->p_on_queue_cnt)); | ||||
hpts->p_hpts_sleep_time = min((i + 1), hpts_sleep_max); | hpts->p_hpts_sleep_time = min((i + 1), hpts_sleep_max); | ||||
} else { | } else { | ||||
/* No one on the wheel sleep for all but 400 slots or sleep max */ | /* No one on the wheel sleep for all but 400 slots or sleep max */ | ||||
hpts->p_hpts_sleep_time = hpts_sleep_max; | hpts->p_hpts_sleep_time = hpts_sleep_max; | ||||
} | } | ||||
} | } | ||||
static int32_t | static int32_t | ||||
tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout) | tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout) | ||||
{ | { | ||||
struct tcpcb *tp; | struct tcpcb *tp; | ||||
struct inpcb *inp = NULL, *ninp; | struct inpcb *inp; | ||||
struct timeval tv; | struct timeval tv; | ||||
uint64_t total_slots_processed = 0; | uint64_t total_slots_processed = 0; | ||||
int32_t slots_to_run, i, error; | int32_t slots_to_run, i, error; | ||||
int32_t paced_cnt = 0; | int32_t paced_cnt = 0; | ||||
int32_t loop_cnt = 0; | int32_t loop_cnt = 0; | ||||
int32_t did_prefetch = 0; | int32_t did_prefetch = 0; | ||||
int32_t prefetch_ninp = 0; | int32_t prefetch_ninp = 0; | ||||
int32_t prefetch_tp = 0; | int32_t prefetch_tp = 0; | ||||
int32_t wrap_loop_cnt = 0; | int32_t wrap_loop_cnt = 0; | ||||
int32_t slot_pos_of_endpoint = 0; | int32_t slot_pos_of_endpoint = 0; | ||||
int32_t orig_exit_slot; | int32_t orig_exit_slot; | ||||
int16_t set_cpu; | |||||
int8_t completed_measure = 0, seen_endpoint = 0; | int8_t completed_measure = 0, seen_endpoint = 0; | ||||
HPTS_MTX_ASSERT(hpts); | HPTS_MTX_ASSERT(hpts); | ||||
NET_EPOCH_ASSERT(); | NET_EPOCH_ASSERT(); | ||||
/* record previous info for any logging */ | /* record previous info for any logging */ | ||||
hpts->saved_lasttick = hpts->p_lasttick; | hpts->saved_lasttick = hpts->p_lasttick; | ||||
hpts->saved_curtick = hpts->p_curtick; | hpts->saved_curtick = hpts->p_curtick; | ||||
hpts->saved_curslot = hpts->p_cur_slot; | hpts->saved_curslot = hpts->p_cur_slot; | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | if (((hpts->p_curtick - hpts->p_lasttick) > | ||||
hpts->p_cur_slot = hpts->p_prev_slot; | hpts->p_cur_slot = hpts->p_prev_slot; | ||||
/* | /* | ||||
* The next slot has guys to run too, and that would | * The next slot has guys to run too, and that would | ||||
* be where we would normally start, lets move them into | * be where we would normally start, lets move them into | ||||
* the next slot (p_prev_slot + 2) so that we will | * the next slot (p_prev_slot + 2) so that we will | ||||
* run them, the extra 10usecs of late (by being | * run them, the extra 10usecs of late (by being | ||||
* put behind) does not really matter in this situation. | * put behind) does not really matter in this situation. | ||||
*/ | */ | ||||
#ifdef INVARIANTS | TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot].head, | ||||
inp_hpts) { | |||||
MPASS(inp->inp_hptsslot == hpts->p_nxt_slot); | |||||
MPASS(inp->inp_hpts_gencnt == | |||||
hpts->p_hptss[hpts->p_nxt_slot].gencnt); | |||||
MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE); | |||||
/* | /* | ||||
* To prevent a panic we need to update the inpslot to the | * Update gencnt and nextslot accordingly to match | ||||
* new location. This is safe since it takes both the | * the new location. This is safe since it takes both | ||||
* INP lock and the pacer mutex to change the inp_hptsslot. | * the INP lock and the pacer mutex to change the | ||||
* inp_hptsslot and inp_hpts_gencnt. | |||||
*/ | */ | ||||
TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts) { | inp->inp_hpts_gencnt = | ||||
hpts->p_hptss[hpts->p_runningslot].gencnt; | |||||
inp->inp_hptsslot = hpts->p_runningslot; | inp->inp_hptsslot = hpts->p_runningslot; | ||||
} | } | ||||
#endif | TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningslot].head, | ||||
TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningslot], | &hpts->p_hptss[hpts->p_nxt_slot].head, inp_hpts); | ||||
&hpts->p_hptss[hpts->p_nxt_slot], inp_hpts); | hpts->p_hptss[hpts->p_runningslot].count += | ||||
hpts->p_hptss[hpts->p_nxt_slot].count; | |||||
hpts->p_hptss[hpts->p_nxt_slot].count = 0; | |||||
hpts->p_hptss[hpts->p_nxt_slot].gencnt++; | |||||
slots_to_run = NUM_OF_HPTSI_SLOTS - 1; | slots_to_run = NUM_OF_HPTSI_SLOTS - 1; | ||||
counter_u64_add(wheel_wrap, 1); | counter_u64_add(wheel_wrap, 1); | ||||
} else { | } else { | ||||
/* | /* | ||||
* Nxt slot is always one after p_runningslot though | * Nxt slot is always one after p_runningslot though | ||||
* its not used usually unless we are doing wheel wrap. | * its not used usually unless we are doing wheel wrap. | ||||
*/ | */ | ||||
hpts->p_nxt_slot = hpts->p_prev_slot; | hpts->p_nxt_slot = hpts->p_prev_slot; | ||||
hpts->p_runningslot = hpts_slot(hpts->p_prev_slot, 1); | hpts->p_runningslot = hpts_slot(hpts->p_prev_slot, 1); | ||||
} | } | ||||
KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) || | KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) || | ||||
((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))), | ((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))), | ||||
("%s hpts:%p in_hpts cnt:%d and queue state mismatch", | ("%s hpts:%p in_hpts cnt:%d and queue state mismatch", | ||||
__FUNCTION__, hpts, hpts->p_dropq_cnt)); | __FUNCTION__, hpts, hpts->p_dropq_cnt)); | ||||
HPTS_MTX_ASSERT(hpts); | |||||
if (hpts->p_on_queue_cnt == 0) { | if (hpts->p_on_queue_cnt == 0) { | ||||
goto no_one; | goto no_one; | ||||
} | } | ||||
HPTS_MTX_ASSERT(hpts); | |||||
for (i = 0; i < slots_to_run; i++) { | for (i = 0; i < slots_to_run; i++) { | ||||
struct inpcb *inp, *ninp; | |||||
TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head); | |||||
struct hptsh *hptsh; | |||||
uint32_t runningslot, gencnt; | |||||
/* | /* | ||||
* Calculate our delay, if there are no extra ticks there | * Calculate our delay, if there are no extra ticks there | ||||
* was not any (i.e. if slots_to_run == 1, no delay). | * was not any (i.e. if slots_to_run == 1, no delay). | ||||
*/ | */ | ||||
hpts->p_delayed_by = (slots_to_run - (i + 1)) * HPTS_TICKS_PER_SLOT; | hpts->p_delayed_by = (slots_to_run - (i + 1)) * | ||||
HPTS_MTX_ASSERT(hpts); | HPTS_TICKS_PER_SLOT; | ||||
while ((inp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningslot])) != NULL) { | |||||
HPTS_MTX_ASSERT(hpts); | runningslot = hpts->p_runningslot; | ||||
hptsh = &hpts->p_hptss[runningslot]; | |||||
TAILQ_SWAP(&head, &hptsh->head, inpcb, inp_hpts); | |||||
hpts->p_on_queue_cnt -= hptsh->count; | |||||
hptsh->count = 0; | |||||
gencnt = hptsh->gencnt++; | |||||
HPTS_UNLOCK(hpts); | |||||
TAILQ_FOREACH_SAFE(inp, &head, inp_hpts, ninp) { | |||||
bool set_cpu; | |||||
if (ninp != NULL) { | |||||
/* We prefetch the next inp if possible */ | |||||
kern_prefetch(ninp, &prefetch_ninp); | |||||
prefetch_ninp = 1; | |||||
} | |||||
/* For debugging */ | /* For debugging */ | ||||
if (seen_endpoint == 0) { | if (seen_endpoint == 0) { | ||||
seen_endpoint = 1; | seen_endpoint = 1; | ||||
orig_exit_slot = slot_pos_of_endpoint = hpts->p_runningslot; | orig_exit_slot = slot_pos_of_endpoint = | ||||
runningslot; | |||||
} else if (completed_measure == 0) { | } else if (completed_measure == 0) { | ||||
/* Record the new position */ | /* Record the new position */ | ||||
orig_exit_slot = hpts->p_runningslot; | orig_exit_slot = runningslot; | ||||
} | } | ||||
total_slots_processed++; | total_slots_processed++; | ||||
hpts->p_inp = inp; | |||||
paced_cnt++; | paced_cnt++; | ||||
KASSERT(hpts->p_runningslot == inp->inp_hptsslot, | |||||
("Hpts:%p inp:%p slot mis-aligned %u vs %u", | INP_WLOCK(inp); | ||||
hpts, inp, hpts->p_runningslot, inp->inp_hptsslot)); | |||||
/* Now pull it */ | |||||
if (inp->inp_hpts_cpu_set == 0) { | if (inp->inp_hpts_cpu_set == 0) { | ||||
set_cpu = 1; | set_cpu = true; | ||||
} else { | } else { | ||||
set_cpu = 0; | set_cpu = false; | ||||
} | } | ||||
hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[hpts->p_runningslot], 0); | |||||
if ((ninp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningslot])) != NULL) { | if (__predict_false(inp->inp_in_hpts == IHPTS_MOVING)) { | ||||
/* We prefetch the next inp if possible */ | if (inp->inp_hptsslot == -1) { | ||||
kern_prefetch(ninp, &prefetch_ninp); | inp->inp_in_hpts = IHPTS_NONE; | ||||
prefetch_ninp = 1; | if (in_pcbrele_wlocked(inp) == false) | ||||
INP_WUNLOCK(inp); | |||||
} else { | |||||
HPTS_LOCK(hpts); | |||||
inp_hpts_insert(inp, hpts); | |||||
HPTS_UNLOCK(hpts); | |||||
INP_WUNLOCK(inp); | |||||
} | } | ||||
continue; | |||||
} | |||||
MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE); | |||||
MPASS(!(inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT))); | |||||
KASSERT(runningslot == inp->inp_hptsslot, | |||||
("Hpts:%p inp:%p slot mis-aligned %u vs %u", | |||||
hpts, inp, runningslot, inp->inp_hptsslot)); | |||||
if (inp->inp_hpts_request) { | if (inp->inp_hpts_request) { | ||||
/* | /* | ||||
* This guy is deferred out further in time | * This guy is deferred out further in time | ||||
* then our wheel had available on it. | * then our wheel had available on it. | ||||
* Push him back on the wheel or run it | * Push him back on the wheel or run it | ||||
* depending. | * depending. | ||||
*/ | */ | ||||
uint32_t maxslots, last_slot, remaining_slots; | uint32_t maxslots, last_slot, remaining_slots; | ||||
remaining_slots = slots_to_run - (i + 1); | remaining_slots = slots_to_run - (i + 1); | ||||
if (inp->inp_hpts_request > remaining_slots) { | if (inp->inp_hpts_request > remaining_slots) { | ||||
HPTS_LOCK(hpts); | |||||
/* | /* | ||||
* How far out can we go? | * How far out can we go? | ||||
*/ | */ | ||||
maxslots = max_slots_available(hpts, hpts->p_cur_slot, &last_slot); | maxslots = max_slots_available(hpts, | ||||
hpts->p_cur_slot, &last_slot); | |||||
if (maxslots >= inp->inp_hpts_request) { | if (maxslots >= inp->inp_hpts_request) { | ||||
/* we can place it finally to be processed */ | /* We can place it finally to | ||||
inp->inp_hptsslot = hpts_slot(hpts->p_runningslot, inp->inp_hpts_request); | * be processed. */ | ||||
inp->inp_hptsslot = hpts_slot( | |||||
hpts->p_runningslot, | |||||
inp->inp_hpts_request); | |||||
inp->inp_hpts_request = 0; | inp->inp_hpts_request = 0; | ||||
} else { | } else { | ||||
/* Work off some more time */ | /* Work off some more time */ | ||||
inp->inp_hptsslot = last_slot; | inp->inp_hptsslot = last_slot; | ||||
inp->inp_hpts_request-= maxslots; | inp->inp_hpts_request -= | ||||
maxslots; | |||||
} | } | ||||
hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], __LINE__, 1); | inp_hpts_insert(inp, hpts); | ||||
hpts->p_inp = NULL; | HPTS_UNLOCK(hpts); | ||||
INP_WUNLOCK(inp); | |||||
continue; | continue; | ||||
} | } | ||||
inp->inp_hpts_request = 0; | inp->inp_hpts_request = 0; | ||||
/* Fall through we will so do it now */ | /* Fall through we will so do it now */ | ||||
} | } | ||||
/* | |||||
* We clear the hpts flag here after dealing with | inp_hpts_release(inp); | ||||
* remaining slots. This way anyone looking with the | |||||
* TCB lock will see its on the hpts until just | |||||
* before we unlock. | |||||
*/ | |||||
inp->inp_in_hpts = 0; | |||||
mtx_unlock(&hpts->p_mtx); | |||||
INP_WLOCK(inp); | |||||
if (in_pcbrele_wlocked(inp)) { | |||||
mtx_lock(&hpts->p_mtx); | |||||
hpts->p_inp = NULL; | |||||
continue; | |||||
} | |||||
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) { | |||||
out_now: | |||||
KASSERT(mtx_owned(&hpts->p_mtx) == 0, | |||||
("Hpts:%p owns mtx prior-to lock line:%d", | |||||
hpts, __LINE__)); | |||||
INP_WUNLOCK(inp); | |||||
mtx_lock(&hpts->p_mtx); | |||||
hpts->p_inp = NULL; | |||||
continue; | |||||
} | |||||
tp = intotcpcb(inp); | tp = intotcpcb(inp); | ||||
if ((tp == NULL) || (tp->t_inpcb == NULL)) { | MPASS(tp); | ||||
goto out_now; | |||||
} | |||||
if (set_cpu) { | if (set_cpu) { | ||||
/* | /* | ||||
* Setup so the next time we will move to | * Setup so the next time we will move to | ||||
* the right CPU. This should be a rare | * the right CPU. This should be a rare | ||||
* event. It will sometimes happens when we | * event. It will sometimes happens when we | ||||
* are the client side (usually not the | * are the client side (usually not the | ||||
* server). Somehow tcp_output() gets called | * server). Somehow tcp_output() gets called | ||||
* before the tcp_do_segment() sets the | * before the tcp_do_segment() sets the | ||||
* intial state. This means the r_cpu and | * intial state. This means the r_cpu and | ||||
* r_hpts_cpu is 0. We get on the hpts, and | * r_hpts_cpu is 0. We get on the hpts, and | ||||
* then tcp_input() gets called setting up | * then tcp_input() gets called setting up | ||||
* the r_cpu to the correct value. The hpts | * the r_cpu to the correct value. The hpts | ||||
* goes off and sees the mis-match. We | * goes off and sees the mis-match. We | ||||
* simply correct it here and the CPU will | * simply correct it here and the CPU will | ||||
* switch to the new hpts nextime the tcb | * switch to the new hpts nextime the tcb | ||||
* gets added to the the hpts (not this one) | * gets added to the the hpts (not this one) | ||||
* :-) | * :-) | ||||
*/ | */ | ||||
tcp_set_hpts(inp); | tcp_set_hpts(inp); | ||||
} | } | ||||
#ifdef VIMAGE | |||||
CURVNET_SET(inp->inp_vnet); | CURVNET_SET(inp->inp_vnet); | ||||
#endif | |||||
/* Lets do any logging that we might want to */ | /* Lets do any logging that we might want to */ | ||||
if (hpts_does_tp_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) { | if (hpts_does_tp_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) { | ||||
tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout); | tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout); | ||||
} | } | ||||
/* | |||||
* There is a hole here, we get the refcnt on the | |||||
* inp so it will still be preserved but to make | |||||
* sure we can get the INP we need to hold the p_mtx | |||||
* above while we pull out the tp/inp, as long as | |||||
* fini gets the lock first we are assured of having | |||||
* a sane INP we can lock and test. | |||||
*/ | |||||
KASSERT(mtx_owned(&hpts->p_mtx) == 0, | |||||
("Hpts:%p owns mtx prior-to tcp_output call line:%d", | |||||
hpts, __LINE__)); | |||||
if (tp->t_fb_ptr != NULL) { | if (tp->t_fb_ptr != NULL) { | ||||
kern_prefetch(tp->t_fb_ptr, &did_prefetch); | kern_prefetch(tp->t_fb_ptr, &did_prefetch); | ||||
did_prefetch = 1; | did_prefetch = 1; | ||||
} | } | ||||
if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) && tp->t_in_pkt) { | if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) && tp->t_in_pkt) { | ||||
error = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0); | error = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0); | ||||
if (error) { | if (error) { | ||||
Show All 36 Lines | TAILQ_FOREACH_SAFE(inp, &head, inp_hpts, ninp) { | ||||
* cause us to load cache with a useless | * cause us to load cache with a useless | ||||
* address (to us). | * address (to us). | ||||
*/ | */ | ||||
kern_prefetch(ninp->inp_ppcb, &prefetch_tp); | kern_prefetch(ninp->inp_ppcb, &prefetch_tp); | ||||
prefetch_tp = 1; | prefetch_tp = 1; | ||||
} | } | ||||
INP_WUNLOCK(inp); | INP_WUNLOCK(inp); | ||||
skip_pacing: | skip_pacing: | ||||
#ifdef VIMAGE | |||||
CURVNET_RESTORE(); | CURVNET_RESTORE(); | ||||
#endif | |||||
INP_UNLOCK_ASSERT(inp); | |||||
KASSERT(mtx_owned(&hpts->p_mtx) == 0, | |||||
("Hpts:%p owns mtx prior-to lock line:%d", | |||||
hpts, __LINE__)); | |||||
mtx_lock(&hpts->p_mtx); | |||||
hpts->p_inp = NULL; | |||||
} | } | ||||
if (seen_endpoint) { | if (seen_endpoint) { | ||||
/* | /* | ||||
* We now have a accurate distance between | * We now have a accurate distance between | ||||
* slot_pos_of_endpoint <-> orig_exit_slot | * slot_pos_of_endpoint <-> orig_exit_slot | ||||
* to tell us how late we were, orig_exit_slot | * to tell us how late we were, orig_exit_slot | ||||
* is where we calculated the end of our cycle to | * is where we calculated the end of our cycle to | ||||
* be when we first entered. | * be when we first entered. | ||||
*/ | */ | ||||
completed_measure = 1; | completed_measure = 1; | ||||
} | } | ||||
HPTS_MTX_ASSERT(hpts); | HPTS_LOCK(hpts); | ||||
hpts->p_inp = NULL; | |||||
hpts->p_runningslot++; | hpts->p_runningslot++; | ||||
if (hpts->p_runningslot >= NUM_OF_HPTSI_SLOTS) { | if (hpts->p_runningslot >= NUM_OF_HPTSI_SLOTS) { | ||||
hpts->p_runningslot = 0; | hpts->p_runningslot = 0; | ||||
} | } | ||||
} | } | ||||
no_one: | no_one: | ||||
HPTS_MTX_ASSERT(hpts); | HPTS_MTX_ASSERT(hpts); | ||||
hpts->p_delayed_by = 0; | hpts->p_delayed_by = 0; | ||||
▲ Show 20 Lines • Show All 386 Lines • ▼ Show 20 Lines | tcp_init_hptsi(void *st) | ||||
sbintime_t sb; | sbintime_t sb; | ||||
struct tcp_hpts_entry *hpts; | struct tcp_hpts_entry *hpts; | ||||
struct pcpu *pc; | struct pcpu *pc; | ||||
cpuset_t cs; | cpuset_t cs; | ||||
char unit[16]; | char unit[16]; | ||||
uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU; | uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU; | ||||
int count, domain, cpu; | int count, domain, cpu; | ||||
tcp_pace.rp_proc = NULL; | |||||
tcp_pace.rp_num_hptss = ncpus; | tcp_pace.rp_num_hptss = ncpus; | ||||
hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK); | hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK); | ||||
hpts_loops = counter_u64_alloc(M_WAITOK); | hpts_loops = counter_u64_alloc(M_WAITOK); | ||||
back_tosleep = counter_u64_alloc(M_WAITOK); | back_tosleep = counter_u64_alloc(M_WAITOK); | ||||
combined_wheel_wrap = counter_u64_alloc(M_WAITOK); | combined_wheel_wrap = counter_u64_alloc(M_WAITOK); | ||||
wheel_wrap = counter_u64_alloc(M_WAITOK); | wheel_wrap = counter_u64_alloc(M_WAITOK); | ||||
hpts_wake_timeout = counter_u64_alloc(M_WAITOK); | hpts_wake_timeout = counter_u64_alloc(M_WAITOK); | ||||
hpts_direct_awakening = counter_u64_alloc(M_WAITOK); | hpts_direct_awakening = counter_u64_alloc(M_WAITOK); | ||||
Show All 18 Lines | for (i = 0; i < tcp_pace.rp_num_hptss; i++) { | ||||
* Init all the hpts structures that are not specifically | * Init all the hpts structures that are not specifically | ||||
* zero'd by the allocations. Also lets attach them to the | * zero'd by the allocations. Also lets attach them to the | ||||
* appropriate sysctl block as well. | * appropriate sysctl block as well. | ||||
*/ | */ | ||||
mtx_init(&hpts->p_mtx, "tcp_hpts_lck", | mtx_init(&hpts->p_mtx, "tcp_hpts_lck", | ||||
"hpts", MTX_DEF | MTX_DUPOK); | "hpts", MTX_DEF | MTX_DUPOK); | ||||
TAILQ_INIT(&hpts->p_dropq); | TAILQ_INIT(&hpts->p_dropq); | ||||
for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) { | for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) { | ||||
TAILQ_INIT(&hpts->p_hptss[j]); | TAILQ_INIT(&hpts->p_hptss[j].head); | ||||
hpts->p_hptss[j].count = 0; | |||||
hpts->p_hptss[j].gencnt = 0; | |||||
} | } | ||||
sysctl_ctx_init(&hpts->hpts_ctx); | sysctl_ctx_init(&hpts->hpts_ctx); | ||||
sprintf(unit, "%d", i); | sprintf(unit, "%d", i); | ||||
hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx, | hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx, | ||||
SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts), | SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts), | ||||
OID_AUTO, | OID_AUTO, | ||||
unit, | unit, | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
▲ Show 20 Lines • Show All 125 Lines • Show Last 20 Lines |