diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h --- a/sys/netinet/tcp_hpts.h +++ b/sys/netinet/tcp_hpts.h @@ -28,21 +28,6 @@ #ifndef __tcp_hpts_h__ #define __tcp_hpts_h__ -/* - * The hpts uses a 102400 wheel. The wheel - * defines the time in 10 usec increments (102400 x 10). - * This gives a range of 10usec - 1024ms to place - * an entry within. If the user requests more than - * 1.024 second, a remaineder is attached and the hpts - * when seeing the remainder will re-insert the - * inpcb forward in time from where it is until - * the remainder is zero. - */ - -#define NUM_OF_HPTSI_SLOTS 102400 - -TAILQ_HEAD(hptsh, inpcb); - /* Number of useconds in a hpts tick */ #define HPTS_TICKS_PER_SLOT 10 #define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1) @@ -85,68 +70,6 @@ #define DEFAULT_CONNECTION_THESHOLD 100 -#ifdef _KERNEL -/* Each hpts has its own p_mtx which is used for locking */ -struct tcp_hpts_entry { - /* Cache line 0x00 */ - struct mtx p_mtx; /* Mutex for hpts */ - struct timeval p_mysleep; /* Our min sleep time */ - uint64_t syscall_cnt; - uint64_t sleeping; /* What the actual sleep was (if sleeping) */ - uint16_t p_hpts_active; /* Flag that says hpts is awake */ - uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */ - uint32_t p_curtick; /* Tick in 10 us the hpts is going to */ - uint32_t p_runningslot; /* Current tick we are at if we are running */ - uint32_t p_prev_slot; /* Previous slot we were on */ - uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */ - uint32_t p_nxt_slot; /* The next slot outside the current range of - * slots that the hpts is running on. */ - int32_t p_on_queue_cnt; /* Count on queue in this hpts */ - uint32_t p_lasttick; /* Last tick before the current one */ - uint8_t p_direct_wake :1, /* boolean */ - p_on_min_sleep:1, /* boolean */ - p_hpts_wake_scheduled:1, /* boolean */ - p_avail:5; - uint8_t p_fill[3]; /* Fill to 32 bits */ - /* Cache line 0x40 */ - void *p_inp; - struct hptsh p_input; /* For the tcp-input runner */ - /* Hptsi wheel */ - struct hptsh *p_hptss; - int32_t p_on_inqueue_cnt; /* Count on input queue in this hpts */ - uint32_t p_hpts_sleep_time; /* Current sleep interval having a max - * of 255ms */ - uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */ - uint32_t saved_lasttick; /* for logging */ - uint32_t saved_curtick; /* for logging */ - uint32_t saved_curslot; /* for logging */ - uint32_t saved_prev_slot; /* for logging */ - uint32_t p_delayed_by; /* How much were we delayed by */ - /* Cache line 0x80 */ - struct sysctl_ctx_list hpts_ctx; - struct sysctl_oid *hpts_root; - struct intr_event *ie; - void *ie_cookie; - uint16_t p_num; /* The hpts number one per cpu */ - uint16_t p_cpu; /* The hpts CPU */ - /* There is extra space in here */ - /* Cache line 0x100 */ - struct callout co __aligned(CACHE_LINE_SIZE); -} __aligned(CACHE_LINE_SIZE); - -struct tcp_hptsi { - struct proc *rp_proc; /* Process structure for hpts */ - struct tcp_hpts_entry **rp_ent; /* Array of hptss */ - uint32_t *cts_last_ran; - uint32_t rp_num_hptss; /* Number of hpts threads */ -}; - -#endif - -#define HPTS_REMOVE_INPUT 0x01 -#define HPTS_REMOVE_OUTPUT 0x02 -#define HPTS_REMOVE_ALL (HPTS_REMOVE_INPUT | HPTS_REMOVE_OUTPUT) - /* * When using the hpts, a TCP stack must make sure * that once a INP_DROPPED flag is applied to a INP @@ -191,15 +114,11 @@ #ifdef _KERNEL -#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED) -struct tcp_hpts_entry *tcp_hpts_lock(struct inpcb *inp); -struct tcp_hpts_entry *tcp_input_lock(struct inpcb *inp); -int __tcp_queue_to_hpts_immediate(struct inpcb *inp, int32_t line); -#define tcp_queue_to_hpts_immediate(a)__tcp_queue_to_hpts_immediate(a, __LINE__) - -struct tcp_hpts_entry *tcp_cur_hpts(struct inpcb *inp); #define tcp_hpts_remove(a, b) __tcp_hpts_remove(a, b, __LINE__) void __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line); +#define HPTS_REMOVE_INPUT 0x01 +#define HPTS_REMOVE_OUTPUT 0x02 +#define HPTS_REMOVE_ALL (HPTS_REMOVE_INPUT | HPTS_REMOVE_OUTPUT) static inline bool tcp_in_hpts(struct inpcb *inp) @@ -238,15 +157,6 @@ uint32_t tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag); -int - __tcp_queue_to_input_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line); -#define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__); -int -__tcp_queue_to_input(struct inpcb *inp, int32_t line); -#define tcp_queue_to_input(a) __tcp_queue_to_input(a, __LINE__) - -uint16_t tcp_hpts_delayedby(struct inpcb *inp); - void __tcp_set_hpts(struct inpcb *inp, int32_t line); #define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__) @@ -255,8 +165,6 @@ void tcp_run_hpts(void); -uint16_t hpts_random_cpu(struct inpcb *inp); - extern int32_t tcp_min_hptsi_time; #endif /* _KERNEL */ @@ -290,13 +198,6 @@ } #ifdef _KERNEL - -static __inline void -tcp_hpts_unlock(struct tcp_hpts_entry *hpts) -{ - mtx_unlock(&hpts->p_mtx); -} - static __inline uint32_t tcp_gethptstick(struct timeval *sv) { diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -187,6 +187,76 @@ #include #endif +/* + * The hpts uses a 102400 wheel. The wheel + * defines the time in 10 usec increments (102400 x 10). + * This gives a range of 10usec - 1024ms to place + * an entry within. If the user requests more than + * 1.024 second, a remaineder is attached and the hpts + * when seeing the remainder will re-insert the + * inpcb forward in time from where it is until + * the remainder is zero. + */ + +#define NUM_OF_HPTSI_SLOTS 102400 + +/* Each hpts has its own p_mtx which is used for locking */ +#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED) +TAILQ_HEAD(hptsh, inpcb); +struct tcp_hpts_entry { + /* Cache line 0x00 */ + struct mtx p_mtx; /* Mutex for hpts */ + struct timeval p_mysleep; /* Our min sleep time */ + uint64_t syscall_cnt; + uint64_t sleeping; /* What the actual sleep was (if sleeping) */ + uint16_t p_hpts_active; /* Flag that says hpts is awake */ + uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */ + uint32_t p_curtick; /* Tick in 10 us the hpts is going to */ + uint32_t p_runningslot; /* Current tick we are at if we are running */ + uint32_t p_prev_slot; /* Previous slot we were on */ + uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */ + uint32_t p_nxt_slot; /* The next slot outside the current range of + * slots that the hpts is running on. */ + int32_t p_on_queue_cnt; /* Count on queue in this hpts */ + uint32_t p_lasttick; /* Last tick before the current one */ + uint8_t p_direct_wake :1, /* boolean */ + p_on_min_sleep:1, /* boolean */ + p_hpts_wake_scheduled:1, /* boolean */ + p_avail:5; + uint8_t p_fill[3]; /* Fill to 32 bits */ + /* Cache line 0x40 */ + void *p_inp; + struct hptsh p_input; /* For the tcp-input runner */ + /* Hptsi wheel */ + struct hptsh *p_hptss; + int32_t p_on_inqueue_cnt; /* Count on input queue in this hpts */ + uint32_t p_hpts_sleep_time; /* Current sleep interval having a max + * of 255ms */ + uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */ + uint32_t saved_lasttick; /* for logging */ + uint32_t saved_curtick; /* for logging */ + uint32_t saved_curslot; /* for logging */ + uint32_t saved_prev_slot; /* for logging */ + uint32_t p_delayed_by; /* How much were we delayed by */ + /* Cache line 0x80 */ + struct sysctl_ctx_list hpts_ctx; + struct sysctl_oid *hpts_root; + struct intr_event *ie; + void *ie_cookie; + uint16_t p_num; /* The hpts number one per cpu */ + uint16_t p_cpu; /* The hpts CPU */ + /* There is extra space in here */ + /* Cache line 0x100 */ + struct callout co __aligned(CACHE_LINE_SIZE); +} __aligned(CACHE_LINE_SIZE); + +struct tcp_hptsi { + struct proc *rp_proc; /* Process structure for hpts */ + struct tcp_hpts_entry **rp_ent; /* Array of hptss */ + uint32_t *cts_last_ran; + uint32_t rp_num_hptss; /* Number of hpts threads */ +}; + MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts"); #ifdef RSS static int tcp_bind_threads = 1; @@ -229,12 +299,10 @@ static int32_t tcp_hpts_precision = 120; -struct hpts_domain_info { +static struct hpts_domain_info { int count; int cpu[MAXCPU]; -}; - -struct hpts_domain_info hpts_domains[MAXMEMDOM]; +} hpts_domains[MAXMEMDOM]; counter_u64_t hpts_hopelessly_behind; @@ -525,18 +593,7 @@ in_pcbref(inp); } -struct tcp_hpts_entry * -tcp_cur_hpts(struct inpcb *inp) -{ - int32_t hpts_num; - struct tcp_hpts_entry *hpts; - - hpts_num = inp->inp_hpts_cpu; - hpts = tcp_pace.rp_ent[hpts_num]; - return (hpts); -} - -struct tcp_hpts_entry * +static struct tcp_hpts_entry * tcp_hpts_lock(struct inpcb *inp) { struct tcp_hpts_entry *hpts; @@ -556,7 +613,7 @@ return (hpts); } -struct tcp_hpts_entry * +static struct tcp_hpts_entry * tcp_input_lock(struct inpcb *inp) { struct tcp_hpts_entry *hpts; @@ -837,19 +894,6 @@ return (need_wake); } -int -__tcp_queue_to_hpts_immediate(struct inpcb *inp, int32_t line) -{ - int32_t ret; - struct tcp_hpts_entry *hpts; - - INP_WLOCK_ASSERT(inp); - hpts = tcp_hpts_lock(inp); - ret = tcp_queue_to_hpts_immediate_locked(inp, hpts, line, 0); - mtx_unlock(&hpts->p_mtx); - return (ret); -} - #ifdef INVARIANTS static void check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t inp_hptsslot, int line) @@ -1052,46 +1096,6 @@ return (tcp_hpts_insert_diag(inp, slot, line, NULL)); } -int -__tcp_queue_to_input_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line) -{ - int32_t retval = 0; - - HPTS_MTX_ASSERT(hpts); - if (inp->inp_in_input == 0) { - /* Ok we need to set it on the hpts in the current slot */ - hpts_sane_input_insert(hpts, inp, line); - retval = 1; - if ((hpts->p_hpts_active == 0) && - (hpts->p_on_min_sleep == 0)){ - /* - * Activate the hpts if it is sleeping. - */ - retval = 2; - hpts->p_direct_wake = 1; - tcp_wakehpts(hpts); - } - } else if ((hpts->p_hpts_active == 0) && - (hpts->p_on_min_sleep == 0)){ - retval = 4; - hpts->p_direct_wake = 1; - tcp_wakehpts(hpts); - } - return (retval); -} - -int32_t -__tcp_queue_to_input(struct inpcb *inp, int line) -{ - struct tcp_hpts_entry *hpts; - int32_t ret; - - hpts = tcp_input_lock(inp); - ret = __tcp_queue_to_input_locked(inp, hpts, line); - mtx_unlock(&hpts->p_mtx); - return (ret); -} - void __tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason, int32_t line) { @@ -1120,7 +1124,7 @@ mtx_unlock(&hpts->p_mtx); } -uint16_t +static uint16_t hpts_random_cpu(struct inpcb *inp){ /* * No flow type set distribute the load randomly. @@ -1820,11 +1824,6 @@ mtx_unlock(&hpts->p_mtx); } -uint16_t -tcp_hpts_delayedby(struct inpcb *inp){ - return (tcp_pace.rp_ent[inp->inp_hpts_cpu]->p_delayed_by); -} - static void __tcp_run_hpts(struct tcp_hpts_entry *hpts) {