Changeset View
Changeset View
Standalone View
Standalone View
netinet/tcp_subr.c
Context not available. | |||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/jail.h> | #include <sys/jail.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/refcount.h> | |||||
#include <sys/mbuf.h> | #include <sys/mbuf.h> | ||||
#ifdef INET6 | #ifdef INET6 | ||||
#include <sys/domain.h> | #include <sys/domain.h> | ||||
Context not available. | |||||
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; | VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; | ||||
#endif | #endif | ||||
struct rwlock tcp_function_lock; | |||||
static int | static int | ||||
sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) | sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
Context not available. | |||||
void *ip4hdr, const void *ip6hdr); | void *ip4hdr, const void *ip6hdr); | ||||
static void tcp_timer_discard(struct tcpcb *, uint32_t); | static void tcp_timer_discard(struct tcpcb *, uint32_t); | ||||
static struct tcp_function_block tcp_def_funcblk = { | |||||
"default", | |||||
tcp_output, | |||||
tcp_do_segment, | |||||
tcp_default_ctloutput, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
0, | |||||
0 | |||||
}; | |||||
struct tcp_funchead t_functions; | |||||
static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk; | |||||
static struct tcp_function_block * | |||||
find_tcp_functions_locked(struct tcp_function_set *fs) | |||||
{ | |||||
struct tcp_function *f; | |||||
struct tcp_function_block *blk=NULL; | |||||
TAILQ_FOREACH(f, &t_functions, next) { | |||||
if (strcmp(f->fb->tcp_block_name, fs->function_set_name) == 0) { | |||||
blk = f->fb; | |||||
break; | |||||
} | |||||
} | |||||
return(blk); | |||||
} | |||||
static struct tcp_function_block * | |||||
find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s) | |||||
{ | |||||
struct tcp_function_block *rblk=NULL; | |||||
struct tcp_function *f; | |||||
TAILQ_FOREACH(f, &t_functions, next) { | |||||
if (f->fb == blk) { | |||||
rblk = blk; | |||||
if (s) { | |||||
*s = f; | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
return (rblk); | |||||
} | |||||
struct tcp_function_block * | |||||
find_and_ref_tcp_functions(struct tcp_function_set *fs) | |||||
{ | |||||
struct tcp_function_block *blk; | |||||
rw_rlock(&tcp_function_lock); | |||||
blk = find_tcp_functions_locked(fs); | |||||
if (blk) | |||||
refcount_acquire(&blk->refcnt); | |||||
rw_runlock(&tcp_function_lock); | |||||
return(blk); | |||||
} | |||||
struct tcp_function_block * | |||||
find_and_ref_tcp_fb(struct tcp_function_block *blk) | |||||
{ | |||||
struct tcp_function_block *rblk; | |||||
rw_rlock(&tcp_function_lock); | |||||
rblk = find_tcp_fb_locked(blk, NULL); | |||||
if (rblk) | |||||
refcount_acquire(&rblk->refcnt); | |||||
rw_runlock(&tcp_function_lock); | |||||
return(rblk); | |||||
} | |||||
static int | |||||
sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
int error=ENOENT; | |||||
struct tcp_function_set fs; | |||||
struct tcp_function_block *blk; | |||||
memset(&fs, 0, sizeof(fs)); | |||||
rw_rlock(&tcp_function_lock); | |||||
blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL); | |||||
if (blk) { | |||||
/* Found him */ | |||||
strcpy(fs.function_set_name, blk->tcp_block_name); | |||||
fs.pcbcnt = blk->refcnt; | |||||
} | |||||
rw_runlock(&tcp_function_lock); | |||||
error = sysctl_handle_string(oidp, fs.function_set_name, | |||||
sizeof(fs.function_set_name), req); | |||||
/* Check for error or no change */ | |||||
if (error != 0 || req->newptr == NULL) | |||||
return(error); | |||||
rw_rlock(&tcp_function_lock); | |||||
blk = find_tcp_functions_locked(&fs); | |||||
if (blk == NULL) { | |||||
error = ENOENT; | |||||
goto done; | |||||
} | |||||
tcp_func_set_ptr = blk; | |||||
jtl: Does this require a write lock to change? (I honestly don't know; what does the lock cover?) | |||||
Not Done Inline ActionsWell it probably should be a write lock.. I will change. Build universe as also found a few problems.. will update. rrs: Well it probably should be a write lock.. I will change.
Build universe as also found a few… | |||||
done: | |||||
rw_runlock(&tcp_function_lock); | |||||
return (error); | |||||
} | |||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default, | |||||
CTLTYPE_STRING | CTLFLAG_RW, | |||||
NULL, 0, sysctl_net_inet_default_tcp_functions, "A", | |||||
"Set/get the default TCP functions"); | |||||
static int | |||||
sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
int error=0, cnt, at; | |||||
struct tcp_function *f, *n; | |||||
char *buffer=NULL; | |||||
size_t sz; | |||||
rw_rlock(&tcp_function_lock); | |||||
cnt = 0; | |||||
TAILQ_FOREACH(f, &t_functions, next) { | |||||
cnt++; | |||||
} | |||||
rw_runlock(&tcp_function_lock); | |||||
sz = ((cnt+1) * TCP_FUNCTION_NAME_LEN_MAX) + cnt; | |||||
buffer = malloc(sz, M_TEMP, M_WAITOK); | |||||
if (buffer == NULL) | |||||
return (ENOMEM); | |||||
memset(buffer, 0, sz); | |||||
error = at = 0; | |||||
n = NULL; | |||||
rw_rlock(&tcp_function_lock); | |||||
TAILQ_FOREACH_SAFE(f, &t_functions, next, n) { | |||||
if (at+TCP_FUNCTION_NAME_LEN_MAX > sz) { | |||||
error = EOVERFLOW; | |||||
break; | |||||
} | |||||
strcpy(&buffer[at], f->fb->tcp_block_name); | |||||
at += strlen(f->fb->tcp_block_name); | |||||
if (n) { | |||||
buffer[at] = ','; | |||||
at++; | |||||
} | |||||
} | |||||
rw_runlock(&tcp_function_lock); | |||||
if (error == 0) { | |||||
sz = strlen(buffer) + 1; | |||||
Not Done Inline ActionsCan't this be simplified to (at + 1)? Saves another strlen() walk of the buffer. jtl: Can't this be simplified to (at + 1)? Saves another strlen() walk of the buffer. | |||||
Not Done Inline Actionssure. rrs: sure.
| |||||
error = sysctl_handle_string(oidp, buffer, sz, req); | |||||
} | |||||
free(buffer, M_TEMP); | |||||
return (error); | |||||
} | |||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available, | |||||
CTLTYPE_STRING|CTLFLAG_RD, | |||||
NULL, 0, sysctl_net_inet_list_available, "A", | |||||
"list available TCP Function sets"); | |||||
static int | |||||
sysctl_net_inet_tcp_functions_list(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
int error=0, n, at; | |||||
struct tcp_function *f; | |||||
struct tcp_function_set *fs=NULL; | |||||
size_t sz; | |||||
rw_rlock(&tcp_function_lock); | |||||
n = 0; | |||||
TAILQ_FOREACH(f, &t_functions, next) { | |||||
n++; | |||||
} | |||||
rw_runlock(&tcp_function_lock); | |||||
if (req->oldptr == NULL) { | |||||
req->oldidx = ((n+2) * sizeof(struct tcp_function_set)); | |||||
return(0); | |||||
} | |||||
sz = n * sizeof(struct tcp_function_set); | |||||
fs = malloc(sz, M_TEMP, M_WAITOK); | |||||
if (fs == NULL) { | |||||
return(ENOMEM); | |||||
} | |||||
at = 0; | |||||
memset(fs, 0, sz); | |||||
rw_rlock(&tcp_function_lock); | |||||
TAILQ_FOREACH(f, &t_functions, next) { | |||||
strcpy(fs[at].function_set_name, f->fb->tcp_block_name); | |||||
fs[at].pcbcnt = f->fb->refcnt; | |||||
at++; | |||||
if (at >= n) | |||||
break; | |||||
} | |||||
rw_runlock(&tcp_function_lock); | |||||
error = SYSCTL_OUT(req, fs, (at * sizeof(struct tcp_function_set))); | |||||
free(fs, M_TEMP); | |||||
return (error); | |||||
} | |||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_list_detail, | |||||
CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RD, 0, 0, | |||||
&sysctl_net_inet_tcp_functions_list, "I", | |||||
"List the TCP function stacks and there reference counts"); | |||||
/* | /* | ||||
* Target size of TCP PCB hash tables. Must be a power of two. | * Target size of TCP PCB hash tables. Must be a power of two. | ||||
* | * | ||||
Context not available. | |||||
#define V_tcpcb_zone VNET(tcpcb_zone) | #define V_tcpcb_zone VNET(tcpcb_zone) | ||||
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); | MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); | ||||
MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory"); | |||||
static struct mtx isn_mtx; | static struct mtx isn_mtx; | ||||
#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF) | #define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF) | ||||
Context not available. | |||||
return (hashsize); | return (hashsize); | ||||
} | } | ||||
int | |||||
register_tcp_functions(struct tcp_function_block *blk, int wait) | |||||
{ | |||||
struct tcp_function_block *lblk; | |||||
struct tcp_function *n; | |||||
struct tcp_function_set fs; | |||||
if ((blk->tcp_output == NULL) || | |||||
(blk->tcp_do_segment == NULL) || | |||||
(blk->tcp_ctloutput == NULL) || | |||||
(strlen(blk->tcp_block_name) == 0)) { | |||||
/* | |||||
* These functions are required and you | |||||
* need a name. | |||||
*/ | |||||
return (EINVAL); | |||||
} | |||||
if (blk->tcp_timer_stop_all || | |||||
blk->tcp_timers_left || | |||||
blk->tcp_timer_activate || | |||||
blk->tcp_timer_active || | |||||
blk->tcp_timer_stop) { | |||||
/* | |||||
* If you define one timer function you | |||||
* must have them all. | |||||
*/ | |||||
if ((blk->tcp_timer_stop_all == NULL) || | |||||
(blk->tcp_timers_left == NULL) || | |||||
(blk->tcp_timer_activate == NULL) || | |||||
(blk->tcp_timer_active == NULL) || | |||||
(blk->tcp_timer_stop == NULL)) { | |||||
return (EINVAL); | |||||
} | |||||
} | |||||
n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait); | |||||
if (n == NULL) { | |||||
return (ENOMEM); | |||||
} | |||||
n->fb = blk; | |||||
strcpy(fs.function_set_name, blk->tcp_block_name); | |||||
rw_wlock(&tcp_function_lock); | |||||
lblk = find_tcp_functions_locked(&fs); | |||||
if (lblk) { | |||||
/* Duplicate name space not allowed */ | |||||
rw_wunlock(&tcp_function_lock); | |||||
free(n, M_TCPFUNCTIONS); | |||||
return (EALREADY); | |||||
} | |||||
refcount_init(&blk->refcnt, 0); | |||||
blk->flags = 0; | |||||
TAILQ_INSERT_TAIL(&t_functions, n, next); | |||||
rw_wunlock(&tcp_function_lock); | |||||
return(0); | |||||
} | |||||
int | |||||
deregister_tcp_functions(struct tcp_function_block *blk) | |||||
{ | |||||
struct tcp_function_block *lblk; | |||||
struct tcp_function *f; | |||||
int error=ENOENT; | |||||
if (strcmp(blk->tcp_block_name, "default") == 0) { | |||||
/* You can't un-register the default */ | |||||
return (EPERM); | |||||
} | |||||
rw_wlock(&tcp_function_lock); | |||||
if (blk == tcp_func_set_ptr) { | |||||
/* You can't free the current default */ | |||||
rw_wunlock(&tcp_function_lock); | |||||
return (EBUSY); | |||||
} | |||||
if (blk->refcnt) { | |||||
/* Still tcb attached, mark it. */ | |||||
blk->flags |= TCP_FUNC_BEING_REMOVED; | |||||
rw_wunlock(&tcp_function_lock); | |||||
return (EBUSY); | |||||
} | |||||
lblk = find_tcp_fb_locked(blk, &f); | |||||
if (lblk) { | |||||
/* Found */ | |||||
TAILQ_REMOVE(&t_functions, f, next); | |||||
f->fb = NULL; | |||||
Not Done Inline ActionsWhy set this to NULL just before freeing? jtl: Why set this to NULL just before freeing? | |||||
Not Done Inline ActionsBecause I am paranoid :-) rrs: Because I am paranoid :-) | |||||
free(f, M_TCPFUNCTIONS); | |||||
error = 0; | |||||
} | |||||
rw_wunlock(&tcp_function_lock); | |||||
return (error); | |||||
} | |||||
void | void | ||||
tcp_init(void) | tcp_init(void) | ||||
{ | { | ||||
Context not available. | |||||
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, | if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, | ||||
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) | &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) | ||||
printf("%s: WARNING: unable to register helper hook\n", __func__); | printf("%s: WARNING: unable to register helper hook\n", __func__); | ||||
/* Setup the tcp function block list */ | |||||
TAILQ_INIT(&t_functions); | |||||
rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0); | |||||
register_tcp_functions(&tcp_def_funcblk, M_WAITOK); | |||||
hashsize = TCBHASHSIZE; | hashsize = TCBHASHSIZE; | ||||
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize); | TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize); | ||||
if (hashsize == 0) { | if (hashsize == 0) { | ||||
Context not available. | |||||
tp->ccv = &tm->ccv; | tp->ccv = &tm->ccv; | ||||
tp->ccv->type = IPPROTO_TCP; | tp->ccv->type = IPPROTO_TCP; | ||||
tp->ccv->ccvc.tcp = tp; | tp->ccv->ccvc.tcp = tp; | ||||
rw_rlock(&tcp_function_lock); | |||||
tp->t_fb = tcp_func_set_ptr; | |||||
refcount_acquire(&tp->t_fb->refcnt); | |||||
rw_runlock(&tcp_function_lock); | |||||
if (tp->t_fb->tcp_fb_init) { | |||||
(*tp->t_fb->tcp_fb_init)(tp); | |||||
} | |||||
/* | /* | ||||
* Use the current system default CC algorithm. | * Use the current system default CC algorithm. | ||||
*/ | */ | ||||
Context not available. | |||||
if (CC_ALGO(tp)->cb_init != NULL) | if (CC_ALGO(tp)->cb_init != NULL) | ||||
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { | if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { | ||||
if (tp->t_fb->tcp_fb_fini) | |||||
(*tp->t_fb->tcp_fb_fini)(tp); | |||||
refcount_release(&tp->t_fb->refcnt); | |||||
uma_zfree(V_tcpcb_zone, tm); | uma_zfree(V_tcpcb_zone, tm); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
Context not available. | |||||
tp->osd = &tm->osd; | tp->osd = &tm->osd; | ||||
if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { | if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { | ||||
if (tp->t_fb->tcp_fb_fini) | |||||
(*tp->t_fb->tcp_fb_fini)(tp); | |||||
refcount_release(&tp->t_fb->refcnt); | |||||
uma_zfree(V_tcpcb_zone, tm); | uma_zfree(V_tcpcb_zone, tm); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
Context not available. | |||||
if (TCPS_HAVERCVDSYN(tp->t_state)) { | if (TCPS_HAVERCVDSYN(tp->t_state)) { | ||||
tcp_state_change(tp, TCPS_CLOSED); | tcp_state_change(tp, TCPS_CLOSED); | ||||
(void) tcp_output(tp); | (void) tp->t_fb->tcp_output(tp); | ||||
TCPSTAT_INC(tcps_drops); | TCPSTAT_INC(tcps_drops); | ||||
} else | } else | ||||
TCPSTAT_INC(tcps_conndrops); | TCPSTAT_INC(tcps_conndrops); | ||||
Context not available. | |||||
tcp_timer_stop(tp, TT_KEEP); | tcp_timer_stop(tp, TT_KEEP); | ||||
tcp_timer_stop(tp, TT_2MSL); | tcp_timer_stop(tp, TT_2MSL); | ||||
tcp_timer_stop(tp, TT_DELACK); | tcp_timer_stop(tp, TT_DELACK); | ||||
if (tp->t_fb->tcp_timer_stop_all) { | |||||
/* Call the stop-all function of the methods */ | |||||
tp->t_fb->tcp_timer_stop_all(tp); | |||||
} | |||||
/* | /* | ||||
* If we got enough samples through the srtt filter, | * If we got enough samples through the srtt filter, | ||||
Context not available. | |||||
inp->inp_ppcb = NULL; | inp->inp_ppcb = NULL; | ||||
if ((tp->t_timers->tt_flags & TT_MASK) == 0) { | if ((tp->t_timers->tt_flags & TT_MASK) == 0) { | ||||
/* We own the last reference on tcpcb, let's free it. */ | /* We own the last reference on tcpcb, let's free it. */ | ||||
if ((tp->t_fb->tcp_timers_left) && | |||||
(tp->t_fb->tcp_timers_left(tp))) { | |||||
/* Some fb timers left running! */ | |||||
return; | |||||
} | |||||
if (tp->t_fb->tcp_fb_fini) | |||||
(*tp->t_fb->tcp_fb_fini)(tp); | |||||
refcount_release(&tp->t_fb->refcnt); | |||||
tp->t_inpcb = NULL; | tp->t_inpcb = NULL; | ||||
uma_zfree(V_tcpcb_zone, tp); | uma_zfree(V_tcpcb_zone, tp); | ||||
released = in_pcbrele_wlocked(inp); | released = in_pcbrele_wlocked(inp); | ||||
Context not available. | |||||
tp->t_timers->tt_flags &= ~timer_type; | tp->t_timers->tt_flags &= ~timer_type; | ||||
if ((tp->t_timers->tt_flags & TT_MASK) == 0) { | if ((tp->t_timers->tt_flags & TT_MASK) == 0) { | ||||
/* We own the last reference on this tcpcb, let's free it. */ | /* We own the last reference on this tcpcb, let's free it. */ | ||||
if ((tp->t_fb->tcp_timers_left) && | |||||
(tp->t_fb->tcp_timers_left(tp))) { | |||||
/* Some fb timers left running! */ | |||||
goto leave; | |||||
} | |||||
if (tp->t_fb->tcp_fb_fini) | |||||
(*tp->t_fb->tcp_fb_fini)(tp); | |||||
refcount_release(&tp->t_fb->refcnt); | |||||
tp->t_inpcb = NULL; | tp->t_inpcb = NULL; | ||||
uma_zfree(V_tcpcb_zone, tp); | uma_zfree(V_tcpcb_zone, tp); | ||||
if (in_pcbrele_wlocked(inp)) { | if (in_pcbrele_wlocked(inp)) { | ||||
Context not available. | |||||
return; | return; | ||||
} | } | ||||
} | } | ||||
leave: | |||||
INP_WUNLOCK(inp); | INP_WUNLOCK(inp); | ||||
INP_INFO_RUNLOCK(&V_tcbinfo); | INP_INFO_RUNLOCK(&V_tcbinfo); | ||||
CURVNET_RESTORE(); | CURVNET_RESTORE(); | ||||
Context not available. | |||||
tp->snd_recover = tp->snd_max; | tp->snd_recover = tp->snd_max; | ||||
if (tp->t_flags & TF_SACK_PERMIT) | if (tp->t_flags & TF_SACK_PERMIT) | ||||
EXIT_FASTRECOVERY(tp->t_flags); | EXIT_FASTRECOVERY(tp->t_flags); | ||||
tcp_output(tp); | tp->t_fb->tcp_output(tp); | ||||
} | } | ||||
#ifdef INET | #ifdef INET | ||||
Context not available. |
Does this require a write lock to change? (I honestly don't know; what does the lock cover?)