Page MenuHomeFreeBSD

D21644.id62264.diff
No OneTemporary

D21644.id62264.diff

Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -111,6 +111,9 @@
u_int lifetime;
};
+#define TCP_SYNCACHE_PAUSE_TIME SYNCOOKIE_LIFETIME
+#define TCP_SYNCACHE_MAX_BACKOFF 6 /* 16 minutes */
+
struct tcp_syncache {
struct syncache_head *hashbase;
uma_zone_t zone;
@@ -122,6 +125,11 @@
uint32_t hash_secret;
struct vnet *vnet;
struct syncookie_secret secret;
+ struct mtx pause_mtx;
+ struct callout pause_co;
+ time_t pause_until;
+ uint8_t pause_backoff;
+ volatile bool paused;
};
/* Internal use for the syncookie functions. */
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -130,8 +130,7 @@
static void syncache_drop(struct syncache *, struct syncache_head *);
static void syncache_free(struct syncache *);
static void syncache_insert(struct syncache *, struct syncache_head *);
-static int syncache_respond(struct syncache *, struct syncache_head *,
- const struct mbuf *, int);
+static int syncache_respond(struct syncache *, const struct mbuf *, int);
static struct socket *syncache_socket(struct syncache *, struct socket *,
struct mbuf *m);
static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
@@ -145,6 +144,8 @@
*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
struct syncache *, struct tcphdr *, struct tcpopt *,
struct socket *);
+static void syncache_pause(struct in_conninfo *);
+static void syncache_unpause(void *);
static void syncookie_reseed(void *);
#ifdef INVARIANTS
static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
@@ -301,6 +302,14 @@
arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0);
callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz,
syncookie_reseed, &V_tcp_syncache);
+
+ /* Initialize the pause machinery. */
+ mtx_init(&V_tcp_syncache.pause_mtx, "tcp_sc_pause", NULL, MTX_DEF);
+ callout_init_mtx(&V_tcp_syncache.pause_co, &V_tcp_syncache.pause_mtx,
+ 0);
+ V_tcp_syncache.pause_until = time_uptime - TCP_SYNCACHE_PAUSE_TIME;
+ V_tcp_syncache.pause_backoff = 0;
+ V_tcp_syncache.paused = false;
}
#ifdef VIMAGE
@@ -317,6 +326,14 @@
*/
callout_drain(&V_tcp_syncache.secret.reseed);
+ /* Stop the SYN cache pause callout. */
+ mtx_lock(&V_tcp_syncache.pause_mtx);
+ if (callout_stop(&V_tcp_syncache.pause_co) == 0) {
+ mtx_unlock(&V_tcp_syncache.pause_mtx);
+ callout_drain(&V_tcp_syncache.pause_co);
+ } else
+ mtx_unlock(&V_tcp_syncache.pause_mtx);
+
/* Cleanup hash buckets: stop timers, free entries, destroy locks. */
for (i = 0; i < V_tcp_syncache.hashsize; i++) {
@@ -340,6 +357,7 @@
/* Free the allocated global resources. */
uma_zdestroy(V_tcp_syncache.zone);
free(V_tcp_syncache.hashbase, M_SYNCACHE);
+ mtx_destroy(&V_tcp_syncache.pause_mtx);
}
#endif
@@ -361,10 +379,10 @@
if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
("sch->sch_length incorrect"));
+ syncache_pause(&sc->sc_inc);
sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
sch->sch_last_overflow = time_uptime;
syncache_drop(sc2, sch);
- TCPSTAT_INC(tcps_sc_bucketoverflow);
}
/* Put it into the bucket. */
@@ -451,6 +469,7 @@
struct syncache *sc, *nsc;
int tick = ticks;
char *s;
+ bool paused;
CURVNET_SET(sch->sch_sc->vnet);
@@ -463,7 +482,19 @@
*/
sch->sch_nextc = tick + INT_MAX;
+ /*
+ * If we have paused processing, unconditionally remove
+ * all syncache entries.
+ */
+ mtx_lock(&V_tcp_syncache.pause_mtx);
+ paused = V_tcp_syncache.paused;
+ mtx_unlock(&V_tcp_syncache.pause_mtx);
+
TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
+ if (paused) {
+ syncache_drop(sc, sch);
+ continue;
+ }
/*
* We do not check if the listen socket still exists
* and accept the case where the listen socket may be
@@ -495,7 +526,7 @@
free(s, M_TCPLOG);
}
- syncache_respond(sc, sch, NULL, TH_SYN|TH_ACK);
+ syncache_respond(sc, NULL, TH_SYN|TH_ACK);
TCPSTAT_INC(tcps_sc_retransmitted);
syncache_timeout(sc, sch, 0);
}
@@ -506,14 +537,24 @@
}
/*
- * Find an entry in the syncache.
- * Returns always with locked syncache_head plus a matching entry or NULL.
+ * Returns true if the system is only using cookies at the moment.
+ * This could be due to a sysadmin decision to only use cookies, or it
+ * could be due to the system detecting an attack.
*/
-static struct syncache *
-syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
+static inline bool
+syncache_cookiesonly(void)
+{
+
+ return (V_tcp_syncookies && (V_tcp_syncache.paused ||
+ V_tcp_syncookiesonly));
+}
+
+/*
+ * Find the hash bucket for the given connection.
+ */
+static struct syncache_head *
+syncache_hashbucket(struct in_conninfo *inc)
{
- struct syncache *sc;
- struct syncache_head *sch;
uint32_t hash;
/*
@@ -526,8 +567,20 @@
hash = jenkins_hash32((uint32_t *)&inc->inc_ie, 5,
V_tcp_syncache.hash_secret) & V_tcp_syncache.hashmask;
- sch = &V_tcp_syncache.hashbase[hash];
- *schp = sch;
+ return (&V_tcp_syncache.hashbase[hash]);
+}
+
+/*
+ * Find an entry in the syncache.
+ * Returns always with locked syncache_head plus a matching entry or NULL.
+ */
+static struct syncache *
+syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
+{
+ struct syncache *sc;
+ struct syncache_head *sch;
+
+ *schp = sch = syncache_hashbucket(inc);
SCH_LOCK(sch);
/* Circle through bucket row to find matching entry. */
@@ -552,6 +605,8 @@
struct syncache_head *sch;
char *s = NULL;
+ if (syncache_cookiesonly())
+ return;
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
@@ -632,7 +687,7 @@
"sending challenge ACK\n",
s, __func__,
th->th_seq, sc->sc_irs + 1, sc->sc_wnd);
- syncache_respond(sc, sch, m, TH_ACK);
+ syncache_respond(sc, m, TH_ACK);
}
} else {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
@@ -655,6 +710,8 @@
struct syncache *sc;
struct syncache_head *sch;
+ if (syncache_cookiesonly())
+ return;
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
@@ -670,6 +727,8 @@
struct syncache *sc;
struct syncache_head *sch;
+ if (syncache_cookiesonly())
+ return;
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
if (sc == NULL)
@@ -1030,6 +1089,7 @@
struct syncache_head *sch;
struct syncache scs;
char *s;
+ bool locked;
/*
* Global TCP locks are held because we manipulate the PCB lists
@@ -1039,8 +1099,15 @@
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
("%s: can handle only ACK", __func__));
- sc = syncache_lookup(inc, &sch); /* returns locked sch */
- SCH_LOCK_ASSERT(sch);
+ if (syncache_cookiesonly()) {
+ sc = NULL;
+ sch = syncache_hashbucket(inc);
+ locked = false;
+ } else {
+ sc = syncache_lookup(inc, &sch); /* returns locked sch */
+ locked = true;
+ SCH_LOCK_ASSERT(sch);
+ }
#ifdef INVARIANTS
/*
@@ -1064,7 +1131,7 @@
* C. check that the syncookie is valid. If it is, then
* cobble up a fake syncache entry, and return.
*/
- if (!V_tcp_syncookies) {
+ if (locked && !V_tcp_syncookies) {
SCH_UNLOCK(sch);
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: Spurious ACK, "
@@ -1072,7 +1139,7 @@
s, __func__);
goto failed;
}
- if (!V_tcp_syncookiesonly &&
+ if (locked && !V_tcp_syncookiesonly &&
sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
SCH_UNLOCK(sch);
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
@@ -1083,7 +1150,8 @@
}
bzero(&scs, sizeof(scs));
sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
- SCH_UNLOCK(sch);
+ if (locked)
+ SCH_UNLOCK(sch);
if (sc == NULL) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: Segment failed "
@@ -1332,6 +1400,7 @@
unsigned int *tfo_pending = NULL;
int tfo_cookie_valid = 0;
int tfo_response_cookie_valid = 0;
+ bool locked;
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
@@ -1437,8 +1506,15 @@
* how to handle such a case; either ignore it as spoofed, or
* drop the current entry and create a new one?
*/
- sc = syncache_lookup(inc, &sch); /* returns locked entry */
- SCH_LOCK_ASSERT(sch);
+ if (syncache_cookiesonly()) {
+ sc = NULL;
+ sch = syncache_hashbucket(inc);
+ locked = false;
+ } else {
+ sc = syncache_lookup(inc, &sch); /* returns locked sch */
+ locked = true;
+ SCH_LOCK_ASSERT(sch);
+ }
if (sc != NULL) {
if (tfo_cookie_valid)
INP_WUNLOCK(inp);
@@ -1475,7 +1551,7 @@
s, __func__);
free(s, M_TCPLOG);
}
- if (syncache_respond(sc, sch, m, TH_SYN|TH_ACK) == 0) {
+ if (syncache_respond(sc, m, TH_SYN|TH_ACK) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1491,7 +1567,15 @@
goto skip_alloc;
}
- sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
+ /*
+ * Skip allocating a syncache entry if we are just going to discard
+ * it later.
+ */
+ if (!locked) {
+ bzero(&scs, sizeof(scs));
+ sc = &scs;
+ } else
+ sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
/*
* The zone allocator couldn't provide more entries.
@@ -1502,6 +1586,7 @@
if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) {
sch->sch_last_overflow = time_uptime;
syncache_drop(sc, sch);
+ syncache_pause(inc);
}
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
@@ -1509,6 +1594,9 @@
bzero(&scs, sizeof(scs));
sc = &scs;
} else {
+ KASSERT(locked,
+ ("%s: bucket unexpectedly unlocked",
+ __func__));
SCH_UNLOCK(sch);
if (ipopts)
(void) m_free(ipopts);
@@ -1627,7 +1715,8 @@
sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK;
}
#endif
- SCH_UNLOCK(sch);
+ if (locked)
+ SCH_UNLOCK(sch);
if (tfo_cookie_valid) {
syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
@@ -1640,7 +1729,7 @@
/*
* Do a standard 3-way handshake.
*/
- if (syncache_respond(sc, sch, m, TH_SYN|TH_ACK) == 0) {
+ if (syncache_respond(sc, m, TH_SYN|TH_ACK) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1685,8 +1774,7 @@
* i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
*/
static int
-syncache_respond(struct syncache *sc, struct syncache_head *sch,
- const struct mbuf *m0, int flags)
+syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
{
struct ip *ip = NULL;
struct mbuf *m;
@@ -2063,8 +2151,6 @@
uint8_t *secbits;
union syncookie cookie;
- SCH_LOCK_ASSERT(sch);
-
cookie.cookie = 0;
/* Map our computed MSS into the 3-bit index. */
@@ -2092,10 +2178,10 @@
cookie.flags.sack_ok = 1;
/* Which of the two secrets to use. */
- secbit = sch->sch_sc->secret.oddeven & 0x1;
+ secbit = V_tcp_syncache.secret.oddeven & 0x1;
cookie.flags.odd_even = secbit;
- secbits = sch->sch_sc->secret.key[secbit];
+ secbits = V_tcp_syncache.secret.key[secbit];
hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits,
(uintptr_t)sch);
@@ -2123,8 +2209,6 @@
int wnd, wscale = 0;
union syncookie cookie;
- SCH_LOCK_ASSERT(sch);
-
/*
* Pull information out of SYN-ACK/ACK and revert sequence number
* advances.
@@ -2139,7 +2223,7 @@
cookie.cookie = (ack & 0xff) ^ (ack >> 24);
/* Which of the two secrets to use. */
- secbits = sch->sch_sc->secret.key[cookie.flags.odd_even];
+ secbits = V_tcp_syncache.secret.key[cookie.flags.odd_even];
hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch);
@@ -2268,6 +2352,104 @@
callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz);
}
+/*
+ * We have overflowed a bucket. Let's pause dealing with the syncache.
+ * This function will increment the bucketoverflow statistics appropriately
+ * (once per pause when pausing is enabled; otherwise, once per overflow).
+ */
+static void
+syncache_pause(struct in_conninfo *inc)
+{
+ time_t delta;
+ const char *s;
+
+ /* XXX:
+ * 2. Add sysctl read here so we don't get the benefit of this
+ * change without the new sysctl.
+ */
+
+ /*
+ * Try an unlocked read. If we already know that another thread
+ * has activated the feature, there is no need to proceed.
+ */
+ if (V_tcp_syncache.paused)
+ return;
+
+ /* Are cookied enabled? If not, we can't pause. */
+ if (!V_tcp_syncookies) {
+ TCPSTAT_INC(tcps_sc_bucketoverflow);
+ return;
+ }
+
+ /*
+ * We may be the first thread to find an overflow. Get the lock
+ * and evaluate if we need to take action.
+ */
+ mtx_lock(&V_tcp_syncache.pause_mtx);
+ if (V_tcp_syncache.paused) {
+ mtx_unlock(&V_tcp_syncache.pause_mtx);
+ return;
+ }
+
+ /* Activate protection. */
+ V_tcp_syncache.paused = true;
+ TCPSTAT_INC(tcps_sc_bucketoverflow);
+
+ /*
+ * Determine the last backoff time. If we are seeing a re-newed
+ * attack within that same time after last reactivating the syncache,
+ * consider it an extension of the same attack.
+ */
+ delta = TCP_SYNCACHE_PAUSE_TIME << V_tcp_syncache.pause_backoff;
+ if (V_tcp_syncache.pause_until + delta - time_uptime > 0) {
+ if (V_tcp_syncache.pause_backoff < TCP_SYNCACHE_MAX_BACKOFF) {
+ delta <<= 1;
+ V_tcp_syncache.pause_backoff++;
+ }
+ } else {
+ delta = TCP_SYNCACHE_PAUSE_TIME;
+ V_tcp_syncache.pause_backoff = 0;
+ }
+
+ /* Log a warning, including IP addresses, if able. */
+ if (inc != NULL)
+ s = tcp_log_addrs(inc, NULL, NULL, NULL);
+ else
+ s = (const char *)NULL;
+ log(LOG_WARNING, "TCP syncache overflow detected; using syncookies for "
+ "the next %lld seconds%s%s%s\n", (long long)delta,
+ (s != NULL) ? " (last SYN: " : "", (s != NULL) ? s : "",
+ (s != NULL) ? ")" : "");
+ free(__DECONST(void *, s), M_TCPLOG);
+
+ /* Use the calculated delta to set a new pause time. */
+ V_tcp_syncache.pause_until = time_uptime + delta;
+ callout_reset(&V_tcp_syncache.pause_co, delta * hz, syncache_unpause,
+ &V_tcp_syncache);
+ mtx_unlock(&V_tcp_syncache.pause_mtx);
+}
+
+/* Evaluate whether we need to unpause. */
+static void
+syncache_unpause(void *arg)
+{
+ struct tcp_syncache *sc;
+ time_t delta;
+
+ sc = arg;
+ mtx_assert(&sc->pause_mtx, MA_OWNED | MA_NOTRECURSED);
+ callout_deactivate(&sc->pause_co);
+
+ /*
+ * Check to make sure we are not running early. If the pause
+ * time has expired, then deactivate the protection.
+ */
+ if ((delta = sc->pause_until - time_uptime) > 0)
+ callout_schedule(&sc->pause_co, delta * hz);
+ else
+ sc->paused = false;
+}
+
/*
* Exports the syncache entries to userland so that netstat can display
* them alongside the other sockets. This function is intended to be

File Metadata

Mime Type
text/plain
Expires
Sat, Jan 17, 12:54 AM (20 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27675275
Default Alt Text
D21644.id62264.diff (14 KB)

Event Timeline