Differential D16416 Diff 46059 head/sysutils/xen-tools47/files/0001-xenstored-apply-a-write-transaction-rate-limit.patch
Changeset View
Changeset View
Standalone View
Standalone View
head/sysutils/xen-tools47/files/0001-xenstored-apply-a-write-transaction-rate-limit.patch
Property | Old Value | New Value |
---|---|---|
fbsd:nokeywords | null | yes \ No newline at end of property |
svn:eol-style | null | native \ No newline at end of property |
svn:mime-type | null | text/plain \ No newline at end of property |
From bfe42a836450591bb41f4f6393c42dbb0d72abb9 Mon Sep 17 00:00:00 2001 | |||||
From: Ian Jackson <ian.jackson@eu.citrix.com> | |||||
Date: Sat, 18 Mar 2017 16:12:26 +0000 | |||||
Subject: [PATCH 01/15] xenstored: apply a write transaction rate limit | |||||
This avoids a rogue client being about to stall another client (eg the | |||||
toolstack) indefinitely. | |||||
This is XSA-206. | |||||
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com> | |||||
Backported to 4.8 (not entirely trivial). | |||||
Reported-by: Juergen Gross <jgross@suse.com> | |||||
Signed-off-by: George Dunlap <george.dunlap@citrix.com> | |||||
Acked-by: Ian Jackson <Ian.Jackson@eu.citrix.com> | |||||
--- | |||||
tools/xenstore/Makefile | 3 +- | |||||
tools/xenstore/xenstored_core.c | 9 ++ | |||||
tools/xenstore/xenstored_core.h | 6 + | |||||
tools/xenstore/xenstored_domain.c | 215 +++++++++++++++++++++++++++++++++ | |||||
tools/xenstore/xenstored_domain.h | 25 ++++ | |||||
tools/xenstore/xenstored_transaction.c | 5 + | |||||
6 files changed, 262 insertions(+), 1 deletion(-) | |||||
diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile | |||||
index d691b78..b458729 100644 | |||||
--- a/tools/xenstore/Makefile | |||||
+++ b/tools/xenstore/Makefile | |||||
@@ -31,6 +31,7 @@ XENSTORED_OBJS_$(CONFIG_FreeBSD) = xenstored_posix.o | |||||
XENSTORED_OBJS_$(CONFIG_MiniOS) = xenstored_minios.o | |||||
XENSTORED_OBJS += $(XENSTORED_OBJS_y) | |||||
+LDLIBS_xenstored += -lrt | |||||
ifneq ($(XENSTORE_STATIC_CLIENTS),y) | |||||
LIBXENSTORE := libxenstore.so | |||||
@@ -72,7 +73,7 @@ endif | |||||
$(XENSTORED_OBJS): CFLAGS += $(CFLAGS_libxengnttab) | |||||
xenstored: $(XENSTORED_OBJS) | |||||
- $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenevtchn) $(LDLIBS_libxengnttab) $(LDLIBS_libxenctrl) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS) | |||||
+ $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenevtchn) $(LDLIBS_libxengnttab) $(LDLIBS_libxenctrl) $(LDLIBS_xenstored) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS) | |||||
xenstored.a: $(XENSTORED_OBJS) | |||||
$(AR) cr $@ $^ | |||||
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c | |||||
index 51fb0b3..1aabc93 100644 | |||||
--- a/tools/xenstore/xenstored_core.c | |||||
+++ b/tools/xenstore/xenstored_core.c | |||||
@@ -357,6 +357,7 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, | |||||
int *ptimeout) | |||||
{ | |||||
struct connection *conn; | |||||
+ struct wrl_timestampt now; | |||||
if (fds) | |||||
memset(fds, 0, sizeof(struct pollfd) * current_array_size); | |||||
@@ -376,8 +377,11 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, | |||||
xce_pollfd_idx = set_fd(xenevtchn_fd(xce_handle), | |||||
POLLIN|POLLPRI); | |||||
+ wrl_gettime_now(&now); | |||||
+ | |||||
list_for_each_entry(conn, &connections, list) { | |||||
if (conn->domain) { | |||||
+ wrl_check_timeout(conn->domain, now, ptimeout); | |||||
if (domain_can_read(conn) || | |||||
(domain_can_write(conn) && | |||||
!list_empty(&conn->out_list))) | |||||
@@ -810,6 +814,7 @@ static void delete_node_single(struct connection *conn, struct node *node) | |||||
corrupt(conn, "Could not delete '%s'", node->name); | |||||
return; | |||||
} | |||||
+ | |||||
domain_entry_dec(conn, node); | |||||
} | |||||
@@ -949,6 +954,7 @@ static void do_write(struct connection *conn, struct buffered_data *in) | |||||
} | |||||
add_change_node(conn->transaction, name, false); | |||||
+ wrl_apply_debit_direct(conn); | |||||
fire_watches(conn, name, false); | |||||
send_ack(conn, XS_WRITE); | |||||
} | |||||
@@ -973,6 +979,7 @@ static void do_mkdir(struct connection *conn, const char *name) | |||||
return; | |||||
} | |||||
add_change_node(conn->transaction, name, false); | |||||
+ wrl_apply_debit_direct(conn); | |||||
fire_watches(conn, name, false); | |||||
} | |||||
send_ack(conn, XS_MKDIR); | |||||
@@ -1098,6 +1105,7 @@ static void do_rm(struct connection *conn, const char *name) | |||||
if (_rm(conn, node, name)) { | |||||
add_change_node(conn->transaction, name, true); | |||||
+ wrl_apply_debit_direct(conn); | |||||
fire_watches(conn, name, true); | |||||
send_ack(conn, XS_RM); | |||||
} | |||||
@@ -1173,6 +1181,7 @@ static void do_set_perms(struct connection *conn, struct buffered_data *in) | |||||
} | |||||
add_change_node(conn->transaction, name, false); | |||||
+ wrl_apply_debit_direct(conn); | |||||
fire_watches(conn, name, false); | |||||
send_ack(conn, XS_SET_PERMS); | |||||
} | |||||
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h | |||||
index 3a497f7..a2a3427 100644 | |||||
--- a/tools/xenstore/xenstored_core.h | |||||
+++ b/tools/xenstore/xenstored_core.h | |||||
@@ -33,6 +33,12 @@ | |||||
#include "list.h" | |||||
#include "tdb.h" | |||||
+#define MIN(a, b) (((a) < (b))? (a) : (b)) | |||||
+ | |||||
+typedef int32_t wrl_creditt; | |||||
+#define WRL_CREDIT_MAX (1000*1000*1000) | |||||
+/* ^ satisfies non-overflow condition for wrl_xfer_credit */ | |||||
+ | |||||
struct buffered_data | |||||
{ | |||||
struct list_head list; | |||||
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c | |||||
index 47b4f03..486c96f 100644 | |||||
--- a/tools/xenstore/xenstored_domain.c | |||||
+++ b/tools/xenstore/xenstored_domain.c | |||||
@@ -21,6 +21,7 @@ | |||||
#include <unistd.h> | |||||
#include <stdlib.h> | |||||
#include <stdarg.h> | |||||
+#include <time.h> | |||||
#include "utils.h" | |||||
#include "talloc.h" | |||||
@@ -74,6 +75,10 @@ struct domain | |||||
/* number of watch for this domain */ | |||||
int nbwatch; | |||||
+ | |||||
+ /* write rate limit */ | |||||
+ wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */ | |||||
+ struct wrl_timestampt wrl_timestamp; | |||||
}; | |||||
static LIST_HEAD(domains); | |||||
@@ -206,6 +211,8 @@ static int destroy_domain(void *_domain) | |||||
fire_watches(NULL, "@releaseDomain", false); | |||||
+ wrl_domain_destroy(domain); | |||||
+ | |||||
return 0; | |||||
} | |||||
@@ -253,6 +260,9 @@ void handle_event(void) | |||||
bool domain_can_read(struct connection *conn) | |||||
{ | |||||
struct xenstore_domain_interface *intf = conn->domain->interface; | |||||
+ | |||||
+ if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0) | |||||
+ return false; | |||||
return (intf->req_cons != intf->req_prod); | |||||
} | |||||
@@ -284,6 +294,8 @@ static struct domain *new_domain(void *context, unsigned int domid, | |||||
domain->domid = domid; | |||||
domain->path = talloc_domain_path(domain, domid); | |||||
+ wrl_domain_new(domain); | |||||
+ | |||||
list_add(&domain->list, &domains); | |||||
talloc_set_destructor(domain, destroy_domain); | |||||
@@ -747,6 +759,209 @@ int domain_watch(struct connection *conn) | |||||
: 0; | |||||
} | |||||
+static wrl_creditt wrl_config_writecost = WRL_FACTOR; | |||||
+static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR; | |||||
+static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR; | |||||
+static wrl_creditt wrl_config_gburst = WRL_GBURST * WRL_FACTOR; | |||||
+static wrl_creditt wrl_config_newdoms_dburst = | |||||
+ WRL_DBURST * WRL_NEWDOMS * WRL_FACTOR; | |||||
+ | |||||
+long wrl_ntransactions; | |||||
+ | |||||
+static long wrl_ndomains; | |||||
+static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */ | |||||
+ | |||||
+void wrl_gettime_now(struct wrl_timestampt *now_wt) | |||||
+{ | |||||
+ struct timespec now_ts; | |||||
+ int r; | |||||
+ | |||||
+ r = clock_gettime(CLOCK_MONOTONIC, &now_ts); | |||||
+ if (r) | |||||
+ barf_perror("Could not find time (clock_gettime failed)"); | |||||
+ | |||||
+ now_wt->sec = now_ts.tv_sec; | |||||
+ now_wt->msec = now_ts.tv_nsec / 1000000; | |||||
+} | |||||
+ | |||||
+static void wrl_xfer_credit(wrl_creditt *debit, wrl_creditt debit_floor, | |||||
+ wrl_creditt *credit, wrl_creditt credit_ceil) | |||||
+ /* | |||||
+ * Transfers zero or more credit from "debit" to "credit". | |||||
+ * Transfers as much as possible while maintaining | |||||
+ * debit >= debit_floor and credit <= credit_ceil. | |||||
+ * (If that's violated already, does nothing.) | |||||
+ * | |||||
+ * Sufficient conditions to avoid overflow, either of: | |||||
+ * |every argument| <= 0x3fffffff | |||||
+ * |every argument| <= 1E9 | |||||
+ * |every argument| <= WRL_CREDIT_MAX | |||||
+ * (And this condition is preserved.) | |||||
+ */ | |||||
+{ | |||||
+ wrl_creditt xfer = MIN( *debit - debit_floor, | |||||
+ credit_ceil - *credit ); | |||||
+ if (xfer > 0) { | |||||
+ *debit -= xfer; | |||||
+ *credit += xfer; | |||||
+ } | |||||
+} | |||||
+ | |||||
+void wrl_domain_new(struct domain *domain) | |||||
+{ | |||||
+ domain->wrl_credit = 0; | |||||
+ wrl_gettime_now(&domain->wrl_timestamp); | |||||
+ wrl_ndomains++; | |||||
+ /* Steal up to DBURST from the reserve */ | |||||
+ wrl_xfer_credit(&wrl_reserve, -wrl_config_newdoms_dburst, | |||||
+ &domain->wrl_credit, wrl_config_dburst); | |||||
+} | |||||
+ | |||||
+void wrl_domain_destroy(struct domain *domain) | |||||
+{ | |||||
+ wrl_ndomains--; | |||||
+ /* | |||||
+ * Don't bother recalculating domain's credit - this just | |||||
+ * means we don't give the reserve the ending domain's credit | |||||
+ * for time elapsed since last update. | |||||
+ */ | |||||
+ wrl_xfer_credit(&domain->wrl_credit, 0, | |||||
+ &wrl_reserve, wrl_config_dburst); | |||||
+} | |||||
+ | |||||
+void wrl_credit_update(struct domain *domain, struct wrl_timestampt now) | |||||
+{ | |||||
+ /* | |||||
+ * We want to calculate | |||||
+ * credit += (now - timestamp) * RATE / ndoms; | |||||
+ * But we want it to saturate, and to avoid floating point. | |||||
+ * To avoid rounding errors from constantly adding small | |||||
+ * amounts of credit, we only add credit for whole milliseconds. | |||||
+ */ | |||||
+ long seconds = now.sec - domain->wrl_timestamp.sec; | |||||
+ long milliseconds = now.msec - domain->wrl_timestamp.msec; | |||||
+ long msec; | |||||
+ int64_t denom, num; | |||||
+ wrl_creditt surplus; | |||||
+ | |||||
+ seconds = MIN(seconds, 1000*1000); /* arbitrary, prevents overflow */ | |||||
+ msec = seconds * 1000 + milliseconds; | |||||
+ | |||||
+ if (msec < 0) | |||||
+ /* shouldn't happen with CLOCK_MONOTONIC */ | |||||
+ msec = 0; | |||||
+ | |||||
+ /* 32x32 -> 64 cannot overflow */ | |||||
+ denom = (int64_t)msec * wrl_config_rate; | |||||
+ num = (int64_t)wrl_ndomains * 1000; | |||||
+ /* denom / num <= 1E6 * wrl_config_rate, so with | |||||
+ reasonable wrl_config_rate, denom / num << 2^64 */ | |||||
+ | |||||
+ /* at last! */ | |||||
+ domain->wrl_credit = MIN( (int64_t)domain->wrl_credit + denom / num, | |||||
+ WRL_CREDIT_MAX ); | |||||
+ /* (maybe briefly violating the DBURST cap on wrl_credit) */ | |||||
+ | |||||
+ /* maybe take from the reserve to make us nonnegative */ | |||||
+ wrl_xfer_credit(&wrl_reserve, 0, | |||||
+ &domain->wrl_credit, 0); | |||||
+ | |||||
+ /* return any surplus (over DBURST) to the reserve */ | |||||
+ surplus = 0; | |||||
+ wrl_xfer_credit(&domain->wrl_credit, wrl_config_dburst, | |||||
+ &surplus, WRL_CREDIT_MAX); | |||||
+ wrl_xfer_credit(&surplus, 0, | |||||
+ &wrl_reserve, wrl_config_gburst); | |||||
+ /* surplus is now implicitly discarded */ | |||||
+ | |||||
+ domain->wrl_timestamp = now; | |||||
+ | |||||
+ trace("wrl: dom %4d %6ld msec %9ld credit %9ld reserve" | |||||
+ " %9ld discard\n", | |||||
+ domain->domid, | |||||
+ msec, | |||||
+ (long)domain->wrl_credit, (long)wrl_reserve, | |||||
+ (long)surplus); | |||||
+} | |||||
+ | |||||
+void wrl_check_timeout(struct domain *domain, | |||||
+ struct wrl_timestampt now, | |||||
+ int *ptimeout) | |||||
+{ | |||||
+ uint64_t num, denom; | |||||
+ int wakeup; | |||||
+ | |||||
+ wrl_credit_update(domain, now); | |||||
+ | |||||
+ if (domain->wrl_credit >= 0) | |||||
+ /* not blocked */ | |||||
+ return; | |||||
+ | |||||
+ if (!*ptimeout) | |||||
+ /* already decided on immediate wakeup, | |||||
+ so no need to calculate our timeout */ | |||||
+ return; | |||||
+ | |||||
+ /* calculate wakeup = now + -credit / (RATE / ndoms); */ | |||||
+ | |||||
+ /* credit cannot go more -ve than one transaction, | |||||
+ * so the first multiplication cannot overflow even 32-bit */ | |||||
+ num = (uint64_t)(-domain->wrl_credit * 1000) * wrl_ndomains; | |||||
+ denom = wrl_config_rate; | |||||
+ | |||||
+ wakeup = MIN( num / denom /* uint64_t */, INT_MAX ); | |||||
+ if (*ptimeout==-1 || wakeup < *ptimeout) | |||||
+ *ptimeout = wakeup; | |||||
+ | |||||
+ trace("wrl: domain %u credit=%ld (reserve=%ld) SLEEPING for %d\n", | |||||
+ domain->domid, | |||||
+ (long)domain->wrl_credit, (long)wrl_reserve, | |||||
+ wakeup); | |||||
+} | |||||
+ | |||||
+void wrl_apply_debit_actual(struct domain *domain) | |||||
+{ | |||||
+ struct wrl_timestampt now; | |||||
+ | |||||
+ if (!domain) | |||||
+ /* sockets escape the write rate limit */ | |||||
+ return; | |||||
+ | |||||
+ wrl_gettime_now(&now); | |||||
+ wrl_credit_update(domain, now); | |||||
+ | |||||
+ domain->wrl_credit -= wrl_config_writecost; | |||||
+ trace("wrl: domain %u credit=%ld (reserve=%ld)\n", | |||||
+ domain->domid, | |||||
+ (long)domain->wrl_credit, (long)wrl_reserve); | |||||
+} | |||||
+ | |||||
+void wrl_apply_debit_direct(struct connection *conn) | |||||
+{ | |||||
+ if (!conn) | |||||
+ /* some writes are generated internally */ | |||||
+ return; | |||||
+ | |||||
+ if (conn->transaction) | |||||
+ /* these are accounted for when the transaction ends */ | |||||
+ return; | |||||
+ | |||||
+ if (!wrl_ntransactions) | |||||
+ /* we don't conflict with anyone */ | |||||
+ return; | |||||
+ | |||||
+ wrl_apply_debit_actual(conn->domain); | |||||
+} | |||||
+ | |||||
+void wrl_apply_debit_trans_commit(struct connection *conn) | |||||
+{ | |||||
+ if (wrl_ntransactions <= 1) | |||||
+ /* our own transaction appears in the counter */ | |||||
+ return; | |||||
+ | |||||
+ wrl_apply_debit_actual(conn->domain); | |||||
+} | |||||
+ | |||||
/* | |||||
* Local variables: | |||||
* c-file-style: "linux" | |||||
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h | |||||
index 83488ed..bdc4044 100644 | |||||
--- a/tools/xenstore/xenstored_domain.h | |||||
+++ b/tools/xenstore/xenstored_domain.h | |||||
@@ -65,4 +65,29 @@ void domain_watch_inc(struct connection *conn); | |||||
void domain_watch_dec(struct connection *conn); | |||||
int domain_watch(struct connection *conn); | |||||
+/* Write rate limiting */ | |||||
+ | |||||
+#define WRL_FACTOR 1000 /* for fixed-point arithmetic */ | |||||
+#define WRL_RATE 200 | |||||
+#define WRL_DBURST 10 | |||||
+#define WRL_GBURST 1000 | |||||
+#define WRL_NEWDOMS 5 | |||||
+ | |||||
+struct wrl_timestampt { | |||||
+ time_t sec; | |||||
+ int msec; | |||||
+}; | |||||
+ | |||||
+extern long wrl_ntransactions; | |||||
+ | |||||
+void wrl_gettime_now(struct wrl_timestampt *now_ts); | |||||
+void wrl_domain_new(struct domain *domain); | |||||
+void wrl_domain_destroy(struct domain *domain); | |||||
+void wrl_credit_update(struct domain *domain, struct wrl_timestampt now); | |||||
+void wrl_check_timeout(struct domain *domain, | |||||
+ struct wrl_timestampt now, | |||||
+ int *ptimeout); | |||||
+void wrl_apply_debit_direct(struct connection *conn); | |||||
+void wrl_apply_debit_trans_commit(struct connection *conn); | |||||
+ | |||||
#endif /* _XENSTORED_DOMAIN_H */ | |||||
diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c | |||||
index d0e4739..a4b328f 100644 | |||||
--- a/tools/xenstore/xenstored_transaction.c | |||||
+++ b/tools/xenstore/xenstored_transaction.c | |||||
@@ -116,6 +116,7 @@ static int destroy_transaction(void *_transaction) | |||||
{ | |||||
struct transaction *trans = _transaction; | |||||
+ wrl_ntransactions--; | |||||
trace_destroy(trans, "transaction"); | |||||
if (trans->tdb) | |||||
tdb_close(trans->tdb); | |||||
@@ -179,6 +180,7 @@ void do_transaction_start(struct connection *conn, struct buffered_data *in) | |||||
talloc_steal(conn, trans); | |||||
talloc_set_destructor(trans, destroy_transaction); | |||||
conn->transaction_started++; | |||||
+ wrl_ntransactions++; | |||||
snprintf(id_str, sizeof(id_str), "%u", trans->id); | |||||
send_reply(conn, XS_TRANSACTION_START, id_str, strlen(id_str)+1); | |||||
@@ -213,6 +215,9 @@ void do_transaction_end(struct connection *conn, const char *arg) | |||||
send_error(conn, EAGAIN); | |||||
return; | |||||
} | |||||
+ | |||||
+ wrl_apply_debit_trans_commit(conn); | |||||
+ | |||||
if (!replace_tdb(trans->tdb_name, trans->tdb)) { | |||||
send_error(conn, errno); | |||||
return; | |||||
-- | |||||
2.1.4 | |||||