diff --git a/lib/librss/librss.3 b/lib/librss/librss.3 index 302d431b6c28..469132027b26 100644 --- a/lib/librss/librss.3 +++ b/lib/librss/librss.3 @@ -1,143 +1,134 @@ .\" $FreeBSD$ .\" .Dd October 23, 2016 .Dt LIBRSS 3 .Os .Sh NAME .Nm librss .Nd Provide Receive-side scaling awareness to userland applications .Sh LIBRARY .Lb librss .Sh SYNOPSIS .In sys/param.h .In sys/cpuset.h .In librss.h .Ft struct rss_config * .Fn rss_config_get "void" .Ft void .Fn rss_config_free "struct rss_config *cfg" .Ft int .Fn rss_config_get_bucket_count "struct rss_config *cfg" .Ft int .Fn rss_get_bucket_cpuset "struct rss_config *rc" "rss_bucket_type_t btype" "int bucket" "cpuset_t *cs" .Ft int .Fn rss_set_bucket_rebalance_cb "rss_bucket_rebalance_cb_t *cb" "void *cbdata" .Ft int -.Fn rss_sock_set_rss_bucket "int fd" "int af" "int rss_bucket" -.Ft int .Fn rss_sock_set_recvrss "int fd" "int af" "int val" .Sh DESCRIPTION The .Nm library and the functions it provides are used for both fetching the system RSS configuration and interacting with RSS aware sockets. .Pp Applications will typically call .Fn rss_config_get to fetch the current RSS configuration from the system and perform initial setup. This typically involves spawning worker threads, one per RSS bucket, and optionally binding them to the per-bucket CPU set. .Pp The .Vt rss_config struct is defined as: .Bd -literal struct rss_config { int rss_ncpus; int rss_nbuckets; int rss_basecpu; int *rss_bucket_map; }; .Ed .Pp Applications will typically use the .Fn rss_config_get_bucket_count function to fetch the number of RSS buckets, create one thread per RSS bucket for RSS aware work, then one RSS aware socket to receive UDP datagrams or TCP connections in each particular RSS bucket / thread. .Pp The .Fn rss_get_bucket_cpuset function sets the given cpuset up for the given RSS bucket and behaviour. Typically applications will wish to just query for .Vt RSS_BUCKET_TYPE_KERNEL_ALL unless they wish to potentially setup different worker threads for transmit and receive. .Pp The .Vt rss_bucket_type_t enum is defined as: .Bd -literal typedef enum { RSS_BUCKET_TYPE_NONE = 0, RSS_BUCKET_TYPE_KERNEL_ALL = 1, RSS_BUCKET_TYPE_KERNEL_TX = 2, RSS_BUCKET_TYPE_KERNEL_RX = 3, RSS_BUCKET_TYPE_MAX = 3, } rss_bucket_type_t; .Ed .Pp The rebalance callback .Vt rss_bucket_rebalance_cb_t is defined as: .Bd -literal typedef void rss_bucket_rebalance_cb_t(void *arg); .Ed .Pp The .Fn rss_set_bucket_rebalance_cb function sets an optional callback that will be called if the kernel rebalances RSS buckets. This is intended as a future expansion to rebalance buckets rather than reprogram the RSS key, so typically the only work to be performed is to rebind worker threads to an updated cpuset. .Pp Once RSS setup is completed, .Fn rss_config_free is called to free the RSS configuration structure. .Pp If .Vt val is set to 1, the socket can be placed in an RSS bucket and will only accept datagrams (for UDP) or connections (for TCP) that are received for that RSS bucket. If set to 0, the socket is placed in the default PCB and will see datagrams/connections that are not initially consumed by a PCB aware socket. .Pp The -.Fn rss_sock_set_rss_bucket -function configures the RSS bucket which a socket belongs in. -Note that TCP sockets created by -.Xr accept 2 -will automatically be assigned to the RSS bucket. -.Pp -The .Fn rss_sock_set_recvrss function enables or disables receiving RSS related information as socket options in. .2 recvmsg calls. .Pp When enabled, UDP datagrams will have a message with the .Vt IP_RECVFLOWID option indicating the 32-bit receive flowid as a uint32_t, and the .Vt IP_RECVRSSBUCKETID option indicating the 32 bit RSS bucket id as a uint32_t. .Sh ERRORS The functions return either <0 or NULL as appropriate upon error. .Sh HISTORY The .Xr librss.3 library first appeared in .Fx 11.0 . .Sh AUTHORS .An Adrian Chadd Aq Mt adrian@FreeBSD.org .Sh BUGS There is currently no kernel mechanism to rebalance the RSS bucket to CPU mapping, and so the callback mechanism is a no-op. diff --git a/lib/librss/librss.c b/lib/librss/librss.c index 4125e94305e2..fcaac4cc7366 100644 --- a/lib/librss/librss.c +++ b/lib/librss/librss.c @@ -1,289 +1,257 @@ /* * Copyright (c) 2016 Adrian Chadd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "librss.h" -int -rss_sock_set_rss_bucket(int fd, int af, int rss_bucket) -{ - int opt; - socklen_t optlen; - int retval; - int f, p; - - switch (af) { - case AF_INET: - p = IPPROTO_IP; - f = IP_RSS_LISTEN_BUCKET; - break; - case AF_INET6: - p = IPPROTO_IPV6; - f = IPV6_RSS_LISTEN_BUCKET; - break; - default: - return (-1); - } - - /* Set RSS bucket */ - opt = rss_bucket; - optlen = sizeof(opt); - retval = setsockopt(fd, p, f, &opt, optlen); - if (retval < 0) { - warn("%s: setsockopt(IP_RSS_LISTEN_BUCKET)", __func__); - return (-1); - } - return (0); -} - int rss_sock_set_recvrss(int fd, int af, int val) { int opt, retval; socklen_t optlen; int f1, f2, p; switch (af) { case AF_INET: p = IPPROTO_IP; f1 = IP_RECVFLOWID; f2 = IP_RECVRSSBUCKETID; break; case AF_INET6: p = IPPROTO_IPV6; f1 = IPV6_RECVFLOWID; f2 = IPV6_RECVRSSBUCKETID; break; default: return (-1); } /* Enable/disable flowid */ opt = val; optlen = sizeof(opt); retval = setsockopt(fd, p, f1, &opt, optlen); if (retval < 0) { warn("%s: setsockopt(IP_RECVFLOWID)", __func__); return (-1); } /* Enable/disable RSS bucket reception */ opt = val; optlen = sizeof(opt); retval = setsockopt(fd, p, f2, &opt, optlen); if (retval < 0) { warn("%s: setsockopt(IP_RECVRSSBUCKETID)", __func__); return (-1); } return (0); } static int rss_getsysctlint(const char *s) { int val, retval; size_t rlen; rlen = sizeof(int); retval = sysctlbyname(s, &val, &rlen, NULL, 0); if (retval < 0) { warn("sysctlbyname (%s)", s); return (-1); } return (val); } static int rss_getbucketmap(int *bucket_map, int nbuckets) { /* XXX I'm lazy; so static string it is */ char bstr[2048]; int retval; size_t rlen; char *s, *ss; int r, b, c; /* Paranoia */ memset(bstr, '\0', sizeof(bstr)); rlen = sizeof(bstr) - 1; retval = sysctlbyname("net.inet.rss.bucket_mapping", bstr, &rlen, NULL, 0); if (retval < 0) { warn("sysctlbyname (net.inet.rss.bucket_mapping)"); return (-1); } ss = bstr; while ((s = strsep(&ss, " ")) != NULL) { r = sscanf(s, "%d:%d", &b, &c); if (r != 2) { fprintf(stderr, "%s: string (%s) not parsable\n", __func__, s); return (-1); } if (b > nbuckets) { fprintf(stderr, "%s: bucket %d > nbuckets %d\n", __func__, b, nbuckets); return (-1); } /* XXX no maxcpu check */ bucket_map[b] = c; } return (0); } struct rss_config * rss_config_get(void) { struct rss_config *rc = NULL; rc = calloc(1, sizeof(*rc)); if (rc == NULL) { warn("%s: calloc", __func__); goto error; } rc->rss_ncpus = rss_getsysctlint("net.inet.rss.ncpus"); if (rc->rss_ncpus < 0) { fprintf(stderr, "%s: couldn't fetch net.inet.rss.ncpus\n", __func__); goto error; } rc->rss_nbuckets = rss_getsysctlint("net.inet.rss.buckets"); if (rc->rss_nbuckets < 0) { fprintf(stderr, "%s: couldn't fetch net.inet.rss.nbuckets\n", __func__); goto error; } rc->rss_basecpu = rss_getsysctlint("net.inet.rss.basecpu"); if (rc->rss_basecpu< 0) { fprintf(stderr, "%s: couldn't fetch net.inet.rss.basecpu\n", __func__); goto error; } rc->rss_bucket_map = calloc(rc->rss_nbuckets, sizeof(int)); if (rc->rss_bucket_map == NULL) { warn("%s: calloc (rss buckets; %d entries)", __func__, rc->rss_nbuckets); goto error; } if (rss_getbucketmap(rc->rss_bucket_map, rc->rss_nbuckets) != 0) { fprintf(stderr, "%s: rss_getbucketmap failed\n", __func__); goto error; } return (rc); error: if (rc != NULL) { free(rc->rss_bucket_map); free(rc); } return (NULL); } void rss_config_free(struct rss_config *rc) { if ((rc != NULL) && rc->rss_bucket_map) free(rc->rss_bucket_map); if (rc != NULL) free(rc); } int rss_config_get_bucket_count(struct rss_config *rc) { if (rc == NULL) return (-1); return (rc->rss_nbuckets); } int rss_get_bucket_cpuset(struct rss_config *rc, rss_bucket_type_t btype, int bucket, cpuset_t *cs) { if (bucket < 0 || bucket >= rc->rss_nbuckets) { errno = EINVAL; return (-1); } /* * For now all buckets are the same, but eventually we'll want * to allow administrators to set separate RSS cpusets for * {kernel,user} {tx, rx} combinations. */ if (btype <= RSS_BUCKET_TYPE_NONE || btype > RSS_BUCKET_TYPE_MAX) { errno = ENOTSUP; return (-1); } CPU_ZERO(cs); CPU_SET(rc->rss_bucket_map[bucket], cs); return (0); } int rss_set_bucket_rebalance_cb(rss_bucket_rebalance_cb_t *cb, void *cbdata) { (void) cb; (void) cbdata; /* * For now there's no rebalance callback, so * just return 0 and ignore it. */ return (0); } diff --git a/lib/librss/librss.h b/lib/librss/librss.h index 134ae28811a2..c90bff86ae50 100644 --- a/lib/librss/librss.h +++ b/lib/librss/librss.h @@ -1,93 +1,86 @@ /* * Copyright (c) 2016 Adrian Chadd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __LIBRSS_H__ #define __LIBRSS_H__ struct rss_config { int rss_ncpus; int rss_nbuckets; int rss_basecpu; int *rss_bucket_map; }; typedef enum { RSS_BUCKET_TYPE_NONE = 0, RSS_BUCKET_TYPE_KERNEL_ALL = 1, RSS_BUCKET_TYPE_KERNEL_TX = 2, RSS_BUCKET_TYPE_KERNEL_RX = 3, RSS_BUCKET_TYPE_MAX = 3, } rss_bucket_type_t; typedef void rss_bucket_rebalance_cb_t(void *arg); -/* - * Set the RSS bucket for the given file descriptor. - * - * This must be done before bind(). - */ -extern int rss_sock_set_rss_bucket(int fd, int af, int rss_bucket); - /* * Enable or disable receiving RSS/flowid information on * received UDP frames. */ extern int rss_sock_set_recvrss(int fd, int af, int val); /* * Fetch RSS configuration information. */ extern struct rss_config * rss_config_get(void); /* * Free an RSS configuration structure. */ extern void rss_config_free(struct rss_config *rc); /* * Return how many RSS buckets there are. */ extern int rss_config_get_bucket_count(struct rss_config *rc); /* * Fetch the cpuset configuration for the given RSS bucket and * type. */ extern int rss_get_bucket_cpuset(struct rss_config *rc, rss_bucket_type_t btype, int bucket, cpuset_t *cs); /* * Set a callback for bucket rebalancing. * * This will occur in a separate thread context rather than * a signal handler. */ extern int rss_set_bucket_rebalance_cb(rss_bucket_rebalance_cb_t *cb, void *cbdata); #endif /* __LIBRSS_H__ */