diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -87,9 +87,17 @@ int lastallocfail; } __aligned(CACHE_LINE_SIZE); +struct ktls_alloc_thread { + uint64_t wakeups; + uint64_t allocs; + struct thread *td; + bool running; +}; + struct ktls_domain_info { int count; int cpu[MAXCPU]; + struct ktls_alloc_thread alloc_td; }; struct ktls_domain_info ktls_domains[MAXMEMDOM]; @@ -142,6 +150,11 @@ &ktls_sw_buffer_cache, 1, "Enable caching of output buffers for SW encryption"); +static int ktls_max_alloc = 128; +SYSCTL_INT(_kern_ipc_tls, OID_AUTO, max_alloc, CTLFLAG_RWTUN, + &ktls_max_alloc, 128, + "Max number of 16k buffers to allocate in thread context"); + static COUNTER_U64_DEFINE_EARLY(ktls_tasks_active); SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD, &ktls_tasks_active, "Number of active tasks"); @@ -278,6 +291,7 @@ static void ktls_reset_send_tag(void *context, int pending); #endif static void ktls_work_thread(void *ctx); +static void ktls_alloc_thread(void *ctx); #if defined(INET) || defined(INET6) static u_int @@ -397,10 +411,10 @@ * Bind threads to cores. If ktls_bind_threads is > * 1, then we bind to the NUMA domain. */ + pc = pcpu_find(i); + domain = pc->pc_domain; if (ktls_bind_threads) { if (ktls_bind_threads > 1) { - pc = pcpu_find(i); - domain = pc->pc_domain; CPU_COPY(&cpuset_domain[domain], &mask); count = ktls_domains[domain].count; ktls_domains[domain].cpu[count] = i; @@ -414,6 +428,23 @@ "Unable to bind KTLS thread for CPU %d error %d", i, error); } + if (ktls_sw_buffer_cache && + ktls_domains[domain].alloc_td.td == NULL) { + error = kproc_kthread_add(ktls_alloc_thread, + &ktls_domains[domain], &ktls_proc, + &ktls_domains[domain].alloc_td.td, + 0, 0, "KTLS_alloc", "thr_%d", domain); + if (error) + panic("Can't add KTLS alloc thread %d error %d", + domain, error); + CPU_COPY(&cpuset_domain[domain], &mask); + error = cpuset_setthread(ktls_domains[domain].alloc_td.td->td_tid, + &mask); + if (error) + panic("Unable to bind KTLS alloc %d error %d", + domain, error); + + } ktls_cpuid_lookup[ktls_number_threads] = i; ktls_number_threads++; } @@ -1946,6 +1977,7 @@ ktls_buffer_alloc(struct ktls_wq *wq, struct mbuf *m) { void *buf; + int domain; if (m->m_epg_npgs <= 2) return (NULL); @@ -1961,8 +1993,20 @@ return (NULL); } buf = uma_zalloc(ktls_buffer_zone, M_NOWAIT | M_NORECLAIM); - if (buf == NULL) + if (buf == NULL) { + domain = PCPU_GET(domain); wq->lastallocfail = ticks; + + /* + * Note that this check is "racy", but the races are + * harmless, and are either a spurious wakeup if + * multiple threads fail allocations before the alloc + * thread wakes, or waiting an extra second in case we + * see an old value of running == true. + */ + if (ktls_domains[domain].alloc_td.running == false) + wakeup(&ktls_domains[domain].alloc_td); + } return (buf); } @@ -2154,6 +2198,56 @@ CURVNET_RESTORE(); } +static void +ktls_alloc_thread(void *ctx) +{ + struct ktls_domain_info *ktls_domain = ctx; + struct ktls_alloc_thread *sc = &ktls_domain->alloc_td; + void **buf; + struct sysctl_oid *oid; + char name[80]; + int i, nbufs; + + curthread->td_domain.dr_policy = + DOMAINSET_PREF(PCPU_GET(domain)); + snprintf(name, sizeof(name), "domain%d", PCPU_GET(domain)); + + + printf("Starting KTLS alloc thread for domain %d\n", + PCPU_GET(domain)); + oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_ipc_tls), OID_AUTO, + name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); + SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "allocs", + CTLFLAG_RD, &sc->allocs, 0, "buffers allocated"); + SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "wakeups", + CTLFLAG_RD, &sc->wakeups, 0, "thread wakeups"); + SYSCTL_ADD_BOOL(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "running", + CTLFLAG_RD, &sc->running, 0, "thread running"); + + buf = NULL; + nbufs = 0; + for (;;) { + sc->running = false; + tsleep(sc, PZERO, "waiting for work", 0); + sc->wakeups++; + sc->running = true; + if (nbufs != ktls_max_alloc) { + free(buf, M_KTLS); + nbufs = ktls_max_alloc; + buf = malloc(sizeof(void *) * nbufs, M_KTLS, + M_WAITOK | M_ZERO); + } + for (i = 0; i < nbufs; i++) { + buf[i] = uma_zalloc(ktls_buffer_zone, M_WAITOK); + sc->allocs++; + } + for (i = 0; i < nbufs; i++) { + uma_zfree(ktls_buffer_zone, buf[i]); + buf[i] = NULL; + } + } +} + static void ktls_work_thread(void *ctx) {