diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -109,6 +109,9 @@ static uma_zone_t ktls_session_zone; static uma_zone_t ktls_buffer_zone; static uint16_t ktls_cpuid_lookup[MAXCPU]; +static int ktls_init_state; +static struct sx ktls_init_lock; +SX_SYSINIT(ktls_init_lock, &ktls_init_lock, "ktls init"); SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Kernel TLS offload"); @@ -379,12 +382,11 @@ uma_zfree(ktls_buffer_zone, (void *)PHYS_TO_DMAP(m->m_epg_pa[0])); } -static void -ktls_init(void *dummy __unused) +static int +ktls_init(void) { struct thread *td; struct pcpu *pc; - cpuset_t mask; int count, domain, error, i; ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS, @@ -410,36 +412,40 @@ STAILQ_INIT(&ktls_wq[i].m_head); STAILQ_INIT(&ktls_wq[i].so_head); mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF); - error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i], - &ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i); - if (error) - panic("Can't add KTLS thread %d error %d", i, error); - - /* - * Bind threads to cores. If ktls_bind_threads is > - * 1, then we bind to the NUMA domain. - */ - if (ktls_bind_threads) { - if (ktls_bind_threads > 1) { - pc = pcpu_find(i); - domain = pc->pc_domain; - CPU_COPY(&cpuset_domain[domain], &mask); - count = ktls_domains[domain].count; - ktls_domains[domain].cpu[count] = i; - ktls_domains[domain].count++; - } else { - CPU_SETOF(i, &mask); - } - error = cpuset_setthread(td->td_tid, &mask); - if (error) - panic( - "Unable to bind KTLS thread for CPU %d error %d", - i, error); + if (ktls_bind_threads > 1) { + pc = pcpu_find(i); + domain = pc->pc_domain; + count = ktls_domains[domain].count; + ktls_domains[domain].cpu[count] = i; + ktls_domains[domain].count++; } ktls_cpuid_lookup[ktls_number_threads] = i; ktls_number_threads++; } + /* + * If we somehow have an empty domain, fall back to choosing + * among all KTLS threads. + */ + if (ktls_bind_threads > 1) { + for (i = 0; i < vm_ndomains; i++) { + if (ktls_domains[i].count == 0) { + ktls_bind_threads = 1; + break; + } + } + } + + /* Start kthreads for each workqueue. */ + CPU_FOREACH(i) { + error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i], + &ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i); + if (error) { + printf("Can't add KTLS thread %d error %d\n", i, error); + return (error); + } + } + /* * Start an allocation thread per-domain to perform blocking allocations * of 16k physically contiguous TLS crypto destination buffers. @@ -454,35 +460,46 @@ &ktls_domains[domain], &ktls_proc, &ktls_domains[domain].alloc_td.td, 0, 0, "KTLS", "alloc_%d", domain); - if (error) - panic("Can't add KTLS alloc thread %d error %d", + if (error) { + printf("Can't add KTLS alloc thread %d error %d\n", domain, error); - CPU_COPY(&cpuset_domain[domain], &mask); - error = cpuset_setthread(ktls_domains[domain].alloc_td.td->td_tid, - &mask); - if (error) - panic("Unable to bind KTLS alloc %d error %d", - domain, error); - } - } - - /* - * If we somehow have an empty domain, fall back to choosing - * among all KTLS threads. - */ - if (ktls_bind_threads > 1) { - for (i = 0; i < vm_ndomains; i++) { - if (ktls_domains[i].count == 0) { - ktls_bind_threads = 1; - break; + return (error); } } } if (bootverbose) printf("KTLS: Initialized %d threads\n", ktls_number_threads); + return (0); +} + +static int +ktls_start_kthreads(void) +{ + int error, state; + +start: + state = atomic_load_acq_int(&ktls_init_state); + if (__predict_true(state > 0)) + return (0); + if (state < 0) + return (ENXIO); + + sx_xlock(&ktls_init_lock); + if (ktls_init_state != 0) { + sx_xunlock(&ktls_init_lock); + goto start; + } + + error = ktls_init(); + if (error == 0) + state = 1; + else + state = -1; + atomic_store_rel_int(&ktls_init_state, state); + sx_xunlock(&ktls_init_lock); + return (error); } -SYSINIT(ktls, SI_SUB_SMP + 1, SI_ORDER_ANY, ktls_init, NULL); #if defined(INET) || defined(INET6) static int @@ -583,6 +600,10 @@ return (EINVAL); } + error = ktls_start_kthreads(); + if (error != 0) + return (error); + tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO); counter_u64_add(ktls_offload_active, 1); @@ -2457,6 +2478,18 @@ CURVNET_RESTORE(); } +static int +ktls_bind_domain(int domain) +{ + int error; + + error = cpuset_setthread(curthread->td_tid, &cpuset_domain[domain]); + if (error != 0) + return (error); + curthread->td_domain.dr_policy = DOMAINSET_PREF(domain); + return (0); +} + static void ktls_alloc_thread(void *ctx) { @@ -2465,14 +2498,16 @@ void **buf; struct sysctl_oid *oid; char name[80]; - int i, nbufs; + int domain, error, i, nbufs; - curthread->td_domain.dr_policy = - DOMAINSET_PREF(PCPU_GET(domain)); - snprintf(name, sizeof(name), "domain%d", PCPU_GET(domain)); + domain = ktls_domain - ktls_domains; if (bootverbose) - printf("Starting KTLS alloc thread for domain %d\n", - PCPU_GET(domain)); + printf("Starting KTLS alloc thread for domain %d\n", domain); + error = ktls_bind_domain(domain); + if (error) + printf("Unable to bind KTLS alloc thread for domain %d: error %d\n", + domain, error); + snprintf(name, sizeof(name), "domain%d", domain); oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_ipc_tls), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "allocs", @@ -2527,10 +2562,32 @@ struct socket *so, *son; STAILQ_HEAD(, mbuf) local_m_head; STAILQ_HEAD(, socket) local_so_head; + int cpu; - if (ktls_bind_threads > 1) { - curthread->td_domain.dr_policy = - DOMAINSET_PREF(PCPU_GET(domain)); + cpu = wq - ktls_wq; + if (bootverbose) + printf("Starting KTLS worker thread for CPU %d\n", cpu); + + /* + * Bind to a core. If ktls_bind_threads is > 1, then + * we bind to the NUMA domain instead. + */ + if (ktls_bind_threads) { + int error; + + if (ktls_bind_threads > 1) { + struct pcpu *pc = pcpu_find(cpu); + + error = ktls_bind_domain(pc->pc_domain); + } else { + cpuset_t mask; + + CPU_SETOF(cpu, &mask); + error = cpuset_setthread(curthread->td_tid, &mask); + } + if (error) + printf("Unable to bind KTLS worker thread for CPU %d: error %d\n", + cpu, error); } #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) fpu_kern_thread(0);