Index: sys/conf/files.amd64 =================================================================== --- sys/conf/files.amd64 +++ sys/conf/files.amd64 @@ -124,6 +124,7 @@ dev/axgbe/xgbe_osdep.c optional axp dev/axgbe/xgbe-i2c.c optional axp dev/axgbe/xgbe-phy-v2.c optional axp +dev/coredirector/coredirector.c optional coredirector dev/enic/enic_res.c optional enic dev/enic/enic_txrx.c optional enic dev/enic/if_enic.c optional enic Index: sys/dev/coredirector/coredirector.c =================================================================== --- /dev/null +++ sys/dev/coredirector/coredirector.c @@ -0,0 +1,641 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Koine Yuusuke + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "opt_global.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#define COREDIRECTOR_NAME "coredirector" + +#ifdef DEBUG +#define DPRINTF(fmt, args...) printf("coredirector:" fmt, ##args) +#else +#define DPRINTF(fmt, args...) +#endif + +#define BITSCOUNT(n) (!!((n)&((uint8_t)1<<0)) \ + + !!((n)&((uint8_t)1<<1)) \ + + !!((n)&((uint8_t)1<<2)) \ + + !!((n)&((uint8_t)1<<3)) \ + + !!((n)&((uint8_t)1<<4)) \ + + !!((n)&((uint8_t)1<<5)) \ + + !!((n)&((uint8_t)1<<6)) \ + + !!((n)&((uint8_t)1<<7))) +#define ROUNDUP8(n) ((((n)+7)/8)*8) + +#define CPUID_LEAF6_ECX_CLASSES(c) (((c)&CPUTPM_C_TD_CLASSES)>>8) +#define CPUID_LEAF6_EDX_TBLPAGES(c) (((c)&CPUTPM_D_TD_TBLPAGES)>>8) + +/* + * Structure defines + */ +struct coredirector_instance { + int ref; + + struct { + uint32_t hfi :1; + uint32_t thdirector :1; + uint32_t pmaped :1; + uint32_t reserved :30; + } flags; + + uint32_t hdrsize; + uint32_t entrysize; + uint32_t allocsize; + uint32_t tblsize; + + int32_t cpugroups; + + uint64_t capabilities; + uint64_t classes; + + union { + void *ptr; + uint64_t *timestamp; + } hwtable; + struct mtx hwtbl_lock; + + union { + void *ptr; + uint64_t *timestamp; + } cache; + + struct mtx intr_lock; +}; + +struct coredirector_softc { + device_t sc_dev; + + int cpuid; + + struct coredirector_instance *inst; +}; + +/* + * Prototype defines + */ +static void coredirector_msr_setflag(uint64_t addr, int cpu, uint64_t flag); +static void coredirector_get_hwtable(struct coredirector_instance *inst); +static void coredirector_interrupt(int cpu, void *value); +static void coredirector_identify(driver_t *driver, device_t parent); +static int coredirector_probe(device_t dev); +static void coredirector_set_instance(struct coredirector_softc *sc); +static int coredirector_attach_notbsp(device_t dev); +static int coredirector_attach(device_t dev); +static int coredirector_detach(device_t dev); +static int coredirector_dumptable_sysctl(SYSCTL_HANDLER_ARGS); + +#if defined(SMP) && defined(SCHED_ULE) +extern struct cpu_group *cpu_top; /* CPU topology */ +#endif + +/* + * Device methods. + */ +static device_method_t coredirector_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, coredirector_identify), + DEVMETHOD(device_probe, coredirector_probe), + DEVMETHOD(device_attach, coredirector_attach), + DEVMETHOD(device_detach, coredirector_detach), + + DEVMETHOD_END +}; + +static driver_t coredirector_driver = { + COREDIRECTOR_NAME, + coredirector_methods, + sizeof(struct coredirector_softc), +}; + +DRIVER_MODULE(coredirector, cpu, coredirector_driver, NULL, NULL); + +static MALLOC_DEFINE(M_COREHFI, COREDIRECTOR_NAME, "Buffers for coredirector driver"); + +static void +coredirector_msr_setflag(uint64_t addr, int cpu, uint64_t flag) +{ + uint64_t msr; + + x86_msr_op(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, MSR_OP_RENDEZVOUS_ONE | MSR_OP_READ | + MSR_OP_CPUID(cpu), 0, &msr); + x86_msr_op(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, MSR_OP_RENDEZVOUS_ONE | MSR_OP_WRITE | + MSR_OP_CPUID(cpu), msr | flag, NULL); +} + +static void +coredirector_get_hwtable(struct coredirector_instance *inst) +{ + uint64_t msr_status; +#if defined(CPUGRP_SCORE) && defined(SMP) && defined(SCHED_ULE) + int grp; + uint8_t *entry; + struct cpu_group *cg; +#endif + + if (! mtx_trylock_spin(&inst->intr_lock)) + return; + + /* Check HFI/ITD update status */ + msr_status = rdmsr(MSR_IA32_PKG_THERM_STATUS); + if (msr_status & IA32_PKG_THERM_STATUS_HFI_UPDATED) { + DPRINTF("[DEBUG] HWtable UPDATED : status= %lx\n", msr_status ); + + /* Check HFI/ITD update's timestamp */ + if (*(inst->cache.timestamp) != *(inst->hwtable.timestamp)) { + /* Copy HFI/ITD HW table to local cache */ + mtx_lock_spin(&inst->hwtbl_lock); + memcpy(inst->cache.ptr, inst->hwtable.ptr, inst->tblsize); + mtx_unlock_spin(&inst->hwtbl_lock); + } + + /* Clear HFI/ITD update's flag on MSR */ + msr_status &= ~IA32_PKG_THERM_STATUS_HFI_UPDATED; + wrmsr(MSR_IA32_PKG_THERM_STATUS, msr_status); + + /* Set Perf./Eff. valut to cpu_group structure */ +#if defined(CPUGRP_SCORE) && defined(SMP) && defined(SCHED_ULE) + entry = (uint8_t *)inst->cache.ptr + inst->hdrsize; + for(grp=0, cg=cpu_top->cg_child; grpcpugroups; grp++, cg++, entry+=inst->entrysize) + memcpy(cg->cg_score, entry, inst->entrysize); +#endif + } + + mtx_unlock_spin(&inst->intr_lock); +} + +static void +coredirector_interrupt(int cpu, void *value) +{ + coredirector_get_hwtable((struct coredirector_instance *)value); +} + +static void +coredirector_identify(driver_t *driver, device_t parent) +{ + device_t child; + u_int regs[4]; + + /* Check exist this driver */ + if (device_find_child(parent, COREDIRECTOR_NAME, -1) != NULL) + return; + + /* Check kernel build option with SMP & SCHED_ULE */ +#if !defined(SMP) || !defined(SCHED_ULE) + return; +#endif + + /* Check Intel CPU */ + if (cpu_high < 6 || cpu_vendor_id != CPU_VENDOR_INTEL) + return; + + /* Check Intel Hardware Feedback & Thread Director feature */ + do_cpuid(0x06, regs); + if( !(regs[0] & CPUTPM1_HW_FEEDBACK) && !(regs[0] & CPUTPM1_THREAD_DIRECTOR)) + return; + + /* Regist driver */ + child = device_add_child(parent, COREDIRECTOR_NAME, device_get_unit(parent)); + if (child == NULL) + device_printf(parent, "Failure add coredirector driver\n"); + + return; +} + +static int +coredirector_probe(device_t dev) +{ + u_int regs[4]; + + if (resource_disabled(COREDIRECTOR_NAME, 0)) + return (ENXIO); + + /* Check Intel Hardware Feedback & Thread Director feature */ + do_cpuid(0x06, regs); + if( !(regs[0] & CPUTPM1_HW_FEEDBACK) && !(regs[0] & CPUTPM1_THREAD_DIRECTOR)) + return (ENXIO); + + /* Disable output driver features without Bootstrap CPU core */ + if (!bootverbose && device_get_unit(dev) != 0) + device_quiet(dev); + + if (regs[0] & CPUTPM1_HW_FEEDBACK) { + if (regs[0] & CPUTPM1_THREAD_DIRECTOR) + device_set_desc(dev, "Intel(R) Thread Director"); + else + device_set_desc(dev, "Intel(R) Hardware-Feedback Interface"); + } + + return (BUS_PROBE_GENERIC); +} + +static void +coredirector_set_instance(struct coredirector_softc *sc) +{ + struct coredirector_softc *scbsp; + struct coredirector_instance *inst; + device_t *devchild; + int childs, cnt; + + if (sc->inst != NULL) + return; + + scbsp = NULL; + devclass_get_devices(devclass_find(COREDIRECTOR_NAME), &devchild, &childs); + for(cnt=0; cntinst; + if (NULL != inst) { + /* Increment instance ref.counter */ + mtx_lock_spin(&inst->intr_lock); + inst->ref++; + mtx_unlock_spin(&inst->intr_lock); + + sc->inst = inst; + } + } +} + +static int +coredirector_attach_notbsp(device_t dev) +{ + struct coredirector_softc *sc = device_get_softc(dev); + + coredirector_set_instance(sc); + + return (0); +} + +static int +coredirector_attach(device_t dev) +{ + int ret = 0; + struct coredirector_softc *sc = device_get_softc(dev); + struct coredirector_instance *inst; + u_int regs[4]; + uint64_t msrval; + uint64_t capabilities; + uint64_t classes; + void *mapptr; + + sc->sc_dev = dev; + sc->cpuid = device_get_unit(dev); + sc->inst = NULL; + + /* Get CPUID Leaf 6 */ + do_cpuid(0x06, regs); + + /* Check performance reporting features */ + if (!(regs[3] & CPUTPM_D_HF_PERFORMANCE)) { + device_printf(dev, "Not support performace reporting. - disable HFI/ITD.\n"); + return (ENXIO); + } + /* Check efficiency reporting features */ + if (!(regs[3] & CPUTPM_D_HF_EFFICIENCY)) { + device_printf(dev, "Not support performace reporting. - disable HFI/ITD.\n"); + return (ENXIO); + } + + /* Check & Get ITD capabilites (CP) */ + capabilities = 2; + if (regs[0] & CPUTPM1_THREAD_DIRECTOR) { + capabilities = BITSCOUNT(regs[3] & CPUTPM_D_TD_CAPABLITIES); + + if (capabilities != 2) { + device_printf(dev, "Not support other than 2 capablities. - disable HFI/ITD.\n"); + return (ENXIO); + } + } + + /* Check & Get ITD classes (CL) */ + classes = 1; + if (regs[0] & CPUTPM1_THREAD_DIRECTOR) { + classes = CPUID_LEAF6_ECX_CLASSES(regs[2]); + +#if defined(CPUGRP_SCORE) + if (classes > CG_SCORE_CLASS_MAX) { + device_printf(dev, "Not support more than %d capablities. - disable HFI/ITD.\n", + CG_SCORE_CLASS_MAX); + return (ENXIO); + } +#endif + + if (classes != 4) { + device_printf(dev, "Not support other than 4 classes. - fallback to HFI.\n"); + classes = 1; + } + } + + /* Enable Intel ThreadDirector features for each Core */ + if (regs[0] & CPUTPM1_THREAD_DIRECTOR) + coredirector_msr_setflag(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, device_get_unit(dev), 1ULL); + + /* Attach for not Bootstrap Processor */ + if (device_get_unit(dev) != 0) { + coredirector_attach_notbsp(dev); + return (0); + } + + /* Alloc driver instance sturcture */ + inst = (struct coredirector_instance *)malloc(sizeof(struct coredirector_instance), + M_COREHFI, M_NOWAIT | M_ZERO); + if (inst == NULL) { + device_printf(dev, "Not enough memory for instance structure. - disable HFI/ITD.\n"); + return (ENOMEM); + } + + /* Set Intel Hardware Feedback & Thread Director feature flags */ + if (regs[0] & CPUTPM1_THREAD_DIRECTOR) + inst->flags.thdirector = 1; + if (regs[0] & CPUTPM1_HW_FEEDBACK) + inst->flags.hfi = 1; + + /* Set Intel Hardware Feedback & Thread Director settings */ + inst->capabilities = capabilities; + inst->classes = classes; + inst->ref = 1; + + /* Get CPU groups */ +#if defined(SMP) && defined(SCHED_ULE) + inst->cpugroups = cpu_top->cg_children; +#else + inst->cpugroups = 0; +#endif + + /* Calc. Header & Entry size */ + inst->hdrsize = ROUNDUP8((capabilities * classes)+8); + inst->entrysize = ROUNDUP8(capabilities * classes); + inst->tblsize = ROUNDUP8(inst->hdrsize + (inst->entrysize * inst->cpugroups)); + + /* Alloc Cache page */ + inst->allocsize = (CPUID_LEAF6_EDX_TBLPAGES(regs[3]) + 1) * PAGE_SIZE; + inst->cache.ptr = malloc(inst->allocsize, M_COREHFI, M_NOWAIT | M_ZERO); + if (inst->cache.ptr == NULL) { + device_printf(dev, "Not enough memory for local table cache. - disable HFI/ITD.\n"); + ret = ENOMEM; + goto attach_err1; + } + + /* Init. spin-lock structure */ + mtx_init(&inst->hwtbl_lock, "coredirector HWtable lock", NULL, MTX_SPIN); + mtx_init(&inst->intr_lock, "coredirector Interrupt lock", NULL, MTX_SPIN); + + /* Set Hardware feedback interface table */ + msrval = rdmsr(MSR_IA32_HW_FEEDBACK_PTR); + if (msrval == 0) { + /* Alloc HFI page */ + inst->hwtable.ptr = kmem_alloc_contig(inst->allocsize, M_NOWAIT | M_ZERO, 0, BUS_SPACE_MAXADDR, + PAGE_SIZE, 0, VM_MEMATTR_DEFAULT ); + if (inst->hwtable.ptr == NULL) { + device_printf(dev, "Not enough memory for Hardware table. - disable HFI/ITD.\n"); + ret = ENOMEM; + goto attach_err2; + } + + inst->flags.pmaped = 0; + + /* Set HFI memory page to MSR_IA32_HW_FEEDBACK_PTR MSR */ + msrval = (uint64_t)vtophys(inst->hwtable.ptr) | IA32_HW_FEEDBACK_PTR_ENABLE; + wrmsr(MSR_IA32_HW_FEEDBACK_PTR, msrval); + } else { + /* Mapping hardware feedback table physical page to kernel memory. */ + mapptr = pmap_mapdev((msrval & ~IA32_HW_FEEDBACK_PTR_ENABLE), inst->allocsize); + if (NULL == mapptr) { + device_printf(dev, "Not enough memory for Hardware mapping table. - disable HFI/ITD.\n"); + ret = ENOMEM; + goto attach_err2; + } + + inst->flags.pmaped = 1; + inst->hwtable.ptr = mapptr; + } + + /* Enable HFI & Thread Director to MSR_IA32_HW_FEEDBACK_CONFIG MSR */ + msrval = rdmsr(MSR_IA32_HW_FEEDBACK_CONFIG); + if (inst->flags.hfi) + msrval |= IA32_HW_FEEDBACK_CONFIG_ENABLE_HWFEEDBACK; + if (inst->flags.thdirector) + msrval |= IA32_HW_FEEDBACK_CONFIG_ENABLE_THDIRECTOR; + wrmsr(MSR_IA32_HW_FEEDBACK_CONFIG, msrval); + + /* Init. copy Hardware feedback table to cache table when reload this module. */ + if (inst->flags.pmaped) + memcpy(inst->cache.ptr, inst->hwtable.ptr, inst->allocsize); + + /* Enable Local APIC thermal interrupt handle */ + lapic_enable_thermal(coredirector_interrupt, (void *)inst); + + /* Set instance for softc structure */ + sc->inst = inst; + + /* Enable HFI/ITD interrupt */ + msrval = rdmsr(MSR_IA32_PKG_THERM_INTERRUPT) | IA32_PKG_THERM_INTERRUPT_HFI_ENABLE; + wrmsr(MSR_IA32_PKG_THERM_INTERRUPT, msrval); + + /* Set sysctl interface */ + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "hwtable", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + dev, sizeof(dev), coredirector_dumptable_sysctl, "A", "Printout HFI/ITD HW table."); + + return (0); + +attach_err2: + mtx_destroy(&inst->hwtbl_lock); + mtx_destroy(&inst->intr_lock); + +attach_err1: + if (inst->cache.ptr != NULL) { + free(inst->cache.ptr, M_COREHFI); + inst->cache.ptr = NULL; + } + + if (sc->inst != NULL) { + free(sc->inst, M_COREHFI); + sc->inst = NULL; + } + + return (ret); +} + +static int +coredirector_detach(device_t dev) +{ + int ref; + uint64_t msr_intr; + struct coredirector_softc *sc = device_get_softc(dev); + struct coredirector_instance *inst; + + inst = sc->inst; + if (inst == NULL) + return (0); + + /* Decrement instance ref.counter */ + mtx_lock_spin(&inst->intr_lock); + ref = --(inst->ref); + mtx_unlock_spin(&inst->intr_lock); + + /* Return if the instance is still referenced. */ + if (ref > 0) + return (0); + + /* Disable HFI/ITD interrupt */ + msr_intr = rdmsr(MSR_IA32_PKG_THERM_INTERRUPT); + msr_intr &= ~IA32_PKG_THERM_INTERRUPT_HFI_ENABLE; + wrmsr(MSR_IA32_PKG_THERM_INTERRUPT, msr_intr); + + /* Disable Local APIC thermal interrupt handle */ + /* TODO: + * Currently, Local APIC thermal interrupt handler is only used + * by this driver, so Local APIC thermal interrupt is disabled, but + * if other drivers are used in the future, instead of disabling Local + * APIC thermal interrupt itself, it will be necessary to disable the + * interrupt handler of this driver. Must make sure to unregister. + */ + lapic_disable_thermal(); + + /* Unmap Hardware feedback table physical page area */ + if (inst->flags.pmaped) + pmap_unmapdev(inst->hwtable.ptr, inst->allocsize); + + /* + * The physical address set for MSR_IA32_HW_FEEDBACK_PTR MSR and the enable + * flag set for MSR_IA32_HW_FEEDBACK_CONFIG MSR should also be disabled, + * but the current CPU implementation is that the physical address once + * set for MSR_IA32_HW_FEEDBACK_PTR MSR remains inside the CPU even after + * being disabled. + * For this reason, We have not intentionally disabled them at this time. + */ + + /* Destroy spin-lock structure */ + mtx_destroy(&inst->hwtbl_lock); + mtx_destroy(&inst->intr_lock); + + /* Free cache area */ + if (inst->cache.ptr != NULL) { + free(inst->cache.ptr, M_COREHFI); + inst->cache.ptr = NULL; + } + + /* Free Instance */ + free(sc->inst, M_COREHFI); + sc->inst = NULL; + + return (0); +} + +static int +coredirector_dumptable_sysctl(SYSCTL_HANDLER_ARGS) +{ + device_t dev; + struct coredirector_softc *sc; + struct coredirector_instance *inst; + struct sbuf *buf; + uint8_t *entry; + int grp, cl, err; +#if defined(SMP) && defined(SCHED_ULE) + struct cpu_group *cg; +#endif + + dev = (device_t)arg1; + sc = device_get_softc(dev); + + inst = sc->inst; + KASSERT(inst != NULL, ("Not initialized coredirector instance.")); + + buf = sbuf_new_for_sysctl(NULL, NULL, 512, req); + if (buf == NULL) + return (ENOMEM); + + sbuf_printf(buf, "\n[Dump HFI/ITD table] TimeStamp=%lx\n", *(inst->cache.timestamp)); + + sbuf_cat(buf, " "); + for(cl=0; clclasses; cl++) + sbuf_printf(buf, " Class %d ", cl); + + sbuf_cat(buf, "\n---------"); + for(cl=0; clclasses; cl++) + sbuf_cat(buf, " Perf: Eff"); + +#if defined(SMP) && defined(SCHED_ULE) + cg = cpu_top->cg_child; + KASSERT(cg != NULL, ("CPU topology is single.")); +#endif + + for(grp=0; grpcpugroups; grp++) { + entry = (uint8_t *)inst->cache.ptr + inst->hdrsize + (grp * inst->entrysize); + + sbuf_printf(buf, "\n Grp %2d:", grp); + + for(cl=0; clclasses; cl++, entry+=2) + sbuf_printf(buf, " %3d : %3d", *(entry+0), *(entry+1)); + +#if defined(SMP) && defined(SCHED_ULE) + if (cg->cg_first != cg->cg_last) + sbuf_printf(buf, " (Core #%d - #%d)", cg->cg_first, cg->cg_last); + else + sbuf_printf(buf, " (Core #%d)", cg->cg_first); + + cg++; +#endif + } + + err = sbuf_finish(buf); + + sbuf_delete(buf); + + return (err); +} + Index: sys/modules/Makefile =================================================================== --- sys/modules/Makefile +++ sys/modules/Makefile @@ -84,6 +84,7 @@ ${_cfi} \ ${_chromebook_platform} \ ${_ciss} \ + ${_coredirector} \ ${_coretemp} \ ${_cpsw} \ ${_cpuctl} \ @@ -766,6 +767,7 @@ _chvgpio= chvgpio _ciss= ciss _chromebook_platform= chromebook_platform +_coredirector= coredirector _coretemp= coretemp .if ${MK_SOURCELESS_HOST} != "no" && empty(KCSAN_ENABLED) _hpt27xx= hpt27xx Index: sys/modules/coredirector/Makefile =================================================================== --- /dev/null +++ sys/modules/coredirector/Makefile @@ -0,0 +1,7 @@ + +.PATH: ${SRCTOP}/sys/dev/coredirector + +KMOD= coredirector +SRCS= coredirector.c bus_if.h device_if.h + +.include