Index: head/sys/arm/annapurna/alpine/alpine_machdep_mp.c
===================================================================
--- head/sys/arm/annapurna/alpine/alpine_machdep_mp.c (revision 290546)
+++ head/sys/arm/annapurna/alpine/alpine_machdep_mp.c (revision 290547)
@@ -1,335 +1,333 @@
/*-
* Copyright (c) 2013 Ruslan Bukin
* Copyright (c) 2015 Semihalf
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define AL_CPU_RESUME_WATERMARK_REG 0x00
#define AL_CPU_RESUME_FLAGS_REG 0x04
#define AL_CPU_RESUME_PCPU_RADDR_REG(cpu) (0x08 + 0x04 + 8*(cpu))
#define AL_CPU_RESUME_PCPU_FLAGS(cpu) (0x08 + 8*(cpu))
/* Per-CPU flags */
#define AL_CPU_RESUME_FLG_PERCPU_DONT_RESUME (1 << 2)
/* The expected magic number for validating the resume addresses */
#define AL_CPU_RESUME_MAGIC_NUM 0xf0e1d200
#define AL_CPU_RESUME_MAGIC_NUM_MASK 0xffffff00
/* The expected minimal version number for validating the capabilities */
#define AL_CPU_RESUME_MIN_VER 0x000000c3
#define AL_CPU_RESUME_MIN_VER_MASK 0x000000ff
/* Field controlling the boot-up of companion cores */
#define AL_NB_INIT_CONTROL (0x8)
#define AL_NB_CONFIG_STATUS_PWR_CTRL(cpu) (0x2020 + (cpu)*0x100)
#define SERDES_NUM_GROUPS 4
#define SERDES_GROUP_SIZE 0x400
extern bus_addr_t al_devmap_pa;
extern bus_addr_t al_devmap_size;
extern void mpentry(void);
int alpine_serdes_resource_get(uint32_t group, bus_space_tag_t *tag,
bus_addr_t *baddr);
static int platform_mp_get_core_cnt(void);
static int alpine_get_cpu_resume_base(u_long *pbase, u_long *psize);
static int alpine_get_nb_base(u_long *pbase, u_long *psize);
static int alpine_get_serdes_base(u_long *pbase, u_long *psize);
int alpine_serdes_resource_get(uint32_t group, bus_space_tag_t *tag,
bus_addr_t *baddr);
static boolean_t alpine_validate_cpu(u_int, phandle_t, u_int, pcell_t *);
static boolean_t
alpine_validate_cpu(u_int id, phandle_t child, u_int addr_cell, pcell_t *reg)
{
return fdt_is_compatible(child, "arm,cortex-a15");
}
static int
platform_mp_get_core_cnt(void)
{
static int ncores = 0;
int nchilds;
uint32_t reg;
/* Calculate ncores value only once */
if (ncores)
return (ncores);
reg = cp15_l2ctlr_get();
ncores = CPUV7_L2CTLR_NPROC(reg);
nchilds = ofw_cpu_early_foreach(alpine_validate_cpu, false);
/* Limit CPUs if DTS has configured less than available */
if ((nchilds > 0) && (nchilds < ncores)) {
printf("SMP: limiting number of active CPUs to %d out of %d\n",
nchilds, ncores);
ncores = nchilds;
}
return (ncores);
}
void
platform_mp_init_secondary(void)
{
arm_pic_init_secondary();
}
void
platform_mp_setmaxid(void)
{
- int core_cnt;
- core_cnt = platform_mp_get_core_cnt();
- mp_maxid = core_cnt - 1;
+ mp_ncpus = platform_mp_get_core_cnt();
+ mp_maxid = mp_ncpus - 1;
}
int
platform_mp_probe(void)
{
- mp_ncpus = platform_mp_get_core_cnt();
return (1);
}
static int
alpine_get_cpu_resume_base(u_long *pbase, u_long *psize)
{
phandle_t node;
u_long base = 0;
u_long size = 0;
if (pbase == NULL || psize == NULL)
return (EINVAL);
if ((node = OF_finddevice("/")) == -1)
return (EFAULT);
if ((node =
ofw_bus_find_compatible(node, "annapurna-labs,al-cpu-resume")) == 0)
return (EFAULT);
if (fdt_regsize(node, &base, &size))
return (EFAULT);
*pbase = base;
*psize = size;
return (0);
}
static int
alpine_get_nb_base(u_long *pbase, u_long *psize)
{
phandle_t node;
u_long base = 0;
u_long size = 0;
if (pbase == NULL || psize == NULL)
return (EINVAL);
if ((node = OF_finddevice("/")) == -1)
return (EFAULT);
if ((node =
ofw_bus_find_compatible(node, "annapurna-labs,al-nb-service")) == 0)
return (EFAULT);
if (fdt_regsize(node, &base, &size))
return (EFAULT);
*pbase = base;
*psize = size;
return (0);
}
void
platform_mp_start_ap(void)
{
uint32_t physaddr;
vm_offset_t vaddr;
uint32_t val;
uint32_t start_mask;
u_long cpu_resume_base;
u_long nb_base;
u_long cpu_resume_size;
u_long nb_size;
bus_addr_t cpu_resume_baddr;
bus_addr_t nb_baddr;
int a;
if (alpine_get_cpu_resume_base(&cpu_resume_base, &cpu_resume_size))
panic("Couldn't resolve cpu_resume_base address\n");
if (alpine_get_nb_base(&nb_base, &nb_size))
panic("Couldn't resolve_nb_base address\n");
/* Proceed with start addresses for additional CPUs */
if (bus_space_map(fdtbus_bs_tag, al_devmap_pa + cpu_resume_base,
cpu_resume_size, 0, &cpu_resume_baddr))
panic("Couldn't map CPU-resume area");
if (bus_space_map(fdtbus_bs_tag, al_devmap_pa + nb_base,
nb_size, 0, &nb_baddr))
panic("Couldn't map NB-service area");
/* Proceed with start addresses for additional CPUs */
val = bus_space_read_4(fdtbus_bs_tag, cpu_resume_baddr,
AL_CPU_RESUME_WATERMARK_REG);
if (((val & AL_CPU_RESUME_MAGIC_NUM_MASK) != AL_CPU_RESUME_MAGIC_NUM) ||
((val & AL_CPU_RESUME_MIN_VER_MASK) < AL_CPU_RESUME_MIN_VER)) {
panic("CPU-resume device is not compatible");
}
vaddr = (vm_offset_t)mpentry;
physaddr = pmap_kextract(vaddr);
for (a = 1; a < platform_mp_get_core_cnt(); a++) {
/* Power up the core */
bus_space_write_4(fdtbus_bs_tag, nb_baddr,
AL_NB_CONFIG_STATUS_PWR_CTRL(a), 0);
mb();
/* Enable resume */
val = bus_space_read_4(fdtbus_bs_tag, cpu_resume_baddr,
AL_CPU_RESUME_PCPU_FLAGS(a));
val &= ~AL_CPU_RESUME_FLG_PERCPU_DONT_RESUME;
bus_space_write_4(fdtbus_bs_tag, cpu_resume_baddr,
AL_CPU_RESUME_PCPU_FLAGS(a), val);
mb();
/* Set resume physical address */
bus_space_write_4(fdtbus_bs_tag, cpu_resume_baddr,
AL_CPU_RESUME_PCPU_RADDR_REG(a), physaddr);
mb();
}
/* Release cores from reset */
if (bus_space_map(fdtbus_bs_tag, al_devmap_pa + nb_base,
nb_size, 0, &nb_baddr))
panic("Couldn't map NB-service area");
start_mask = (1 << platform_mp_get_core_cnt()) - 1;
/* Release cores from reset */
val = bus_space_read_4(fdtbus_bs_tag, nb_baddr, AL_NB_INIT_CONTROL);
val |= start_mask;
bus_space_write_4(fdtbus_bs_tag, nb_baddr, AL_NB_INIT_CONTROL, val);
dsb();
bus_space_unmap(fdtbus_bs_tag, nb_baddr, nb_size);
bus_space_unmap(fdtbus_bs_tag, cpu_resume_baddr, cpu_resume_size);
}
static int
alpine_get_serdes_base(u_long *pbase, u_long *psize)
{
phandle_t node;
u_long base = 0;
u_long size = 0;
if (pbase == NULL || psize == NULL)
return (EINVAL);
if ((node = OF_finddevice("/")) == -1)
return (EFAULT);
if ((node =
ofw_bus_find_compatible(node, "annapurna-labs,al-serdes")) == 0)
return (EFAULT);
if (fdt_regsize(node, &base, &size))
return (EFAULT);
*pbase = base;
*psize = size;
return (0);
}
int
alpine_serdes_resource_get(uint32_t group, bus_space_tag_t *tag, bus_addr_t *baddr)
{
u_long serdes_base, serdes_size;
int ret;
static bus_addr_t baddr_mapped[SERDES_NUM_GROUPS];
if (group >= SERDES_NUM_GROUPS)
return (EINVAL);
if (baddr_mapped[group]) {
*tag = fdtbus_bs_tag;
*baddr = baddr_mapped[group];
return (0);
}
ret = alpine_get_serdes_base(&serdes_base, &serdes_size);
if (ret)
return (ret);
ret = bus_space_map(fdtbus_bs_tag,
al_devmap_pa + serdes_base + group * SERDES_GROUP_SIZE,
(SERDES_NUM_GROUPS - group) * SERDES_GROUP_SIZE, 0, baddr);
if (ret)
return (ret);
baddr_mapped[group] = *baddr;
return (0);
}
void
platform_ipi_send(cpuset_t cpus, u_int ipi)
{
pic_ipi_send(cpus, ipi);
}
Index: head/sys/arm/mv/armadaxp/armadaxp_mp.c
===================================================================
--- head/sys/arm/mv/armadaxp/armadaxp_mp.c (revision 290546)
+++ head/sys/arm/mv/armadaxp/armadaxp_mp.c (revision 290547)
@@ -1,195 +1,194 @@
/*-
* Copyright (c) 2011 Semihalf.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define MV_AXP_CPU_DIVCLK_BASE (MV_BASE + 0x18700)
#define CPU_DIVCLK_CTRL0 0x00
#define CPU_DIVCLK_CTRL2_RATIO_FULL0 0x08
#define CPU_DIVCLK_CTRL2_RATIO_FULL1 0x0c
#define CPU_DIVCLK_MASK(x) (~(0xff << (8 * (x))))
#define CPU_PMU(x) (MV_BASE + 0x22100 + (0x100 * (x)))
#define CPU_PMU_BOOT 0x24
#define MP (MV_BASE + 0x20800)
#define MP_SW_RESET(x) ((x) * 8)
#define CPU_RESUME_CONTROL (0x20988)
void armadaxp_init_coher_fabric(void);
int platform_get_ncpus(void);
/* Coherency Fabric registers */
static uint32_t
read_cpu_clkdiv(uint32_t reg)
{
return (bus_space_read_4(fdtbus_bs_tag, MV_AXP_CPU_DIVCLK_BASE, reg));
}
static void
write_cpu_clkdiv(uint32_t reg, uint32_t val)
{
bus_space_write_4(fdtbus_bs_tag, MV_AXP_CPU_DIVCLK_BASE, reg, val);
}
void
platform_mp_setmaxid(void)
{
- mp_maxid = 3;
+ mp_ncpus = platform_get_ncpus();
+ mp_maxid = mp_ncpus - 1;
}
int
platform_mp_probe(void)
{
-
- mp_ncpus = platform_get_ncpus();
return (mp_ncpus > 1);
}
void
platform_mp_init_secondary(void)
{
}
void mptramp(void);
void mptramp_end(void);
extern vm_offset_t mptramp_pmu_boot;
void
platform_mp_start_ap(void)
{
uint32_t reg, *src, *dst, cpu_num, div_val, cputype;
vm_offset_t pmu_boot_off;
/*
* Initialization procedure depends on core revision,
* in this step CHIP ID is checked to choose proper procedure
*/
cputype = cpufunc_id();
cputype &= CPU_ID_CPU_MASK;
/*
* Set the PA of CPU0 Boot Address Redirect register used in
* mptramp according to the actual SoC registers' base address.
*/
pmu_boot_off = (CPU_PMU(0) - MV_BASE) + CPU_PMU_BOOT;
mptramp_pmu_boot = fdt_immr_pa + pmu_boot_off;
dst = pmap_mapdev(0xffff0000, PAGE_SIZE);
for (src = (uint32_t *)mptramp; src < (uint32_t *)mptramp_end;
src++, dst++) {
*dst = *src;
}
pmap_unmapdev((vm_offset_t)dst, PAGE_SIZE);
if (cputype == CPU_ID_MV88SV584X_V7) {
/* Core rev A0 */
div_val = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1);
div_val &= 0x3f;
for (cpu_num = 1; cpu_num < mp_ncpus; cpu_num++ ) {
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1);
reg &= CPU_DIVCLK_MASK(cpu_num);
reg |= div_val << (cpu_num * 8);
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1, reg);
}
} else {
/* Core rev Z1 */
div_val = 0x01;
if (mp_ncpus > 1) {
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL0);
reg &= CPU_DIVCLK_MASK(3);
reg |= div_val << 24;
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL0, reg);
}
for (cpu_num = 2; cpu_num < mp_ncpus; cpu_num++ ) {
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1);
reg &= CPU_DIVCLK_MASK(cpu_num);
reg |= div_val << (cpu_num * 8);
write_cpu_clkdiv(CPU_DIVCLK_CTRL2_RATIO_FULL1, reg);
}
}
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL0);
reg |= ((0x1 << (mp_ncpus - 1)) - 1) << 21;
write_cpu_clkdiv(CPU_DIVCLK_CTRL0, reg);
reg = read_cpu_clkdiv(CPU_DIVCLK_CTRL0);
reg |= 0x01000000;
write_cpu_clkdiv(CPU_DIVCLK_CTRL0, reg);
DELAY(100);
reg &= ~(0xf << 21);
write_cpu_clkdiv(CPU_DIVCLK_CTRL0, reg);
DELAY(100);
bus_space_write_4(fdtbus_bs_tag, MV_BASE, CPU_RESUME_CONTROL, 0);
for (cpu_num = 1; cpu_num < mp_ncpus; cpu_num++ )
bus_space_write_4(fdtbus_bs_tag, CPU_PMU(cpu_num), CPU_PMU_BOOT,
pmap_kextract((vm_offset_t)mpentry));
cpu_idcache_wbinv_all();
for (cpu_num = 1; cpu_num < mp_ncpus; cpu_num++ )
bus_space_write_4(fdtbus_bs_tag, MP, MP_SW_RESET(cpu_num), 0);
/* XXX: Temporary workaround for hangup after releasing AP's */
wmb();
DELAY(10);
armadaxp_init_coher_fabric();
}
void
platform_ipi_send(cpuset_t cpus, u_int ipi)
{
pic_ipi_send(cpus, ipi);
}
Index: head/sys/arm/qemu/virt_mp.c
===================================================================
--- head/sys/arm/qemu/virt_mp.c (revision 290546)
+++ head/sys/arm/qemu/virt_mp.c (revision 290547)
@@ -1,119 +1,113 @@
/*-
* Copyright (c) 2015 Andrew Turner
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static int running_cpus;
int
platform_mp_probe(void)
{
- int ncpus;
- ncpus = ofw_cpu_early_foreach(NULL, true);
- if (ncpus <= 1) {
- mp_ncpus = 1;
- return (0);
- }
-
- mp_ncpus = MIN(ncpus, MAXCPU);
-
- return (1);
+ return (mp_ncpus > 1);
}
static boolean_t
virt_maxid(u_int id, phandle_t node, u_int addr_cells, pcell_t *reg)
{
if (mp_maxid < id)
mp_maxid = id;
return (true);
}
void
platform_mp_setmaxid(void)
{
mp_maxid = PCPU_GET(cpuid);
- ofw_cpu_early_foreach(virt_maxid, true);
+ mp_ncpus = ofw_cpu_early_foreach(virt_maxid, true);
+ if (mp_ncpus < 1)
+ mp_ncpus = 1;
+ mp_ncpus = MIN(ncpus, MAXCPU);
}
static boolean_t
virt_start_ap(u_int id, phandle_t node, u_int addr_cells, pcell_t *reg)
{
int err;
if (running_cpus >= mp_ncpus)
return (false);
running_cpus++;
err = psci_cpu_on(*reg, pmap_kextract((vm_offset_t)mpentry), id);
if (err != PSCI_RETVAL_SUCCESS)
return (false);
return (true);
}
void
platform_mp_start_ap(void)
{
ofw_cpu_early_foreach(virt_start_ap, true);
}
void
platform_mp_init_secondary(void)
{
arm_pic_init_secondary();
}
void
platform_ipi_send(cpuset_t cpus, u_int ipi)
{
pic_ipi_send(cpus, ipi);
}
Index: head/sys/arm/ti/omap4/omap4_mp.c
===================================================================
--- head/sys/arm/ti/omap4/omap4_mp.c (revision 290546)
+++ head/sys/arm/ti/omap4/omap4_mp.c (revision 290547)
@@ -1,87 +1,87 @@
/*-
* Copyright (c) 2012 Olivier Houchard. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
void
platform_mp_init_secondary(void)
{
arm_pic_init_secondary();
}
void
platform_mp_setmaxid(void)
{
- mp_maxid = 1;
+ mp_maxid = 1;
+ mp_ncpus = 2;
}
int
platform_mp_probe(void)
{
- mp_ncpus = 2;
return (1);
}
void
platform_mp_start_ap(void)
{
bus_addr_t scu_addr;
if (bus_space_map(fdtbus_bs_tag, 0x48240000, 0x1000, 0, &scu_addr) != 0)
panic("Couldn't map the SCU\n");
/* Enable the SCU */
*(volatile unsigned int *)scu_addr |= 1;
//*(volatile unsigned int *)(scu_addr + 0x30) |= 1;
cpu_idcache_wbinv_all();
cpu_l2cache_wbinv_all();
ti_smc0(0x200, 0xfffffdff, MODIFY_AUX_CORE_0);
ti_smc0(pmap_kextract((vm_offset_t)mpentry), 0, WRITE_AUX_CORE_1);
armv7_sev();
bus_space_unmap(fdtbus_bs_tag, scu_addr, 0x1000);
}
void
platform_ipi_send(cpuset_t cpus, u_int ipi)
{
pic_ipi_send(cpus, ipi);
}
Index: head/sys/arm/xilinx/zy7_mp.c
===================================================================
--- head/sys/arm/xilinx/zy7_mp.c (revision 290546)
+++ head/sys/arm/xilinx/zy7_mp.c (revision 290547)
@@ -1,119 +1,119 @@
/*-
* Copyright (c) 2013 Thomas Skibo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define ZYNQ7_CPU1_ENTRY 0xfffffff0
#define SCU_CONTROL_REG 0xf8f00000
#define SCU_CONTROL_ENABLE (1 << 0)
void
platform_mp_init_secondary(void)
{
arm_pic_init_secondary();
}
void
platform_mp_setmaxid(void)
{
mp_maxid = 1;
+ mp_ncpus = 2;
}
int
platform_mp_probe(void)
{
- mp_ncpus = 2;
return (1);
}
void
platform_mp_start_ap(void)
{
bus_space_handle_t scu_handle;
bus_space_handle_t ocm_handle;
uint32_t scu_ctrl;
/* Map in SCU control register. */
if (bus_space_map(fdtbus_bs_tag, SCU_CONTROL_REG, 4,
0, &scu_handle) != 0)
panic("platform_mp_start_ap: Couldn't map SCU config reg\n");
/* Set SCU enable bit. */
scu_ctrl = bus_space_read_4(fdtbus_bs_tag, scu_handle, 0);
scu_ctrl |= SCU_CONTROL_ENABLE;
bus_space_write_4(fdtbus_bs_tag, scu_handle, 0, scu_ctrl);
bus_space_unmap(fdtbus_bs_tag, scu_handle, 4);
/* Map in magic location to give entry address to CPU1. */
if (bus_space_map(fdtbus_bs_tag, ZYNQ7_CPU1_ENTRY, 4,
0, &ocm_handle) != 0)
panic("platform_mp_start_ap: Couldn't map OCM\n");
/* Write start address for CPU1. */
bus_space_write_4(fdtbus_bs_tag, ocm_handle, 0,
pmap_kextract((vm_offset_t)mpentry));
bus_space_unmap(fdtbus_bs_tag, ocm_handle, 4);
/*
* The SCU is enabled above but I think the second CPU doesn't
* turn on filtering until after the wake-up below. I think that's why
* things don't work if I don't put these cache ops here. Also, the
* magic location, 0xfffffff0, isn't in the SCU's filtering range so it
* needs a write-back too.
*/
cpu_idcache_wbinv_all();
cpu_l2cache_wbinv_all();
/* Wake up CPU1. */
armv7_sev();
}
void
platform_ipi_send(cpuset_t cpus, u_int ipi)
{
pic_ipi_send(cpus, ipi);
}
Index: head/sys/kern/subr_smp.c
===================================================================
--- head/sys/kern/subr_smp.c (revision 290546)
+++ head/sys/kern/subr_smp.c (revision 290547)
@@ -1,855 +1,863 @@
/*-
* Copyright (c) 2001, John Baldwin .
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This module holds the global variables and machine independent functions
* used for the kernel SMP support.
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "opt_sched.h"
#ifdef SMP
volatile cpuset_t stopped_cpus;
volatile cpuset_t started_cpus;
volatile cpuset_t suspended_cpus;
cpuset_t hlt_cpus_mask;
cpuset_t logical_cpus_mask;
void (*cpustop_restartfunc)(void);
#endif
static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS);
/* This is used in modules that need to work in both SMP and UP. */
cpuset_t all_cpus;
int mp_ncpus;
/* export this for libkvm consumers. */
int mp_maxcpus = MAXCPU;
volatile int smp_started;
u_int mp_maxid;
static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL,
"Kernel SMP");
SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0,
"Max CPU ID.");
SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus,
0, "Max number of CPUs that the system was compiled for.");
SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD | CTLTYPE_INT, NULL, 0,
sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode");
int smp_disabled = 0; /* has smp been disabled? */
SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
&smp_disabled, 0, "SMP has been disabled from the loader");
int smp_cpus = 1; /* how many cpu's running */
SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0,
"Number of CPUs online");
int smp_topology = 0; /* Which topology we're using. */
SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0,
"Topology override setting; 0 is default provided by hardware.");
#ifdef SMP
/* Enable forwarding of a signal to a process running on a different CPU */
static int forward_signal_enabled = 1;
SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
&forward_signal_enabled, 0,
"Forwarding of a signal to a process on a different CPU");
/* Variables needed for SMP rendezvous. */
static volatile int smp_rv_ncpus;
static void (*volatile smp_rv_setup_func)(void *arg);
static void (*volatile smp_rv_action_func)(void *arg);
static void (*volatile smp_rv_teardown_func)(void *arg);
static void *volatile smp_rv_func_arg;
static volatile int smp_rv_waiters[4];
/*
* Shared mutex to restrict busywaits between smp_rendezvous() and
* smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these
* functions trigger at once and cause multiple CPUs to busywait with
* interrupts disabled.
*/
struct mtx smp_ipi_mtx;
/*
* Let the MD SMP code initialize mp_maxid very early if it can.
*/
static void
mp_setmaxid(void *dummy)
{
+
cpu_mp_setmaxid();
+
+ KASSERT(mp_ncpus >= 1, ("%s: CPU count < 1", __func__));
+ KASSERT(mp_ncpus > 1 || mp_maxid == 0,
+ ("%s: one CPU but mp_maxid is not zero", __func__));
+ KASSERT(mp_maxid >= mp_ncpus - 1,
+ ("%s: counters out of sync: max %d, count %d", __func__,
+ mp_maxid, mp_ncpus));
}
SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);
/*
* Call the MD SMP initialization code.
*/
static void
mp_start(void *dummy)
{
mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN);
/* Probe for MP hardware. */
if (smp_disabled != 0 || cpu_mp_probe() == 0) {
mp_ncpus = 1;
CPU_SETOF(PCPU_GET(cpuid), &all_cpus);
return;
}
cpu_mp_start();
printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
mp_ncpus);
cpu_mp_announce();
}
SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);
void
forward_signal(struct thread *td)
{
int id;
/*
* signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on
* this thread, so all we need to do is poke it if it is currently
* executing so that it executes ast().
*/
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(TD_IS_RUNNING(td),
("forward_signal: thread is not TDS_RUNNING"));
CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
if (!smp_started || cold || panicstr)
return;
if (!forward_signal_enabled)
return;
/* No need to IPI ourself. */
if (td == curthread)
return;
id = td->td_oncpu;
if (id == NOCPU)
return;
ipi_cpu(id, IPI_AST);
}
/*
* When called the executing CPU will send an IPI to all other CPUs
* requesting that they halt execution.
*
* Usually (but not necessarily) called with 'other_cpus' as its arg.
*
* - Signals all CPUs in map to stop.
* - Waits for each to stop.
*
* Returns:
* -1: error
* 0: NA
* 1: ok
*
*/
static int
generic_stop_cpus(cpuset_t map, u_int type)
{
#ifdef KTR
char cpusetbuf[CPUSETBUFSIZ];
#endif
static volatile u_int stopping_cpu = NOCPU;
int i;
volatile cpuset_t *cpus;
KASSERT(
#if defined(__amd64__) || defined(__i386__)
type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
#else
type == IPI_STOP || type == IPI_STOP_HARD,
#endif
("%s: invalid stop type", __func__));
if (!smp_started)
return (0);
CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
cpusetobj_strprint(cpusetbuf, &map), type);
#if defined(__amd64__) || defined(__i386__)
/*
* When suspending, ensure there are are no IPIs in progress.
* IPIs that have been issued, but not yet delivered (e.g.
* not pending on a vCPU when running under virtualization)
* will be lost, violating FreeBSD's assumption of reliable
* IPI delivery.
*/
if (type == IPI_SUSPEND)
mtx_lock_spin(&smp_ipi_mtx);
#endif
if (stopping_cpu != PCPU_GET(cpuid))
while (atomic_cmpset_int(&stopping_cpu, NOCPU,
PCPU_GET(cpuid)) == 0)
while (stopping_cpu != NOCPU)
cpu_spinwait(); /* spin */
/* send the stop IPI to all CPUs in map */
ipi_selected(map, type);
#if defined(__amd64__) || defined(__i386__)
if (type == IPI_SUSPEND)
cpus = &suspended_cpus;
else
#endif
cpus = &stopped_cpus;
i = 0;
while (!CPU_SUBSET(cpus, &map)) {
/* spin */
cpu_spinwait();
i++;
if (i == 100000000) {
printf("timeout stopping cpus\n");
break;
}
}
#if defined(__amd64__) || defined(__i386__)
if (type == IPI_SUSPEND)
mtx_unlock_spin(&smp_ipi_mtx);
#endif
stopping_cpu = NOCPU;
return (1);
}
int
stop_cpus(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_STOP));
}
int
stop_cpus_hard(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_STOP_HARD));
}
#if defined(__amd64__) || defined(__i386__)
int
suspend_cpus(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_SUSPEND));
}
#endif
/*
* Called by a CPU to restart stopped CPUs.
*
* Usually (but not necessarily) called with 'stopped_cpus' as its arg.
*
* - Signals all CPUs in map to restart.
* - Waits for each to restart.
*
* Returns:
* -1: error
* 0: NA
* 1: ok
*/
static int
generic_restart_cpus(cpuset_t map, u_int type)
{
#ifdef KTR
char cpusetbuf[CPUSETBUFSIZ];
#endif
volatile cpuset_t *cpus;
KASSERT(
#if defined(__amd64__) || defined(__i386__)
type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
#else
type == IPI_STOP || type == IPI_STOP_HARD,
#endif
("%s: invalid stop type", __func__));
if (!smp_started)
return 0;
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
#if defined(__amd64__) || defined(__i386__)
if (type == IPI_SUSPEND)
cpus = &suspended_cpus;
else
#endif
cpus = &stopped_cpus;
/* signal other cpus to restart */
CPU_COPY_STORE_REL(&map, &started_cpus);
/* wait for each to clear its bit */
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
return 1;
}
int
restart_cpus(cpuset_t map)
{
return (generic_restart_cpus(map, IPI_STOP));
}
#if defined(__amd64__) || defined(__i386__)
int
resume_cpus(cpuset_t map)
{
return (generic_restart_cpus(map, IPI_SUSPEND));
}
#endif
/*
* All-CPU rendezvous. CPUs are signalled, all execute the setup function
* (if specified), rendezvous, execute the action function (if specified),
* rendezvous again, execute the teardown function (if specified), and then
* resume.
*
* Note that the supplied external functions _must_ be reentrant and aware
* that they are running in parallel and in an unknown lock context.
*/
void
smp_rendezvous_action(void)
{
struct thread *td;
void *local_func_arg;
void (*local_setup_func)(void*);
void (*local_action_func)(void*);
void (*local_teardown_func)(void*);
#ifdef INVARIANTS
int owepreempt;
#endif
/* Ensure we have up-to-date values. */
atomic_add_acq_int(&smp_rv_waiters[0], 1);
while (smp_rv_waiters[0] < smp_rv_ncpus)
cpu_spinwait();
/* Fetch rendezvous parameters after acquire barrier. */
local_func_arg = smp_rv_func_arg;
local_setup_func = smp_rv_setup_func;
local_action_func = smp_rv_action_func;
local_teardown_func = smp_rv_teardown_func;
/*
* Use a nested critical section to prevent any preemptions
* from occurring during a rendezvous action routine.
* Specifically, if a rendezvous handler is invoked via an IPI
* and the interrupted thread was in the critical_exit()
* function after setting td_critnest to 0 but before
* performing a deferred preemption, this routine can be
* invoked with td_critnest set to 0 and td_owepreempt true.
* In that case, a critical_exit() during the rendezvous
* action would trigger a preemption which is not permitted in
* a rendezvous action. To fix this, wrap all of the
* rendezvous action handlers in a critical section. We
* cannot use a regular critical section however as having
* critical_exit() preempt from this routine would also be
* problematic (the preemption must not occur before the IPI
* has been acknowledged via an EOI). Instead, we
* intentionally ignore td_owepreempt when leaving the
* critical section. This should be harmless because we do
* not permit rendezvous action routines to schedule threads,
* and thus td_owepreempt should never transition from 0 to 1
* during this routine.
*/
td = curthread;
td->td_critnest++;
#ifdef INVARIANTS
owepreempt = td->td_owepreempt;
#endif
/*
* If requested, run a setup function before the main action
* function. Ensure all CPUs have completed the setup
* function before moving on to the action function.
*/
if (local_setup_func != smp_no_rendevous_barrier) {
if (smp_rv_setup_func != NULL)
smp_rv_setup_func(smp_rv_func_arg);
atomic_add_int(&smp_rv_waiters[1], 1);
while (smp_rv_waiters[1] < smp_rv_ncpus)
cpu_spinwait();
}
if (local_action_func != NULL)
local_action_func(local_func_arg);
if (local_teardown_func != smp_no_rendevous_barrier) {
/*
* Signal that the main action has been completed. If a
* full exit rendezvous is requested, then all CPUs will
* wait here until all CPUs have finished the main action.
*/
atomic_add_int(&smp_rv_waiters[2], 1);
while (smp_rv_waiters[2] < smp_rv_ncpus)
cpu_spinwait();
if (local_teardown_func != NULL)
local_teardown_func(local_func_arg);
}
/*
* Signal that the rendezvous is fully completed by this CPU.
* This means that no member of smp_rv_* pseudo-structure will be
* accessed by this target CPU after this point; in particular,
* memory pointed by smp_rv_func_arg.
*
* The release semantic ensures that all accesses performed by
* the current CPU are visible when smp_rendezvous_cpus()
* returns, by synchronizing with the
* atomic_load_acq_int(&smp_rv_waiters[3]).
*/
atomic_add_rel_int(&smp_rv_waiters[3], 1);
td->td_critnest--;
KASSERT(owepreempt == td->td_owepreempt,
("rendezvous action changed td_owepreempt"));
}
void
smp_rendezvous_cpus(cpuset_t map,
void (* setup_func)(void *),
void (* action_func)(void *),
void (* teardown_func)(void *),
void *arg)
{
int curcpumap, i, ncpus = 0;
/* Look comments in the !SMP case. */
if (!smp_started) {
spinlock_enter();
if (setup_func != NULL)
setup_func(arg);
if (action_func != NULL)
action_func(arg);
if (teardown_func != NULL)
teardown_func(arg);
spinlock_exit();
return;
}
CPU_FOREACH(i) {
if (CPU_ISSET(i, &map))
ncpus++;
}
if (ncpus == 0)
panic("ncpus is 0 with non-zero map");
mtx_lock_spin(&smp_ipi_mtx);
/* Pass rendezvous parameters via global variables. */
smp_rv_ncpus = ncpus;
smp_rv_setup_func = setup_func;
smp_rv_action_func = action_func;
smp_rv_teardown_func = teardown_func;
smp_rv_func_arg = arg;
smp_rv_waiters[1] = 0;
smp_rv_waiters[2] = 0;
smp_rv_waiters[3] = 0;
atomic_store_rel_int(&smp_rv_waiters[0], 0);
/*
* Signal other processors, which will enter the IPI with
* interrupts off.
*/
curcpumap = CPU_ISSET(curcpu, &map);
CPU_CLR(curcpu, &map);
ipi_selected(map, IPI_RENDEZVOUS);
/* Check if the current CPU is in the map */
if (curcpumap != 0)
smp_rendezvous_action();
/*
* Ensure that the master CPU waits for all the other
* CPUs to finish the rendezvous, so that smp_rv_*
* pseudo-structure and the arg are guaranteed to not
* be in use.
*
* Load acquire synchronizes with the release add in
* smp_rendezvous_action(), which ensures that our caller sees
* all memory actions done by the called functions on other
* CPUs.
*/
while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus)
cpu_spinwait();
mtx_unlock_spin(&smp_ipi_mtx);
}
void
smp_rendezvous(void (* setup_func)(void *),
void (* action_func)(void *),
void (* teardown_func)(void *),
void *arg)
{
smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
}
static struct cpu_group group[MAXCPU];
struct cpu_group *
smp_topo(void)
{
char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
struct cpu_group *top;
/*
* Check for a fake topology request for debugging purposes.
*/
switch (smp_topology) {
case 1:
/* Dual core with no sharing. */
top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
break;
case 2:
/* No topology, all cpus are equal. */
top = smp_topo_none();
break;
case 3:
/* Dual core with shared L2. */
top = smp_topo_1level(CG_SHARE_L2, 2, 0);
break;
case 4:
/* quad core, shared l3 among each package, private l2. */
top = smp_topo_1level(CG_SHARE_L3, 4, 0);
break;
case 5:
/* quad core, 2 dualcore parts on each package share l2. */
top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
break;
case 6:
/* Single-core 2xHTT */
top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
break;
case 7:
/* quad core with a shared l3, 8 threads sharing L2. */
top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
CG_FLAG_SMT);
break;
default:
/* Default, ask the system what it wants. */
top = cpu_topo();
break;
}
/*
* Verify the returned topology.
*/
if (top->cg_count != mp_ncpus)
panic("Built bad topology at %p. CPU count %d != %d",
top, top->cg_count, mp_ncpus);
if (CPU_CMP(&top->cg_mask, &all_cpus))
panic("Built bad topology at %p. CPU mask (%s) != (%s)",
top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
cpusetobj_strprint(cpusetbuf2, &all_cpus));
return (top);
}
struct cpu_group *
smp_topo_none(void)
{
struct cpu_group *top;
top = &group[0];
top->cg_parent = NULL;
top->cg_child = NULL;
top->cg_mask = all_cpus;
top->cg_count = mp_ncpus;
top->cg_children = 0;
top->cg_level = CG_SHARE_NONE;
top->cg_flags = 0;
return (top);
}
static int
smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
int count, int flags, int start)
{
char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
cpuset_t mask;
int i;
CPU_ZERO(&mask);
for (i = 0; i < count; i++, start++)
CPU_SET(start, &mask);
child->cg_parent = parent;
child->cg_child = NULL;
child->cg_children = 0;
child->cg_level = share;
child->cg_count = count;
child->cg_flags = flags;
child->cg_mask = mask;
parent->cg_children++;
for (; parent != NULL; parent = parent->cg_parent) {
if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
panic("Duplicate children in %p. mask (%s) child (%s)",
parent,
cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
CPU_OR(&parent->cg_mask, &child->cg_mask);
parent->cg_count += child->cg_count;
}
return (start);
}
struct cpu_group *
smp_topo_1level(int share, int count, int flags)
{
struct cpu_group *child;
struct cpu_group *top;
int packages;
int cpu;
int i;
cpu = 0;
top = &group[0];
packages = mp_ncpus / count;
top->cg_child = child = &group[1];
top->cg_level = CG_SHARE_NONE;
for (i = 0; i < packages; i++, child++)
cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
return (top);
}
struct cpu_group *
smp_topo_2level(int l2share, int l2count, int l1share, int l1count,
int l1flags)
{
struct cpu_group *top;
struct cpu_group *l1g;
struct cpu_group *l2g;
int cpu;
int i;
int j;
cpu = 0;
top = &group[0];
l2g = &group[1];
top->cg_child = l2g;
top->cg_level = CG_SHARE_NONE;
top->cg_children = mp_ncpus / (l2count * l1count);
l1g = l2g + top->cg_children;
for (i = 0; i < top->cg_children; i++, l2g++) {
l2g->cg_parent = top;
l2g->cg_child = l1g;
l2g->cg_level = l2share;
for (j = 0; j < l2count; j++, l1g++)
cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
l1flags, cpu);
}
return (top);
}
struct cpu_group *
smp_topo_find(struct cpu_group *top, int cpu)
{
struct cpu_group *cg;
cpuset_t mask;
int children;
int i;
CPU_SETOF(cpu, &mask);
cg = top;
for (;;) {
if (!CPU_OVERLAP(&cg->cg_mask, &mask))
return (NULL);
if (cg->cg_children == 0)
return (cg);
children = cg->cg_children;
for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
if (CPU_OVERLAP(&cg->cg_mask, &mask))
break;
}
return (NULL);
}
#else /* !SMP */
void
smp_rendezvous_cpus(cpuset_t map,
void (*setup_func)(void *),
void (*action_func)(void *),
void (*teardown_func)(void *),
void *arg)
{
/*
* In the !SMP case we just need to ensure the same initial conditions
* as the SMP case.
*/
spinlock_enter();
if (setup_func != NULL)
setup_func(arg);
if (action_func != NULL)
action_func(arg);
if (teardown_func != NULL)
teardown_func(arg);
spinlock_exit();
}
void
smp_rendezvous(void (*setup_func)(void *),
void (*action_func)(void *),
void (*teardown_func)(void *),
void *arg)
{
/* Look comments in the smp_rendezvous_cpus() case. */
spinlock_enter();
if (setup_func != NULL)
setup_func(arg);
if (action_func != NULL)
action_func(arg);
if (teardown_func != NULL)
teardown_func(arg);
spinlock_exit();
}
/*
* Provide dummy SMP support for UP kernels. Modules that need to use SMP
* APIs will still work using this dummy support.
*/
static void
mp_setvariables_for_up(void *dummy)
{
mp_ncpus = 1;
mp_maxid = PCPU_GET(cpuid);
CPU_SETOF(mp_maxid, &all_cpus);
KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
}
SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
mp_setvariables_for_up, NULL);
#endif /* SMP */
void
smp_no_rendevous_barrier(void *dummy)
{
#ifdef SMP
KASSERT((!smp_started),("smp_no_rendevous called and smp is started"));
#endif
}
/*
* Wait specified idle threads to switch once. This ensures that even
* preempted threads have cycled through the switch function once,
* exiting their codepaths. This allows us to change global pointers
* with no other synchronization.
*/
int
quiesce_cpus(cpuset_t map, const char *wmesg, int prio)
{
struct pcpu *pcpu;
u_int gen[MAXCPU];
int error;
int cpu;
error = 0;
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
continue;
pcpu = pcpu_find(cpu);
gen[cpu] = pcpu->pc_idlethread->td_generation;
}
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu))
continue;
pcpu = pcpu_find(cpu);
thread_lock(curthread);
sched_bind(curthread, cpu);
thread_unlock(curthread);
while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
error = tsleep(quiesce_cpus, prio, wmesg, 1);
if (error != EWOULDBLOCK)
goto out;
error = 0;
}
}
out:
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
return (error);
}
int
quiesce_all_cpus(const char *wmesg, int prio)
{
return quiesce_cpus(all_cpus, wmesg, prio);
}
/* Extra care is taken with this sysctl because the data type is volatile */
static int
sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)
{
int error, active;
active = smp_started;
error = SYSCTL_OUT(req, &active, sizeof(active));
return (error);
}
Index: head/sys/x86/x86/mp_x86.c
===================================================================
--- head/sys/x86/x86/mp_x86.c (revision 290546)
+++ head/sys/x86/x86/mp_x86.c (revision 290547)
@@ -1,1120 +1,1092 @@
/*-
* Copyright (c) 1996, by Steve Passe
* Copyright (c) 2003, by Peter Wemm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. The name of the developer may NOT be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
__FBSDID("$FreeBSD$");
#ifdef __i386__
#include "opt_apic.h"
#endif
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
#include "opt_pmap.h"
#include "opt_sched.h"
#include "opt_smp.h"
#include
#include
#include
#include /* cngetc() */
#include
#ifdef GPROF
#include
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define WARMBOOT_TARGET 0
#define WARMBOOT_OFF (KERNBASE + 0x0467)
#define WARMBOOT_SEG (KERNBASE + 0x0469)
#define CMOS_REG (0x70)
#define CMOS_DATA (0x71)
#define BIOS_RESET (0x0f)
#define BIOS_WARM (0x0a)
/* lock region used by kernel profiling */
int mcount_lock;
int mp_naps; /* # of Applications processors */
int boot_cpu_id = -1; /* designated BSP */
extern struct pcpu __pcpu[];
/* AP uses this during bootstrap. Do not staticize. */
char *bootSTK;
int bootAP;
/* Free these after use */
void *bootstacks[MAXCPU];
void *dpcpu;
struct pcb stoppcbs[MAXCPU];
struct susppcb **susppcbs;
#ifdef COUNT_IPIS
/* Interrupt counts. */
static u_long *ipi_preempt_counts[MAXCPU];
static u_long *ipi_ast_counts[MAXCPU];
u_long *ipi_invltlb_counts[MAXCPU];
u_long *ipi_invlrng_counts[MAXCPU];
u_long *ipi_invlpg_counts[MAXCPU];
u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
static u_long *ipi_hardclock_counts[MAXCPU];
#endif
/* Default cpu_ops implementation. */
struct cpu_ops cpu_ops;
/*
* Local data and functions.
*/
static volatile cpuset_t ipi_stop_nmi_pending;
/* used to hold the AP's until we are ready to release them */
struct mtx ap_boot_mtx;
/* Set to 1 once we're ready to let the APs out of the pen. */
volatile int aps_ready = 0;
/*
* Store data from cpu_add() until later in the boot when we actually setup
* the APs.
*/
struct cpu_info cpu_info[MAX_APIC_ID + 1];
int cpu_apic_ids[MAXCPU];
int apic_cpuids[MAX_APIC_ID + 1];
/* Holds pending bitmap based IPIs per CPU */
volatile u_int cpu_ipi_pending[MAXCPU];
int cpu_logical; /* logical cpus per core */
int cpu_cores; /* cores per package */
static void release_aps(void *dummy);
static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
static int hyperthreading_allowed = 1;
void
mem_range_AP_init(void)
{
if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
mem_range_softc.mr_op->initAP(&mem_range_softc);
}
static void
topo_probe_amd(void)
{
int core_id_bits;
int id;
/* AMD processors do not support HTT. */
cpu_logical = 1;
if ((amd_feature2 & AMDID2_CMP) == 0) {
cpu_cores = 1;
return;
}
core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
AMDID_COREID_SIZE_SHIFT;
if (core_id_bits == 0) {
cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
return;
}
/* Fam 10h and newer should get here. */
for (id = 0; id <= MAX_APIC_ID; id++) {
/* Check logical CPU availability. */
if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
continue;
/* Check if logical CPU has the same package ID. */
if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
continue;
cpu_cores++;
}
}
/*
* Round up to the next power of two, if necessary, and then
* take log2.
* Returns -1 if argument is zero.
*/
static __inline int
mask_width(u_int x)
{
return (fls(x << (1 - powerof2(x))) - 1);
}
static void
topo_probe_0x4(void)
{
u_int p[4];
int pkg_id_bits;
int core_id_bits;
int max_cores;
int max_logical;
int id;
/* Both zero and one here mean one logical processor per package. */
max_logical = (cpu_feature & CPUID_HTT) != 0 ?
(cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
if (max_logical <= 1)
return;
/*
* Because of uniformity assumption we examine only
* those logical processors that belong to the same
* package as BSP. Further, we count number of
* logical processors that belong to the same core
* as BSP thus deducing number of threads per core.
*/
if (cpu_high >= 0x4) {
cpuid_count(0x04, 0, p);
max_cores = ((p[0] >> 26) & 0x3f) + 1;
} else
max_cores = 1;
core_id_bits = mask_width(max_logical/max_cores);
if (core_id_bits < 0)
return;
pkg_id_bits = core_id_bits + mask_width(max_cores);
for (id = 0; id <= MAX_APIC_ID; id++) {
/* Check logical CPU availability. */
if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
continue;
/* Check if logical CPU has the same package ID. */
if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
continue;
cpu_cores++;
/* Check if logical CPU has the same package and core IDs. */
if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
cpu_logical++;
}
KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
("topo_probe_0x4 couldn't find BSP"));
cpu_cores /= cpu_logical;
hyperthreading_cpus = cpu_logical;
}
static void
topo_probe_0xb(void)
{
u_int p[4];
int bits;
int cnt;
int i;
int logical;
int type;
int x;
/* We only support three levels for now. */
for (i = 0; i < 3; i++) {
cpuid_count(0x0b, i, p);
/* Fall back if CPU leaf 11 doesn't really exist. */
if (i == 0 && p[1] == 0) {
topo_probe_0x4();
return;
}
bits = p[0] & 0x1f;
logical = p[1] &= 0xffff;
type = (p[2] >> 8) & 0xff;
if (type == 0 || logical == 0)
break;
/*
* Because of uniformity assumption we examine only
* those logical processors that belong to the same
* package as BSP.
*/
for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
if (!cpu_info[x].cpu_present ||
cpu_info[x].cpu_disabled)
continue;
if (x >> bits == boot_cpu_id >> bits)
cnt++;
}
if (type == CPUID_TYPE_SMT)
cpu_logical = cnt;
else if (type == CPUID_TYPE_CORE)
cpu_cores = cnt;
}
if (cpu_logical == 0)
cpu_logical = 1;
cpu_cores /= cpu_logical;
}
/*
* Both topology discovery code and code that consumes topology
* information assume top-down uniformity of the topology.
* That is, all physical packages must be identical and each
* core in a package must have the same number of threads.
* Topology information is queried only on BSP, on which this
* code runs and for which it can query CPUID information.
* Then topology is extrapolated on all packages using the
* uniformity assumption.
*/
void
topo_probe(void)
{
static int cpu_topo_probed = 0;
if (cpu_topo_probed)
return;
CPU_ZERO(&logical_cpus_mask);
if (mp_ncpus <= 1)
cpu_cores = cpu_logical = 1;
else if (cpu_vendor_id == CPU_VENDOR_AMD)
topo_probe_amd();
else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
/*
* See Intel(R) 64 Architecture Processor
* Topology Enumeration article for details.
*
* Note that 0x1 <= cpu_high < 4 case should be
* compatible with topo_probe_0x4() logic when
* CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
* or it should trigger the fallback otherwise.
*/
if (cpu_high >= 0xb)
topo_probe_0xb();
else if (cpu_high >= 0x1)
topo_probe_0x4();
}
/*
* Fallback: assume each logical CPU is in separate
* physical package. That is, no multi-core, no SMT.
*/
if (cpu_cores == 0 || cpu_logical == 0)
cpu_cores = cpu_logical = 1;
cpu_topo_probed = 1;
}
struct cpu_group *
cpu_topo(void)
{
int cg_flags;
/*
* Determine whether any threading flags are
* necessry.
*/
topo_probe();
if (cpu_logical > 1 && hyperthreading_cpus)
cg_flags = CG_FLAG_HTT;
else if (cpu_logical > 1)
cg_flags = CG_FLAG_SMT;
else
cg_flags = 0;
if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
printf("WARNING: Non-uniform processors.\n");
printf("WARNING: Using suboptimal topology.\n");
return (smp_topo_none());
}
/*
* No multi-core or hyper-threaded.
*/
if (cpu_logical * cpu_cores == 1)
return (smp_topo_none());
/*
* Only HTT no multi-core.
*/
if (cpu_logical > 1 && cpu_cores == 1)
return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
/*
* Only multi-core no HTT.
*/
if (cpu_cores > 1 && cpu_logical == 1)
return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
/*
* Both HTT and multi-core.
*/
return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
CG_SHARE_L1, cpu_logical, cg_flags));
}
void
cpu_add(u_int apic_id, char boot_cpu)
{
if (apic_id > MAX_APIC_ID) {
panic("SMP: APIC ID %d too high", apic_id);
return;
}
KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
apic_id));
cpu_info[apic_id].cpu_present = 1;
if (boot_cpu) {
KASSERT(boot_cpu_id == -1,
("CPU %d claims to be BSP, but CPU %d already is", apic_id,
boot_cpu_id));
boot_cpu_id = apic_id;
cpu_info[apic_id].cpu_bsp = 1;
}
if (mp_ncpus < MAXCPU) {
mp_ncpus++;
mp_maxid = mp_ncpus - 1;
}
if (bootverbose)
printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
"AP");
}
void
cpu_mp_setmaxid(void)
{
/*
- * mp_maxid should be already set by calls to cpu_add().
- * Just sanity check its value here.
+ * mp_ncpus and mp_maxid should be already set by calls to cpu_add().
+ * If there were no calls to cpu_add() assume this is a UP system.
*/
if (mp_ncpus == 0)
- KASSERT(mp_maxid == 0,
- ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
- else if (mp_ncpus == 1)
- mp_maxid = 0;
- else
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__,
- mp_maxid, mp_ncpus));
+ mp_ncpus = 1;
}
int
cpu_mp_probe(void)
{
/*
* Always record BSP in CPU map so that the mbuf init code works
* correctly.
*/
CPU_SETOF(0, &all_cpus);
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- mp_maxid = 0;
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
+ return (mp_ncpus > 1);
}
/*
* Print various information about the SMP system hardware and setup.
*/
void
cpu_mp_announce(void)
{
const char *hyperthread;
int i;
printf("FreeBSD/SMP: %d package(s) x %d core(s)",
mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
if (hyperthreading_cpus > 1)
printf(" x %d HTT threads", cpu_logical);
else if (cpu_logical > 1)
printf(" x %d SMT threads", cpu_logical);
printf("\n");
/* List active CPUs first. */
printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
for (i = 1; i < mp_ncpus; i++) {
if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
hyperthread = "/HT";
else
hyperthread = "";
printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
cpu_apic_ids[i]);
}
/* List disabled CPUs last. */
for (i = 0; i <= MAX_APIC_ID; i++) {
if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
continue;
if (cpu_info[i].cpu_hyperthread)
hyperthread = "/HT";
else
hyperthread = "";
printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
i);
}
}
void
init_secondary_tail(void)
{
u_int cpuid;
/*
* On real hardware, switch to x2apic mode if possible. Do it
* after aps_ready was signalled, to avoid manipulating the
* mode while BSP might still want to send some IPI to us
* (second startup IPI is ignored on modern hardware etc).
*/
lapic_xapic_mode();
/* Initialize the PAT MSR. */
pmap_init_pat();
/* set up CPU registers and state */
cpu_setregs();
/* set up SSE/NX */
initializecpu();
/* set up FPU state on the AP */
#ifdef __amd64__
fpuinit();
#else
npxinit(false);
#endif
if (cpu_ops.cpu_init)
cpu_ops.cpu_init();
/* A quick check from sanity claus */
cpuid = PCPU_GET(cpuid);
if (PCPU_GET(apic_id) != lapic_id()) {
printf("SMP: cpuid = %d\n", cpuid);
printf("SMP: actual apic_id = %d\n", lapic_id());
printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
panic("cpuid mismatch! boom!!");
}
/* Initialize curthread. */
KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
PCPU_SET(curthread, PCPU_GET(idlethread));
mca_init();
mtx_lock_spin(&ap_boot_mtx);
/* Init local apic for irq's */
lapic_setup(1);
/* Set memory range attributes for this CPU to match the BSP */
mem_range_AP_init();
smp_cpus++;
CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
printf("SMP: AP CPU #%d Launched!\n", cpuid);
/* Determine if we are a logical CPU. */
/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
CPU_SET(cpuid, &logical_cpus_mask);
if (bootverbose)
lapic_dump("AP");
if (smp_cpus == mp_ncpus) {
/* enable IPI's, tlb shootdown, freezes etc */
atomic_store_rel_int(&smp_started, 1);
}
#ifdef __amd64__
/*
* Enable global pages TLB extension
* This also implicitly flushes the TLB
*/
load_cr4(rcr4() | CR4_PGE);
if (pmap_pcid_enabled)
load_cr4(rcr4() | CR4_PCIDE);
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_ufssel);
#endif
mtx_unlock_spin(&ap_boot_mtx);
/* Wait until all the AP's are up. */
while (atomic_load_acq_int(&smp_started) == 0)
ia32_pause();
/* Start per-CPU event timers. */
cpu_initclocks_ap();
sched_throw(NULL);
panic("scheduler returned us to %s", __func__);
/* NOTREACHED */
}
/*******************************************************************
* local functions and data
*/
/*
* We tell the I/O APIC code about all the CPUs we want to receive
* interrupts. If we don't want certain CPUs to receive IRQs we
* can simply not tell the I/O APIC code about them in this function.
* We also do not tell it about the BSP since it tells itself about
* the BSP internally to work with UP kernels and on UP machines.
*/
void
set_interrupt_apic_ids(void)
{
u_int i, apic_id;
for (i = 0; i < MAXCPU; i++) {
apic_id = cpu_apic_ids[i];
if (apic_id == -1)
continue;
if (cpu_info[apic_id].cpu_bsp)
continue;
if (cpu_info[apic_id].cpu_disabled)
continue;
/* Don't let hyperthreads service interrupts. */
if (cpu_logical > 1 &&
apic_id % cpu_logical != 0)
continue;
intr_add_cpu(i);
}
}
/*
* Assign logical CPU IDs to local APICs.
*/
void
assign_cpu_ids(void)
{
u_int i;
TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
&hyperthreading_allowed);
/* Check for explicitly disabled CPUs. */
for (i = 0; i <= MAX_APIC_ID; i++) {
if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
continue;
if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
cpu_info[i].cpu_hyperthread = 1;
/*
* Don't use HT CPU if it has been disabled by a
* tunable.
*/
if (hyperthreading_allowed == 0) {
cpu_info[i].cpu_disabled = 1;
continue;
}
}
/* Don't use this CPU if it has been disabled by a tunable. */
if (resource_disabled("lapic", i)) {
cpu_info[i].cpu_disabled = 1;
continue;
}
}
if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
hyperthreading_cpus = 0;
cpu_logical = 1;
}
/*
* Assign CPU IDs to local APIC IDs and disable any CPUs
* beyond MAXCPU. CPU 0 is always assigned to the BSP.
*
* To minimize confusion for userland, we attempt to number
* CPUs such that all threads and cores in a package are
* grouped together. For now we assume that the BSP is always
* the first thread in a package and just start adding APs
* starting with the BSP's APIC ID.
*/
mp_ncpus = 1;
cpu_apic_ids[0] = boot_cpu_id;
apic_cpuids[boot_cpu_id] = 0;
for (i = boot_cpu_id + 1; i != boot_cpu_id;
i == MAX_APIC_ID ? i = 0 : i++) {
if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
cpu_info[i].cpu_disabled)
continue;
if (mp_ncpus < MAXCPU) {
cpu_apic_ids[mp_ncpus] = i;
apic_cpuids[i] = mp_ncpus;
mp_ncpus++;
} else
cpu_info[i].cpu_disabled = 1;
}
KASSERT(mp_maxid >= mp_ncpus - 1,
("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
mp_ncpus));
}
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif /* COUNT_XINVLTLB_HITS */
/*
* Init and startup IPI.
*/
void
ipi_startup(int apic_id, int vector)
{
/*
* This attempts to follow the algorithm described in the
* Intel Multiprocessor Specification v1.4 in section B.4.
* For each IPI, we allow the local APIC ~20us to deliver the
* IPI. If that times out, we panic.
*/
/*
* first we do an INIT IPI: this INIT IPI might be run, resetting
* and running the target CPU. OR this INIT IPI might be latched (P5
* bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
* ignored.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
lapic_ipi_wait(100);
/* Explicitly deassert the INIT IPI. */
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
apic_id);
DELAY(10000); /* wait ~10mS */
/*
* next we do a STARTUP IPI: the previous INIT IPI might still be
* latched, (P5 bug) this 1st STARTUP would then terminate
* immediately, and the previously started INIT IPI would continue. OR
* the previous INIT IPI has already run. and this STARTUP IPI will
* run. OR the previous INIT IPI was ignored. and this STARTUP IPI
* will run.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
if (!lapic_ipi_wait(100))
panic("Failed to deliver first STARTUP IPI to APIC %d",
apic_id);
DELAY(200); /* wait ~200uS */
/*
* finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
* the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
* this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
* recognized after hardware RESET or INIT IPI.
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
if (!lapic_ipi_wait(100))
panic("Failed to deliver second STARTUP IPI to APIC %d",
apic_id);
DELAY(200); /* wait ~200uS */
}
/*
* Send an IPI to specified CPU handling the bitmap logic.
*/
void
ipi_send_cpu(int cpu, u_int ipi)
{
u_int bitmap, old_pending, new_pending;
KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
if (IPI_IS_BITMAPED(ipi)) {
bitmap = 1 << ipi;
ipi = IPI_BITMAP_VECTOR;
do {
old_pending = cpu_ipi_pending[cpu];
new_pending = old_pending | bitmap;
} while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
old_pending, new_pending));
if (old_pending)
return;
}
lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
}
void
ipi_bitmap_handler(struct trapframe frame)
{
struct trapframe *oldframe;
struct thread *td;
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
critical_enter();
td = curthread;
td->td_intr_nesting_level++;
oldframe = td->td_intr_frame;
td->td_intr_frame = &frame;
ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
if (ipi_bitmap & (1 << IPI_PREEMPT)) {
#ifdef COUNT_IPIS
(*ipi_preempt_counts[cpu])++;
#endif
sched_preempt(td);
}
if (ipi_bitmap & (1 << IPI_AST)) {
#ifdef COUNT_IPIS
(*ipi_ast_counts[cpu])++;
#endif
/* Nothing to do for AST */
}
if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
#ifdef COUNT_IPIS
(*ipi_hardclock_counts[cpu])++;
#endif
hardclockintr();
}
td->td_intr_frame = oldframe;
td->td_intr_nesting_level--;
critical_exit();
}
/*
* send an IPI to a set of cpus.
*/
void
ipi_selected(cpuset_t cpus, u_int ipi)
{
int cpu;
/*
* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
* of help in order to understand what is the source.
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &cpus);
while ((cpu = CPU_FFS(&cpus)) != 0) {
cpu--;
CPU_CLR(cpu, &cpus);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
}
}
/*
* send an IPI to a specific CPU.
*/
void
ipi_cpu(int cpu, u_int ipi)
{
/*
* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
* of help in order to understand what is the source.
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
CPU_SET_ATOMIC(cpu, &ipi_stop_nmi_pending);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
}
/*
* send an IPI to all CPUs EXCEPT myself
*/
void
ipi_all_but_self(u_int ipi)
{
cpuset_t other_cpus;
other_cpus = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &other_cpus);
if (IPI_IS_BITMAPED(ipi)) {
ipi_selected(other_cpus, ipi);
return;
}
/*
* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
* of help in order to understand what is the source.
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus);
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
int
ipi_nmi_handler()
{
u_int cpuid;
/*
* As long as there is not a simple way to know about a NMI's
* source, if the bitmask for the current CPU is present in
* the global pending bitword an IPI_STOP_HARD has been issued
* and should be handled.
*/
cpuid = PCPU_GET(cpuid);
if (!CPU_ISSET(cpuid, &ipi_stop_nmi_pending))
return (1);
CPU_CLR_ATOMIC(cpuid, &ipi_stop_nmi_pending);
cpustop_handler();
return (0);
}
/*
* Handle an IPI_STOP by saving our current context and spinning until we
* are resumed.
*/
void
cpustop_handler(void)
{
u_int cpu;
cpu = PCPU_GET(cpuid);
savectx(&stoppcbs[cpu]);
/* Indicate that we are stopped */
CPU_SET_ATOMIC(cpu, &stopped_cpus);
/* Wait for restart */
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
CPU_CLR_ATOMIC(cpu, &started_cpus);
CPU_CLR_ATOMIC(cpu, &stopped_cpus);
#if defined(__amd64__) && defined(DDB)
amd64_db_resume_dbreg();
#endif
if (cpu == 0 && cpustop_restartfunc != NULL) {
cpustop_restartfunc();
cpustop_restartfunc = NULL;
}
}
/*
* Handle an IPI_SUSPEND by saving our current context and spinning until we
* are resumed.
*/
void
cpususpend_handler(void)
{
u_int cpu;
mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
cpu = PCPU_GET(cpuid);
if (savectx(&susppcbs[cpu]->sp_pcb)) {
#ifdef __amd64__
fpususpend(susppcbs[cpu]->sp_fpususpend);
#else
npxsuspend(susppcbs[cpu]->sp_fpususpend);
#endif
wbinvd();
CPU_SET_ATOMIC(cpu, &suspended_cpus);
} else {
#ifdef __amd64__
fpuresume(susppcbs[cpu]->sp_fpususpend);
#else
npxresume(susppcbs[cpu]->sp_fpususpend);
#endif
pmap_init_pat();
initializecpu();
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
/* Indicate that we are resumed */
CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
/* Wait for resume */
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
if (cpu_ops.cpu_resume)
cpu_ops.cpu_resume();
#ifdef __amd64__
if (vmm_resume_p)
vmm_resume_p();
#endif
/* Resume MCA and local APIC */
lapic_xapic_mode();
mca_resume();
lapic_setup(0);
/* Indicate that we are resumed */
CPU_CLR_ATOMIC(cpu, &suspended_cpus);
CPU_CLR_ATOMIC(cpu, &started_cpus);
}
void
invlcache_handler(void)
{
#ifdef COUNT_IPIS
(*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
wbinvd();
atomic_add_int(&smp_tlb_wait, 1);
}
/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
static void
release_aps(void *dummy __unused)
{
if (mp_ncpus == 1)
return;
atomic_store_rel_int(&aps_ready, 1);
while (smp_started == 0)
ia32_pause();
}
SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
#ifdef COUNT_IPIS
/*
* Setup interrupt counters for IPI handlers.
*/
static void
mp_ipi_intrcnt(void *dummy)
{
char buf[64];
int i;
CPU_FOREACH(i) {
snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
intrcnt_add(buf, &ipi_invltlb_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
intrcnt_add(buf, &ipi_invlrng_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
intrcnt_add(buf, &ipi_invlpg_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
intrcnt_add(buf, &ipi_invlcache_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
intrcnt_add(buf, &ipi_preempt_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:ast", i);
intrcnt_add(buf, &ipi_ast_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
intrcnt_add(buf, &ipi_rendezvous_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
intrcnt_add(buf, &ipi_hardclock_counts[i]);
}
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
#endif