Index: head/sys/amd64/amd64/cpu_switch.S =================================================================== --- head/sys/amd64/amd64/cpu_switch.S +++ head/sys/amd64/amd64/cpu_switch.S @@ -188,8 +188,10 @@ /* Do we need to reload tss ? */ movq PCPU(TSSP),%rax movq PCB_TSSP(%r8),%rdx + movq PCPU(PRVSPACE),%r13 + addq $PC_COMMONTSS,%r13 testq %rdx,%rdx - cmovzq PCPU(COMMONTSSP),%rdx + cmovzq %r13,%rdx cmpq %rax,%rdx jne do_tss done_tss: Index: head/sys/amd64/amd64/db_interface.c =================================================================== --- head/sys/amd64/amd64/db_interface.c +++ head/sys/amd64/amd64/db_interface.c @@ -97,9 +97,9 @@ db_show_mdpcpu(struct pcpu *pc) { + db_printf("self = %p\n", pc->pc_prvspace); db_printf("curpmap = %p\n", pc->pc_curpmap); db_printf("tssp = %p\n", pc->pc_tssp); - db_printf("commontssp = %p\n", pc->pc_commontssp); db_printf("rsp0 = 0x%lx\n", pc->pc_rsp0); db_printf("kcr3 = 0x%lx\n", pc->pc_kcr3); db_printf("ucr3 = 0x%lx\n", pc->pc_ucr3); Index: head/sys/amd64/amd64/genassym.c =================================================================== --- head/sys/amd64/amd64/genassym.c +++ head/sys/amd64/amd64/genassym.c @@ -225,7 +225,7 @@ ASSYM(PC_FS32P, offsetof(struct pcpu, pc_fs32p)); ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p)); ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt)); -ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp)); +ASSYM(PC_COMMONTSS, offsetof(struct pcpu, pc_common_tss)); ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss)); ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt)); ASSYM(PC_KCR3, offsetof(struct pcpu, pc_kcr3)); Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c +++ head/sys/amd64/amd64/machdep.c @@ -669,8 +669,6 @@ static char dbg0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); -struct amd64tss common_tss[MAXCPU]; - /* * Software prototypes -- in more palatable form. * @@ -1550,8 +1548,7 @@ PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); - PCPU_SET(tssp, &common_tss[0]); - PCPU_SET(commontssp, &common_tss[0]); + PCPU_SET(tssp, PCPU_PTR(common_tss)); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); PCPU_SET(fs32p, &gdt[GUFS32_SEL]); @@ -1572,9 +1569,12 @@ amd64_bsp_ist_init(struct pcpu *pc) { struct nmi_pcpu *np; + struct amd64tss *tssp; + tssp = &pc->pc_common_tss; + /* doublefault stack space, runs on ist1 */ - common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; + tssp->tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* * NMI stack, runs on ist2. The pcpu pointer is stored just @@ -1582,7 +1582,7 @@ */ np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1; np->np_pcpu = (register_t)pc; - common_tss[0].tss_ist2 = (long)np; + tssp->tss_ist2 = (long)np; /* * MC# stack, runs on ist3. The pcpu pointer is stored just @@ -1590,14 +1590,14 @@ */ np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1; np->np_pcpu = (register_t)pc; - common_tss[0].tss_ist3 = (long)np; + tssp->tss_ist3 = (long)np; /* * DB# stack, runs on ist4. */ np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1; np->np_pcpu = (register_t)pc; - common_tss[0].tss_ist4 = (long)np; + tssp->tss_ist4 = (long)np; } u_int64_t @@ -1664,6 +1664,8 @@ */ pmap_thread_init_invl_gen(&thread0); + pc = &temp_bsp_pcpu; + /* * make gdt memory segments */ @@ -1672,14 +1674,13 @@ x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } - gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; + gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); - pc = &temp_bsp_pcpu; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); @@ -1781,7 +1782,8 @@ amd64_bsp_ist_init(pc); /* Set the IO permission bitmap (empty due to tss seg limit) */ - common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; + pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + + IOPERM_BITMAP_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); @@ -1865,7 +1867,7 @@ rsp0 = thread0.td_md.md_stack_base; /* Ensure the stack is aligned to 16 bytes */ rsp0 &= ~0xFul; - common_tss[0].tss_rsp0 = rsp0; + __pcpu[0].pc_common_tss.tss_rsp0 = rsp0; amd64_bsp_pcpu_init2(rsp0); /* transfer to user mode */ Index: head/sys/amd64/amd64/mp_machdep.c =================================================================== --- head/sys/amd64/amd64/mp_machdep.c +++ head/sys/amd64/amd64/mp_machdep.c @@ -285,26 +285,51 @@ /* Update microcode before doing anything else. */ ucode_load_ap(cpu); + /* Get per-cpu data and save */ + pc = &__pcpu[cpu]; + + /* prime data page for it to use */ + pcpu_init(pc, cpu, sizeof(struct pcpu)); + dpcpu_init(dpcpu, cpu); + pc->pc_apic_id = cpu_apic_ids[cpu]; + pc->pc_prvspace = pc; + pc->pc_curthread = 0; + pc->pc_tssp = &pc->pc_common_tss; + pc->pc_rsp0 = 0; + pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack + + PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); + pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + + GPROC0_SEL]; + pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; + pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; + pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + + GUSERLDT_SEL]; + /* See comment in pmap_bootstrap(). */ + pc->pc_pcid_next = PMAP_PCID_KERN + 2; + pc->pc_pcid_gen = 1; + /* Init tss */ - common_tss[cpu] = common_tss[0]; - common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + + pc->pc_common_tss = __pcpu[0].pc_common_tss; + pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; - common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; + pc->pc_common_tss.tss_rsp0 = 0; + pc->pc_common_tss.tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; + /* The NMI stack runs on IST2. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; - common_tss[cpu].tss_ist2 = (long) np; + pc->pc_common_tss.tss_ist2 = (long)np; /* The MC# stack runs on IST3. */ np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; - common_tss[cpu].tss_ist3 = (long) np; + pc->pc_common_tss.tss_ist3 = (long)np; /* The DB# stack runs on IST4. */ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; - common_tss[cpu].tss_ist4 = (long) np; + pc->pc_common_tss.tss_ist4 = (long)np; /* Prepare private GDT */ - gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; + gdt_segs[GPROC0_SEL].ssd_base = (long)&pc->pc_common_tss; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) @@ -313,45 +338,20 @@ ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; - ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; + ap_gdt.rd_base = (u_long)&gdt[NGDT * cpu]; lgdt(&ap_gdt); /* does magic intra-segment return */ - /* Get per-cpu data */ - pc = &__pcpu[cpu]; - - /* prime data page for it to use */ - pcpu_init(pc, cpu, sizeof(struct pcpu)); - dpcpu_init(dpcpu, cpu); - pc->pc_apic_id = cpu_apic_ids[cpu]; - pc->pc_prvspace = pc; - pc->pc_curthread = 0; - pc->pc_tssp = &common_tss[cpu]; - pc->pc_commontssp = &common_tss[cpu]; - pc->pc_rsp0 = 0; - pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack + - PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); - pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + - GPROC0_SEL]; - pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; - pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; - pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + - GUSERLDT_SEL]; - /* See comment in pmap_bootstrap(). */ - pc->pc_pcid_next = PMAP_PCID_KERN + 2; - pc->pc_pcid_gen = 1; - common_tss[cpu].tss_rsp0 = 0; - /* Save the per-cpu pointer for use by the NMI handler. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; - np->np_pcpu = (register_t) pc; + np->np_pcpu = (register_t)pc; /* Save the per-cpu pointer for use by the MC# handler. */ np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; - np->np_pcpu = (register_t) pc; + np->np_pcpu = (register_t)pc; /* Save the per-cpu pointer for use by the DB# handler. */ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; - np->np_pcpu = (register_t) pc; + np->np_pcpu = (register_t)pc; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); Index: head/sys/amd64/amd64/pmap.c =================================================================== --- head/sys/amd64/amd64/pmap.c +++ head/sys/amd64/amd64/pmap.c @@ -1765,6 +1765,10 @@ pcpu_init(&__pcpu[0], 0, sizeof(struct pcpu)); amd64_bsp_pcpu_init1(&__pcpu[0]); amd64_bsp_ist_init(&__pcpu[0]); + gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&__pcpu[0].pc_common_tss; + ssdtosyssd(&gdt_segs[GPROC0_SEL], + (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); + ltr(GSEL(GPROC0_SEL, SEL_KPL)); __pcpu[0].pc_dynamic = temp_bsp_pcpu.pc_dynamic; __pcpu[0].pc_acpi_id = temp_bsp_pcpu.pc_acpi_id; @@ -9719,20 +9723,19 @@ sizeof(struct user_segment_descriptor) * NGDT * MAXCPU, false); pmap_pti_add_kva_locked((vm_offset_t)idt, (vm_offset_t)idt + sizeof(struct gate_descriptor) * NIDT, false); - pmap_pti_add_kva_locked((vm_offset_t)common_tss, - (vm_offset_t)common_tss + sizeof(struct amd64tss) * MAXCPU, false); CPU_FOREACH(i) { /* Doublefault stack IST 1 */ - va = common_tss[i].tss_ist1; + va = __pcpu[i].pc_common_tss.tss_ist1; pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); /* NMI stack IST 2 */ - va = common_tss[i].tss_ist2 + sizeof(struct nmi_pcpu); + va = __pcpu[i].pc_common_tss.tss_ist2 + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); /* MC# stack IST 3 */ - va = common_tss[i].tss_ist3 + sizeof(struct nmi_pcpu); + va = __pcpu[i].pc_common_tss.tss_ist3 + + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); /* DB# stack IST 4 */ - va = common_tss[i].tss_ist4 + sizeof(struct nmi_pcpu); + va = __pcpu[i].pc_common_tss.tss_ist4 + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); } pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE, Index: head/sys/amd64/amd64/sys_machdep.c =================================================================== --- head/sys/amd64/amd64/sys_machdep.c +++ head/sys/amd64/amd64/sys_machdep.c @@ -426,8 +426,7 @@ memset(iomap, 0xff, IOPERM_BITMAP_SIZE); critical_enter(); /* Takes care of tss_rsp0. */ - memcpy(tssp, &common_tss[PCPU_GET(cpuid)], - sizeof(struct amd64tss)); + memcpy(tssp, PCPU_PTR(common_tss), sizeof(struct amd64tss)); tssp->tss_iobase = sizeof(*tssp); pcb->pcb_tssp = tssp; tss_sd = PCPU_GET(tss); Index: head/sys/amd64/include/pcpu.h =================================================================== --- head/sys/amd64/include/pcpu.h +++ head/sys/amd64/include/pcpu.h @@ -35,6 +35,8 @@ #error "sys/cdefs.h is a prerequisite for this file" #endif +#include + #define PC_PTI_STACK_SZ 16 struct monitorbuf { @@ -56,7 +58,7 @@ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \ - struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \ + void *pc_pad0; \ uint64_t pc_kcr3; \ uint64_t pc_ucr3; \ uint64_t pc_saved_ucr3; \ @@ -89,7 +91,8 @@ uint32_t pc_pad[2]; \ uint8_t pc_mds_tmp[64]; \ u_int pc_ipi_bitmap; \ - char __pad[3172] /* pad to UMA_PCPU_ALLOC_SIZE */ + struct amd64tss pc_common_tss; \ + char __pad[3068] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 Index: head/sys/amd64/include/tss.h =================================================================== --- head/sys/amd64/include/tss.h +++ head/sys/amd64/include/tss.h @@ -65,8 +65,4 @@ u_int16_t tss_iobase; /* io bitmap offset */ }; -#ifdef _KERNEL -extern struct amd64tss common_tss[]; -#endif - #endif /* _MACHINE_TSS_H_ */