Index: head/sys/amd64/amd64/genassym.c =================================================================== --- head/sys/amd64/amd64/genassym.c (revision 607) +++ head/sys/amd64/amd64/genassym.c (revision 608) @@ -1,189 +1,188 @@ /*- * Copyright (c) 1982, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 - * $Id: genassym.c,v 1.3 1993/10/10 02:09:44 rgrimes Exp $ + * $Id: genassym.c,v 1.4 1993/10/12 15:33:18 rgrimes Exp $ */ #include "sys/param.h" #include "sys/buf.h" #include "sys/vmmeter.h" #include "sys/proc.h" #include "sys/user.h" #include "sys/mbuf.h" #include "sys/msgbuf.h" #include "sys/resourcevar.h" #include "machine/cpu.h" #include "machine/trap.h" #include "machine/psl.h" #include "sys/syscall.h" #include "vm/vm_param.h" #include "vm/vm_map.h" #include "machine/pmap.h" main() { struct proc *p = (struct proc *)0; struct vmmeter *vm = (struct vmmeter *)0; struct user *up = (struct user *)0; struct rusage *rup = (struct rusage *)0; struct uprof *uprof = (struct uprof *)0; struct vmspace *vms = (struct vmspace *)0; vm_map_t map = (vm_map_t)0; pmap_t pmap = (pmap_t)0; struct pcb *pcb = (struct pcb *)0; struct trapframe *tf = (struct trapframe *)0; struct sigframe *sigf = (struct sigframe *)0; register unsigned i; printf("#define\tI386_CR3PAT %d\n", I386_CR3PAT); printf("#define\tUDOT_SZ %d\n", sizeof(struct user)); printf("#define\tP_LINK %d\n", &p->p_link); printf("#define\tP_RLINK %d\n", &p->p_rlink); printf("#define\tP_VMSPACE %d\n", &p->p_vmspace); printf("#define\tVM_PMAP %d\n", &vms->vm_pmap); printf("#define\tP_ADDR %d\n", &p->p_addr); printf("#define\tP_PRI %d\n", &p->p_pri); printf("#define\tP_STAT %d\n", &p->p_stat); printf("#define\tP_WCHAN %d\n", &p->p_wchan); printf("#define\tP_FLAG %d\n", &p->p_flag); printf("#define\tP_PID %d\n", &p->p_pid); printf("#define\tSSLEEP %d\n", SSLEEP); printf("#define\tSRUN %d\n", SRUN); printf("#define\tV_SWTCH %d\n", &vm->v_swtch); printf("#define\tV_TRAP %d\n", &vm->v_trap); printf("#define\tV_SYSCALL %d\n", &vm->v_syscall); printf("#define\tV_INTR %d\n", &vm->v_intr); printf("#define\tV_SOFT %d\n", &vm->v_soft); printf("#define\tV_PDMA %d\n", &vm->v_pdma); printf("#define\tV_FAULTS %d\n", &vm->v_faults); printf("#define\tV_PGREC %d\n", &vm->v_pgrec); printf("#define\tV_FASTPGREC %d\n", &vm->v_fastpgrec); printf("#define\tUPAGES %d\n", UPAGES); printf("#define\tHIGHPAGES %d\n", HIGHPAGES); printf("#define\tCLSIZE %d\n", CLSIZE); printf("#define\tNBPG %d\n", NBPG); printf("#define\tNPTEPG %d\n", NPTEPG); printf("#define\tNKPDE %d\n", NKPDE); printf("#define\tKPTDI %d\n", KPTDI); printf("#define\tPTDPTDI %d\n", PTDPTDI); printf("#define\tAPTDPTDI %d\n", APTDPTDI); printf("#define\tPGSHIFT %d\n", PGSHIFT); printf("#define\tPDRSHIFT %d\n", PDRSHIFT); printf("#define\tSYSPTSIZE %d\n", SYSPTSIZE); printf("#define\tUSRPTSIZE %d\n", USRPTSIZE); printf("#define\tUSRIOSIZE %d\n", USRIOSIZE); #ifdef SYSVSHM printf("#define\tSHMMAXPGS %d\n", SHMMAXPGS); #endif printf("#define\tUSRSTACK %d\n", USRSTACK); printf("#define\tKERNBASE %d\n", KERNBASE); printf("#define\tKERNSIZE %d\n", KERNSIZE); printf("#define\tMSGBUFPTECNT %d\n", btoc(sizeof (struct msgbuf))); printf("#define\tNMBCLUSTERS %d\n", NMBCLUSTERS); printf("#define\tMCLBYTES %d\n", MCLBYTES); printf("#define\tPCB_LINK %d\n", &pcb->pcb_tss.tss_link); printf("#define\tPCB_ESP0 %d\n", &pcb->pcb_tss.tss_esp0); printf("#define\tPCB_SS0 %d\n", &pcb->pcb_tss.tss_ss0); printf("#define\tPCB_ESP1 %d\n", &pcb->pcb_tss.tss_esp1); printf("#define\tPCB_SS1 %d\n", &pcb->pcb_tss.tss_ss1); printf("#define\tPCB_ESP2 %d\n", &pcb->pcb_tss.tss_esp2); printf("#define\tPCB_SS2 %d\n", &pcb->pcb_tss.tss_ss2); printf("#define\tPCB_CR3 %d\n", &pcb->pcb_tss.tss_cr3); printf("#define\tPCB_EIP %d\n", &pcb->pcb_tss.tss_eip); printf("#define\tPCB_EFLAGS %d\n", &pcb->pcb_tss.tss_eflags); printf("#define\tPCB_EAX %d\n", &pcb->pcb_tss.tss_eax); printf("#define\tPCB_ECX %d\n", &pcb->pcb_tss.tss_ecx); printf("#define\tPCB_EDX %d\n", &pcb->pcb_tss.tss_edx); printf("#define\tPCB_EBX %d\n", &pcb->pcb_tss.tss_ebx); printf("#define\tPCB_ESP %d\n", &pcb->pcb_tss.tss_esp); printf("#define\tPCB_EBP %d\n", &pcb->pcb_tss.tss_ebp); printf("#define\tPCB_ESI %d\n", &pcb->pcb_tss.tss_esi); printf("#define\tPCB_EDI %d\n", &pcb->pcb_tss.tss_edi); printf("#define\tPCB_ES %d\n", &pcb->pcb_tss.tss_es); printf("#define\tPCB_CS %d\n", &pcb->pcb_tss.tss_cs); printf("#define\tPCB_SS %d\n", &pcb->pcb_tss.tss_ss); printf("#define\tPCB_DS %d\n", &pcb->pcb_tss.tss_ds); printf("#define\tPCB_FS %d\n", &pcb->pcb_tss.tss_fs); printf("#define\tPCB_GS %d\n", &pcb->pcb_tss.tss_gs); printf("#define\tPCB_LDT %d\n", &pcb->pcb_tss.tss_ldt); printf("#define\tPCB_USERLDT %d\n", &pcb->pcb_ldt); printf("#define\tPCB_IOOPT %d\n", &pcb->pcb_tss.tss_ioopt); - printf("#define\tNKMEMCLUSTERS %d\n", NKMEMCLUSTERS); printf("#define\tU_PROF %d\n", &up->u_stats.p_prof); printf("#define\tU_PROFSCALE %d\n", &up->u_stats.p_prof.pr_scale); printf("#define\tPR_BASE %d\n", &uprof->pr_base); printf("#define\tPR_SIZE %d\n", &uprof->pr_size); printf("#define\tPR_OFF %d\n", &uprof->pr_off); printf("#define\tPR_SCALE %d\n", &uprof->pr_scale); printf("#define\tRU_MINFLT %d\n", &rup->ru_minflt); printf("#define\tPCB_FLAGS %d\n", &pcb->pcb_flags); printf("#define\tPCB_SAVEFPU %d\n", &pcb->pcb_savefpu); printf("#define\tFP_USESEMC %d\n", FP_USESEMC); printf("#define\tPCB_SAVEEMC %d\n", &pcb->pcb_saveemc); printf("#define\tPCB_CMAP2 %d\n", &pcb->pcb_cmap2); printf("#define\tPCB_IML %d\n", &pcb->pcb_iml); printf("#define\tPCB_ONFAULT %d\n", &pcb->pcb_onfault); printf("#define\tTF_ES %d\n", &tf->tf_es); printf("#define\tTF_DS %d\n", &tf->tf_ds); printf("#define\tTF_EDI %d\n", &tf->tf_edi); printf("#define\tTF_ESI %d\n", &tf->tf_esi); printf("#define\tTF_EBP %d\n", &tf->tf_ebp); printf("#define\tTF_ISP %d\n", &tf->tf_isp); printf("#define\tTF_EBX %d\n", &tf->tf_ebx); printf("#define\tTF_EDX %d\n", &tf->tf_edx); printf("#define\tTF_ECX %d\n", &tf->tf_ecx); printf("#define\tTF_EAX %d\n", &tf->tf_eax); printf("#define\tTF_TRAPNO %d\n", &tf->tf_trapno); printf("#define\tTF_ERR %d\n", &tf->tf_err); printf("#define\tTF_EIP %d\n", &tf->tf_eip); printf("#define\tTF_CS %d\n", &tf->tf_cs); printf("#define\tTF_EFLAGS %d\n", &tf->tf_eflags); printf("#define\tTF_ESP %d\n", &tf->tf_esp); printf("#define\tTF_SS %d\n", &tf->tf_ss); printf("#define\tSIGF_SIGNUM %d\n", &sigf->sf_signum); printf("#define\tSIGF_CODE %d\n", &sigf->sf_code); printf("#define\tSIGF_SCP %d\n", &sigf->sf_scp); printf("#define\tSIGF_HANDLER %d\n", &sigf->sf_handler); printf("#define\tSIGF_SC %d\n", &sigf->sf_sc); printf("#define\tB_READ %d\n", B_READ); printf("#define\tENOENT %d\n", ENOENT); printf("#define\tEFAULT %d\n", EFAULT); printf("#define\tENAMETOOLONG %d\n", ENAMETOOLONG); exit(0); } Index: head/sys/amd64/amd64/locore.S =================================================================== --- head/sys/amd64/amd64/locore.S (revision 607) +++ head/sys/amd64/amd64/locore.S (revision 608) @@ -1,2158 +1,2152 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $Id: locore.s,v 1.6 1993/10/10 06:07:57 rgrimes Exp $ + * $Id: locore.s,v 1.7 1993/10/13 07:11:11 rgrimes Exp $ */ /* * locore.s: 4BSD machine support for the Intel 386 * Preliminary version * Written by William F. Jolitz, 386BSD Project */ #include "npx.h" #include "assym.s" #include "machine/psl.h" #include "machine/pte.h" #include "errno.h" #include "machine/trap.h" #include "machine/specialreg.h" #include "i386/isa/debug.h" #include "machine/cputypes.h" #define KDSEL 0x10 #define SEL_RPL_MASK 0x0003 #define TRAPF_CS_OFF (13 * 4) /* * Note: This version greatly munged to avoid various assembler errors * that may be fixed in newer versions of gas. Perhaps newer versions * will have more pleasant appearance. */ .set IDXSHIFT,10 #define ALIGN_DATA .align 2 #define ALIGN_TEXT .align 2,0x90 /* 4-byte boundaries, NOP-filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte boundaries better for 486 */ #define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: #define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) #ifdef GPROF /* * ALTENTRY() must be before a corresponding ENTRY() so that it can jump * over the mcounting. */ #define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f #define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: /* * The call to mcount supports the usual (bad) conventions. We allocate * some data and pass a pointer to it although the 386BSD doesn't use * the data. We set up a frame before calling mcount because that is * the standard convention although it makes work for both mcount and * callers. */ #define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ pushl %ebp; movl %esp,%ebp; \ movl $1b,%eax; call mcount; popl %ebp #else /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. * If there is a previous ALTENTRY() then the alignment code is empty. */ #define ALTENTRY(name) GEN_ENTRY(_/**/name) #define ENTRY(name) GEN_ENTRY(_/**/name) #endif /* NB: NOP now preserves registers so NOPs can be inserted anywhere */ /* XXX: NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP #else #define FASTER_NOP pushl %eax ; inb $0x84,%al ; popl %eax #define NOP pushl %eax ; inb $0x84,%al ; inb $0x84,%al ; popl %eax #endif /* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). */ .globl _PTmap,_PTD,_PTDpde,_Sysmap .set _PTmap,PTDPTDI << PDRSHIFT .set _PTD,_PTmap + (PTDPTDI * NBPG) - .set _PTDpde,_PTD + (PTDPTDI * 4) /* XXX 4=sizeof pte */ + .set _PTDpde,_PTD + (PTDPTDI * 4) /* XXX 4=sizeof pde */ - .set _Sysmap,0xFDFF8000 + .set _Sysmap,_PTmap + (KPTDI * NBPG) /* * APTmap, APTD is the alternate recursive pagemap. * It's used when modifying another process's page tables. */ .globl _APTmap,_APTD,_APTDpde .set _APTmap,APTDPTDI << PDRSHIFT .set _APTD,_APTmap + (APTDPTDI * NBPG) - .set _APTDpde,_PTD + (APTDPTDI * 4) /* XXX 4=sizeof pte */ + .set _APTDpde,_PTD + (APTDPTDI * 4) /* XXX 4=sizeof pde */ /* * Access to each processes kernel stack is via a region of * per-process address space (at the beginning), immediatly above * the user process stack. */ .set _kstack,USRSTACK .globl _kstack .set PPDROFF,0x3F6 .set PPTEOFF,0x400-UPAGES /* 0x3FE */ /* * Globals */ .data .globl _esym _esym: .long 0 /* ptr to end of syms */ .globl _boothowto,_bootdev,_curpcb .globl _cpu,_cold,_atdevbase _cpu: .long 0 /* are we 386, 386sx, or 486 */ _cold: .long 1 /* cold till we are not */ _atdevbase: .long 0 /* location of start of iomem in virtual */ _atdevphys: .long 0 /* location of device mapping ptes (phys) */ .globl _IdlePTD,_KPTphys _IdlePTD: .long 0 _KPTphys: .long 0 .globl _cyloffset,_proc0paddr _cyloffset: .long 0 _proc0paddr: .long 0 .space 512 tmpstk: /* * System Initialization */ .text /* * btext: beginning of text section. * Also the entry point (jumped to directly from the boot blocks). */ ENTRY(btext) movw $0x1234,0x472 /* warm boot */ jmp 1f .space 0x500 /* skip over warm boot shit */ /* * pass parameters on stack (howto, bootdev, unit, cyloffset, esym) * note: (%esp) is return address of boot * ( if we want to hold onto /boot, it's physical %esp up to _end) */ 1: movl 4(%esp),%eax movl %eax,_boothowto-KERNBASE movl 8(%esp),%eax movl %eax,_bootdev-KERNBASE movl 12(%esp),%eax movl %eax,_cyloffset-KERNBASE movl 16(%esp),%eax addl $KERNBASE,%eax movl %eax,_esym-KERNBASE /* find out our CPU type. */ pushfl popl %eax movl %eax,%ecx xorl $0x40000,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax shrl $18,%eax andl $1,%eax push %ecx popfl cmpl $0,%eax jne 1f movl $CPU_386,_cpu-KERNBASE jmp 2f 1: movl $CPU_486,_cpu-KERNBASE 2: /* * Finished with old stack; load new %esp now instead of later so * we can trace this code without having to worry about the trace * trap clobbering the memory test or the zeroing of the bss+bootstrap * page tables. * * XXX - wdboot clears the bss after testing that this is safe. * This is too wasteful - memory below 640K is scarce. The boot * program should check: * text+data <= &stack_variable - more_space_for_stack * text+data+bss+pad+space_for_page_tables <= end_of_memory * Oops, the gdt is in the carcass of the boot program so clearing * the rest of memory is still not possible. */ movl $tmpstk-KERNBASE,%esp /* bootstrap stack end location */ -#ifdef garbage - /* count up memory */ - - xorl %eax,%eax /* start with base memory at 0x0 */ - #movl $0xA0000/NBPG,%ecx /* look every 4K up to 640K */ - movl $0xA0,%ecx /* look every 4K up to 640K */ -1: movl (%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,(%eax) /* write test pattern */ - /* flush stupid cache here! (with bcopy(0,0,512*1024) ) */ - cmpl $0xa55a5aa5,(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,(%eax) /* restore memory */ - addl $NBPG,%eax - loop 1b -2: shrl $12,%eax - movl %eax,_Maxmem-KERNBASE - - movl $0x100000,%eax /* next, talley remaining memory */ - #movl $((0xFFF000-0x100000)/NBPG),%ecx - movl $(0xFFF-0x100),%ecx -1: movl (%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,(%eax) /* write test pattern */ - cmpl $0xa55a5aa5,(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,(%eax) /* restore memory */ - addl $NBPG,%eax - loop 1b -2: shrl $12,%eax - movl %eax,_Maxmem-KERNBASE -#endif - /* * Virtual address space of kernel: * * text | data | bss | [syms] | page dir | proc0 kernel stack | usr stk map | Sysmap * 0 1 2 3 4 */ /* find end of kernel image */ movl $_end-KERNBASE,%ecx addl $NBPG-1,%ecx /* page align up */ andl $~(NBPG-1),%ecx movl %ecx,%esi /* esi=start of tables */ /* clear bss and memory for bootstrap pagetables. */ movl $_edata-KERNBASE,%edi subl %edi,%ecx addl $(UPAGES+5)*NBPG,%ecx /* size of tables */ xorl %eax,%eax /* pattern */ cld rep stosb +/* + * If we are loaded at 0x0 check to see if we have space for the + * page tables pages after the kernel and before the 640K ISA memory + * hole. If we do not have space relocate the page table pages and + * the kernel stack to start at 1MB. The value that ends up in esi + * is used by the rest of locore to build the tables. Locore adjusts + * esi each time it allocates a structure and then passes the final + * value to init386(first) as the value first. esi should ALWAYS + * be page aligned!! + */ + movl %esi,%ecx /* Get current first availiable address */ + cmpl $0x100000,%ecx /* Lets see if we are already above 1MB */ + jge 1f /* yep, don't need to check for room */ + addl $(NKPDE + 4) * NBPG,%ecx /* XXX the 4 is for kstack */ + /* space for kstack, PTD and PTE's */ + cmpl $(640*1024),%ecx + /* see if it fits in low memory */ + jle 1f /* yep, don't need to relocate it */ + movl $0x100000,%esi /* won't fit, so start it at 1MB */ +1: + /* physical address of Idle Address space */ movl %esi,_IdlePTD-KERNBASE /* * fillkpt * eax = (page frame address | control | status) == pte * ebx = address of page table * ecx = how many pages to map */ #define fillkpt \ 1: movl %eax,(%ebx) ; \ addl $NBPG,%eax ; /* increment physical address */ \ addl $4,%ebx ; /* next pte */ \ loop 1b ; /* * Map Kernel * N.B. don't bother with making kernel text RO, as 386 * ignores R/W AND U/S bits on kernel access (only v works) ! * * First step - build page tables */ movl %esi,%ecx /* this much memory, */ shrl $PGSHIFT,%ecx /* for this many pte s */ addl $UPAGES+4,%ecx /* including our early context */ cmpl $0xa0,%ecx /* XXX - cover debugger pages */ jae 1f movl $0xa0,%ecx 1: movl $PG_V|PG_KW,%eax /* having these bits set, */ lea (4*NBPG)(%esi),%ebx /* physical address of KPT in proc 0, */ movl %ebx,_KPTphys-KERNBASE /* in the kernel page table, */ fillkpt /* map I/O memory map */ movl $0x100-0xa0,%ecx /* for this many pte s, */ movl $(0xa0000|PG_V|PG_UW),%eax /* having these bits set,(perhaps URW?) XXX 06 Aug 92 */ movl %ebx,_atdevphys-KERNBASE /* remember phys addr of ptes */ fillkpt /* map proc 0's kernel stack into user page table page */ movl $UPAGES,%ecx /* for this many pte s, */ lea (1*NBPG)(%esi),%eax /* physical address in proc 0 */ lea (KERNBASE)(%eax),%edx movl %edx,_proc0paddr-KERNBASE /* remember VA for 0th process init */ orl $PG_V|PG_KW,%eax /* having these bits set, */ lea (3*NBPG)(%esi),%ebx /* physical address of stack pt in proc 0 */ addl $(PPTEOFF*4),%ebx fillkpt /* * Construct a page table directory * (of page directory elements - pde's) */ /* install a pde for temporary double map of bottom of VA */ lea (4*NBPG)(%esi),%eax /* physical address of kernel page table */ orl $PG_V|PG_UW,%eax /* pde entry is valid XXX 06 Aug 92 */ movl %eax,(%esi) /* which is where temp maps! */ /* kernel pde's */ movl $(NKPDE),%ecx /* for this many pde s, */ lea (KPTDI*4)(%esi),%ebx /* offset of pde for kernel */ fillkpt /* install a pde recursively mapping page directory as a page table! */ movl %esi,%eax /* phys address of ptd in proc 0 */ orl $PG_V|PG_UW,%eax /* pde entry is valid XXX 06 Aug 92 */ movl %eax,PTDPTDI*4(%esi) /* which is where PTmap maps! */ /* install a pde to map kernel stack for proc 0 */ lea (3*NBPG)(%esi),%eax /* physical address of pt in proc 0 */ orl $PG_V|PG_KW,%eax /* pde entry is valid */ movl %eax,PPDROFF*4(%esi) /* which is where kernel stack maps! */ /* copy and convert stuff from old gdt and idt for debugger */ cmpl $0x0375c339,0x96104 /* XXX - debugger signature */ jne 1f movb $1,_bdb_exists-KERNBASE 1: pushal subl $2*6,%esp sgdt (%esp) movl 2(%esp),%esi /* base address of current gdt */ movl $_gdt-KERNBASE,%edi movl %edi,2(%esp) movl $8*18/4,%ecx rep /* copy gdt */ movsl movl $_gdt-KERNBASE,-8+2(%edi) /* adjust gdt self-ptr */ movb $0x92,-8+5(%edi) sidt 6(%esp) movl 6+2(%esp),%esi /* base address of current idt */ movl 8+4(%esi),%eax /* convert dbg descriptor to ... */ movw 8(%esi),%ax movl %eax,bdb_dbg_ljmp+1-KERNBASE /* ... immediate offset ... */ movl 8+2(%esi),%eax movw %ax,bdb_dbg_ljmp+5-KERNBASE /* ... and selector for ljmp */ movl 24+4(%esi),%eax /* same for bpt descriptor */ movw 24(%esi),%ax movl %eax,bdb_bpt_ljmp+1-KERNBASE movl 24+2(%esi),%eax movw %ax,bdb_bpt_ljmp+5-KERNBASE movl $_idt-KERNBASE,%edi movl %edi,6+2(%esp) movl $8*4/4,%ecx rep /* copy idt */ movsl lgdt (%esp) lidt 6(%esp) addl $2*6,%esp popal /* load base of page directory and enable mapping */ movl %esi,%eax /* phys address of ptd in proc 0 */ orl $I386_CR3PAT,%eax movl %eax,%cr3 /* load ptd addr into mmu */ movl %cr0,%eax /* get control word */ /* * XXX it is now safe to always (attempt to) set CR0_WP and to set up * the page tables assuming it works, so USE_486_WRITE_PROTECT will go * away. The special 386 PTE checking needs to be conditional on * whatever distingiushes 486-only kernels from 386-486 kernels. */ #ifdef USE_486_WRITE_PROTECT orl $CR0_PE|CR0_PG|CR0_WP,%eax /* enable paging */ #else orl $CR0_PE|CR0_PG,%eax /* enable paging */ #endif movl %eax,%cr0 /* and let's page NOW! */ pushl $begin /* jump to high mem */ ret begin: /* now running relocated at KERNBASE where the system is linked to run */ .globl _Crtat /* XXX - locore should not know about */ movl _Crtat,%eax /* variables of device drivers (pccons)! */ subl $(KERNBASE+0xA0000),%eax movl _atdevphys,%edx /* get pte PA */ subl _KPTphys,%edx /* remove base of ptes, now have phys offset */ shll $PGSHIFT-2,%edx /* corresponding to virt offset */ addl $KERNBASE,%edx /* add virtual base */ movl %edx,_atdevbase addl %eax,%edx movl %edx,_Crtat /* set up bootstrap stack */ movl $_kstack+UPAGES*NBPG-4*12,%esp /* bootstrap stack end location */ xorl %eax,%eax /* mark end of frames */ movl %eax,%ebp movl _proc0paddr,%eax movl %esi,PCB_CR3(%eax) - lea 7*NBPG(%esi),%esi /* skip past stack. */ - pushl %esi - /* relocate debugger gdt entries */ movl $_gdt+8*9,%eax /* adjust slots 9-17 */ movl $9,%ecx reloc_gdt: movb $0xfe,7(%eax) /* top byte of base addresses, was 0, */ addl $8,%eax /* now KERNBASE>>24 */ loop reloc_gdt cmpl $0,_bdb_exists je 1f int $3 1: + /* + * Skip over the page tables and the kernel stack + * XXX 4 is kstack size + */ + lea (NKPDE + 4) * NBPG(%esi),%esi + + pushl %esi /* value of first for init386(first) */ call _init386 /* wire 386 chip for unix operation */ movl $0,_PTD call _main /* autoconfiguration, mountroot etc */ popl %esi /* * now we've run main() and determined what cpu-type we are, we can * enable WP mode on i486 cpus and above. * on return from main(), we are process 1 * set up address space and stack so that we can 'return' to user mode */ .globl __ucodesel,__udatasel movl __ucodesel,%eax movl __udatasel,%ecx /* build outer stack frame */ pushl %ecx /* user ss */ pushl $USRSTACK /* user esp */ pushl %eax /* user cs */ pushl $0 /* user ip */ movl %cx,%ds movl %cx,%es movl %ax,%fs /* double map cs to fs */ movl %cx,%gs /* and ds to gs */ lret /* goto user! */ pushl $lretmsg1 /* "should never get here!" */ call _panic lretmsg1: .asciz "lret: toinit\n" .set exec,59 .set exit,1 #define LCALL(x,y) .byte 0x9a ; .long y; .word x /* * Icode is copied out to process 1 and executed in user mode: * execve("/sbin/init", argv, envp); exit(0); * If the execve fails, process 1 exits and the system panics. */ NON_GPROF_ENTRY(icode) pushl $0 /* envp for execve() */ # pushl $argv-_icode /* can't do this 'cos gas 1.38 is broken */ movl $argv,%eax subl $_icode,%eax pushl %eax /* argp for execve() */ # pushl $init-_icode movl $init,%eax subl $_icode,%eax pushl %eax /* fname for execve() */ pushl %eax /* dummy return address */ movl $exec,%eax LCALL(0x7,0x0) /* exit if something botches up in the above execve() */ pushl %eax /* execve failed, the errno will do for an */ /* exit code because errnos are < 128 */ pushl %eax /* dummy return address */ movl $exit,%eax LCALL(0x7,0x0) init: .asciz "/sbin/init" ALIGN_DATA argv: .long init+6-_icode /* argv[0] = "init" ("/sbin/init" + 6) */ .long eicode-_icode /* argv[1] follows icode after copyout */ .long 0 eicode: .globl _szicode _szicode: .long _szicode-_icode NON_GPROF_ENTRY(sigcode) call SIGF_HANDLER(%esp) lea SIGF_SC(%esp),%eax /* scp (the call may have clobbered the */ /* copy at 8(%esp)) */ pushl %eax pushl %eax /* junk to fake return address */ movl $103,%eax /* XXX sigreturn() */ LCALL(0x7,0) /* enter kernel with args on stack */ hlt /* never gets here */ .globl _szsigcode _szsigcode: .long _szsigcode-_sigcode /* * Support routines for GCC, general C-callable functions */ ENTRY(__udivsi3) movl 4(%esp),%eax xorl %edx,%edx divl 8(%esp) ret ENTRY(__divsi3) movl 4(%esp),%eax cltd idivl 8(%esp) ret /* * I/O bus instructions via C */ ENTRY(inb) /* val = inb(port) */ movl 4(%esp),%edx subl %eax,%eax NOP inb %dx,%al ret ENTRY(inw) /* val = inw(port) */ movl 4(%esp),%edx subl %eax,%eax NOP inw %dx,%ax ret ENTRY(insb) /* insb(port, addr, cnt) */ pushl %edi movw 8(%esp),%dx movl 12(%esp),%edi movl 16(%esp),%ecx cld NOP rep insb NOP movl %edi,%eax popl %edi ret ENTRY(insw) /* insw(port, addr, cnt) */ pushl %edi movw 8(%esp),%dx movl 12(%esp),%edi movl 16(%esp),%ecx cld NOP rep insw NOP movl %edi,%eax popl %edi ret ENTRY(rtcin) /* rtcin(val) */ movl 4(%esp),%eax outb %al,$0x70 subl %eax,%eax inb $0x71,%al ret ENTRY(outb) /* outb(port, val) */ movl 4(%esp),%edx NOP movl 8(%esp),%eax outb %al,%dx NOP ret ENTRY(outw) /* outw(port, val) */ movl 4(%esp),%edx NOP movl 8(%esp),%eax outw %ax,%dx NOP ret ENTRY(outsb) /* outsb(port, addr, cnt) */ pushl %esi movw 8(%esp),%dx movl 12(%esp),%esi movl 16(%esp),%ecx cld NOP rep outsb NOP movl %esi,%eax popl %esi ret ENTRY(outsw) /* outsw(port, addr, cnt) */ pushl %esi movw 8(%esp),%dx movl 12(%esp),%esi movl 16(%esp),%ecx cld NOP rep outsw NOP movl %esi,%eax popl %esi ret /* * bcopy family */ ENTRY(bzero) /* void bzero(void *base, u_int cnt) */ pushl %edi movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx cld rep stosl movl 12(%esp),%ecx andl $3,%ecx rep stosb popl %edi ret ENTRY(fillw) /* fillw(pat, base, cnt) */ pushl %edi movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx cld rep stosw popl %edi ret ENTRY(bcopyb) bcopyb: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards. */ addl %ecx,%esi std decl %edi decl %esi rep movsb popl %edi popl %esi cld ret ENTRY(bcopyw) bcopyw: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ shrl $1,%ecx /* copy by 16-bit words */ rep movsw adc %ecx,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi std andl $1,%ecx /* any fractional bytes? */ decl %edi decl %esi rep movsb movl 20(%esp),%ecx /* copy remainder by 16-bit words */ shrl $1,%ecx decl %esi decl %edi rep movsw popl %edi popl %esi cld ret ENTRY(bcopyx) movl 16(%esp),%eax cmpl $2,%eax je bcopyw /* not _bcopyw, to avoid multiple mcounts */ cmpl $4,%eax je bcopy jmp bcopyb /* * (ov)bcopy(src, dst, cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ALTENTRY(ovbcopy) ENTRY(bcopy) bcopy: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ shrl $2,%ecx /* copy by 32-bit words */ rep movsl movl 20(%esp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi std andl $3,%ecx /* any fractional bytes? */ decl %edi decl %esi rep movsb movl 20(%esp),%ecx /* copy remainder by 32-bit words */ shrl $2,%ecx subl $3,%esi subl $3,%edi rep movsl popl %edi popl %esi cld ret ALTENTRY(ntohl) ENTRY(htonl) movl 4(%esp),%eax #ifdef i486 /* XXX */ /* Since Gas 1.38 does not grok bswap this has been coded as the * equivalent bytes. This can be changed back to bswap when we * upgrade to a newer version of Gas */ /* bswap %eax */ .byte 0x0f .byte 0xc8 #else xchgb %al,%ah roll $16,%eax xchgb %al,%ah #endif ret ALTENTRY(ntohs) ENTRY(htons) movzwl 4(%esp),%eax xchgb %al,%ah ret /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines and possibly * the math- and DOS emulators should be the only places that do this. * * We have to access the memory with user's permissions, so use a segment * selector with RPL 3. For writes to user space we have to additionally * check the PTE for write permission, because the 386 does not check * write permissions when we are executing with EPL 0. The 486 does check * this if the WP bit is set in CR0, so we can use a simpler version here. * * These routines set curpcb->onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->onfault instead of the function. */ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ movl _curpcb,%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi pushl %ebx movl 16(%esp),%esi movl 20(%esp),%edi movl 24(%esp),%ebx orl %ebx,%ebx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. * * Otherwise, it saves having to load and restore %es to get the * usual segment-based protection (the destination segment for movs * is always %es). The other explicit checks for user-writablility * are not quite sufficient. They fail for the user area because * we mapped the user area read/write to avoid having an #ifdef in * vm_machdep.c. They fail for user PTEs and/or PTDs! (107 * addresses including 0xff800000 and 0xfc000000). I'm not sure if * this can be fixed. Marking the PTEs supervisor mode and the * PDE's user mode would almost work, but there may be a problem * with the self-referential PDE. */ movl %edi,%eax addl %ebx,%eax jc copyout_fault #define VM_END_USER_ADDRESS 0xFDBFE000 /* XXX */ cmpl $VM_END_USER_ADDRESS,%eax ja copyout_fault #ifndef USE_486_WRITE_PROTECT /* * We have to check each PTE for user write permission. * The checking may cause a page fault, so it is important to set * up everything for return via copyout_fault before here. */ /* compute number of pages */ movl %edi,%ecx andl $NBPG-1,%ecx addl %ebx,%ecx decl %ecx shrl $IDXSHIFT+2,%ecx incl %ecx /* compute PTE offset for start address */ movl %edi,%edx shrl $IDXSHIFT,%edx andb $0xfc,%dl 1: /* check PTE for each page */ movb _PTmap(%edx),%al andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ cmpb $0x07,%al je 2f /* simulate a trap */ pushl %edx pushl %ecx shll $IDXSHIFT,%edx pushl %edx call _trapwrite /* trapwrite(addr) */ popl %edx popl %ecx popl %edx orl %eax,%eax /* if not ok, return EFAULT */ jnz copyout_fault 2: addl $4,%edx decl %ecx jnz 1b /* check next page */ #endif /* ndef USE_486_WRITE_PROTECT */ /* bcopy(%esi, %edi, %ebx) */ cld movl %ebx,%ecx shrl $2,%ecx rep movsl movb %bl,%cl andb $3,%cl /* XXX can we trust the rest of %ecx on clones? */ rep movsb done_copyout: popl %ebx popl %edi popl %esi xorl %eax,%eax movl _curpcb,%edx movl %eax,PCB_ONFAULT(%edx) ret ALIGN_TEXT copyout_fault: popl %ebx popl %edi popl %esi movl _curpcb,%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret ENTRY(copyin) /* copyin(from_user, to_kernel, len) */ movl _curpcb,%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi movl 12(%esp),%esi /* caddr_t from */ movl 16(%esp),%edi /* caddr_t to */ movl 20(%esp),%ecx /* size_t len */ movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld gs rep movsl movb %al,%cl andb $3,%cl /* copy remaining bytes */ gs rep movsb popl %edi popl %esi xorl %eax,%eax movl _curpcb,%edx movl %eax,PCB_ONFAULT(%edx) ret ALIGN_TEXT copyin_fault: popl %edi popl %esi movl _curpcb,%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * fu{byte,sword,word} : fetch a byte(sword, word) from user memory */ ALTENTRY(fuiword) ENTRY(fuword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ENTRY(fusword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ALTENTRY(fuibyte) ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ALIGN_TEXT fusufault: movl _curpcb,%ecx xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) decl %eax ret /* * su{byte,sword,word}: write a byte(word, longword) to user memory */ #ifdef USE_486_WRITE_PROTECT /* * we only have to set the right segment selector. */ ALTENTRY(suiword) ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret ENTRY(susword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movw 8(%esp),%ax gs movw %ax,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret ALTENTRY(suibyte) ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movb 8(%esp),%al gs movb %al,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret #else /* USE_486_WRITE_PROTECT */ /* * here starts the trouble again: check PTE, twice if word crosses * a page boundary. */ /* XXX - page boundary crossing is not handled yet */ ALTENTRY(suibyte) ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movb %al,(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret ENTRY(susword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movw %ax,(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret ALTENTRY(suiword) ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,0(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret #endif /* USE_486_WRITE_PROTECT */ /* * copyoutstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ #ifdef USE_486_WRITE_PROTECT ENTRY(copyoutstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f /* * gs override doesn't work for stosb. Use the same explicit check * as in copyout(). It's much slower now because it is per-char. * XXX - however, it would be faster to rewrite this function to use * strlen() and copyout(). */ cmpl $VM_END_USER_ADDRESS,%edi jae cpystrflt lodsb gs stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f #else /* ndef USE_486_WRITE_PROTECT */ ENTRY(copyoutstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ 1: /* * It suffices to check that the first byte is in user space, because * we look at a page at a time and the end address is on a page * boundary. */ cmpl $VM_END_USER_ADDRESS,%edi jae cpystrflt movl %edi,%eax shrl $IDXSHIFT,%eax andb $0xfc,%al movb _PTmap(%eax),%al andb $7,%al cmpb $7,%al je 2f /* simulate trap */ pushl %edx pushl %edi call _trapwrite popl %edi popl %edx orl %eax,%eax jnz cpystrflt 2: /* copy up to end of this page */ movl %edi,%eax andl $NBPG-1,%eax movl $NBPG,%ecx subl %eax,%ecx /* ecx = NBPG - (src % NBPG) */ cmpl %ecx,%edx jge 3f movl %edx,%ecx /* ecx = min(ecx, edx) */ 3: orl %ecx,%ecx jz 4f decl %ecx decl %edx lodsb stosb orb %al,%al jnz 3b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* next page */ orl %edx,%edx jnz 1b /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f #endif /* USE_486_WRITE_PROTECT */ /* * copyinstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f gs lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f cpystrflt: movl $EFAULT,%eax 6: /* set *lencopied and return %eax */ movl _curpcb,%ecx movl $0,PCB_ONFAULT(%ecx) movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx orl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret /* * copystr(from, to, maxlen, int *lencopied) */ ENTRY(copystr) pushl %esi pushl %edi movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax 6: /* set *lencopied and return %eax */ movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx orl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret /* * Handling of special 386 registers and descriptor tables etc */ ENTRY(lgdt) /* void lgdt(struct region_descriptor *rdp); */ /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) /* flush the prefetch q */ jmp 1f nop 1: /* reload "stale" selectors */ movl $KDSEL,%eax movl %ax,%ds movl %ax,%es movl %ax,%ss /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax pushl %eax # movl $KCSEL,4(%esp) movl $8,4(%esp) lret /* * void lidt(struct region_descriptor *rdp); */ ENTRY(lidt) movl 4(%esp),%eax lidt (%eax) ret /* * void lldt(u_short sel) */ ENTRY(lldt) lldt 4(%esp) ret /* * void ltr(u_short sel) */ ENTRY(ltr) ltr 4(%esp) ret ENTRY(ssdtosd) /* ssdtosd(*ssdp,*sdp) */ pushl %ebx movl 8(%esp),%ecx movl 8(%ecx),%ebx shll $16,%ebx movl (%ecx),%edx roll $16,%edx movb %dh,%bl movb %dl,%bh rorl $8,%ebx movl 4(%ecx),%eax movw %ax,%dx andl $0xf0000,%eax orl %eax,%ebx movl 12(%esp),%ecx movl %edx,(%ecx) movl %ebx,4(%ecx) popl %ebx ret ENTRY(tlbflush) /* tlbflush() */ movl %cr3,%eax orl $I386_CR3PAT,%eax movl %eax,%cr3 ret ENTRY(load_cr0) /* load_cr0(cr0) */ movl 4(%esp),%eax movl %eax,%cr0 ret ENTRY(rcr0) /* rcr0() */ movl %cr0,%eax ret ENTRY(rcr2) /* rcr2() */ movl %cr2,%eax ret ENTRY(rcr3) /* rcr3() */ movl %cr3,%eax ret ENTRY(load_cr3) /* void load_cr3(caddr_t cr3) */ movl 4(%esp),%eax orl $I386_CR3PAT,%eax movl %eax,%cr3 ret /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) /* save ebx */ movl %esp,4(%eax) /* save esp */ movl %ebp,8(%eax) /* save ebp */ movl %esi,12(%eax) /* save esi */ movl %edi,16(%eax) /* save edi */ movl (%esp),%edx /* get rta */ movl %edx,20(%eax) /* save eip */ xorl %eax,%eax /* return(0); */ ret ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx /* restore ebx */ movl 4(%eax),%esp /* restore esp */ movl 8(%eax),%ebp /* restore ebp */ movl 12(%eax),%esi /* restore esi */ movl 16(%eax),%edi /* restore edi */ movl 20(%eax),%edx /* get rta */ movl %edx,(%esp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret /*****************************************************************************/ /* Scheduling */ /*****************************************************************************/ /* * The following primitives manipulate the run queues. * _whichqs tells which of the 32 queues _qs * have processes in them. Setrq puts processes into queues, Remrq * removes them from queues. The running process is on no queue, * other processes are on a queue related to p->p_pri, divided by 4 * actually to shrink the 0-127 range of priorities into the 32 available * queues. */ .globl _whichqs,_qs,_cnt,_panic .comm _noproc,4 .comm _runrun,4 /* * Setrq(p) * * Call should be made at spl6(), and p->p_stat should be SRUN */ ENTRY(setrq) movl 4(%esp),%eax cmpl $0,P_RLINK(%eax) /* should not be on q already */ je set1 pushl $set2 call _panic set1: movzbl P_PRI(%eax),%edx shrl $2,%edx btsl %edx,_whichqs /* set q full bit */ shll $3,%edx addl $_qs,%edx /* locate q hdr */ movl %edx,P_LINK(%eax) /* link process on tail of q */ movl P_RLINK(%edx),%ecx movl %ecx,P_RLINK(%eax) movl %eax,P_RLINK(%edx) movl %eax,P_LINK(%ecx) ret set2: .asciz "setrq" /* * Remrq(p) * * Call should be made at spl6(). */ ENTRY(remrq) movl 4(%esp),%eax movzbl P_PRI(%eax),%edx shrl $2,%edx btrl %edx,_whichqs /* clear full bit, panic if clear already */ jb rem1 pushl $rem3 call _panic rem1: pushl %edx movl P_LINK(%eax),%ecx /* unlink process */ movl P_RLINK(%eax),%edx movl %edx,P_RLINK(%ecx) movl P_RLINK(%eax),%ecx movl P_LINK(%eax),%edx movl %edx,P_LINK(%ecx) popl %edx movl $_qs,%ecx shll $3,%edx addl %edx,%ecx cmpl P_LINK(%ecx),%ecx /* q still has something? */ je rem2 shrl $3,%edx /* yes, set bit as still full */ btsl %edx,_whichqs rem2: movl $0,P_RLINK(%eax) /* zap reverse link to indicate off list */ ret rem3: .asciz "remrq" sw0: .asciz "swtch" /* * When no processes are on the runq, Swtch branches to idle * to wait for something to come ready. */ ALIGN_TEXT Idle: sti SHOW_STI ALIGN_TEXT idle_loop: call _spl0 cmpl $0,_whichqs jne sw1 hlt /* wait for interrupt */ jmp idle_loop badsw: pushl $sw0 call _panic /*NOTREACHED*/ /* * Swtch() */ SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ ENTRY(swtch) incl _cnt+V_SWTCH /* switch to new process. first, save context as needed */ movl _curproc,%ecx /* if no process to save, don't bother */ testl %ecx,%ecx je sw1 movl P_ADDR(%ecx),%ecx movl (%esp),%eax /* Hardware registers */ movl %eax,PCB_EIP(%ecx) movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) -#ifdef NPX +#if NNPX > 0 /* have we used fp, and need a save? */ mov _curproc,%eax cmp %eax,_npxproc jne 1f pushl %ecx /* h/w bugs make saving complicated */ leal PCB_SAVEFPU(%ecx),%eax pushl %eax call _npxsave /* do it in a big C function */ popl %eax popl %ecx 1: -#endif +#endif /* NNPX > 0 */ movl _CMAP2,%eax /* save temporary map PTE */ movl %eax,PCB_CMAP2(%ecx) /* in our context */ movl $0,_curproc /* out of process */ # movw _cpl,%ax # movw %ax,PCB_IML(%ecx) /* save ipl */ /* save is done, now choose a new process or idle */ sw1: cli SHOW_CLI movl _whichqs,%edi 2: /* XXX - bsf is sloow */ bsfl %edi,%eax /* find a full q */ je Idle /* if none, idle */ /* XX update whichqs? */ swfnd: btrl %eax,%edi /* clear q full status */ jnb 2b /* if it was clear, look for another */ movl %eax,%ebx /* save which one we are using */ shll $3,%eax addl $_qs,%eax /* select q */ movl %eax,%esi #ifdef DIAGNOSTIC cmpl P_LINK(%eax),%eax /* linked to self? (e.g. not on list) */ je badsw /* not possible */ #endif movl P_LINK(%eax),%ecx /* unlink from front of process q */ movl P_LINK(%ecx),%edx movl %edx,P_LINK(%eax) movl P_RLINK(%ecx),%eax movl %eax,P_RLINK(%edx) cmpl P_LINK(%ecx),%esi /* q empty */ je 3f btsl %ebx,%edi /* nope, set to indicate full */ 3: movl %edi,_whichqs /* update q status */ movl $0,%eax movl %eax,_want_resched #ifdef DIAGNOSTIC cmpl %eax,P_WCHAN(%ecx) jne badsw cmpb $SRUN,P_STAT(%ecx) jne badsw #endif movl %eax,P_RLINK(%ecx) /* isolate process to run */ movl P_ADDR(%ecx),%edx movl PCB_CR3(%edx),%ebx /* switch address space */ movl %ebx,%cr3 /* restore context */ movl PCB_EBX(%edx),%ebx movl PCB_ESP(%edx),%esp movl PCB_EBP(%edx),%ebp movl PCB_ESI(%edx),%esi movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) movl PCB_CMAP2(%edx),%eax /* get temporary map */ movl %eax,_CMAP2 /* reload temporary map PTE */ movl %ecx,_curproc /* into next process */ movl %edx,_curpcb pushl %edx /* save p to return */ /* * XXX - 0.0 forgot to save it - is that why this was commented out in 0.1? * I think restoring the cpl is unnecessary, but we must turn off the cli * now that spl*() don't do it as a side affect. */ pushl PCB_IML(%edx) sti SHOW_STI #if 0 call _splx #endif addl $4,%esp /* * XXX - 0.0 gets here via swtch_to_inactive(). I think 0.1 gets here in the * same way. Better return a value. */ popl %eax /* return(p); */ ret ENTRY(mvesp) movl %esp,%eax ret /* * struct proc *swtch_to_inactive(p) ; struct proc *p; * * At exit of a process, move off the address space of the * process and onto a "safe" one. Then, on a temporary stack * return and run code that disposes of the old state. * Since this code requires a parameter from the "old" stack, * pass it back as a return value. */ ENTRY(swtch_to_inactive) popl %edx /* old pc */ popl %eax /* arg, our return value */ movl _IdlePTD,%ecx movl %ecx,%cr3 /* good bye address space */ #write buffer? movl $tmpstk-4,%esp /* temporary stack, compensated for call */ jmp %edx /* return, execute remainder of cleanup */ /* * savectx(pcb, altreturn) * Update pcb, saving current processor state and arranging * for alternate return ala longjmp in swtch if altreturn is true. */ ENTRY(savectx) movl 4(%esp),%ecx movw _cpl,%ax movw %ax,PCB_IML(%ecx) movl (%esp),%eax movl %eax,PCB_EIP(%ecx) movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) -#ifdef NPX +#if NNPX > 0 /* * If npxproc == NULL, then the npx h/w state is irrelevant and the * state had better already be in the pcb. This is true for forks * but not for dumps (the old book-keeping with FP flags in the pcb * always lost for dumps because the dump pcb has 0 flags). * * If npxproc != NULL, then we have to save the npx h/w state to * npxproc's pcb and copy it to the requested pcb, or save to the * requested pcb and reload. Copying is easier because we would * have to handle h/w bugs for reloading. We used to lose the * parent's npx state for forks by forgetting to reload. */ mov _npxproc,%eax testl %eax,%eax je 1f pushl %ecx movl P_ADDR(%eax),%eax leal PCB_SAVEFPU(%eax),%eax pushl %eax pushl %eax call _npxsave popl %eax popl %eax popl %ecx pushl %ecx pushl $108+8*2 /* XXX h/w state size + padding */ leal PCB_SAVEFPU(%ecx),%ecx pushl %ecx pushl %eax call _bcopy addl $12,%esp popl %ecx 1: -#endif +#endif /* NNPX > 0 */ movl _CMAP2,%edx /* save temporary map PTE */ movl %edx,PCB_CMAP2(%ecx) /* in our context */ cmpl $0,8(%esp) je 1f movl %esp,%edx /* relocate current sp relative to pcb */ subl $_kstack,%edx /* (sp is relative to kstack): */ addl %edx,%ecx /* pcb += sp - kstack; */ movl %eax,(%ecx) /* write return pc at (relocated) sp@ */ /* this mess deals with replicating register state gcc hides */ movl 12(%esp),%eax movl %eax,12(%ecx) movl 16(%esp),%eax movl %eax,16(%ecx) movl 20(%esp),%eax movl %eax,20(%ecx) movl 24(%esp),%eax movl %eax,24(%ecx) 1: xorl %eax,%eax /* return 0 */ ret /* * addupc(int pc, struct uprof *up, int ticks): * update profiling information for the user process. */ ENTRY(addupc) pushl %ebp movl %esp,%ebp movl 12(%ebp),%edx /* up */ movl 8(%ebp),%eax /* pc */ subl PR_OFF(%edx),%eax /* pc -= up->pr_off */ jl L1 /* if (pc < 0) return */ shrl $1,%eax /* praddr = pc >> 1 */ imull PR_SCALE(%edx),%eax /* praddr *= up->pr_scale */ shrl $15,%eax /* praddr = praddr << 15 */ andl $-2,%eax /* praddr &= ~1 */ cmpl PR_SIZE(%edx),%eax /* if (praddr > up->pr_size) return */ ja L1 /* addl %eax,%eax /* praddr -> word offset */ addl PR_BASE(%edx),%eax /* praddr += up-> pr_base */ movl 16(%ebp),%ecx /* ticks */ movl _curpcb,%edx movl $proffault,PCB_ONFAULT(%edx) addl %ecx,(%eax) /* storage location += ticks */ movl $0,PCB_ONFAULT(%edx) L1: leave ret ALIGN_TEXT proffault: /* if we get a fault, then kill profiling all together */ movl $0,PCB_ONFAULT(%edx) /* squish the fault handler */ movl 12(%ebp),%ecx movl $0,PR_SCALE(%ecx) /* up->pr_scale = 0 */ leave ret /* To be done: */ ENTRY(astoff) ret /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines * * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose * control. The sti's give the standard losing behaviour for ddb and kgdb. */ #define IDTVEC(name) ALIGN_TEXT; .globl _X/**/name; _X/**/name: #define TRAP(a) pushl $(a) ; jmp alltraps #ifdef KGDB # define BPTTRAP(a) sti; pushl $(a) ; jmp bpttraps #else # define BPTTRAP(a) sti; TRAP(a) #endif IDTVEC(div) pushl $0; TRAP(T_DIVIDE) IDTVEC(dbg) #ifdef BDBTRAP BDBTRAP(dbg) #endif pushl $0; BPTTRAP(T_TRCTRAP) IDTVEC(nmi) pushl $0; TRAP(T_NMI) IDTVEC(bpt) #ifdef BDBTRAP BDBTRAP(bpt) #endif pushl $0; BPTTRAP(T_BPTFLT) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) pushl $0; TRAP(T_BOUND) IDTVEC(ill) pushl $0; TRAP(T_PRIVINFLT) IDTVEC(dna) pushl $0; TRAP(T_DNA) IDTVEC(dble) TRAP(T_DOUBLEFLT) /*PANIC("Double Fault");*/ IDTVEC(fpusegm) pushl $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) /*PANIC("TSS not valid");*/ IDTVEC(missing) TRAP(T_SEGNPFLT) IDTVEC(stk) TRAP(T_STKFLT) IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) -#ifdef NPX +#if NNPX > 0 /* * Handle like an interrupt so that we can call npxintr to clear the * error. It would be better to handle npx interrupts as traps but * this is difficult for nested interrupts. */ pushl $0 /* dummy error code */ pushl $T_ASTFLT pushal nop /* silly, the bug is for popal and it only * bites when the next instruction has a * complicated address mode */ pushl %ds pushl %es /* now the stack frame is a trap frame */ movl $KDSEL,%eax movl %ax,%ds movl %ax,%es pushl _cpl pushl $0 /* dummy unit to finish building intr frame */ incl _cnt+V_TRAP call _npxintr jmp doreti -#else +#else /* NNPX > 0 */ pushl $0; TRAP(T_ARITHTRAP) -#endif +#endif /* NNPX > 0 */ /* 17 - 31 reserved for future exp */ IDTVEC(rsvd0) pushl $0; TRAP(17) IDTVEC(rsvd1) pushl $0; TRAP(18) IDTVEC(rsvd2) pushl $0; TRAP(19) IDTVEC(rsvd3) pushl $0; TRAP(20) IDTVEC(rsvd4) pushl $0; TRAP(21) IDTVEC(rsvd5) pushl $0; TRAP(22) IDTVEC(rsvd6) pushl $0; TRAP(23) IDTVEC(rsvd7) pushl $0; TRAP(24) IDTVEC(rsvd8) pushl $0; TRAP(25) IDTVEC(rsvd9) pushl $0; TRAP(26) IDTVEC(rsvd10) pushl $0; TRAP(27) IDTVEC(rsvd11) pushl $0; TRAP(28) IDTVEC(rsvd12) pushl $0; TRAP(29) IDTVEC(rsvd13) pushl $0; TRAP(30) IDTVEC(rsvd14) pushl $0; TRAP(31) SUPERALIGN_TEXT alltraps: pushal nop pushl %ds pushl %es movl $KDSEL,%eax movl %ax,%ds movl %ax,%es calltrap: incl _cnt+V_TRAP call _trap /* * Return through doreti to handle ASTs. Have to change trap frame * to interrupt frame. */ movl $T_ASTFLT,4+4+32(%esp) /* new trap type (err code not used) */ pushl _cpl pushl $0 /* dummy unit */ jmp doreti #ifdef KGDB /* * This code checks for a kgdb trap, then falls through * to the regular trap code. */ SUPERALIGN_TEXT bpttraps: pushal nop pushl %es pushl %ds movl $KDSEL,%eax movl %ax,%ds movl %ax,%es testb $SEL_RPL_MASK,TRAPF_CS_OFF(%esp) /* non-kernel mode? */ jne calltrap /* yes */ call _kgdb_trap_glue jmp calltrap #endif /* * Call gate entry for syscall */ SUPERALIGN_TEXT IDTVEC(syscall) pushfl /* only for stupid carry bit and more stupid wait3 cc kludge */ /* XXX - also for direction flag (bzero, etc. clear it) */ pushal /* only need eax,ecx,edx - trap resaves others */ nop movl $KDSEL,%eax /* switch to kernel segments */ movl %ax,%ds movl %ax,%es incl _cnt+V_SYSCALL /* kml 3/25/93 */ call _syscall /* * Return through doreti to handle ASTs. Have to change syscall frame * to interrupt frame. * * XXX - we should have set up the frame earlier to avoid the * following popal/pushal (not much can be done to avoid shuffling * the flags). Consistent frames would simplify things all over. */ movl 32+0(%esp),%eax /* old flags, shuffle to above cs:eip */ movl 32+4(%esp),%ebx /* `int' frame should have been ef, eip, cs */ movl 32+8(%esp),%ecx movl %ebx,32+0(%esp) movl %ecx,32+4(%esp) movl %eax,32+8(%esp) popal nop pushl $0 /* dummy error code */ pushl $T_ASTFLT pushal nop movl __udatasel,%eax /* switch back to user segments */ pushl %eax /* XXX - better to preserve originals? */ pushl %eax pushl _cpl pushl $0 jmp doreti #ifdef SHOW_A_LOT /* * 'show_bits' was too big when defined as a macro. The line length for some * enclosing macro was too big for gas. Perhaps the code would have blown * the cache anyway. */ ALIGN_TEXT show_bits: pushl %eax SHOW_BIT(0) SHOW_BIT(1) SHOW_BIT(2) SHOW_BIT(3) SHOW_BIT(4) SHOW_BIT(5) SHOW_BIT(6) SHOW_BIT(7) SHOW_BIT(8) SHOW_BIT(9) SHOW_BIT(10) SHOW_BIT(11) SHOW_BIT(12) SHOW_BIT(13) SHOW_BIT(14) SHOW_BIT(15) popl %eax ret .data bit_colors: .byte GREEN,RED,0,0 .text #endif /* SHOW_A_LOT */ /* * include generated interrupt vectors and ISA intr code */ #include "i386/isa/vector.s" #include "i386/isa/icu.s" Index: head/sys/amd64/amd64/locore.s =================================================================== --- head/sys/amd64/amd64/locore.s (revision 607) +++ head/sys/amd64/amd64/locore.s (revision 608) @@ -1,2158 +1,2152 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $Id: locore.s,v 1.6 1993/10/10 06:07:57 rgrimes Exp $ + * $Id: locore.s,v 1.7 1993/10/13 07:11:11 rgrimes Exp $ */ /* * locore.s: 4BSD machine support for the Intel 386 * Preliminary version * Written by William F. Jolitz, 386BSD Project */ #include "npx.h" #include "assym.s" #include "machine/psl.h" #include "machine/pte.h" #include "errno.h" #include "machine/trap.h" #include "machine/specialreg.h" #include "i386/isa/debug.h" #include "machine/cputypes.h" #define KDSEL 0x10 #define SEL_RPL_MASK 0x0003 #define TRAPF_CS_OFF (13 * 4) /* * Note: This version greatly munged to avoid various assembler errors * that may be fixed in newer versions of gas. Perhaps newer versions * will have more pleasant appearance. */ .set IDXSHIFT,10 #define ALIGN_DATA .align 2 #define ALIGN_TEXT .align 2,0x90 /* 4-byte boundaries, NOP-filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte boundaries better for 486 */ #define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: #define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) #ifdef GPROF /* * ALTENTRY() must be before a corresponding ENTRY() so that it can jump * over the mcounting. */ #define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f #define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: /* * The call to mcount supports the usual (bad) conventions. We allocate * some data and pass a pointer to it although the 386BSD doesn't use * the data. We set up a frame before calling mcount because that is * the standard convention although it makes work for both mcount and * callers. */ #define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ pushl %ebp; movl %esp,%ebp; \ movl $1b,%eax; call mcount; popl %ebp #else /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. * If there is a previous ALTENTRY() then the alignment code is empty. */ #define ALTENTRY(name) GEN_ENTRY(_/**/name) #define ENTRY(name) GEN_ENTRY(_/**/name) #endif /* NB: NOP now preserves registers so NOPs can be inserted anywhere */ /* XXX: NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP #else #define FASTER_NOP pushl %eax ; inb $0x84,%al ; popl %eax #define NOP pushl %eax ; inb $0x84,%al ; inb $0x84,%al ; popl %eax #endif /* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). */ .globl _PTmap,_PTD,_PTDpde,_Sysmap .set _PTmap,PTDPTDI << PDRSHIFT .set _PTD,_PTmap + (PTDPTDI * NBPG) - .set _PTDpde,_PTD + (PTDPTDI * 4) /* XXX 4=sizeof pte */ + .set _PTDpde,_PTD + (PTDPTDI * 4) /* XXX 4=sizeof pde */ - .set _Sysmap,0xFDFF8000 + .set _Sysmap,_PTmap + (KPTDI * NBPG) /* * APTmap, APTD is the alternate recursive pagemap. * It's used when modifying another process's page tables. */ .globl _APTmap,_APTD,_APTDpde .set _APTmap,APTDPTDI << PDRSHIFT .set _APTD,_APTmap + (APTDPTDI * NBPG) - .set _APTDpde,_PTD + (APTDPTDI * 4) /* XXX 4=sizeof pte */ + .set _APTDpde,_PTD + (APTDPTDI * 4) /* XXX 4=sizeof pde */ /* * Access to each processes kernel stack is via a region of * per-process address space (at the beginning), immediatly above * the user process stack. */ .set _kstack,USRSTACK .globl _kstack .set PPDROFF,0x3F6 .set PPTEOFF,0x400-UPAGES /* 0x3FE */ /* * Globals */ .data .globl _esym _esym: .long 0 /* ptr to end of syms */ .globl _boothowto,_bootdev,_curpcb .globl _cpu,_cold,_atdevbase _cpu: .long 0 /* are we 386, 386sx, or 486 */ _cold: .long 1 /* cold till we are not */ _atdevbase: .long 0 /* location of start of iomem in virtual */ _atdevphys: .long 0 /* location of device mapping ptes (phys) */ .globl _IdlePTD,_KPTphys _IdlePTD: .long 0 _KPTphys: .long 0 .globl _cyloffset,_proc0paddr _cyloffset: .long 0 _proc0paddr: .long 0 .space 512 tmpstk: /* * System Initialization */ .text /* * btext: beginning of text section. * Also the entry point (jumped to directly from the boot blocks). */ ENTRY(btext) movw $0x1234,0x472 /* warm boot */ jmp 1f .space 0x500 /* skip over warm boot shit */ /* * pass parameters on stack (howto, bootdev, unit, cyloffset, esym) * note: (%esp) is return address of boot * ( if we want to hold onto /boot, it's physical %esp up to _end) */ 1: movl 4(%esp),%eax movl %eax,_boothowto-KERNBASE movl 8(%esp),%eax movl %eax,_bootdev-KERNBASE movl 12(%esp),%eax movl %eax,_cyloffset-KERNBASE movl 16(%esp),%eax addl $KERNBASE,%eax movl %eax,_esym-KERNBASE /* find out our CPU type. */ pushfl popl %eax movl %eax,%ecx xorl $0x40000,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax shrl $18,%eax andl $1,%eax push %ecx popfl cmpl $0,%eax jne 1f movl $CPU_386,_cpu-KERNBASE jmp 2f 1: movl $CPU_486,_cpu-KERNBASE 2: /* * Finished with old stack; load new %esp now instead of later so * we can trace this code without having to worry about the trace * trap clobbering the memory test or the zeroing of the bss+bootstrap * page tables. * * XXX - wdboot clears the bss after testing that this is safe. * This is too wasteful - memory below 640K is scarce. The boot * program should check: * text+data <= &stack_variable - more_space_for_stack * text+data+bss+pad+space_for_page_tables <= end_of_memory * Oops, the gdt is in the carcass of the boot program so clearing * the rest of memory is still not possible. */ movl $tmpstk-KERNBASE,%esp /* bootstrap stack end location */ -#ifdef garbage - /* count up memory */ - - xorl %eax,%eax /* start with base memory at 0x0 */ - #movl $0xA0000/NBPG,%ecx /* look every 4K up to 640K */ - movl $0xA0,%ecx /* look every 4K up to 640K */ -1: movl (%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,(%eax) /* write test pattern */ - /* flush stupid cache here! (with bcopy(0,0,512*1024) ) */ - cmpl $0xa55a5aa5,(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,(%eax) /* restore memory */ - addl $NBPG,%eax - loop 1b -2: shrl $12,%eax - movl %eax,_Maxmem-KERNBASE - - movl $0x100000,%eax /* next, talley remaining memory */ - #movl $((0xFFF000-0x100000)/NBPG),%ecx - movl $(0xFFF-0x100),%ecx -1: movl (%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,(%eax) /* write test pattern */ - cmpl $0xa55a5aa5,(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,(%eax) /* restore memory */ - addl $NBPG,%eax - loop 1b -2: shrl $12,%eax - movl %eax,_Maxmem-KERNBASE -#endif - /* * Virtual address space of kernel: * * text | data | bss | [syms] | page dir | proc0 kernel stack | usr stk map | Sysmap * 0 1 2 3 4 */ /* find end of kernel image */ movl $_end-KERNBASE,%ecx addl $NBPG-1,%ecx /* page align up */ andl $~(NBPG-1),%ecx movl %ecx,%esi /* esi=start of tables */ /* clear bss and memory for bootstrap pagetables. */ movl $_edata-KERNBASE,%edi subl %edi,%ecx addl $(UPAGES+5)*NBPG,%ecx /* size of tables */ xorl %eax,%eax /* pattern */ cld rep stosb +/* + * If we are loaded at 0x0 check to see if we have space for the + * page tables pages after the kernel and before the 640K ISA memory + * hole. If we do not have space relocate the page table pages and + * the kernel stack to start at 1MB. The value that ends up in esi + * is used by the rest of locore to build the tables. Locore adjusts + * esi each time it allocates a structure and then passes the final + * value to init386(first) as the value first. esi should ALWAYS + * be page aligned!! + */ + movl %esi,%ecx /* Get current first availiable address */ + cmpl $0x100000,%ecx /* Lets see if we are already above 1MB */ + jge 1f /* yep, don't need to check for room */ + addl $(NKPDE + 4) * NBPG,%ecx /* XXX the 4 is for kstack */ + /* space for kstack, PTD and PTE's */ + cmpl $(640*1024),%ecx + /* see if it fits in low memory */ + jle 1f /* yep, don't need to relocate it */ + movl $0x100000,%esi /* won't fit, so start it at 1MB */ +1: + /* physical address of Idle Address space */ movl %esi,_IdlePTD-KERNBASE /* * fillkpt * eax = (page frame address | control | status) == pte * ebx = address of page table * ecx = how many pages to map */ #define fillkpt \ 1: movl %eax,(%ebx) ; \ addl $NBPG,%eax ; /* increment physical address */ \ addl $4,%ebx ; /* next pte */ \ loop 1b ; /* * Map Kernel * N.B. don't bother with making kernel text RO, as 386 * ignores R/W AND U/S bits on kernel access (only v works) ! * * First step - build page tables */ movl %esi,%ecx /* this much memory, */ shrl $PGSHIFT,%ecx /* for this many pte s */ addl $UPAGES+4,%ecx /* including our early context */ cmpl $0xa0,%ecx /* XXX - cover debugger pages */ jae 1f movl $0xa0,%ecx 1: movl $PG_V|PG_KW,%eax /* having these bits set, */ lea (4*NBPG)(%esi),%ebx /* physical address of KPT in proc 0, */ movl %ebx,_KPTphys-KERNBASE /* in the kernel page table, */ fillkpt /* map I/O memory map */ movl $0x100-0xa0,%ecx /* for this many pte s, */ movl $(0xa0000|PG_V|PG_UW),%eax /* having these bits set,(perhaps URW?) XXX 06 Aug 92 */ movl %ebx,_atdevphys-KERNBASE /* remember phys addr of ptes */ fillkpt /* map proc 0's kernel stack into user page table page */ movl $UPAGES,%ecx /* for this many pte s, */ lea (1*NBPG)(%esi),%eax /* physical address in proc 0 */ lea (KERNBASE)(%eax),%edx movl %edx,_proc0paddr-KERNBASE /* remember VA for 0th process init */ orl $PG_V|PG_KW,%eax /* having these bits set, */ lea (3*NBPG)(%esi),%ebx /* physical address of stack pt in proc 0 */ addl $(PPTEOFF*4),%ebx fillkpt /* * Construct a page table directory * (of page directory elements - pde's) */ /* install a pde for temporary double map of bottom of VA */ lea (4*NBPG)(%esi),%eax /* physical address of kernel page table */ orl $PG_V|PG_UW,%eax /* pde entry is valid XXX 06 Aug 92 */ movl %eax,(%esi) /* which is where temp maps! */ /* kernel pde's */ movl $(NKPDE),%ecx /* for this many pde s, */ lea (KPTDI*4)(%esi),%ebx /* offset of pde for kernel */ fillkpt /* install a pde recursively mapping page directory as a page table! */ movl %esi,%eax /* phys address of ptd in proc 0 */ orl $PG_V|PG_UW,%eax /* pde entry is valid XXX 06 Aug 92 */ movl %eax,PTDPTDI*4(%esi) /* which is where PTmap maps! */ /* install a pde to map kernel stack for proc 0 */ lea (3*NBPG)(%esi),%eax /* physical address of pt in proc 0 */ orl $PG_V|PG_KW,%eax /* pde entry is valid */ movl %eax,PPDROFF*4(%esi) /* which is where kernel stack maps! */ /* copy and convert stuff from old gdt and idt for debugger */ cmpl $0x0375c339,0x96104 /* XXX - debugger signature */ jne 1f movb $1,_bdb_exists-KERNBASE 1: pushal subl $2*6,%esp sgdt (%esp) movl 2(%esp),%esi /* base address of current gdt */ movl $_gdt-KERNBASE,%edi movl %edi,2(%esp) movl $8*18/4,%ecx rep /* copy gdt */ movsl movl $_gdt-KERNBASE,-8+2(%edi) /* adjust gdt self-ptr */ movb $0x92,-8+5(%edi) sidt 6(%esp) movl 6+2(%esp),%esi /* base address of current idt */ movl 8+4(%esi),%eax /* convert dbg descriptor to ... */ movw 8(%esi),%ax movl %eax,bdb_dbg_ljmp+1-KERNBASE /* ... immediate offset ... */ movl 8+2(%esi),%eax movw %ax,bdb_dbg_ljmp+5-KERNBASE /* ... and selector for ljmp */ movl 24+4(%esi),%eax /* same for bpt descriptor */ movw 24(%esi),%ax movl %eax,bdb_bpt_ljmp+1-KERNBASE movl 24+2(%esi),%eax movw %ax,bdb_bpt_ljmp+5-KERNBASE movl $_idt-KERNBASE,%edi movl %edi,6+2(%esp) movl $8*4/4,%ecx rep /* copy idt */ movsl lgdt (%esp) lidt 6(%esp) addl $2*6,%esp popal /* load base of page directory and enable mapping */ movl %esi,%eax /* phys address of ptd in proc 0 */ orl $I386_CR3PAT,%eax movl %eax,%cr3 /* load ptd addr into mmu */ movl %cr0,%eax /* get control word */ /* * XXX it is now safe to always (attempt to) set CR0_WP and to set up * the page tables assuming it works, so USE_486_WRITE_PROTECT will go * away. The special 386 PTE checking needs to be conditional on * whatever distingiushes 486-only kernels from 386-486 kernels. */ #ifdef USE_486_WRITE_PROTECT orl $CR0_PE|CR0_PG|CR0_WP,%eax /* enable paging */ #else orl $CR0_PE|CR0_PG,%eax /* enable paging */ #endif movl %eax,%cr0 /* and let's page NOW! */ pushl $begin /* jump to high mem */ ret begin: /* now running relocated at KERNBASE where the system is linked to run */ .globl _Crtat /* XXX - locore should not know about */ movl _Crtat,%eax /* variables of device drivers (pccons)! */ subl $(KERNBASE+0xA0000),%eax movl _atdevphys,%edx /* get pte PA */ subl _KPTphys,%edx /* remove base of ptes, now have phys offset */ shll $PGSHIFT-2,%edx /* corresponding to virt offset */ addl $KERNBASE,%edx /* add virtual base */ movl %edx,_atdevbase addl %eax,%edx movl %edx,_Crtat /* set up bootstrap stack */ movl $_kstack+UPAGES*NBPG-4*12,%esp /* bootstrap stack end location */ xorl %eax,%eax /* mark end of frames */ movl %eax,%ebp movl _proc0paddr,%eax movl %esi,PCB_CR3(%eax) - lea 7*NBPG(%esi),%esi /* skip past stack. */ - pushl %esi - /* relocate debugger gdt entries */ movl $_gdt+8*9,%eax /* adjust slots 9-17 */ movl $9,%ecx reloc_gdt: movb $0xfe,7(%eax) /* top byte of base addresses, was 0, */ addl $8,%eax /* now KERNBASE>>24 */ loop reloc_gdt cmpl $0,_bdb_exists je 1f int $3 1: + /* + * Skip over the page tables and the kernel stack + * XXX 4 is kstack size + */ + lea (NKPDE + 4) * NBPG(%esi),%esi + + pushl %esi /* value of first for init386(first) */ call _init386 /* wire 386 chip for unix operation */ movl $0,_PTD call _main /* autoconfiguration, mountroot etc */ popl %esi /* * now we've run main() and determined what cpu-type we are, we can * enable WP mode on i486 cpus and above. * on return from main(), we are process 1 * set up address space and stack so that we can 'return' to user mode */ .globl __ucodesel,__udatasel movl __ucodesel,%eax movl __udatasel,%ecx /* build outer stack frame */ pushl %ecx /* user ss */ pushl $USRSTACK /* user esp */ pushl %eax /* user cs */ pushl $0 /* user ip */ movl %cx,%ds movl %cx,%es movl %ax,%fs /* double map cs to fs */ movl %cx,%gs /* and ds to gs */ lret /* goto user! */ pushl $lretmsg1 /* "should never get here!" */ call _panic lretmsg1: .asciz "lret: toinit\n" .set exec,59 .set exit,1 #define LCALL(x,y) .byte 0x9a ; .long y; .word x /* * Icode is copied out to process 1 and executed in user mode: * execve("/sbin/init", argv, envp); exit(0); * If the execve fails, process 1 exits and the system panics. */ NON_GPROF_ENTRY(icode) pushl $0 /* envp for execve() */ # pushl $argv-_icode /* can't do this 'cos gas 1.38 is broken */ movl $argv,%eax subl $_icode,%eax pushl %eax /* argp for execve() */ # pushl $init-_icode movl $init,%eax subl $_icode,%eax pushl %eax /* fname for execve() */ pushl %eax /* dummy return address */ movl $exec,%eax LCALL(0x7,0x0) /* exit if something botches up in the above execve() */ pushl %eax /* execve failed, the errno will do for an */ /* exit code because errnos are < 128 */ pushl %eax /* dummy return address */ movl $exit,%eax LCALL(0x7,0x0) init: .asciz "/sbin/init" ALIGN_DATA argv: .long init+6-_icode /* argv[0] = "init" ("/sbin/init" + 6) */ .long eicode-_icode /* argv[1] follows icode after copyout */ .long 0 eicode: .globl _szicode _szicode: .long _szicode-_icode NON_GPROF_ENTRY(sigcode) call SIGF_HANDLER(%esp) lea SIGF_SC(%esp),%eax /* scp (the call may have clobbered the */ /* copy at 8(%esp)) */ pushl %eax pushl %eax /* junk to fake return address */ movl $103,%eax /* XXX sigreturn() */ LCALL(0x7,0) /* enter kernel with args on stack */ hlt /* never gets here */ .globl _szsigcode _szsigcode: .long _szsigcode-_sigcode /* * Support routines for GCC, general C-callable functions */ ENTRY(__udivsi3) movl 4(%esp),%eax xorl %edx,%edx divl 8(%esp) ret ENTRY(__divsi3) movl 4(%esp),%eax cltd idivl 8(%esp) ret /* * I/O bus instructions via C */ ENTRY(inb) /* val = inb(port) */ movl 4(%esp),%edx subl %eax,%eax NOP inb %dx,%al ret ENTRY(inw) /* val = inw(port) */ movl 4(%esp),%edx subl %eax,%eax NOP inw %dx,%ax ret ENTRY(insb) /* insb(port, addr, cnt) */ pushl %edi movw 8(%esp),%dx movl 12(%esp),%edi movl 16(%esp),%ecx cld NOP rep insb NOP movl %edi,%eax popl %edi ret ENTRY(insw) /* insw(port, addr, cnt) */ pushl %edi movw 8(%esp),%dx movl 12(%esp),%edi movl 16(%esp),%ecx cld NOP rep insw NOP movl %edi,%eax popl %edi ret ENTRY(rtcin) /* rtcin(val) */ movl 4(%esp),%eax outb %al,$0x70 subl %eax,%eax inb $0x71,%al ret ENTRY(outb) /* outb(port, val) */ movl 4(%esp),%edx NOP movl 8(%esp),%eax outb %al,%dx NOP ret ENTRY(outw) /* outw(port, val) */ movl 4(%esp),%edx NOP movl 8(%esp),%eax outw %ax,%dx NOP ret ENTRY(outsb) /* outsb(port, addr, cnt) */ pushl %esi movw 8(%esp),%dx movl 12(%esp),%esi movl 16(%esp),%ecx cld NOP rep outsb NOP movl %esi,%eax popl %esi ret ENTRY(outsw) /* outsw(port, addr, cnt) */ pushl %esi movw 8(%esp),%dx movl 12(%esp),%esi movl 16(%esp),%ecx cld NOP rep outsw NOP movl %esi,%eax popl %esi ret /* * bcopy family */ ENTRY(bzero) /* void bzero(void *base, u_int cnt) */ pushl %edi movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx cld rep stosl movl 12(%esp),%ecx andl $3,%ecx rep stosb popl %edi ret ENTRY(fillw) /* fillw(pat, base, cnt) */ pushl %edi movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx cld rep stosw popl %edi ret ENTRY(bcopyb) bcopyb: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards. */ addl %ecx,%esi std decl %edi decl %esi rep movsb popl %edi popl %esi cld ret ENTRY(bcopyw) bcopyw: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ shrl $1,%ecx /* copy by 16-bit words */ rep movsw adc %ecx,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi std andl $1,%ecx /* any fractional bytes? */ decl %edi decl %esi rep movsb movl 20(%esp),%ecx /* copy remainder by 16-bit words */ shrl $1,%ecx decl %esi decl %edi rep movsw popl %edi popl %esi cld ret ENTRY(bcopyx) movl 16(%esp),%eax cmpl $2,%eax je bcopyw /* not _bcopyw, to avoid multiple mcounts */ cmpl $4,%eax je bcopy jmp bcopyb /* * (ov)bcopy(src, dst, cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ALTENTRY(ovbcopy) ENTRY(bcopy) bcopy: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ shrl $2,%ecx /* copy by 32-bit words */ rep movsl movl 20(%esp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi std andl $3,%ecx /* any fractional bytes? */ decl %edi decl %esi rep movsb movl 20(%esp),%ecx /* copy remainder by 32-bit words */ shrl $2,%ecx subl $3,%esi subl $3,%edi rep movsl popl %edi popl %esi cld ret ALTENTRY(ntohl) ENTRY(htonl) movl 4(%esp),%eax #ifdef i486 /* XXX */ /* Since Gas 1.38 does not grok bswap this has been coded as the * equivalent bytes. This can be changed back to bswap when we * upgrade to a newer version of Gas */ /* bswap %eax */ .byte 0x0f .byte 0xc8 #else xchgb %al,%ah roll $16,%eax xchgb %al,%ah #endif ret ALTENTRY(ntohs) ENTRY(htons) movzwl 4(%esp),%eax xchgb %al,%ah ret /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines and possibly * the math- and DOS emulators should be the only places that do this. * * We have to access the memory with user's permissions, so use a segment * selector with RPL 3. For writes to user space we have to additionally * check the PTE for write permission, because the 386 does not check * write permissions when we are executing with EPL 0. The 486 does check * this if the WP bit is set in CR0, so we can use a simpler version here. * * These routines set curpcb->onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->onfault instead of the function. */ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ movl _curpcb,%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi pushl %ebx movl 16(%esp),%esi movl 20(%esp),%edi movl 24(%esp),%ebx orl %ebx,%ebx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. * * Otherwise, it saves having to load and restore %es to get the * usual segment-based protection (the destination segment for movs * is always %es). The other explicit checks for user-writablility * are not quite sufficient. They fail for the user area because * we mapped the user area read/write to avoid having an #ifdef in * vm_machdep.c. They fail for user PTEs and/or PTDs! (107 * addresses including 0xff800000 and 0xfc000000). I'm not sure if * this can be fixed. Marking the PTEs supervisor mode and the * PDE's user mode would almost work, but there may be a problem * with the self-referential PDE. */ movl %edi,%eax addl %ebx,%eax jc copyout_fault #define VM_END_USER_ADDRESS 0xFDBFE000 /* XXX */ cmpl $VM_END_USER_ADDRESS,%eax ja copyout_fault #ifndef USE_486_WRITE_PROTECT /* * We have to check each PTE for user write permission. * The checking may cause a page fault, so it is important to set * up everything for return via copyout_fault before here. */ /* compute number of pages */ movl %edi,%ecx andl $NBPG-1,%ecx addl %ebx,%ecx decl %ecx shrl $IDXSHIFT+2,%ecx incl %ecx /* compute PTE offset for start address */ movl %edi,%edx shrl $IDXSHIFT,%edx andb $0xfc,%dl 1: /* check PTE for each page */ movb _PTmap(%edx),%al andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ cmpb $0x07,%al je 2f /* simulate a trap */ pushl %edx pushl %ecx shll $IDXSHIFT,%edx pushl %edx call _trapwrite /* trapwrite(addr) */ popl %edx popl %ecx popl %edx orl %eax,%eax /* if not ok, return EFAULT */ jnz copyout_fault 2: addl $4,%edx decl %ecx jnz 1b /* check next page */ #endif /* ndef USE_486_WRITE_PROTECT */ /* bcopy(%esi, %edi, %ebx) */ cld movl %ebx,%ecx shrl $2,%ecx rep movsl movb %bl,%cl andb $3,%cl /* XXX can we trust the rest of %ecx on clones? */ rep movsb done_copyout: popl %ebx popl %edi popl %esi xorl %eax,%eax movl _curpcb,%edx movl %eax,PCB_ONFAULT(%edx) ret ALIGN_TEXT copyout_fault: popl %ebx popl %edi popl %esi movl _curpcb,%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret ENTRY(copyin) /* copyin(from_user, to_kernel, len) */ movl _curpcb,%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi movl 12(%esp),%esi /* caddr_t from */ movl 16(%esp),%edi /* caddr_t to */ movl 20(%esp),%ecx /* size_t len */ movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld gs rep movsl movb %al,%cl andb $3,%cl /* copy remaining bytes */ gs rep movsb popl %edi popl %esi xorl %eax,%eax movl _curpcb,%edx movl %eax,PCB_ONFAULT(%edx) ret ALIGN_TEXT copyin_fault: popl %edi popl %esi movl _curpcb,%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * fu{byte,sword,word} : fetch a byte(sword, word) from user memory */ ALTENTRY(fuiword) ENTRY(fuword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ENTRY(fusword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ALTENTRY(fuibyte) ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ALIGN_TEXT fusufault: movl _curpcb,%ecx xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) decl %eax ret /* * su{byte,sword,word}: write a byte(word, longword) to user memory */ #ifdef USE_486_WRITE_PROTECT /* * we only have to set the right segment selector. */ ALTENTRY(suiword) ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret ENTRY(susword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movw 8(%esp),%ax gs movw %ax,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret ALTENTRY(suibyte) ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movb 8(%esp),%al gs movb %al,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret #else /* USE_486_WRITE_PROTECT */ /* * here starts the trouble again: check PTE, twice if word crosses * a page boundary. */ /* XXX - page boundary crossing is not handled yet */ ALTENTRY(suibyte) ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movb %al,(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret ENTRY(susword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movw %ax,(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret ALTENTRY(suiword) ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,0(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret #endif /* USE_486_WRITE_PROTECT */ /* * copyoutstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ #ifdef USE_486_WRITE_PROTECT ENTRY(copyoutstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f /* * gs override doesn't work for stosb. Use the same explicit check * as in copyout(). It's much slower now because it is per-char. * XXX - however, it would be faster to rewrite this function to use * strlen() and copyout(). */ cmpl $VM_END_USER_ADDRESS,%edi jae cpystrflt lodsb gs stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f #else /* ndef USE_486_WRITE_PROTECT */ ENTRY(copyoutstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ 1: /* * It suffices to check that the first byte is in user space, because * we look at a page at a time and the end address is on a page * boundary. */ cmpl $VM_END_USER_ADDRESS,%edi jae cpystrflt movl %edi,%eax shrl $IDXSHIFT,%eax andb $0xfc,%al movb _PTmap(%eax),%al andb $7,%al cmpb $7,%al je 2f /* simulate trap */ pushl %edx pushl %edi call _trapwrite popl %edi popl %edx orl %eax,%eax jnz cpystrflt 2: /* copy up to end of this page */ movl %edi,%eax andl $NBPG-1,%eax movl $NBPG,%ecx subl %eax,%ecx /* ecx = NBPG - (src % NBPG) */ cmpl %ecx,%edx jge 3f movl %edx,%ecx /* ecx = min(ecx, edx) */ 3: orl %ecx,%ecx jz 4f decl %ecx decl %edx lodsb stosb orb %al,%al jnz 3b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* next page */ orl %edx,%edx jnz 1b /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f #endif /* USE_486_WRITE_PROTECT */ /* * copyinstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f gs lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f cpystrflt: movl $EFAULT,%eax 6: /* set *lencopied and return %eax */ movl _curpcb,%ecx movl $0,PCB_ONFAULT(%ecx) movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx orl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret /* * copystr(from, to, maxlen, int *lencopied) */ ENTRY(copystr) pushl %esi pushl %edi movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax 6: /* set *lencopied and return %eax */ movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx orl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret /* * Handling of special 386 registers and descriptor tables etc */ ENTRY(lgdt) /* void lgdt(struct region_descriptor *rdp); */ /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) /* flush the prefetch q */ jmp 1f nop 1: /* reload "stale" selectors */ movl $KDSEL,%eax movl %ax,%ds movl %ax,%es movl %ax,%ss /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax pushl %eax # movl $KCSEL,4(%esp) movl $8,4(%esp) lret /* * void lidt(struct region_descriptor *rdp); */ ENTRY(lidt) movl 4(%esp),%eax lidt (%eax) ret /* * void lldt(u_short sel) */ ENTRY(lldt) lldt 4(%esp) ret /* * void ltr(u_short sel) */ ENTRY(ltr) ltr 4(%esp) ret ENTRY(ssdtosd) /* ssdtosd(*ssdp,*sdp) */ pushl %ebx movl 8(%esp),%ecx movl 8(%ecx),%ebx shll $16,%ebx movl (%ecx),%edx roll $16,%edx movb %dh,%bl movb %dl,%bh rorl $8,%ebx movl 4(%ecx),%eax movw %ax,%dx andl $0xf0000,%eax orl %eax,%ebx movl 12(%esp),%ecx movl %edx,(%ecx) movl %ebx,4(%ecx) popl %ebx ret ENTRY(tlbflush) /* tlbflush() */ movl %cr3,%eax orl $I386_CR3PAT,%eax movl %eax,%cr3 ret ENTRY(load_cr0) /* load_cr0(cr0) */ movl 4(%esp),%eax movl %eax,%cr0 ret ENTRY(rcr0) /* rcr0() */ movl %cr0,%eax ret ENTRY(rcr2) /* rcr2() */ movl %cr2,%eax ret ENTRY(rcr3) /* rcr3() */ movl %cr3,%eax ret ENTRY(load_cr3) /* void load_cr3(caddr_t cr3) */ movl 4(%esp),%eax orl $I386_CR3PAT,%eax movl %eax,%cr3 ret /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) /* save ebx */ movl %esp,4(%eax) /* save esp */ movl %ebp,8(%eax) /* save ebp */ movl %esi,12(%eax) /* save esi */ movl %edi,16(%eax) /* save edi */ movl (%esp),%edx /* get rta */ movl %edx,20(%eax) /* save eip */ xorl %eax,%eax /* return(0); */ ret ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx /* restore ebx */ movl 4(%eax),%esp /* restore esp */ movl 8(%eax),%ebp /* restore ebp */ movl 12(%eax),%esi /* restore esi */ movl 16(%eax),%edi /* restore edi */ movl 20(%eax),%edx /* get rta */ movl %edx,(%esp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret /*****************************************************************************/ /* Scheduling */ /*****************************************************************************/ /* * The following primitives manipulate the run queues. * _whichqs tells which of the 32 queues _qs * have processes in them. Setrq puts processes into queues, Remrq * removes them from queues. The running process is on no queue, * other processes are on a queue related to p->p_pri, divided by 4 * actually to shrink the 0-127 range of priorities into the 32 available * queues. */ .globl _whichqs,_qs,_cnt,_panic .comm _noproc,4 .comm _runrun,4 /* * Setrq(p) * * Call should be made at spl6(), and p->p_stat should be SRUN */ ENTRY(setrq) movl 4(%esp),%eax cmpl $0,P_RLINK(%eax) /* should not be on q already */ je set1 pushl $set2 call _panic set1: movzbl P_PRI(%eax),%edx shrl $2,%edx btsl %edx,_whichqs /* set q full bit */ shll $3,%edx addl $_qs,%edx /* locate q hdr */ movl %edx,P_LINK(%eax) /* link process on tail of q */ movl P_RLINK(%edx),%ecx movl %ecx,P_RLINK(%eax) movl %eax,P_RLINK(%edx) movl %eax,P_LINK(%ecx) ret set2: .asciz "setrq" /* * Remrq(p) * * Call should be made at spl6(). */ ENTRY(remrq) movl 4(%esp),%eax movzbl P_PRI(%eax),%edx shrl $2,%edx btrl %edx,_whichqs /* clear full bit, panic if clear already */ jb rem1 pushl $rem3 call _panic rem1: pushl %edx movl P_LINK(%eax),%ecx /* unlink process */ movl P_RLINK(%eax),%edx movl %edx,P_RLINK(%ecx) movl P_RLINK(%eax),%ecx movl P_LINK(%eax),%edx movl %edx,P_LINK(%ecx) popl %edx movl $_qs,%ecx shll $3,%edx addl %edx,%ecx cmpl P_LINK(%ecx),%ecx /* q still has something? */ je rem2 shrl $3,%edx /* yes, set bit as still full */ btsl %edx,_whichqs rem2: movl $0,P_RLINK(%eax) /* zap reverse link to indicate off list */ ret rem3: .asciz "remrq" sw0: .asciz "swtch" /* * When no processes are on the runq, Swtch branches to idle * to wait for something to come ready. */ ALIGN_TEXT Idle: sti SHOW_STI ALIGN_TEXT idle_loop: call _spl0 cmpl $0,_whichqs jne sw1 hlt /* wait for interrupt */ jmp idle_loop badsw: pushl $sw0 call _panic /*NOTREACHED*/ /* * Swtch() */ SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ ENTRY(swtch) incl _cnt+V_SWTCH /* switch to new process. first, save context as needed */ movl _curproc,%ecx /* if no process to save, don't bother */ testl %ecx,%ecx je sw1 movl P_ADDR(%ecx),%ecx movl (%esp),%eax /* Hardware registers */ movl %eax,PCB_EIP(%ecx) movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) -#ifdef NPX +#if NNPX > 0 /* have we used fp, and need a save? */ mov _curproc,%eax cmp %eax,_npxproc jne 1f pushl %ecx /* h/w bugs make saving complicated */ leal PCB_SAVEFPU(%ecx),%eax pushl %eax call _npxsave /* do it in a big C function */ popl %eax popl %ecx 1: -#endif +#endif /* NNPX > 0 */ movl _CMAP2,%eax /* save temporary map PTE */ movl %eax,PCB_CMAP2(%ecx) /* in our context */ movl $0,_curproc /* out of process */ # movw _cpl,%ax # movw %ax,PCB_IML(%ecx) /* save ipl */ /* save is done, now choose a new process or idle */ sw1: cli SHOW_CLI movl _whichqs,%edi 2: /* XXX - bsf is sloow */ bsfl %edi,%eax /* find a full q */ je Idle /* if none, idle */ /* XX update whichqs? */ swfnd: btrl %eax,%edi /* clear q full status */ jnb 2b /* if it was clear, look for another */ movl %eax,%ebx /* save which one we are using */ shll $3,%eax addl $_qs,%eax /* select q */ movl %eax,%esi #ifdef DIAGNOSTIC cmpl P_LINK(%eax),%eax /* linked to self? (e.g. not on list) */ je badsw /* not possible */ #endif movl P_LINK(%eax),%ecx /* unlink from front of process q */ movl P_LINK(%ecx),%edx movl %edx,P_LINK(%eax) movl P_RLINK(%ecx),%eax movl %eax,P_RLINK(%edx) cmpl P_LINK(%ecx),%esi /* q empty */ je 3f btsl %ebx,%edi /* nope, set to indicate full */ 3: movl %edi,_whichqs /* update q status */ movl $0,%eax movl %eax,_want_resched #ifdef DIAGNOSTIC cmpl %eax,P_WCHAN(%ecx) jne badsw cmpb $SRUN,P_STAT(%ecx) jne badsw #endif movl %eax,P_RLINK(%ecx) /* isolate process to run */ movl P_ADDR(%ecx),%edx movl PCB_CR3(%edx),%ebx /* switch address space */ movl %ebx,%cr3 /* restore context */ movl PCB_EBX(%edx),%ebx movl PCB_ESP(%edx),%esp movl PCB_EBP(%edx),%ebp movl PCB_ESI(%edx),%esi movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) movl PCB_CMAP2(%edx),%eax /* get temporary map */ movl %eax,_CMAP2 /* reload temporary map PTE */ movl %ecx,_curproc /* into next process */ movl %edx,_curpcb pushl %edx /* save p to return */ /* * XXX - 0.0 forgot to save it - is that why this was commented out in 0.1? * I think restoring the cpl is unnecessary, but we must turn off the cli * now that spl*() don't do it as a side affect. */ pushl PCB_IML(%edx) sti SHOW_STI #if 0 call _splx #endif addl $4,%esp /* * XXX - 0.0 gets here via swtch_to_inactive(). I think 0.1 gets here in the * same way. Better return a value. */ popl %eax /* return(p); */ ret ENTRY(mvesp) movl %esp,%eax ret /* * struct proc *swtch_to_inactive(p) ; struct proc *p; * * At exit of a process, move off the address space of the * process and onto a "safe" one. Then, on a temporary stack * return and run code that disposes of the old state. * Since this code requires a parameter from the "old" stack, * pass it back as a return value. */ ENTRY(swtch_to_inactive) popl %edx /* old pc */ popl %eax /* arg, our return value */ movl _IdlePTD,%ecx movl %ecx,%cr3 /* good bye address space */ #write buffer? movl $tmpstk-4,%esp /* temporary stack, compensated for call */ jmp %edx /* return, execute remainder of cleanup */ /* * savectx(pcb, altreturn) * Update pcb, saving current processor state and arranging * for alternate return ala longjmp in swtch if altreturn is true. */ ENTRY(savectx) movl 4(%esp),%ecx movw _cpl,%ax movw %ax,PCB_IML(%ecx) movl (%esp),%eax movl %eax,PCB_EIP(%ecx) movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) -#ifdef NPX +#if NNPX > 0 /* * If npxproc == NULL, then the npx h/w state is irrelevant and the * state had better already be in the pcb. This is true for forks * but not for dumps (the old book-keeping with FP flags in the pcb * always lost for dumps because the dump pcb has 0 flags). * * If npxproc != NULL, then we have to save the npx h/w state to * npxproc's pcb and copy it to the requested pcb, or save to the * requested pcb and reload. Copying is easier because we would * have to handle h/w bugs for reloading. We used to lose the * parent's npx state for forks by forgetting to reload. */ mov _npxproc,%eax testl %eax,%eax je 1f pushl %ecx movl P_ADDR(%eax),%eax leal PCB_SAVEFPU(%eax),%eax pushl %eax pushl %eax call _npxsave popl %eax popl %eax popl %ecx pushl %ecx pushl $108+8*2 /* XXX h/w state size + padding */ leal PCB_SAVEFPU(%ecx),%ecx pushl %ecx pushl %eax call _bcopy addl $12,%esp popl %ecx 1: -#endif +#endif /* NNPX > 0 */ movl _CMAP2,%edx /* save temporary map PTE */ movl %edx,PCB_CMAP2(%ecx) /* in our context */ cmpl $0,8(%esp) je 1f movl %esp,%edx /* relocate current sp relative to pcb */ subl $_kstack,%edx /* (sp is relative to kstack): */ addl %edx,%ecx /* pcb += sp - kstack; */ movl %eax,(%ecx) /* write return pc at (relocated) sp@ */ /* this mess deals with replicating register state gcc hides */ movl 12(%esp),%eax movl %eax,12(%ecx) movl 16(%esp),%eax movl %eax,16(%ecx) movl 20(%esp),%eax movl %eax,20(%ecx) movl 24(%esp),%eax movl %eax,24(%ecx) 1: xorl %eax,%eax /* return 0 */ ret /* * addupc(int pc, struct uprof *up, int ticks): * update profiling information for the user process. */ ENTRY(addupc) pushl %ebp movl %esp,%ebp movl 12(%ebp),%edx /* up */ movl 8(%ebp),%eax /* pc */ subl PR_OFF(%edx),%eax /* pc -= up->pr_off */ jl L1 /* if (pc < 0) return */ shrl $1,%eax /* praddr = pc >> 1 */ imull PR_SCALE(%edx),%eax /* praddr *= up->pr_scale */ shrl $15,%eax /* praddr = praddr << 15 */ andl $-2,%eax /* praddr &= ~1 */ cmpl PR_SIZE(%edx),%eax /* if (praddr > up->pr_size) return */ ja L1 /* addl %eax,%eax /* praddr -> word offset */ addl PR_BASE(%edx),%eax /* praddr += up-> pr_base */ movl 16(%ebp),%ecx /* ticks */ movl _curpcb,%edx movl $proffault,PCB_ONFAULT(%edx) addl %ecx,(%eax) /* storage location += ticks */ movl $0,PCB_ONFAULT(%edx) L1: leave ret ALIGN_TEXT proffault: /* if we get a fault, then kill profiling all together */ movl $0,PCB_ONFAULT(%edx) /* squish the fault handler */ movl 12(%ebp),%ecx movl $0,PR_SCALE(%ecx) /* up->pr_scale = 0 */ leave ret /* To be done: */ ENTRY(astoff) ret /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines * * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose * control. The sti's give the standard losing behaviour for ddb and kgdb. */ #define IDTVEC(name) ALIGN_TEXT; .globl _X/**/name; _X/**/name: #define TRAP(a) pushl $(a) ; jmp alltraps #ifdef KGDB # define BPTTRAP(a) sti; pushl $(a) ; jmp bpttraps #else # define BPTTRAP(a) sti; TRAP(a) #endif IDTVEC(div) pushl $0; TRAP(T_DIVIDE) IDTVEC(dbg) #ifdef BDBTRAP BDBTRAP(dbg) #endif pushl $0; BPTTRAP(T_TRCTRAP) IDTVEC(nmi) pushl $0; TRAP(T_NMI) IDTVEC(bpt) #ifdef BDBTRAP BDBTRAP(bpt) #endif pushl $0; BPTTRAP(T_BPTFLT) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) pushl $0; TRAP(T_BOUND) IDTVEC(ill) pushl $0; TRAP(T_PRIVINFLT) IDTVEC(dna) pushl $0; TRAP(T_DNA) IDTVEC(dble) TRAP(T_DOUBLEFLT) /*PANIC("Double Fault");*/ IDTVEC(fpusegm) pushl $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) /*PANIC("TSS not valid");*/ IDTVEC(missing) TRAP(T_SEGNPFLT) IDTVEC(stk) TRAP(T_STKFLT) IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) -#ifdef NPX +#if NNPX > 0 /* * Handle like an interrupt so that we can call npxintr to clear the * error. It would be better to handle npx interrupts as traps but * this is difficult for nested interrupts. */ pushl $0 /* dummy error code */ pushl $T_ASTFLT pushal nop /* silly, the bug is for popal and it only * bites when the next instruction has a * complicated address mode */ pushl %ds pushl %es /* now the stack frame is a trap frame */ movl $KDSEL,%eax movl %ax,%ds movl %ax,%es pushl _cpl pushl $0 /* dummy unit to finish building intr frame */ incl _cnt+V_TRAP call _npxintr jmp doreti -#else +#else /* NNPX > 0 */ pushl $0; TRAP(T_ARITHTRAP) -#endif +#endif /* NNPX > 0 */ /* 17 - 31 reserved for future exp */ IDTVEC(rsvd0) pushl $0; TRAP(17) IDTVEC(rsvd1) pushl $0; TRAP(18) IDTVEC(rsvd2) pushl $0; TRAP(19) IDTVEC(rsvd3) pushl $0; TRAP(20) IDTVEC(rsvd4) pushl $0; TRAP(21) IDTVEC(rsvd5) pushl $0; TRAP(22) IDTVEC(rsvd6) pushl $0; TRAP(23) IDTVEC(rsvd7) pushl $0; TRAP(24) IDTVEC(rsvd8) pushl $0; TRAP(25) IDTVEC(rsvd9) pushl $0; TRAP(26) IDTVEC(rsvd10) pushl $0; TRAP(27) IDTVEC(rsvd11) pushl $0; TRAP(28) IDTVEC(rsvd12) pushl $0; TRAP(29) IDTVEC(rsvd13) pushl $0; TRAP(30) IDTVEC(rsvd14) pushl $0; TRAP(31) SUPERALIGN_TEXT alltraps: pushal nop pushl %ds pushl %es movl $KDSEL,%eax movl %ax,%ds movl %ax,%es calltrap: incl _cnt+V_TRAP call _trap /* * Return through doreti to handle ASTs. Have to change trap frame * to interrupt frame. */ movl $T_ASTFLT,4+4+32(%esp) /* new trap type (err code not used) */ pushl _cpl pushl $0 /* dummy unit */ jmp doreti #ifdef KGDB /* * This code checks for a kgdb trap, then falls through * to the regular trap code. */ SUPERALIGN_TEXT bpttraps: pushal nop pushl %es pushl %ds movl $KDSEL,%eax movl %ax,%ds movl %ax,%es testb $SEL_RPL_MASK,TRAPF_CS_OFF(%esp) /* non-kernel mode? */ jne calltrap /* yes */ call _kgdb_trap_glue jmp calltrap #endif /* * Call gate entry for syscall */ SUPERALIGN_TEXT IDTVEC(syscall) pushfl /* only for stupid carry bit and more stupid wait3 cc kludge */ /* XXX - also for direction flag (bzero, etc. clear it) */ pushal /* only need eax,ecx,edx - trap resaves others */ nop movl $KDSEL,%eax /* switch to kernel segments */ movl %ax,%ds movl %ax,%es incl _cnt+V_SYSCALL /* kml 3/25/93 */ call _syscall /* * Return through doreti to handle ASTs. Have to change syscall frame * to interrupt frame. * * XXX - we should have set up the frame earlier to avoid the * following popal/pushal (not much can be done to avoid shuffling * the flags). Consistent frames would simplify things all over. */ movl 32+0(%esp),%eax /* old flags, shuffle to above cs:eip */ movl 32+4(%esp),%ebx /* `int' frame should have been ef, eip, cs */ movl 32+8(%esp),%ecx movl %ebx,32+0(%esp) movl %ecx,32+4(%esp) movl %eax,32+8(%esp) popal nop pushl $0 /* dummy error code */ pushl $T_ASTFLT pushal nop movl __udatasel,%eax /* switch back to user segments */ pushl %eax /* XXX - better to preserve originals? */ pushl %eax pushl _cpl pushl $0 jmp doreti #ifdef SHOW_A_LOT /* * 'show_bits' was too big when defined as a macro. The line length for some * enclosing macro was too big for gas. Perhaps the code would have blown * the cache anyway. */ ALIGN_TEXT show_bits: pushl %eax SHOW_BIT(0) SHOW_BIT(1) SHOW_BIT(2) SHOW_BIT(3) SHOW_BIT(4) SHOW_BIT(5) SHOW_BIT(6) SHOW_BIT(7) SHOW_BIT(8) SHOW_BIT(9) SHOW_BIT(10) SHOW_BIT(11) SHOW_BIT(12) SHOW_BIT(13) SHOW_BIT(14) SHOW_BIT(15) popl %eax ret .data bit_colors: .byte GREEN,RED,0,0 .text #endif /* SHOW_A_LOT */ /* * include generated interrupt vectors and ISA intr code */ #include "i386/isa/vector.s" #include "i386/isa/icu.s" Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c (revision 607) +++ head/sys/amd64/amd64/machdep.c (revision 608) @@ -1,1277 +1,1285 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.10 1993/10/10 06:01:44 rgrimes Exp $ + * $Id: machdep.c,v 1.11 1993/10/14 18:15:35 rgrimes Exp $ */ #include "npx.h" #include "isa.h" #include #include "param.h" #include "systm.h" #include "signalvar.h" #include "kernel.h" #include "map.h" #include "proc.h" #include "user.h" #include "exec.h" /* for PS_STRINGS */ #include "buf.h" #include "reboot.h" #include "conf.h" #include "file.h" #include "callout.h" #include "malloc.h" #include "mbuf.h" #include "msgbuf.h" #include "net/netisr.h" #ifdef SYSVSHM #include "sys/shm.h" #endif #include "vm/vm.h" #include "vm/vm_kern.h" #include "vm/vm_page.h" #include "sys/exec.h" #include "sys/vnode.h" #ifndef MACHINE_NONCONTIG extern vm_offset_t avail_end; #else extern vm_offset_t avail_start, avail_end; static vm_offset_t hole_start, hole_end; static vm_offset_t avail_next; static unsigned int avail_remaining; #endif /* MACHINE_NONCONTIG */ #include "machine/cpu.h" #include "machine/reg.h" #include "machine/psl.h" #include "machine/specialreg.h" #include "machine/sysarch.h" #include "i386/isa/isa.h" #include "i386/isa/rtc.h" #define EXPECT_BASEMEM 640 /* The expected base memory*/ #define INFORM_WAIT 1 /* Set to pause berfore crash in weird cases*/ /* * Declare these as initialized data so we can patch them. */ int nswbuf = 0; #ifdef NBUF int nbuf = NBUF; #else int nbuf = 0; #endif #ifdef BUFPAGES int bufpages = BUFPAGES; #else int bufpages = 0; #endif extern int freebufspace; int _udatasel, _ucodesel; /* * Machine-dependent startup code */ int boothowto = 0, Maxmem = 0; long dumplo; int physmem, maxmem; extern int bootdev; #ifdef SMALL extern int forcemaxmem; #endif int biosmem; extern cyloffset; int cpu_class; void dumpsys __P((void)); void cpu_startup() { register int unixsize; register unsigned i; register struct pte *pte; int mapaddr, j; register caddr_t v; int maxbufs, base, residual; extern long Usrptsize; vm_offset_t minaddr, maxaddr; vm_size_t size; int firstaddr; /* * Initialize error message buffer (at end of core). */ /* avail_end was pre-decremented in pmap_bootstrap to compensate */ for (i = 0; i < btoc(sizeof (struct msgbuf)); i++) #ifndef MACHINE_NONCONTIG pmap_enter(pmap_kernel(), msgbufp, avail_end + i * NBPG, VM_PROT_ALL, TRUE); #else pmap_enter(pmap_kernel(), (caddr_t)msgbufp + i * NBPG, avail_end + i * NBPG, VM_PROT_ALL, TRUE); #endif msgbufmapped = 1; /* * Good {morning,afternoon,evening,night}. */ printf(version); identifycpu(); printf("real mem = %d\n", ctob(physmem)); /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) /* valloc(cfree, struct cblock, nclist); no clists any more!!! - cgd */ valloc(callout, struct callout, ncallout); #ifdef NetBSD valloc(swapmap, struct map, nswapmap = maxproc * 2); #endif #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif /* * Determine how many buffers to allocate. - * Use 10% of memory for the first 2 Meg, 5% of the remaining - * memory. Insure a minimum of 16 buffers. + * Use 20% of memory of memory beyond the first 2MB + * Insure a minimum of 16 fs buffers. * We allocate 1/2 as many swap buffer headers as file i/o buffers. */ if (bufpages == 0) - if (physmem < btoc(2 * 1024 * 1024)) - bufpages = physmem / 10 / CLSIZE; - else - bufpages = (btoc(2 * 1024 * 1024) + physmem) / 20 / CLSIZE; + bufpages = (ctob(physmem) - 2048*1024) / NBPG / 5; + if (bufpages < 32) + bufpages = 32; + /* + * We must still limit the maximum number of buffers to be no + * more than 2/5's of the size of the kernal malloc region, this + * will only take effect for machines with lots of memory + */ + bufpages = min(bufpages, (VM_KMEM_SIZE / NBPG) * 2 / 5); if (nbuf == 0) { nbuf = bufpages / 2; if (nbuf < 16) nbuf = 16; } freebufspace = bufpages * NBPG; if (nswbuf == 0) { nswbuf = (nbuf / 2) &~ 1; /* force even */ if (nswbuf > 256) nswbuf = 256; /* sanity */ } valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); /* * Allocate a submap for buffer space allocations. + * XXX we are NOT using buffer_map, but due to + * the references to it we will just allocate 1 page of + * vm (not real memory) to make things happy... */ buffer_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - bufpages*NBPG, TRUE); + /* bufpages * */NBPG, TRUE); /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. */ /* exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, * 16*NCARGS, TRUE); * NOT CURRENTLY USED -- cgd */ /* * Allocate a submap for physio */ phys_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, VM_PHYS_SIZE, TRUE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES); mb_map = kmem_suballoc(kernel_map, (vm_offset_t)&mbutl, &maxaddr, VM_MBUF_SIZE, FALSE); /* * Initialize callouts */ callfree = callout; for (i = 1; i < ncallout; i++) callout[i-1].c_next = &callout[i]; printf("avail mem = %d\n", ptoa(vm_page_free_count)); printf("using %d buffers containing %d bytes of memory\n", nbuf, bufpages * CLBYTES); /* * Set up CPU-specific registers, cache, etc. */ initcpu(); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); /* * Configure the system. */ configure(); } struct cpu_nameclass i386_cpus[] = { { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ { "i386DX", CPUCLASS_386 }, /* CPU_386 */ { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ { "i486DX", CPUCLASS_486 }, /* CPU_486 */ { "i586", CPUCLASS_586 }, /* CPU_586 */ }; identifycpu() /* translated from hp300 -- cgd */ { printf("CPU: "); if (cpu >= 0 && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) { printf("%s", i386_cpus[cpu].cpu_name); cpu_class = i386_cpus[cpu].cpu_class; } else { printf("unknown cpu type %d\n", cpu); panic("startup: bad cpu id"); } printf(" ("); switch(cpu_class) { case CPUCLASS_286: printf("286"); break; case CPUCLASS_386: printf("386"); break; case CPUCLASS_486: printf("486"); break; case CPUCLASS_586: printf("586"); break; default: printf("unknown"); /* will panic below... */ } printf("-class CPU)"); printf("\n"); /* cpu speed would be nice, but how? */ /* * Now that we have told the user what they have, * let them know if that machine type isn't configured. */ switch (cpu_class) { case CPUCLASS_286: /* a 286 should not make it this far, anyway */ #if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) #error This kernel is not configured for one of the supported CPUs #endif #if !defined(I386_CPU) case CPUCLASS_386: #endif #if !defined(I486_CPU) case CPUCLASS_486: #endif #if !defined(I586_CPU) case CPUCLASS_586: #endif panic("CPU class not configured"); default: break; } } #ifdef PGINPROF /* * Return the difference (in microseconds) * between the current time and a previous * time as represented by the arguments. * If there is a pending clock interrupt * which has not been serviced due to high * ipl, return error code. */ /*ARGSUSED*/ vmtime(otime, olbolt, oicr) register int otime, olbolt, oicr; { return (((time.tv_sec-otime)*60 + lbolt-olbolt)*16667); } #endif extern int kstack[]; /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig, mask; unsigned code; { register struct proc *p = curproc; register int *regs; register struct sigframe *fp; struct sigacts *ps = p->p_sigacts; int oonstack, frmtrap; regs = p->p_regs; oonstack = ps->ps_onstack; frmtrap = curpcb->pcb_flags & FM_TRAP; /* * Allocate and validate space for the signal handler * context. Note that if the stack is in P0 space, the * call to grow() is a nop, and the useracc() check * will fail if the process has not already allocated * the space with a `brk'. */ if (!ps->ps_onstack && (ps->ps_sigonstack & sigmask(sig))) { fp = (struct sigframe *)(ps->ps_sigsp - sizeof(struct sigframe)); ps->ps_onstack = 1; } else { if (frmtrap) fp = (struct sigframe *)(regs[tESP] - sizeof(struct sigframe)); else fp = (struct sigframe *)(regs[sESP] - sizeof(struct sigframe)); } if ((unsigned)fp <= (unsigned)p->p_vmspace->vm_maxsaddr + MAXSSIZ - ctob(p->p_vmspace->vm_ssize)) (void)grow(p, (unsigned)fp); if (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; sig = sigmask(SIGILL); p->p_sigignore &= ~sig; p->p_sigcatch &= ~sig; p->p_sigmask &= ~sig; psignal(p, SIGILL); return; } /* * Build the argument list for the signal handler. */ fp->sf_signum = sig; fp->sf_code = code; fp->sf_scp = &fp->sf_sc; fp->sf_handler = catcher; /* save scratch registers */ if(frmtrap) { fp->sf_eax = regs[tEAX]; fp->sf_edx = regs[tEDX]; fp->sf_ecx = regs[tECX]; } else { fp->sf_eax = regs[sEAX]; fp->sf_edx = regs[sEDX]; fp->sf_ecx = regs[sECX]; } /* * Build the signal context to be used by sigreturn. */ fp->sf_sc.sc_onstack = oonstack; fp->sf_sc.sc_mask = mask; if(frmtrap) { fp->sf_sc.sc_sp = regs[tESP]; fp->sf_sc.sc_fp = regs[tEBP]; fp->sf_sc.sc_pc = regs[tEIP]; fp->sf_sc.sc_ps = regs[tEFLAGS]; regs[tESP] = (int)fp; regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc; } else { fp->sf_sc.sc_sp = regs[sESP]; fp->sf_sc.sc_fp = regs[sEBP]; fp->sf_sc.sc_pc = regs[sEIP]; fp->sf_sc.sc_ps = regs[sEFLAGS]; regs[sESP] = (int)fp; regs[sEIP] = (int)((struct pcb *)kstack)->pcb_sigc; } } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper priviledges or to cause * a machine fault. */ struct sigreturn_args { struct sigcontext *sigcntxp; }; sigreturn(p, uap, retval) struct proc *p; struct sigreturn_args *uap; int *retval; { register struct sigcontext *scp; register struct sigframe *fp; register int *regs = p->p_regs; /* * (XXX old comment) regs[sESP] points to the return address. * The user scp pointer is above that. * The return address is faked in the signal trampoline code * for consistency. */ scp = uap->sigcntxp; fp = (struct sigframe *) ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0) return(EINVAL); /* restore scratch registers */ regs[sEAX] = fp->sf_eax ; regs[sEDX] = fp->sf_edx ; regs[sECX] = fp->sf_ecx ; if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0) return(EINVAL); #ifdef notyet if ((scp->sc_ps & PSL_MBZ) != 0 || (scp->sc_ps & PSL_MBO) != PSL_MBO) { return(EINVAL); } #endif p->p_sigacts->ps_onstack = scp->sc_onstack & 01; p->p_sigmask = scp->sc_mask &~ (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP)); regs[sEBP] = scp->sc_fp; regs[sESP] = scp->sc_sp; regs[sEIP] = scp->sc_pc; regs[sEFLAGS] = scp->sc_ps; return(EJUSTRETURN); } /* * a simple function to make the system panic (and dump a vmcore) * in a predictable fashion */ void diediedie() { panic("because you said to!"); } int waittime = -1; struct pcb dumppcb; void boot(arghowto) int arghowto; { register long dummy; /* r12 is reserved */ register int howto; /* r11 == how to boot */ register int devtype; /* r10 == major of root dev */ extern int cold; int nomsg = 1; if(cold) { printf("hit reset please"); for(;;); } howto = arghowto; if ((howto&RB_NOSYNC) == 0 && waittime < 0 && bfreelist[0].b_forw) { register struct buf *bp; int iter, nbusy; waittime = 0; (void) splnet(); printf("syncing disks... "); /* * Release inodes held by texts before update. */ if (panicstr == 0) vnode_pager_umount(NULL); sync((struct sigcontext *)0); /* * Unmount filesystems */ #if 0 if (panicstr == 0) vfs_unmountall(); #endif for (iter = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) nbusy++; if (nbusy == 0) break; if (nomsg) { printf("updating disks before rebooting... "); nomsg = 0; } printf("%d ", nbusy); DELAY(40000 * iter); } if (nbusy) printf("giving up\n"); else printf("done\n"); DELAY(10000); /* wait for printf to finish */ } splhigh(); devtype = major(rootdev); if (howto&RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); cngetc(); } else { if (howto & RB_DUMP) { savectx(&dumppcb, 0); dumppcb.pcb_ptd = rcr3(); dumpsys(); /*NOTREACHED*/ } } #ifdef lint dummy = 0; dummy = dummy; printf("howto %d, devtype %d\n", arghowto, devtype); #endif cpu_reset(); for(;;) ; /*NOTREACHED*/ } unsigned dumpmag = 0x8fca0101; /* magic number for savecore */ int dumpsize = 0; /* also for savecore */ /* * Doadump comes here after turning off memory management and * getting on the dump stack, either when called above, or by * the auto-restart code. */ void dumpsys() { if (dumpdev == NODEV) return; if ((minor(dumpdev)&07) != 1) return; dumpsize = physmem; printf("\ndumping to dev %x, offset %d\n", dumpdev, dumplo); printf("dump "); switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) { case ENXIO: printf("device bad\n"); break; case EFAULT: printf("device not ready\n"); break; case EINVAL: printf("area improper\n"); break; case EIO: printf("i/o error\n"); break; case EINTR: printf("aborted from console\n"); break; default: printf("succeeded\n"); break; } printf("\n\n"); DELAY(1000); } #ifdef HZ /* * If HZ is defined we use this code, otherwise the code in * /sys/i386/i386/microtime.s is used. The othercode only works * for HZ=100. */ microtime(tvp) register struct timeval *tvp; { int s = splhigh(); *tvp = time; tvp->tv_usec += tick; while (tvp->tv_usec > 1000000) { tvp->tv_sec++; tvp->tv_usec -= 1000000; } splx(s); } #endif /* HZ */ physstrat(bp, strat, prio) struct buf *bp; int (*strat)(), prio; { register int s; caddr_t baddr; /* * vmapbuf clobbers b_addr so we must remember it so that it * can be restored after vunmapbuf. This is truely rude, we * should really be storing this in a field in the buf struct * but none are available and I didn't want to add one at * this time. Note that b_addr for dirty page pushes is * restored in vunmapbuf. (ugh!) */ baddr = bp->b_un.b_addr; vmapbuf(bp); (*strat)(bp); /* pageout daemon doesn't wait for pushed pages */ if (bp->b_flags & B_DIRTY) return; s = splbio(); while ((bp->b_flags & B_DONE) == 0) sleep((caddr_t)bp, prio); splx(s); vunmapbuf(bp); bp->b_un.b_addr = baddr; } initcpu() { } /* * Clear registers on exec */ void setregs(p, entry) struct proc *p; u_long entry; { p->p_regs[sEBP] = 0; /* bottom of the fp chain */ p->p_regs[sEIP] = entry; p->p_addr->u_pcb.pcb_flags = 0; /* no fp at all */ load_cr0(rcr0() | CR0_TS); /* start emulating */ #if NNPX > 0 npxinit(__INITIAL_NPXCW__); -#endif +#endif /* NNPX > 0 */ } /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ #define DESCRIPTOR_SIZE 8 #define GNULL_SEL 0 /* Null Descriptor */ #define GCODE_SEL 1 /* Kernel Code Descriptor */ #define GDATA_SEL 2 /* Kernel Data Descriptor */ #define GLDT_SEL 3 /* LDT - eventually one per process */ #define GTGATE_SEL 4 /* Process task switch gate */ #define GPANIC_SEL 5 /* Task state to consider panic from */ #define GPROC0_SEL 6 /* Task state process slot zero and up */ #define NGDT GPROC0_SEL+1 unsigned char gdt[GPROC0_SEL+1][DESCRIPTOR_SIZE]; /* interrupt descriptor table */ struct gate_descriptor idt[NIDT]; /* local descriptor table */ unsigned char ldt[5][DESCRIPTOR_SIZE]; #define LSYS5CALLS_SEL 0 /* forced by intel BCS */ #define LSYS5SIGR_SEL 1 #define L43BSDCALLS_SEL 2 /* notyet */ #define LUCODE_SEL 3 #define LUDATA_SEL 4 /* seperate stack, es,fs,gs sels ? */ /* #define LPOSIXCALLS_SEL 5 /* notyet */ struct i386tss tss, panic_tss; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Panic Tss Descriptor */ { (int) &panic_tss, /* segment base address */ sizeof(tss)-1, /* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Proc 0 Tss Descriptor */ { (int) kstack, /* segment base address */ sizeof(tss)-1, /* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }}; struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ } }; setidt(idx, func, typ, dpl) char *func; { struct gate_descriptor *ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = 8; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X, name) extern IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(rsvd0), IDTVEC(rsvd1), IDTVEC(rsvd2), IDTVEC(rsvd3), IDTVEC(rsvd4), IDTVEC(rsvd5), IDTVEC(rsvd6), IDTVEC(rsvd7), IDTVEC(rsvd8), IDTVEC(rsvd9), IDTVEC(rsvd10), IDTVEC(rsvd11), IDTVEC(rsvd12), IDTVEC(rsvd13), IDTVEC(rsvd14), IDTVEC(rsvd14), IDTVEC(syscall); int lcr0(), lcr3(), rcr0(), rcr2(); int _gsel_tss; init386(first) { extern ssdtosd(), lgdt(), lidt(), lldt(), etext; int x, *pi; unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; extern int sigcode,szsigcode; /* table descriptors - used to load tables by microp */ unsigned short r_gdt[3], r_idt[3]; int pagesinbase, pagesinext; proc0.p_addr = proc0paddr; /* * Initialize the console before we print anything out. */ cninit (); /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(i386_round_page(&etext)) - 1; gdt_segs[GDATA_SEL].ssd_limit = 0xffffffff; /* XXX constant? */ for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, gdt+x); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * NBPG) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, ldt+x); /* exceptions */ setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_KPL); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL); setidt(8, &IDTVEC(dble), SDT_SYS386TGT, SEL_KPL); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL); setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL); setidt(17, &IDTVEC(rsvd0), SDT_SYS386TGT, SEL_KPL); setidt(18, &IDTVEC(rsvd1), SDT_SYS386TGT, SEL_KPL); setidt(19, &IDTVEC(rsvd2), SDT_SYS386TGT, SEL_KPL); setidt(20, &IDTVEC(rsvd3), SDT_SYS386TGT, SEL_KPL); setidt(21, &IDTVEC(rsvd4), SDT_SYS386TGT, SEL_KPL); setidt(22, &IDTVEC(rsvd5), SDT_SYS386TGT, SEL_KPL); setidt(23, &IDTVEC(rsvd6), SDT_SYS386TGT, SEL_KPL); setidt(24, &IDTVEC(rsvd7), SDT_SYS386TGT, SEL_KPL); setidt(25, &IDTVEC(rsvd8), SDT_SYS386TGT, SEL_KPL); setidt(26, &IDTVEC(rsvd9), SDT_SYS386TGT, SEL_KPL); setidt(27, &IDTVEC(rsvd10), SDT_SYS386TGT, SEL_KPL); setidt(28, &IDTVEC(rsvd11), SDT_SYS386TGT, SEL_KPL); setidt(29, &IDTVEC(rsvd12), SDT_SYS386TGT, SEL_KPL); setidt(30, &IDTVEC(rsvd13), SDT_SYS386TGT, SEL_KPL); setidt(31, &IDTVEC(rsvd14), SDT_SYS386TGT, SEL_KPL); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif r_gdt[0] = (unsigned short) (sizeof(gdt) - 1); r_gdt[1] = (unsigned short) ((int) gdt & 0xffff); r_gdt[2] = (unsigned short) ((int) gdt >> 16); lgdt(&r_gdt); r_idt[0] = (unsigned short) (sizeof(idt) - 1); r_idt[1] = (unsigned short) ((int) idt & 0xfffff); r_idt[2] = (unsigned short) ((int) idt >> 16); lidt(&r_idt); lldt(GSEL(GLDT_SEL, SEL_KPL)); #include "ddb.h" #if NDDB > 0 kdb_init(); if (boothowto & RB_KDB) Debugger(); #endif /* Use BIOS values stored in RTC CMOS RAM, since probing * breaks certain 386 AT relics. */ biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); /*printf("bios base %d ext %d ", biosbasemem, biosextmem);*/ /* * 15 Aug 92 Terry Lambert The real fix for the CMOS bug */ if( biosbasemem != EXPECT_BASEMEM) { printf( "Warning: Base memory %dK, assuming %dK\n", biosbasemem, EXPECT_BASEMEM); biosbasemem = EXPECT_BASEMEM; /* assume base*/ } if( biosextmem > 65536) { printf( "Warning: Extended memory %dK(>64M), assuming 0K\n", biosextmem); biosextmem = 0; /* assume none*/ } /* * Go into normal calculation; Note that we try to run in 640K, and * that invalid CMOS values of non 0xffff are no longer a cause of * ptdi problems. I have found a gutted kernel can run in 640K. */ pagesinbase = 640/4 - first/NBPG; pagesinext = biosextmem/4; /* use greater of either base or extended memory. do this * until I reinstitue discontiguous allocation of vm_page * array. */ if (pagesinbase > pagesinext) Maxmem = 640/4; else { Maxmem = pagesinext + 0x100000/NBPG; if (first < 0x100000) first = 0x100000; /* skip hole */ } /* This used to explode, since Maxmem used to be 0 for bas CMOS*/ maxmem = Maxmem - 1; /* highest page of usable memory */ physmem = maxmem; /* number of pages of physmem addr space */ /*printf("using first 0x%x to 0x%x\n ", first, maxmem*NBPG);*/ if (maxmem < 2048/4) { printf("Too little RAM memory. Warning, running in degraded mode.\n"); #ifdef INFORM_WAIT /* * People with less than 2 Meg have to hit return; this way * we see the messages and can tell them why they blow up later. * If they get working well enough to recompile, they can unset * the flag; otherwise, it's a toy and they have to lump it. */ cngetc(); #endif /* !INFORM_WAIT*/ } /* call pmap initialization to make new kernel address space */ #ifndef MACHINCE_NONCONTIG pmap_bootstrap (first, 0); #else pmap_bootstrap ((vm_offset_t)atdevbase + IOM_SIZE); #endif /* MACHINE_NONCONTIG */ /* now running on new page tables, configured,and u/iom is accessible */ /* make a initial tss so microp can get interrupt stack on syscall! */ proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG; proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; _gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = (sizeof(tss))<<16; ltr(_gsel_tss); /* make a call gate to reenter kernel with */ gdp = (struct gate_descriptor *) &ldt[LSYS5CALLS_SEL][0]; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 0; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode); proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_ptd = IdlePTD; } extern struct pte *CMAP1, *CMAP2; extern caddr_t CADDR1, CADDR2; /* * zero out physical memory * specified in relocation units (NBPG bytes) */ clearseg(n) { *(int *)CMAP2 = PG_V | PG_KW | ctob(n); load_cr3(rcr3()); bzero(CADDR2,NBPG); #ifndef MACHINE_NONCONTIG *(int *) CADDR2 = 0; #endif /* MACHINE_NONCONTIG */ } /* * copy a page of physical memory * specified in relocation units (NBPG bytes) */ void copyseg(frm, n) { *(int *)CMAP2 = PG_V | PG_KW | ctob(n); load_cr3(rcr3()); bcopy((void *)frm, (void *)CADDR2, NBPG); } /* * copy a page of physical memory * specified in relocation units (NBPG bytes) */ void physcopyseg(frm, to) { *(int *)CMAP1 = PG_V | PG_KW | ctob(frm); *(int *)CMAP2 = PG_V | PG_KW | ctob(to); load_cr3(rcr3()); bcopy(CADDR1, CADDR2, NBPG); } /*aston() { schednetisr(NETISR_AST); }*/ void setsoftclock() { schednetisr(NETISR_SCLK); } /* * insert an element into a queue */ #undef insque _insque(element, head) register struct prochd *element, *head; { element->ph_link = head->ph_link; head->ph_link = (struct proc *)element; element->ph_rlink = (struct proc *)head; ((struct prochd *)(element->ph_link))->ph_rlink=(struct proc *)element; } /* * remove an element from a queue */ #undef remque _remque(element) register struct prochd *element; { ((struct prochd *)(element->ph_link))->ph_rlink = element->ph_rlink; ((struct prochd *)(element->ph_rlink))->ph_link = element->ph_link; element->ph_rlink = (struct proc *)0; } #ifdef SLOW_OLD_COPYSTRS vmunaccess() {} #if 0 /* assembler versions now in locore.s */ /* * Below written in C to allow access to debugging code */ copyinstr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *toaddr, *fromaddr; { int c,tally; tally = 0; while (maxlength--) { c = fubyte(fromaddr++); if (c == -1) { if(lencopied) *lencopied = tally; return(EFAULT); } tally++; *(char *)toaddr++ = (char) c; if (c == 0){ if(lencopied) *lencopied = (u_int)tally; return(0); } } if(lencopied) *lencopied = (u_int)tally; return(ENAMETOOLONG); } copyoutstr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *fromaddr, *toaddr; { int c; int tally; tally = 0; while (maxlength--) { c = subyte(toaddr++, *(char *)fromaddr); if (c == -1) return(EFAULT); tally++; if (*(char *)fromaddr++ == 0){ if(lencopied) *lencopied = tally; return(0); } } if(lencopied) *lencopied = tally; return(ENAMETOOLONG); } #endif /* SLOW_OLD_COPYSTRS */ copystr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *fromaddr, *toaddr; { u_int tally; tally = 0; while (maxlength--) { *(u_char *)toaddr = *(u_char *)fromaddr++; tally++; if (*(u_char *)toaddr++ == 0) { if(lencopied) *lencopied = tally; return(0); } } if(lencopied) *lencopied = tally; return(ENAMETOOLONG); } #endif Index: head/sys/amd64/amd64/pmap.c =================================================================== --- head/sys/amd64/amd64/pmap.c (revision 607) +++ head/sys/amd64/amd64/pmap.c (revision 608) @@ -1,1725 +1,1722 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.5 1993/10/12 13:53:25 rgrimes Exp $ + * $Id: pmap.c,v 1.6 1993/10/12 15:09:37 rgrimes Exp $ */ -static char rcsid[] = "$Id: pmap.c,v 1.5 1993/10/12 13:53:25 rgrimes Exp $"; /* * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * Derived from: hp300/@(#)pmap.c 7.1 (Berkeley) 12/5/90 */ /* * Reno i386 version, from Mike Hibler's hp300 version. */ /* * Manages physical address maps. * * In addition to hardware address maps, this * module is called upon to provide software-use-only * maps which may or may not be stored in the same * form as hardware maps. These pseudo-maps are * used to store intermediate results from copy * operations to and from address spaces. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "param.h" #include "proc.h" #include "malloc.h" #include "user.h" #include "vm/vm.h" #include "vm/vm_kern.h" #include "vm/vm_page.h" /*#include "vm/vm_pageout.h"*/ #include "i386/isa/isa.h" /* * Allocate various and sundry SYSMAPs used in the days of old VM * and not yet converted. XXX. */ #define BSDVM_COMPAT 1 #ifdef DEBUG struct { int kernel; /* entering kernel mapping */ int user; /* entering user mapping */ int ptpneeded; /* needed to allocate a PT page */ int pwchange; /* no mapping change, just wiring or protection */ int wchange; /* no mapping change, just wiring */ int mchange; /* was mapped but mapping to different page */ int managed; /* a managed page */ int firstpv; /* first mapping for this PA */ int secondpv; /* second mapping for this PA */ int ci; /* cache inhibited */ int unmanaged; /* not a managed page */ int flushes; /* cache flushes */ } enter_stats; struct { int calls; int removes; int pvfirst; int pvsearch; int ptinvalid; int uflushes; int sflushes; } remove_stats; int debugmap = 0; int pmapdebug = 0 /* 0xffff */; #define PDB_FOLLOW 0x0001 #define PDB_INIT 0x0002 #define PDB_ENTER 0x0004 #define PDB_REMOVE 0x0008 #define PDB_CREATE 0x0010 #define PDB_PTPAGE 0x0020 #define PDB_CACHE 0x0040 #define PDB_BITS 0x0080 #define PDB_COLLECT 0x0100 #define PDB_PROTECT 0x0200 #define PDB_PDRTAB 0x0400 #define PDB_PARANOIA 0x2000 #define PDB_WIRING 0x4000 #define PDB_PVDUMP 0x8000 int pmapvacflush = 0; #define PVF_ENTER 0x01 #define PVF_REMOVE 0x02 #define PVF_PROTECT 0x04 #define PVF_TOTAL 0x80 #endif /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023])) #define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) #define pmap_pde_v(pte) ((pte)->pd_v) #define pmap_pte_w(pte) ((pte)->pg_w) /* #define pmap_pte_ci(pte) ((pte)->pg_ci) */ #define pmap_pte_m(pte) ((pte)->pg_m) #define pmap_pte_u(pte) ((pte)->pg_u) #define pmap_pte_v(pte) ((pte)->pg_v) #define pmap_pte_set_w(pte, v) ((pte)->pg_w = (v)) #define pmap_pte_set_prot(pte, v) ((pte)->pg_prot = (v)) /* * Given a map and a machine independent protection code, * convert to a vax protection code. */ #define pte_prot(m, p) (protection_codes[p]) int protection_codes[8]; struct pmap kernel_pmap_store; pmap_t kernel_pmap; vm_offset_t avail_start; /* PA of first available physical page */ vm_offset_t avail_end; /* PA of last available physical page */ vm_size_t mem_size; /* memory size in bytes */ vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss)*/ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t vm_first_phys; /* PA of first managed page */ vm_offset_t vm_last_phys; /* PA just past last managed page */ int i386pagesperpage; /* PAGE_SIZE / NBPG */ boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ char *pmap_attributes; /* reference and modify bits */ boolean_t pmap_testbit(); void pmap_clear_modify(); #if BSDVM_COMPAT #include "msgbuf.h" /* * All those kernel PT submaps that BSD is so fond of */ struct pte *CMAP1, *CMAP2, *mmap; caddr_t CADDR1, CADDR2, vmmap; struct pte *msgbufmap; struct msgbuf *msgbufp; #endif /* * Bootstrap the system enough to run with virtual memory. * Map the kernel's code and data, and allocate the system page table. * * On the I386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address KERNBASE to the actual * (physical) address starting relative to 0] */ struct pte *pmap_pte(); void pmap_bootstrap(firstaddr, loadaddr) vm_offset_t firstaddr; vm_offset_t loadaddr; { #if BSDVM_COMPAT vm_offset_t va; struct pte *pte; #endif extern vm_offset_t maxmem, physmem; extern int IdlePTD; avail_start = firstaddr + 8 * NBPG; avail_end = maxmem << PG_SHIFT; /* XXX: allow for msgbuf */ avail_end -= i386_round_page(sizeof(struct msgbuf)); mem_size = physmem << PG_SHIFT; virtual_avail = (vm_offset_t)atdevbase + 0x100000 - 0xa0000 + 10*NBPG; virtual_end = VM_MAX_KERNEL_ADDRESS; i386pagesperpage = PAGE_SIZE / NBPG; /* * Initialize protection array. */ i386_protection_init(); /* * The kernel's pmap is statically allocated so we don't * have to use pmap_create, which is unlikely to work * correctly at this part of the boot sequence. */ kernel_pmap = &kernel_pmap_store; #ifdef notdef /* * Create Kernel page directory table and page maps. * [ currently done in locore. i have wild and crazy ideas -wfj ] * XXX IF THIS IS EVER USED, IT MUST BE MOVED TO THE TOP * OF THIS ROUTINE -- cgd */ bzero(firstaddr, 4*NBPG); kernel_pmap->pm_pdir = firstaddr + VM_MIN_KERNEL_ADDRESS; kernel_pmap->pm_ptab = firstaddr + VM_MIN_KERNEL_ADDRESS + NBPG; firstaddr += NBPG; for (x = i386_btod(VM_MIN_KERNEL_ADDRESS); x < i386_btod(VM_MIN_KERNEL_ADDRESS)+3; x++) { struct pde *pde; pde = kernel_pmap->pm_pdir + x; *(int *)pde = firstaddr + x*NBPG | PG_V | PG_KW; } #else kernel_pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD); #endif simple_lock_init(&kernel_pmap->pm_lock); kernel_pmap->pm_count = 1; #if BSDVM_COMPAT /* * Allocate all the submaps we need */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); va = virtual_avail; pte = pmap_pte(kernel_pmap, va); SYSMAP(caddr_t ,CMAP1 ,CADDR1 ,1 ) SYSMAP(caddr_t ,CMAP2 ,CADDR2 ,1 ) SYSMAP(caddr_t ,mmap ,vmmap ,1 ) SYSMAP(struct msgbuf * ,msgbufmap ,msgbufp ,1 ) virtual_avail = va; #endif /* * reserve special hunk of memory for use by bus dma as a bounce * buffer (contiguous virtual *and* physical memory). * do it from firstaddr -> firstaddr+8 pages. note that * avail_start was bumped up 8 pages, above, to accomodate this. */ { extern vm_offset_t isaphysmem; isaphysmem = va; virtual_avail = pmap_map(va, firstaddr, firstaddr + 8*NBPG, VM_PROT_ALL); } *(int *)PTD = 0; load_cr3(rcr3()); } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(phys_start, phys_end) vm_offset_t phys_start, phys_end; { vm_offset_t addr, addr2; vm_size_t npg, s; int rv; extern int KPTphys; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_init(%x, %x)\n", phys_start, phys_end); #endif /* * Now that kernel map has been allocated, we can mark as * unavailable regions which we have mapped in locore. */ addr = atdevbase; (void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0, &addr, (0x100000-0xa0000), FALSE); addr = (vm_offset_t) KERNBASE + KPTphys/* *NBPG */; vm_object_reference(kernel_object); (void) vm_map_find(kernel_map, kernel_object, addr, &addr, 2*NBPG, FALSE); /* * Allocate memory for random pmap data structures. Includes the * pv_head_table and pmap_attributes. */ npg = atop(phys_end - phys_start); s = (vm_size_t) (sizeof(struct pv_entry) * npg + npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); pv_table = (pv_entry_t) addr; addr += sizeof(struct pv_entry) * npg; pmap_attributes = (char *) addr; #ifdef DEBUG if (pmapdebug & PDB_INIT) printf("pmap_init: %x bytes (%x pgs): tbl %x attr %x\n", s, npg, pv_table, pmap_attributes); #endif /* * Now it is safe to enable pv_table recording. */ vm_first_phys = phys_start; vm_last_phys = phys_end; pmap_initialized = TRUE; } /* * Used to map a range of physical addresses into kernel * virtual address space. * * For now, VM is already on, we only need to map the * specified memory. */ vm_offset_t pmap_map(virt, start, end, prot) vm_offset_t virt; vm_offset_t start; vm_offset_t end; int prot; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_map(%x, %x, %x, %x)\n", virt, start, end, prot); #endif while (start < end) { pmap_enter(kernel_pmap, virt, start, prot, FALSE); virt += PAGE_SIZE; start += PAGE_SIZE; } return(virt); } /* * Create and return a physical map. * * If the size specified for the map * is zero, the map is an actual physical * map, and may be referenced by the * hardware. * * If the size specified is non-zero, * the map will be used in software only, and * is bounded by that size. * * [ just allocate a ptd and mark it uninitialize -- should we track * with a table which process has which ptd? -wfj ] */ pmap_t pmap_create(size) vm_size_t size; { register pmap_t pmap; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) printf("pmap_create(%x)\n", size); #endif /* * Software use map does not need a pmap */ if (size) return(NULL); /* XXX: is it ok to wait here? */ pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); #ifdef notifwewait if (pmap == NULL) panic("pmap_create: cannot allocate a pmap"); #endif bzero(pmap, sizeof(*pmap)); pmap_pinit(pmap); return (pmap); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ void pmap_pinit(pmap) register struct pmap *pmap; { #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) pg("pmap_pinit(%x)\n", pmap); #endif /* * No need to allocate page table space yet but we do need a * valid page directory table. */ pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, NBPG); /* wire in kernel global address entries */ bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, NKPDE*4); /* install self-referential address mapping entry */ *(int *)(pmap->pm_pdir+PTDPTDI) = (int)pmap_extract(kernel_pmap, pmap->pm_pdir) | PG_V | PG_KW; pmap->pm_count = 1; simple_lock_init(&pmap->pm_lock); } /* * Retire the given physical map from service. * Should only be called if the map contains * no valid mappings. */ void pmap_destroy(pmap) register pmap_t pmap; { int count; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_destroy(%x)\n", pmap); #endif if (pmap == NULL) return; simple_lock(&pmap->pm_lock); count = --pmap->pm_count; simple_unlock(&pmap->pm_lock); if (count == 0) { pmap_release(pmap); free((caddr_t)pmap, M_VMPMAP); } } /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap) register struct pmap *pmap; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) pg("pmap_release(%x)\n", pmap); #endif #ifdef notdef /* DIAGNOSTIC */ /* count would be 0 from pmap_destroy... */ simple_lock(&pmap->pm_lock); if (pmap->pm_count != 1) panic("pmap_release count"); #endif kmem_free(kernel_map, (vm_offset_t)pmap->pm_pdir, NBPG); } /* * Add a reference to the specified pmap. */ void pmap_reference(pmap) pmap_t pmap; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_reference(%x)", pmap); #endif if (pmap != NULL) { simple_lock(&pmap->pm_lock); pmap->pm_count++; simple_unlock(&pmap->pm_lock); } } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap, sva, eva) struct pmap *pmap; register vm_offset_t sva; register vm_offset_t eva; { register pt_entry_t *ptp,*ptq; vm_offset_t va; vm_offset_t pa; pt_entry_t *pte; pv_entry_t pv, npv; int ix; int s, bits; #ifdef DEBUG pt_entry_t opte; if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) pg("pmap_remove(%x, %x, %x)", pmap, sva, eva); #endif if (pmap == NULL) return; /* are we current address space or kernel? */ if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) ptp=PTmap; /* otherwise, we are alternate address space */ else { if (pmap->pm_pdir[PTDPTDI].pd_pfnum != APTDpde.pd_pfnum) { APTDpde = pmap->pm_pdir[PTDPTDI]; tlbflush(); } ptp=APTmap; } #ifdef DEBUG remove_stats.calls++; #endif /* this is essential since we must check the PDE(sva) for precense */ while (sva <= eva && !pmap_pde_v(pmap_pde(pmap, sva))) sva = (sva & PD_MASK) + (1<= eva) return; ptq++; } if(!(sva & 0x3ff)) /* Only check once in a while */ { if (!pmap_pde_v(pmap_pde(pmap, i386_ptob(sva)))) { /* We can race ahead here, straight to next pde.. */ sva = (sva & 0xffc00) + (1<<10) -1 ; continue; } } if(!pmap_pte_pa(ptp+sva)) continue; pte = ptp + sva; pa = pmap_pte_pa(pte); va = i386_ptob(sva); #ifdef DEBUG opte = *pte; remove_stats.removes++; #endif /* * Update statistics */ if (pmap_pte_w(pte)) pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; /* * Invalidate the PTEs. * XXX: should cluster them up and invalidate as many * as possible at once. */ #ifdef DEBUG if (pmapdebug & PDB_REMOVE) printf("remove: inv %x ptes at %x(%x) ", i386pagesperpage, pte, *(int *)pte); #endif bits = ix = 0; do { bits |= *(int *)pte & (PG_U|PG_M); *(int *)pte++ = 0; /*TBIS(va + ix * NBPG);*/ } while (++ix != i386pagesperpage); if (curproc && pmap == &curproc->p_vmspace->vm_pmap) pmap_activate(pmap, (struct pcb *)curproc->p_addr); /* are we current address space or kernel? */ /*if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) load_cr3(curpcb->pcb_ptd);*/ tlbflush(); #ifdef needednotdone reduce wiring count on page table pages as references drop #endif /* * Remove from the PV table (raise IPL since we * may be called at interrupt time). */ if (pa < vm_first_phys || pa >= vm_last_phys) continue; pv = pa_to_pvh(pa); s = splimp(); /* * If it is the first entry on the list, it is actually * in the header and we must copy the following entry up * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ if (pmap == pv->pv_pmap && va == pv->pv_va) { npv = pv->pv_next; if (npv) { *pv = *npv; free((caddr_t)npv, M_VMPVENT); } else pv->pv_pmap = NULL; #ifdef DEBUG remove_stats.pvfirst++; #endif } else { for (npv = pv->pv_next; npv; npv = npv->pv_next) { #ifdef DEBUG remove_stats.pvsearch++; #endif if (pmap == npv->pv_pmap && va == npv->pv_va) break; pv = npv; } #ifdef DEBUG if (npv == NULL) panic("pmap_remove: PA not in pv_tab"); #endif pv->pv_next = npv->pv_next; free((caddr_t)npv, M_VMPVENT); pv = pa_to_pvh(pa); } #ifdef notdef [tally number of pagetable pages, if sharing of ptpages adjust here] #endif /* * Update saved attributes for managed page */ pmap_attributes[pa_index(pa)] |= bits; splx(s); } #ifdef notdef [cache and tlb flushing, if needed] #endif } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. */ void pmap_remove_all(pa) vm_offset_t pa; { register pv_entry_t pv; int s; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) printf("pmap_remove_all(%x)", pa); /*pmap_pvdump(pa);*/ #endif /* * Not one of ours */ if (pa < vm_first_phys || pa >= vm_last_phys) return; pv = pa_to_pvh(pa); s = splimp(); /* * Do it the easy way for now */ while (pv->pv_pmap != NULL) { #ifdef DEBUG if (!pmap_pde_v(pmap_pde(pv->pv_pmap, pv->pv_va)) || pmap_pte_pa(pmap_pte(pv->pv_pmap, pv->pv_va)) != pa) panic("pmap_remove_all: bad mapping"); #endif pmap_remove(pv->pv_pmap, pv->pv_va, pv->pv_va + PAGE_SIZE); } splx(s); } /* * Routine: pmap_copy_on_write * Function: * Remove write privileges from all * physical maps for this physical page. */ void pmap_copy_on_write(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) printf("pmap_copy_on_write(%x)", pa); #endif pmap_changebit(pa, PG_RO, TRUE); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap, sva, eva, prot) register pmap_t pmap; vm_offset_t sva, eva; vm_prot_t prot; { register pt_entry_t *pte; register vm_offset_t va; register int ix; int i386prot; boolean_t firstpage = TRUE; register pt_entry_t *ptp; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) printf("pmap_protect(%x, %x, %x, %x)", pmap, sva, eva, prot); #endif if (pmap == NULL) return; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; /* are we current address space or kernel? */ if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) ptp=PTmap; /* otherwise, we are alternate address space */ else { if (pmap->pm_pdir[PTDPTDI].pd_pfnum != APTDpde.pd_pfnum) { APTDpde = pmap->pm_pdir[PTDPTDI]; tlbflush(); } ptp=APTmap; } for (va = sva; va < eva; va += PAGE_SIZE) { /* * Page table page is not allocated. * Skip it, we don't want to force allocation * of unnecessary PTE pages just to set the protection. */ if (!pmap_pde_v(pmap_pde(pmap, va))) { /* XXX: avoid address wrap around */ if (va >= i386_trunc_pdr((vm_offset_t)-1)) break; va = i386_round_pdr(va + PAGE_SIZE) - PAGE_SIZE; continue; } pte = ptp + i386_btop(va); /* * Page not valid. Again, skip it. * Should we do this? Or set protection anyway? */ if (!pmap_pte_v(pte)) continue; ix = 0; i386prot = pte_prot(pmap, prot); if(va < UPT_MAX_ADDRESS) i386prot |= 2 /*PG_u*/; do { /* clear VAC here if PG_RO? */ pmap_pte_set_prot(pte++, i386prot); /*TBIS(va + ix * NBPG);*/ } while (++ix != i386pagesperpage); } if (curproc && pmap == &curproc->p_vmspace->vm_pmap) pmap_activate(pmap, (struct pcb *)curproc->p_addr); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap, va, pa, prot, wired) register pmap_t pmap; vm_offset_t va; register vm_offset_t pa; vm_prot_t prot; boolean_t wired; { register pt_entry_t *pte; register int npte, ix; vm_offset_t opa; boolean_t cacheable = TRUE; boolean_t checkpv = TRUE; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) printf("pmap_enter(%x, %x, %x, %x, %x)", pmap, va, pa, prot, wired); #endif if (pmap == NULL) return; if(va > VM_MAX_KERNEL_ADDRESS)panic("pmap_enter: toobig"); /* also, should not muck with PTD va! */ #ifdef DEBUG if (pmap == kernel_pmap) enter_stats.kernel++; else enter_stats.user++; #endif /* * Page Directory table entry not valid, we need a new PT page */ if (!pmap_pde_v(pmap_pde(pmap, va))) { printf("ptdi %x\n", pmap->pm_pdir[PTDPTDI]); panic("Page Table Directory Invalid (ptdi)"); } pte = pmap_pte(pmap, va); opa = pmap_pte_pa(pte); #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: pte %x, *pte %x ", pte, *(int *)pte); #endif /* * Mapping has not changed, must be protection or wiring change. */ if (opa == pa) { #ifdef DEBUG enter_stats.pwchange++; #endif /* * Wiring change, just update stats. * We don't worry about wiring PT pages as they remain * resident as long as there are valid mappings in them. * Hence, if a user page is wired, the PT page will be also. */ if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) { #ifdef DEBUG if (pmapdebug & PDB_ENTER) pg("enter: wiring change -> %x ", wired); #endif if (wired) pmap->pm_stats.wired_count++; else pmap->pm_stats.wired_count--; #ifdef DEBUG enter_stats.wchange++; #endif } goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: removing old mapping %x pa %x ", va, opa); #endif pmap_remove(pmap, va, va + PAGE_SIZE); #ifdef DEBUG enter_stats.mchange++; #endif } /* * Enter on the PV list if part of our managed memory * Note that we raise IPL while manipulating pv_table * since pmap_enter can be called at interrupt time. */ if (pa >= vm_first_phys && pa < vm_last_phys) { register pv_entry_t pv, npv; int s; #ifdef DEBUG enter_stats.managed++; #endif pv = pa_to_pvh(pa); s = splimp(); #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: pv at %x: %x/%x/%x ", pv, pv->pv_va, pv->pv_pmap, pv->pv_next); #endif /* * No entries yet, use header as the first entry */ if (pv->pv_pmap == NULL) { #ifdef DEBUG enter_stats.firstpv++; #endif pv->pv_va = va; pv->pv_pmap = pmap; pv->pv_next = NULL; pv->pv_flags = 0; } /* * There is at least one other VA mapping this page. * Place this entry after the header. */ else { /*printf("second time: ");*/ #ifdef DEBUG for (npv = pv; npv; npv = npv->pv_next) if (pmap == npv->pv_pmap && va == npv->pv_va) panic("pmap_enter: already in pv_tab"); #endif npv = (pv_entry_t) malloc(sizeof *npv, M_VMPVENT, M_NOWAIT); npv->pv_va = va; npv->pv_pmap = pmap; npv->pv_next = pv->pv_next; pv->pv_next = npv; #ifdef DEBUG if (!npv->pv_next) enter_stats.secondpv++; #endif } splx(s); } /* * Assumption: if it is not part of our managed memory * then it must be device memory which may be volitile. */ if (pmap_initialized) { checkpv = cacheable = FALSE; #ifdef DEBUG enter_stats.unmanaged++; #endif } /* * Increment counters */ pmap->pm_stats.resident_count++; if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. * Assume uniform modified and referenced status for all * I386 pages in a MACH page. */ npte = (pa & PG_FRAME) | pte_prot(pmap, prot) | PG_V; npte |= (*(int *)pte & (PG_M|PG_U)); if (wired) npte |= PG_W; if(va < UPT_MIN_ADDRESS) npte |= PG_u; else if(va < UPT_MAX_ADDRESS) npte |= PG_u | PG_RW; #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: new pte value %x ", npte); #endif ix = 0; do { *(int *)pte++ = npte; /*TBIS(va);*/ npte += NBPG; va += NBPG; } while (++ix != i386pagesperpage); pte--; -#ifdef DEBUGx -cache, tlb flushes -#endif /*pads(pmap);*/ /*load_cr3(((struct pcb *)curproc->p_addr)->pcb_ptd);*/ tlbflush(); } /* * pmap_page_protect: * * Lower the permission for all mappings to a given page. */ void pmap_page_protect(phys, prot) vm_offset_t phys; vm_prot_t prot; { switch (prot) { case VM_PROT_READ: case VM_PROT_READ|VM_PROT_EXECUTE: pmap_copy_on_write(phys); break; case VM_PROT_ALL: break; default: pmap_remove_all(phys); break; } } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap, va, wired) register pmap_t pmap; vm_offset_t va; boolean_t wired; { register pt_entry_t *pte; register int ix; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_change_wiring(%x, %x, %x)", pmap, va, wired); #endif if (pmap == NULL) return; pte = pmap_pte(pmap, va); #ifdef DEBUG /* * Page table page is not allocated. * Should this ever happen? Ignore it for now, * we don't want to force allocation of unnecessary PTE pages. */ if (!pmap_pde_v(pmap_pde(pmap, va))) { if (pmapdebug & PDB_PARANOIA) pg("pmap_change_wiring: invalid PDE for %x ", va); return; } /* * Page not valid. Should this ever happen? * Just continue and change wiring anyway. */ if (!pmap_pte_v(pte)) { if (pmapdebug & PDB_PARANOIA) pg("pmap_change_wiring: invalid PTE for %x ", va); } #endif if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) { if (wired) pmap->pm_stats.wired_count++; else pmap->pm_stats.wired_count--; } /* * Wiring is not a hardware characteristic so there is no need * to invalidate TLB. */ ix = 0; do { pmap_pte_set_w(pte++, wired); } while (++ix != i386pagesperpage); } /* * Routine: pmap_pte * Function: * Extract the page table entry associated * with the given map/virtual_address pair. * [ what about induced faults -wfj] */ struct pte *pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; { #ifdef DEBUGx if (pmapdebug & PDB_FOLLOW) printf("pmap_pte(%x, %x) ->\n", pmap, va); #endif + if (pmap && pmap_pde_v(pmap_pde(pmap, va))) { /* are we current address space or kernel? */ if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) return ((struct pte *) vtopte(va)); /* otherwise, we are alternate address space */ else { if (pmap->pm_pdir[PTDPTDI].pd_pfnum != APTDpde.pd_pfnum) { APTDpde = pmap->pm_pdir[PTDPTDI]; tlbflush(); } return((struct pte *) avtopte(va)); } } return(0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_offset_t pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { register vm_offset_t pa; #ifdef DEBUGx if (pmapdebug & PDB_FOLLOW) pg("pmap_extract(%x, %x) -> ", pmap, va); #endif pa = 0; if (pmap && pmap_pde_v(pmap_pde(pmap, va))) { pa = *(int *) pmap_pte(pmap, va); } if (pa) pa = (pa & PG_FRAME) | (va & ~PG_FRAME); #ifdef DEBUGx if (pmapdebug & PDB_FOLLOW) printf("%x\n", pa); #endif return(pa); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) pmap_t dst_pmap; pmap_t src_pmap; vm_offset_t dst_addr; vm_size_t len; vm_offset_t src_addr; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_copy(%x, %x, %x, %x, %x)", dst_pmap, src_pmap, dst_addr, len, src_addr); #endif } /* * Require that all active physical maps contain no * incorrect entries NOW. [This update includes * forcing updates of any address map caching.] * * Generally used to insure that a thread about * to run will see a semantically correct world. */ void pmap_update() { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_update()"); #endif tlbflush(); } /* * Routine: pmap_collect * Function: * Garbage collects the physical map system for * pages which are no longer used. * Success need not be guaranteed -- that is, there * may well be pages which are not referenced, but * others may be collected. * Usage: * Called by the pageout daemon when pages are scarce. * [ needs to be written -wfj ] */ void pmap_collect(pmap) pmap_t pmap; { register vm_offset_t pa; register pv_entry_t pv; register int *pte; vm_offset_t kpa; int s; #ifdef DEBUG int *pde; int opmapdebug; printf("pmap_collect(%x) ", pmap); #endif if (pmap != kernel_pmap) return; } /* [ macro again?, should I force kstack into user map here? -wfj ] */ void pmap_activate(pmap, pcbp) register pmap_t pmap; struct pcb *pcbp; { int x; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_PDRTAB)) pg("pmap_activate(%x, %x) ", pmap, pcbp); #endif PMAP_ACTIVATE(pmap, pcbp); /*printf("pde "); for(x=0x3f6; x < 0x3fA; x++) printf("%x ", pmap->pm_pdir[x]);*/ /*pads(pmap);*/ /*pg(" pcb_cr3 %x", pcbp->pcb_cr3);*/ } /* * Routine: pmap_kernel * Function: * Returns the physical map handle for the kernel. */ pmap_t pmap_kernel() { return (kernel_pmap); } /* * pmap_zero_page zeros the specified (machine independent) * page by mapping the page into virtual memory and using * bzero to clear its contents, one machine dependent page * at a time. */ pmap_zero_page(phys) register vm_offset_t phys; { register int ix; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_zero_page(%x)", phys); #endif phys >>= PG_SHIFT; ix = 0; do { clearseg(phys++); } while (++ix != i386pagesperpage); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ pmap_copy_page(src, dst) register vm_offset_t src, dst; { register int ix; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_copy_page(%x, %x)", src, dst); #endif src >>= PG_SHIFT; dst >>= PG_SHIFT; ix = 0; do { physcopyseg(src++, dst++); } while (++ix != i386pagesperpage); } /* * Routine: pmap_pageable * Function: * Make the specified pages (by pmap, offset) * pageable (or not) as requested. * * A page which is not pageable may not take * a fault; therefore, its page table entry * must remain valid for the duration. * * This routine is merely advisory; pmap_enter * will specify that these pages are to be wired * down (or not) as appropriate. */ pmap_pageable(pmap, sva, eva, pageable) pmap_t pmap; vm_offset_t sva, eva; boolean_t pageable; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_pageable(%x, %x, %x, %x)", pmap, sva, eva, pageable); #endif /* * If we are making a PT page pageable then all valid * mappings must be gone from that page. Hence it should * be all zeros and there is no need to clean it. * Assumptions: * - we are called with only one page at a time * - PT pages have only one pv_table entry */ if (pmap == kernel_pmap && pageable && sva + PAGE_SIZE == eva) { register pv_entry_t pv; register vm_offset_t pa; #ifdef DEBUG if ((pmapdebug & (PDB_FOLLOW|PDB_PTPAGE)) == PDB_PTPAGE) printf("pmap_pageable(%x, %x, %x, %x)", pmap, sva, eva, pageable); #endif /*if (!pmap_pde_v(pmap_pde(pmap, sva))) return;*/ if(pmap_pte(pmap, sva) == 0) return; pa = pmap_pte_pa(pmap_pte(pmap, sva)); if (pa < vm_first_phys || pa >= vm_last_phys) return; pv = pa_to_pvh(pa); /*if (!ispt(pv->pv_va)) return;*/ #ifdef DEBUG if (pv->pv_va != sva || pv->pv_next) { pg("pmap_pageable: bad PT page va %x next %x\n", pv->pv_va, pv->pv_next); return; } #endif /* * Mark it unmodified to avoid pageout */ pmap_clear_modify(pa); #ifdef needsomethinglikethis if (pmapdebug & PDB_PTPAGE) pg("pmap_pageable: PT page %x(%x) unmodified\n", sva, *(int *)pmap_pte(pmap, sva)); if (pmapdebug & PDB_WIRING) pmap_check_wiring("pageable", sva); #endif } } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_clear_modify(%x)", pa); #endif pmap_changebit(pa, PG_M, FALSE); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_clear_reference(%x)", pa); #endif pmap_changebit(pa, PG_U, FALSE); } /* * pmap_is_referenced: * * Return whether or not the specified physical page is referenced * by any physical maps. */ boolean_t pmap_is_referenced(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) { boolean_t rv = pmap_testbit(pa, PG_U); printf("pmap_is_referenced(%x) -> %c", pa, "FT"[rv]); return(rv); } #endif return(pmap_testbit(pa, PG_U)); } /* * pmap_is_modified: * * Return whether or not the specified physical page is modified * by any physical maps. */ boolean_t pmap_is_modified(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) { boolean_t rv = pmap_testbit(pa, PG_M); printf("pmap_is_modified(%x) -> %c", pa, "FT"[rv]); return(rv); } #endif return(pmap_testbit(pa, PG_M)); } vm_offset_t pmap_phys_address(ppn) int ppn; { return(i386_ptob(ppn)); } /* * Miscellaneous support routines follow */ i386_protection_init() { register int *kp, prot; kp = protection_codes; for (prot = 0; prot < 8; prot++) { switch (prot) { case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: *kp++ = 0; break; case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: *kp++ = PG_RO; break; case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: *kp++ = PG_RW; break; } } } boolean_t pmap_testbit(pa, bit) register vm_offset_t pa; int bit; { register pv_entry_t pv; register int *pte, ix; int s; if (pa < vm_first_phys || pa >= vm_last_phys) return(FALSE); pv = pa_to_pvh(pa); s = splimp(); /* * Check saved info first */ if (pmap_attributes[pa_index(pa)] & bit) { splx(s); return(TRUE); } /* * Not found, check current mappings returning * immediately if found. */ if (pv->pv_pmap != NULL) { for (; pv; pv = pv->pv_next) { pte = (int *) pmap_pte(pv->pv_pmap, pv->pv_va); ix = 0; do { if (*pte++ & bit) { splx(s); return(TRUE); } } while (++ix != i386pagesperpage); } } splx(s); return(FALSE); } pmap_changebit(pa, bit, setem) register vm_offset_t pa; int bit; boolean_t setem; { register pv_entry_t pv; register int *pte, npte, ix; vm_offset_t va; int s; boolean_t firstpage = TRUE; #ifdef DEBUG if (pmapdebug & PDB_BITS) printf("pmap_changebit(%x, %x, %s)", pa, bit, setem ? "set" : "clear"); #endif if (pa < vm_first_phys || pa >= vm_last_phys) return; pv = pa_to_pvh(pa); s = splimp(); /* * Clear saved attributes (modify, reference) */ if (!setem) pmap_attributes[pa_index(pa)] &= ~bit; /* * Loop over all current mappings setting/clearing as appropos * If setting RO do we need to clear the VAC? */ if (pv->pv_pmap != NULL) { #ifdef DEBUG int toflush = 0; #endif for (; pv; pv = pv->pv_next) { #ifdef DEBUG toflush |= (pv->pv_pmap == kernel_pmap) ? 2 : 1; #endif va = pv->pv_va; /* * XXX don't write protect pager mappings */ if (bit == PG_RO) { extern vm_offset_t pager_sva, pager_eva; if (va >= pager_sva && va < pager_eva) continue; } pte = (int *) pmap_pte(pv->pv_pmap, va); ix = 0; do { if (setem) npte = *pte | bit; else npte = *pte & ~bit; if (*pte != npte) { *pte = npte; /*TBIS(va);*/ } va += NBPG; pte++; } while (++ix != i386pagesperpage); if (curproc && pv->pv_pmap == &curproc->p_vmspace->vm_pmap) pmap_activate(pv->pv_pmap, (struct pcb *)curproc->p_addr); } #ifdef somethinglikethis if (setem && bit == PG_RO && (pmapvacflush & PVF_PROTECT)) { if ((pmapvacflush & PVF_TOTAL) || toflush == 3) DCIA(); else if (toflush == 2) DCIS(); else DCIU(); } #endif } splx(s); } #ifdef DEBUG pmap_pvdump(pa) vm_offset_t pa; { register pv_entry_t pv; printf("pa %x", pa); for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { printf(" -> pmap %x, va %x, flags %x", pv->pv_pmap, pv->pv_va, pv->pv_flags); pads(pv->pv_pmap); } printf(" "); } #ifdef notyet pmap_check_wiring(str, va) char *str; vm_offset_t va; { vm_map_entry_t entry; register int count, *pte; va = trunc_page(va); if (!pmap_pde_v(pmap_pde(kernel_pmap, va)) || !pmap_pte_v(pmap_pte(kernel_pmap, va))) return; if (!vm_map_lookup_entry(pt_map, va, &entry)) { pg("wired_check: entry for %x not found\n", va); return; } count = 0; for (pte = (int *)va; pte < (int *)(va+PAGE_SIZE); pte++) if (*pte) count++; if (entry->wired_count != count) pg("*%s*: %x: w%d/a%d\n", str, va, entry->wired_count, count); } #endif /* print address space of pmap*/ pads(pm) pmap_t pm; { unsigned va, i, j; struct pte *ptep; if(pm == kernel_pmap) return; for (i = 0; i < 1024; i++) if(pm->pm_pdir[i].pd_v) for (j = 0; j < 1024 ; j++) { - va = (i<<22)+(j<<12); + va = (i< UPT_MAX_ADDRESS) continue; ptep = pmap_pte(pm, va); if(pmap_pte_v(ptep)) printf("%x:%x ", va, *(int *)ptep); } ; } #endif Index: head/sys/amd64/amd64/trap.c =================================================================== --- head/sys/amd64/amd64/trap.c (revision 607) +++ head/sys/amd64/amd64/trap.c (revision 608) @@ -1,613 +1,585 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)trap.c 7.4 (Berkeley) 5/13/91 - * - * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE - * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 1 00137 - * -------------------- ----- ---------------------- - * - * 08 Apr 93 Bruce Evans Several VM system fixes - * Paul Kranenburg Add counter for vmstat + * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 + * $Id$ */ -static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/trap.c,v 1.2 1993/07/27 10:52:20 davidg Exp $"; /* * 386 Trap and System call handleing */ +#include "npx.h" #include "machine/cpu.h" #include "machine/psl.h" #include "machine/reg.h" #include "param.h" #include "systm.h" #include "proc.h" #include "user.h" #include "acct.h" #include "kernel.h" #ifdef KTRACE #include "ktrace.h" #endif #include "vm/vm_param.h" #include "vm/pmap.h" #include "vm/vm_map.h" #include "sys/vmmeter.h" #include "machine/trap.h" #ifdef __GNUC__ /* * The "r" contraint could be "rm" except for fatal bugs in gas. As usual, * we omit the size from the mov instruction to avoid nonfatal bugs in gas. */ #define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; }) #define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs)) #else /* not __GNUC__ */ u_short read_gs __P((void)); void write_gs __P((/* promoted u_short */ int gs)); #endif /* __GNUC__ */ struct sysent sysent[]; int nsysent; int dostacklimits; unsigned rcr2(); extern short cpl; /* * trap(frame): * Exception, fault, and trap interface to BSD kernel. This * common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. Note that the * effect is as if the arguments were passed call by reference. */ /*ARGSUSED*/ trap(frame) struct trapframe frame; { register int i; register struct proc *p = curproc; struct timeval syst; int ucode, type, code, eva; frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ type = frame.tf_trapno; #include "ddb.h" #if NDDB > 0 if (curpcb && curpcb->pcb_onfault) { if (frame.tf_trapno == T_BPTFLT || frame.tf_trapno == T_TRCTRAP) if (kdb_trap (type, 0, &frame)) return; } #endif /*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x", frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, rcr2(), frame.tf_esp);*/ if(curpcb == 0 || curproc == 0) goto we_re_toast; if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) { extern int _udatasel; if (read_gs() != (u_short) _udatasel) /* * Some user has corrupted %gs but we depend on it in * copyout() etc. Fix it up and retry. * * (We don't preserve %fs or %gs, so users can change * them to either _ucodesel, _udatasel or a not-present * selector, possibly ORed with 0 to 3, making them * volatile for other users. Not preserving them saves * time and doesn't lose functionality or open security * holes.) */ write_gs(_udatasel); else copyfault: frame.tf_eip = (int)curpcb->pcb_onfault; return; } syst = p->p_stime; if (ISPL(frame.tf_cs) == SEL_UPL) { type |= T_USER; p->p_regs = (int *)&frame; curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ } ucode=0; eva = rcr2(); code = frame.tf_err; switch (type) { default: we_re_toast: #ifdef KDB if (kdb_trap(&psl)) return; #endif #if NDDB > 0 if (kdb_trap (type, 0, &frame)) return; #endif printf("trap type %d code = %x eip = %x cs = %x eflags = %x ", frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags); eva = rcr2(); printf("cr2 %x cpl %x\n", eva, cpl); /* type &= ~T_USER; */ /* XXX what the hell is this */ panic("trap"); /*NOTREACHED*/ case T_SEGNPFLT|T_USER: case T_STKFLT|T_USER: case T_PROTFLT|T_USER: /* protection fault */ ucode = code + BUS_SEGM_FAULT ; i = SIGBUS; break; case T_PRIVINFLT|T_USER: /* privileged instruction fault */ case T_RESADFLT|T_USER: /* reserved addressing fault */ case T_RESOPFLT|T_USER: /* reserved operand fault */ case T_FPOPFLT|T_USER: /* coprocessor operand fault */ ucode = type &~ T_USER; i = SIGILL; break; case T_ASTFLT|T_USER: /* Allow process switch */ astoff(); cnt.v_soft++; if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) { addupc(frame.tf_eip, &p->p_stats->p_prof, 1); p->p_flag &= ~SOWEUPC; } goto out; case T_DNA|T_USER: -#ifdef NPX +#if NNPX > 0 /* if a transparent fault (due to context switch "late") */ if (npxdna()) return; -#endif +#endif /* NNPX > 0 */ #ifdef MATH_EMULATE i = math_emulate(&frame); if (i == 0) return; #else /* MATH_EMULTATE */ panic("trap: math emulation necessary!"); #endif /* MATH_EMULTATE */ ucode = FPE_FPU_NP_TRAP; break; case T_BOUND|T_USER: ucode = FPE_SUBRNG_TRAP; i = SIGFPE; break; case T_OFLOW|T_USER: ucode = FPE_INTOVF_TRAP; i = SIGFPE; break; case T_DIVIDE|T_USER: ucode = FPE_INTDIV_TRAP; i = SIGFPE; break; case T_ARITHTRAP|T_USER: ucode = code; i = SIGFPE; break; case T_PAGEFLT: /* allow page faults in kernel mode */ #if 0 /* XXX - check only applies to 386's and 486's with WP off */ if (code & PGEX_P) goto we_re_toast; #endif /* fall into */ case T_PAGEFLT|T_USER: /* page fault */ { register vm_offset_t va; register struct vmspace *vm = p->p_vmspace; register vm_map_t map; int rv; vm_prot_t ftype; extern vm_map_t kernel_map; unsigned nss,v; va = trunc_page((vm_offset_t)eva); - /* - * Avoid even looking at pde_v(va) for high va's. va's - * above VM_MAX_KERNEL_ADDRESS don't correspond to normal - * PDE's (half of them correspond to APDEpde and half to - * an unmapped kernel PDE). va's betweeen 0xFEC00000 and - * VM_MAX_KERNEL_ADDRESS correspond to unmapped kernel PDE's - * (XXX - why are only 3 initialized when 6 are required to - * reach VM_MAX_KERNEL_ADDRESS?). Faulting in an unmapped - * kernel page table would give inconsistent PTD's. - * - * XXX - faulting in unmapped page tables wastes a page if - * va turns out to be invalid. - * - * XXX - should "kernel address space" cover the kernel page - * tables? Might have same problem with PDEpde as with - * APDEpde (or there may be no problem with APDEpde). - */ - if (va > 0xFEBFF000) { - rv = KERN_FAILURE; /* becomes SIGBUS */ - goto nogo; - } /* * It is only a kernel address space fault iff: * 1. (type & T_USER) == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor space fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ if (type == T_PAGEFLT && va >= KERNBASE) map = kernel_map; else map = &vm->vm_map; if (code & PGEX_W) ftype = VM_PROT_READ | VM_PROT_WRITE; else ftype = VM_PROT_READ; #ifdef DEBUG if (map == kernel_map && va == 0) { printf("trap: bad kernel access at %x\n", va); goto we_re_toast; } #endif /* * XXX: rude hack to make stack limits "work" */ nss = 0; if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map && dostacklimits) { nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ - (unsigned)va)); if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) { /*pg("trap rlimit %d, maxsaddr %x va %x ", nss, vm->vm_maxsaddr, va);*/ rv = KERN_FAILURE; goto nogo; } } /* check if page table is mapped, if not, fault it first */ #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v) if (!pde_v(va)) { v = trunc_page(vtopte(va)); rv = vm_fault(map, v, ftype, FALSE); if (rv != KERN_SUCCESS) goto nogo; /* check if page table fault, increment wiring */ vm_map_pageable(map, v, round_page(v+1), FALSE); } else v=0; rv = vm_fault(map, va, ftype, FALSE); if (rv == KERN_SUCCESS) { /* * XXX: continuation of rude stack hack */ if (nss > vm->vm_ssize) vm->vm_ssize = nss; va = trunc_page(vtopte(va)); /* for page table, increment wiring as long as not a page table fault as well */ if (!v && type != T_PAGEFLT) vm_map_pageable(map, va, round_page(va+1), FALSE); if (type == T_PAGEFLT) return; goto out; } nogo: if (type == T_PAGEFLT) { if (curpcb->pcb_onfault) goto copyfault; printf("vm_fault(%x, %x, %x, 0) -> %x\n", map, va, ftype, rv); printf(" type %x, code %x\n", type, code); goto we_re_toast; } i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; break; } #if NDDB == 0 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ frame.tf_eflags &= ~PSL_T; /* Q: how do we turn it on again? */ return; #endif case T_BPTFLT|T_USER: /* bpt instruction fault */ case T_TRCTRAP|T_USER: /* trace trap */ frame.tf_eflags &= ~PSL_T; i = SIGTRAP; break; #include "isa.h" #if NISA > 0 case T_NMI: case T_NMI|T_USER: #if NDDB > 0 /* NMI can be hooked up to a pushbutton for debugging */ printf ("NMI ... going to debugger\n"); if (kdb_trap (type, 0, &frame)) return; #endif /* machine/parity/power fail/"kitchen sink" faults */ if(isa_nmi(code) == 0) return; else goto we_re_toast; #endif } trapsignal(p, i, ucode); if ((type & T_USER) == 0) return; out: while (i = CURSIG(p)) psig(i); p->p_pri = p->p_usrpri; if (want_resched) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another * (since the running process is not on a queue.) * If that happened after we setrq ourselves but before we * swtch()'ed, we might not be on the queue indicated by * our priority. */ (void) splclock(); setrq(p); p->p_stats->p_ru.ru_nivcsw++; swtch(); (void) splnone(); while (i = CURSIG(p)) psig(i); } if (p->p_stats->p_prof.pr_scale) { int ticks; struct timeval *tv = &p->p_stime; ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); if (ticks) { #ifdef PROFTIMER extern int profscale; addupc(frame.tf_eip, &p->p_stats->p_prof, ticks * profscale); #else addupc(frame.tf_eip, &p->p_stats->p_prof, ticks); #endif } } curpri = p->p_pri; curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ } /* * Compensate for 386 brain damage (missing URKR). * This is a little simpler than the pagefault handler in trap() because * it the page tables have already been faulted in and high addresses * are thrown out early for other reasons. */ int trapwrite(addr) unsigned addr; { unsigned nss; struct proc *p; vm_offset_t va; struct vmspace *vm; va = trunc_page((vm_offset_t)addr); /* * XXX - MAX is END. Changed > to >= for temp. fix. */ if (va >= VM_MAXUSER_ADDRESS) return (1); /* * XXX: rude stack hack adapted from trap(). */ nss = 0; p = curproc; vm = p->p_vmspace; if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) { nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ - (unsigned)va)); if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) return (1); } if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE) != KERN_SUCCESS) return (1); /* * XXX: continuation of rude stack hack */ if (nss > vm->vm_ssize) vm->vm_ssize = nss; return (0); } /* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ /*ARGSUSED*/ syscall(frame) volatile struct syscframe frame; { register int *locr0 = ((int *)&frame); register caddr_t params; register int i; register struct sysent *callp; register struct proc *p = curproc; struct timeval syst; int error, opc; int args[8], rval[2]; int code; #ifdef lint r0 = 0; r0 = r0; r1 = 0; r1 = r1; #endif syst = p->p_stime; if (ISPL(frame.sf_cs) != SEL_UPL) panic("syscall"); code = frame.sf_eax; curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ p->p_regs = (int *)&frame; params = (caddr_t)frame.sf_esp + sizeof (int) ; /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. */ opc = frame.sf_eip - 7; callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { i = fuword(params); params += sizeof (int); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; } if ((i = callp->sy_narg * sizeof (int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { frame.sf_eax = error; frame.sf_eflags |= PSL_C; /* carry bit */ #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); #endif goto done; } #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); #endif rval[0] = 0; rval[1] = frame.sf_edx; /*pg("%d. s %d\n", p->p_pid, code);*/ error = (*callp->sy_call)(p, args, rval); if (error == ERESTART) frame.sf_eip = opc; else if (error != EJUSTRETURN) { if (error) { /*pg("error %d", error);*/ frame.sf_eax = error; frame.sf_eflags |= PSL_C; /* carry bit */ } else { frame.sf_eax = rval[0]; frame.sf_edx = rval[1]; frame.sf_eflags &= ~PSL_C; /* carry bit */ } } /* else if (error == EJUSTRETURN) */ /* nothing to do */ done: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; while (i = CURSIG(p)) psig(i); p->p_pri = p->p_usrpri; if (want_resched) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another * (since the running process is not on a queue.) * If that happened after we setrq ourselves but before we * swtch()'ed, we might not be on the queue indicated by * our priority. */ (void) splclock(); setrq(p); p->p_stats->p_ru.ru_nivcsw++; swtch(); (void) splnone(); while (i = CURSIG(p)) psig(i); } if (p->p_stats->p_prof.pr_scale) { int ticks; struct timeval *tv = &p->p_stime; ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); if (ticks) { #ifdef PROFTIMER extern int profscale; addupc(frame.sf_eip, &p->p_stats->p_prof, ticks * profscale); #else addupc(frame.sf_eip, &p->p_stats->p_prof, ticks); #endif } } curpri = p->p_pri; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p->p_tracep, code, error, rval[0]); #endif #ifdef DIAGNOSTICx { extern int _udatasel, _ucodesel; if (frame.sf_ss != _udatasel) printf("ss %x call %d\n", frame.sf_ss, code); if ((frame.sf_cs&0xffff) != _ucodesel) printf("cs %x call %d\n", frame.sf_cs, code); if (frame.sf_eip > VM_MAXUSER_ADDRESS) { printf("eip %x call %d\n", frame.sf_eip, code); frame.sf_eip = 0; } } #endif } Index: head/sys/amd64/amd64/vm_machdep.c =================================================================== --- head/sys/amd64/amd64/vm_machdep.c (revision 607) +++ head/sys/amd64/amd64/vm_machdep.c (revision 608) @@ -1,425 +1,415 @@ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 - * - * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE - * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 1 00154 - * -------------------- ----- ---------------------- - * - * 20 Apr 93 Bruce Evans New npx-0.5 code - * - */ - -/* + * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ + * $Id$ */ -static char rcsid[] = "$Header: /usr/chroot/CVS/386BSD/src/sys/i386/i386/vm_machdep.c,v 1.3 1993/07/27 10:52:21 davidg Exp $"; +#include "npx.h" #include "param.h" #include "systm.h" #include "proc.h" #include "malloc.h" #include "buf.h" #include "user.h" #include "../include/cpu.h" #include "vm/vm.h" #include "vm/vm_kern.h" /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the kernel stack and pcb, making the child * ready to run, and marking it so that it can return differently * than the parent. Returns 1 in the child process, 0 in the parent. * We currently double-map the user area so that the stack is at the same * address in each process; in the future we will probably relocate * the frame pointers on the stack after copying. */ cpu_fork(p1, p2) register struct proc *p1, *p2; { register struct user *up = p2->p_addr; int foo, offset, addr, i; extern char kstack[]; extern int mvesp(); /* * Copy pcb and stack from proc p1 to p2. * We do this as cheaply as possible, copying only the active * part of the stack. The stack and pcb need to agree; * this is tricky, as the final pcb is constructed by savectx, * but its frame isn't yet on the stack when the stack is copied. * swtch compensates for this when the child eventually runs. * This should be done differently, with a single call * that copies and updates the pcb+stack, * replacing the bcopy and savectx. */ p2->p_addr->u_pcb = p1->p_addr->u_pcb; offset = mvesp() - (int)kstack; bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset, (unsigned) ctob(UPAGES) - offset); p2->p_regs = p1->p_regs; /* * Wire top of address space of child to it's kstack. * First, fault in a page of pte's to map it. */ addr = trunc_page((u_int)vtopte(kstack)); vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE); for (i=0; i < UPAGES; i++) pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG, pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), /* * The user area has to be mapped writable because * it contains the kernel stack (when CR0_WP is on * on a 486 there is no user-read/kernel-write * mode). It is protected from user mode access * by the segment limits. */ VM_PROT_READ|VM_PROT_WRITE, TRUE); pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb); /* * * Arrange for a non-local goto when the new process * is started, to resume here, returning nonzero from setjmp. */ if (savectx(up, 1)) { /* * Return 1 in child. */ return (1); } return (0); } #ifdef notyet /* * cpu_exit is called as the last action during exit. * * We change to an inactive address space and a "safe" stack, * passing thru an argument to the new stack. Now, safely isolated * from the resources we're shedding, we release the address space * and any remaining machine-dependent resources, including the * memory for the user structure and kernel stack. * * Next, we assign a dummy context to be written over by swtch, * calling it to send this process off to oblivion. * [The nullpcb allows us to minimize cost in swtch() by not having * a special case]. */ struct proc *swtch_to_inactive(); volatile void cpu_exit(p) register struct proc *p; { static struct pcb nullpcb; /* pcb to overwrite on last swtch */ -#ifdef NPX +#if NNPX > 0 npxexit(p); -#endif +#endif /* NNPX */ /* move to inactive space and stack, passing arg accross */ p = swtch_to_inactive(p); /* drop per-process resources */ vmspace_free(p->p_vmspace); kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); p->p_addr = (struct user *) &nullpcb; splclock(); swtch(); /* NOTREACHED */ } #else void cpu_exit(p) register struct proc *p; { -#ifdef NPX +#if NNPX > 0 npxexit(p); -#endif +#endif /* NNPX */ splclock(); swtch(); /* * This is to shutup the compiler, and if swtch() failed I suppose * this would be a good thing. This keeps gcc happy because panic * is a volatile void function as well. */ panic("cpu_exit"); } cpu_wait(p) struct proc *p; { /* drop per-process resources */ vmspace_free(p->p_vmspace); kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); } #endif /* * Set a red zone in the kernel stack after the u. area. */ setredzone(pte, vaddr) u_short *pte; caddr_t vaddr; { /* eventually do this by setting up an expand-down stack segment for ss0: selector, allowing stack access down to top of u. this means though that protection violations need to be handled thru a double fault exception that must do an integral task switch to a known good context, within which a dump can be taken. a sensible scheme might be to save the initial context used by sched (that has physical memory mapped 1:1 at bottom) and take the dump while still in mapped mode */ } /* * Move pages from one kernel virtual address to another. * Both addresses are assumed to reside in the Sysmap, * and size must be a multiple of CLSIZE. */ pagemove(from, to, size) register caddr_t from, to; int size; { register struct pte *fpte, *tpte; if (size % CLBYTES) panic("pagemove"); fpte = kvtopte(from); tpte = kvtopte(to); while (size > 0) { *tpte++ = *fpte; *(int *)fpte++ = 0; from += NBPG; to += NBPG; size -= NBPG; } tlbflush(); } /* * Convert kernel VA to physical address */ kvtop(addr) register caddr_t addr; { vm_offset_t va; va = pmap_extract(kernel_pmap, (vm_offset_t)addr); if (va == 0) panic("kvtop: zero page frame"); return((int)va); } #ifdef notdef /* * The probe[rw] routines should probably be redone in assembler * for efficiency. */ prober(addr) register u_int addr; { register int page; register struct proc *p; if (addr >= USRSTACK) return(0); p = u.u_procp; page = btop(addr); if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) return(1); return(0); } probew(addr) register u_int addr; { register int page; register struct proc *p; if (addr >= USRSTACK) return(0); p = u.u_procp; page = btop(addr); if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW); return(0); } /* * NB: assumes a physically contiguous kernel page table * (makes life a LOT simpler). */ kernacc(addr, count, rw) register u_int addr; int count, rw; { register struct pde *pde; register struct pte *pte; register int ix, cnt; extern long Syssize; if (count <= 0) return(0); pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG); ix = (addr & PD_MASK) >> PD_SHIFT; cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT; cnt -= ix; for (pde += ix; cnt; cnt--, pde++) if (pde->pd_v == 0) return(0); - ix = btop(addr-0xfe000000); - cnt = btop(addr-0xfe000000+count+NBPG-1); + ix = btop(addr-KERNBASE); + cnt = btop(addr-KERNBASE+count+NBPG-1); if (cnt > (int)&Syssize) return(0); cnt -= ix; for (pte = &Sysmap[ix]; cnt; cnt--, pte++) if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/) return(0); return(1); } useracc(addr, count, rw) register u_int addr; int count, rw; { register int (*func)(); register u_int addr2; extern int prober(), probew(); if (count <= 0) return(0); addr2 = addr; addr += count; func = (rw == B_READ) ? prober : probew; do { if ((*func)(addr2) == 0) return(0); addr2 = (addr2 + NBPG) & ~PGOFSET; } while (addr2 < addr); return(1); } #endif extern vm_map_t phys_map; /* * Map an IO request into kernel virtual address space. Requests fall into * one of five catagories: * * B_PHYS|B_UAREA: User u-area swap. * Address is relative to start of u-area (p_addr). * B_PHYS|B_PAGET: User page table swap. * Address is a kernel VA in usrpt (Usrptmap). * B_PHYS|B_DIRTY: Dirty page push. * Address is a VA in proc2's address space. * B_PHYS|B_PGIN: Kernel pagein of user pages. * Address is VA in user's address space. * B_PHYS: User "raw" IO request. * Address is VA in user's address space. * * All requests are (re)mapped into kernel VA space via the useriomap * (a name with only slightly more meaning than "kernelmap") */ vmapbuf(bp) register struct buf *bp; { register int npf; register caddr_t addr; register long flags = bp->b_flags; struct proc *p; int off; vm_offset_t kva; register vm_offset_t pa; if ((flags & B_PHYS) == 0) panic("vmapbuf"); addr = bp->b_saveaddr = bp->b_un.b_addr; off = (int)addr & PGOFSET; p = bp->b_proc; npf = btoc(round_page(bp->b_bcount + off)); kva = kmem_alloc_wait(phys_map, ctob(npf)); bp->b_un.b_addr = (caddr_t) (kva + off); while (npf--) { pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr); if (pa == 0) panic("vmapbuf: null page frame"); pmap_enter(vm_map_pmap(phys_map), kva, trunc_page(pa), VM_PROT_READ|VM_PROT_WRITE, TRUE); addr += PAGE_SIZE; kva += PAGE_SIZE; } } /* * Free the io map PTEs associated with this IO operation. * We also invalidate the TLB entries and restore the original b_addr. */ vunmapbuf(bp) register struct buf *bp; { register int npf; register caddr_t addr = bp->b_un.b_addr; vm_offset_t kva; if ((bp->b_flags & B_PHYS) == 0) panic("vunmapbuf"); npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET))); kva = (vm_offset_t)((int)addr & ~PGOFSET); kmem_free_wakeup(phys_map, kva, ctob(npf)); bp->b_un.b_addr = bp->b_saveaddr; bp->b_saveaddr = NULL; } /* * Force reset the processor by invalidating the entire address space! */ cpu_reset() { /* force a shutdown by unmapping entire address space ! */ bzero((caddr_t) PTD, NBPG); /* "good night, sweet prince .... " */ tlbflush(); /* NOTREACHED */ } Index: head/sys/i386/i386/genassym.c =================================================================== --- head/sys/i386/i386/genassym.c (revision 607) +++ head/sys/i386/i386/genassym.c (revision 608) @@ -1,189 +1,188 @@ /*- * Copyright (c) 1982, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 - * $Id: genassym.c,v 1.3 1993/10/10 02:09:44 rgrimes Exp $ + * $Id: genassym.c,v 1.4 1993/10/12 15:33:18 rgrimes Exp $ */ #include "sys/param.h" #include "sys/buf.h" #include "sys/vmmeter.h" #include "sys/proc.h" #include "sys/user.h" #include "sys/mbuf.h" #include "sys/msgbuf.h" #include "sys/resourcevar.h" #include "machine/cpu.h" #include "machine/trap.h" #include "machine/psl.h" #include "sys/syscall.h" #include "vm/vm_param.h" #include "vm/vm_map.h" #include "machine/pmap.h" main() { struct proc *p = (struct proc *)0; struct vmmeter *vm = (struct vmmeter *)0; struct user *up = (struct user *)0; struct rusage *rup = (struct rusage *)0; struct uprof *uprof = (struct uprof *)0; struct vmspace *vms = (struct vmspace *)0; vm_map_t map = (vm_map_t)0; pmap_t pmap = (pmap_t)0; struct pcb *pcb = (struct pcb *)0; struct trapframe *tf = (struct trapframe *)0; struct sigframe *sigf = (struct sigframe *)0; register unsigned i; printf("#define\tI386_CR3PAT %d\n", I386_CR3PAT); printf("#define\tUDOT_SZ %d\n", sizeof(struct user)); printf("#define\tP_LINK %d\n", &p->p_link); printf("#define\tP_RLINK %d\n", &p->p_rlink); printf("#define\tP_VMSPACE %d\n", &p->p_vmspace); printf("#define\tVM_PMAP %d\n", &vms->vm_pmap); printf("#define\tP_ADDR %d\n", &p->p_addr); printf("#define\tP_PRI %d\n", &p->p_pri); printf("#define\tP_STAT %d\n", &p->p_stat); printf("#define\tP_WCHAN %d\n", &p->p_wchan); printf("#define\tP_FLAG %d\n", &p->p_flag); printf("#define\tP_PID %d\n", &p->p_pid); printf("#define\tSSLEEP %d\n", SSLEEP); printf("#define\tSRUN %d\n", SRUN); printf("#define\tV_SWTCH %d\n", &vm->v_swtch); printf("#define\tV_TRAP %d\n", &vm->v_trap); printf("#define\tV_SYSCALL %d\n", &vm->v_syscall); printf("#define\tV_INTR %d\n", &vm->v_intr); printf("#define\tV_SOFT %d\n", &vm->v_soft); printf("#define\tV_PDMA %d\n", &vm->v_pdma); printf("#define\tV_FAULTS %d\n", &vm->v_faults); printf("#define\tV_PGREC %d\n", &vm->v_pgrec); printf("#define\tV_FASTPGREC %d\n", &vm->v_fastpgrec); printf("#define\tUPAGES %d\n", UPAGES); printf("#define\tHIGHPAGES %d\n", HIGHPAGES); printf("#define\tCLSIZE %d\n", CLSIZE); printf("#define\tNBPG %d\n", NBPG); printf("#define\tNPTEPG %d\n", NPTEPG); printf("#define\tNKPDE %d\n", NKPDE); printf("#define\tKPTDI %d\n", KPTDI); printf("#define\tPTDPTDI %d\n", PTDPTDI); printf("#define\tAPTDPTDI %d\n", APTDPTDI); printf("#define\tPGSHIFT %d\n", PGSHIFT); printf("#define\tPDRSHIFT %d\n", PDRSHIFT); printf("#define\tSYSPTSIZE %d\n", SYSPTSIZE); printf("#define\tUSRPTSIZE %d\n", USRPTSIZE); printf("#define\tUSRIOSIZE %d\n", USRIOSIZE); #ifdef SYSVSHM printf("#define\tSHMMAXPGS %d\n", SHMMAXPGS); #endif printf("#define\tUSRSTACK %d\n", USRSTACK); printf("#define\tKERNBASE %d\n", KERNBASE); printf("#define\tKERNSIZE %d\n", KERNSIZE); printf("#define\tMSGBUFPTECNT %d\n", btoc(sizeof (struct msgbuf))); printf("#define\tNMBCLUSTERS %d\n", NMBCLUSTERS); printf("#define\tMCLBYTES %d\n", MCLBYTES); printf("#define\tPCB_LINK %d\n", &pcb->pcb_tss.tss_link); printf("#define\tPCB_ESP0 %d\n", &pcb->pcb_tss.tss_esp0); printf("#define\tPCB_SS0 %d\n", &pcb->pcb_tss.tss_ss0); printf("#define\tPCB_ESP1 %d\n", &pcb->pcb_tss.tss_esp1); printf("#define\tPCB_SS1 %d\n", &pcb->pcb_tss.tss_ss1); printf("#define\tPCB_ESP2 %d\n", &pcb->pcb_tss.tss_esp2); printf("#define\tPCB_SS2 %d\n", &pcb->pcb_tss.tss_ss2); printf("#define\tPCB_CR3 %d\n", &pcb->pcb_tss.tss_cr3); printf("#define\tPCB_EIP %d\n", &pcb->pcb_tss.tss_eip); printf("#define\tPCB_EFLAGS %d\n", &pcb->pcb_tss.tss_eflags); printf("#define\tPCB_EAX %d\n", &pcb->pcb_tss.tss_eax); printf("#define\tPCB_ECX %d\n", &pcb->pcb_tss.tss_ecx); printf("#define\tPCB_EDX %d\n", &pcb->pcb_tss.tss_edx); printf("#define\tPCB_EBX %d\n", &pcb->pcb_tss.tss_ebx); printf("#define\tPCB_ESP %d\n", &pcb->pcb_tss.tss_esp); printf("#define\tPCB_EBP %d\n", &pcb->pcb_tss.tss_ebp); printf("#define\tPCB_ESI %d\n", &pcb->pcb_tss.tss_esi); printf("#define\tPCB_EDI %d\n", &pcb->pcb_tss.tss_edi); printf("#define\tPCB_ES %d\n", &pcb->pcb_tss.tss_es); printf("#define\tPCB_CS %d\n", &pcb->pcb_tss.tss_cs); printf("#define\tPCB_SS %d\n", &pcb->pcb_tss.tss_ss); printf("#define\tPCB_DS %d\n", &pcb->pcb_tss.tss_ds); printf("#define\tPCB_FS %d\n", &pcb->pcb_tss.tss_fs); printf("#define\tPCB_GS %d\n", &pcb->pcb_tss.tss_gs); printf("#define\tPCB_LDT %d\n", &pcb->pcb_tss.tss_ldt); printf("#define\tPCB_USERLDT %d\n", &pcb->pcb_ldt); printf("#define\tPCB_IOOPT %d\n", &pcb->pcb_tss.tss_ioopt); - printf("#define\tNKMEMCLUSTERS %d\n", NKMEMCLUSTERS); printf("#define\tU_PROF %d\n", &up->u_stats.p_prof); printf("#define\tU_PROFSCALE %d\n", &up->u_stats.p_prof.pr_scale); printf("#define\tPR_BASE %d\n", &uprof->pr_base); printf("#define\tPR_SIZE %d\n", &uprof->pr_size); printf("#define\tPR_OFF %d\n", &uprof->pr_off); printf("#define\tPR_SCALE %d\n", &uprof->pr_scale); printf("#define\tRU_MINFLT %d\n", &rup->ru_minflt); printf("#define\tPCB_FLAGS %d\n", &pcb->pcb_flags); printf("#define\tPCB_SAVEFPU %d\n", &pcb->pcb_savefpu); printf("#define\tFP_USESEMC %d\n", FP_USESEMC); printf("#define\tPCB_SAVEEMC %d\n", &pcb->pcb_saveemc); printf("#define\tPCB_CMAP2 %d\n", &pcb->pcb_cmap2); printf("#define\tPCB_IML %d\n", &pcb->pcb_iml); printf("#define\tPCB_ONFAULT %d\n", &pcb->pcb_onfault); printf("#define\tTF_ES %d\n", &tf->tf_es); printf("#define\tTF_DS %d\n", &tf->tf_ds); printf("#define\tTF_EDI %d\n", &tf->tf_edi); printf("#define\tTF_ESI %d\n", &tf->tf_esi); printf("#define\tTF_EBP %d\n", &tf->tf_ebp); printf("#define\tTF_ISP %d\n", &tf->tf_isp); printf("#define\tTF_EBX %d\n", &tf->tf_ebx); printf("#define\tTF_EDX %d\n", &tf->tf_edx); printf("#define\tTF_ECX %d\n", &tf->tf_ecx); printf("#define\tTF_EAX %d\n", &tf->tf_eax); printf("#define\tTF_TRAPNO %d\n", &tf->tf_trapno); printf("#define\tTF_ERR %d\n", &tf->tf_err); printf("#define\tTF_EIP %d\n", &tf->tf_eip); printf("#define\tTF_CS %d\n", &tf->tf_cs); printf("#define\tTF_EFLAGS %d\n", &tf->tf_eflags); printf("#define\tTF_ESP %d\n", &tf->tf_esp); printf("#define\tTF_SS %d\n", &tf->tf_ss); printf("#define\tSIGF_SIGNUM %d\n", &sigf->sf_signum); printf("#define\tSIGF_CODE %d\n", &sigf->sf_code); printf("#define\tSIGF_SCP %d\n", &sigf->sf_scp); printf("#define\tSIGF_HANDLER %d\n", &sigf->sf_handler); printf("#define\tSIGF_SC %d\n", &sigf->sf_sc); printf("#define\tB_READ %d\n", B_READ); printf("#define\tENOENT %d\n", ENOENT); printf("#define\tEFAULT %d\n", EFAULT); printf("#define\tENAMETOOLONG %d\n", ENAMETOOLONG); exit(0); } Index: head/sys/i386/i386/locore.s =================================================================== --- head/sys/i386/i386/locore.s (revision 607) +++ head/sys/i386/i386/locore.s (revision 608) @@ -1,2158 +1,2152 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $Id: locore.s,v 1.6 1993/10/10 06:07:57 rgrimes Exp $ + * $Id: locore.s,v 1.7 1993/10/13 07:11:11 rgrimes Exp $ */ /* * locore.s: 4BSD machine support for the Intel 386 * Preliminary version * Written by William F. Jolitz, 386BSD Project */ #include "npx.h" #include "assym.s" #include "machine/psl.h" #include "machine/pte.h" #include "errno.h" #include "machine/trap.h" #include "machine/specialreg.h" #include "i386/isa/debug.h" #include "machine/cputypes.h" #define KDSEL 0x10 #define SEL_RPL_MASK 0x0003 #define TRAPF_CS_OFF (13 * 4) /* * Note: This version greatly munged to avoid various assembler errors * that may be fixed in newer versions of gas. Perhaps newer versions * will have more pleasant appearance. */ .set IDXSHIFT,10 #define ALIGN_DATA .align 2 #define ALIGN_TEXT .align 2,0x90 /* 4-byte boundaries, NOP-filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte boundaries better for 486 */ #define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: #define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) #ifdef GPROF /* * ALTENTRY() must be before a corresponding ENTRY() so that it can jump * over the mcounting. */ #define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f #define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: /* * The call to mcount supports the usual (bad) conventions. We allocate * some data and pass a pointer to it although the 386BSD doesn't use * the data. We set up a frame before calling mcount because that is * the standard convention although it makes work for both mcount and * callers. */ #define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ pushl %ebp; movl %esp,%ebp; \ movl $1b,%eax; call mcount; popl %ebp #else /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. * If there is a previous ALTENTRY() then the alignment code is empty. */ #define ALTENTRY(name) GEN_ENTRY(_/**/name) #define ENTRY(name) GEN_ENTRY(_/**/name) #endif /* NB: NOP now preserves registers so NOPs can be inserted anywhere */ /* XXX: NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP #else #define FASTER_NOP pushl %eax ; inb $0x84,%al ; popl %eax #define NOP pushl %eax ; inb $0x84,%al ; inb $0x84,%al ; popl %eax #endif /* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). */ .globl _PTmap,_PTD,_PTDpde,_Sysmap .set _PTmap,PTDPTDI << PDRSHIFT .set _PTD,_PTmap + (PTDPTDI * NBPG) - .set _PTDpde,_PTD + (PTDPTDI * 4) /* XXX 4=sizeof pte */ + .set _PTDpde,_PTD + (PTDPTDI * 4) /* XXX 4=sizeof pde */ - .set _Sysmap,0xFDFF8000 + .set _Sysmap,_PTmap + (KPTDI * NBPG) /* * APTmap, APTD is the alternate recursive pagemap. * It's used when modifying another process's page tables. */ .globl _APTmap,_APTD,_APTDpde .set _APTmap,APTDPTDI << PDRSHIFT .set _APTD,_APTmap + (APTDPTDI * NBPG) - .set _APTDpde,_PTD + (APTDPTDI * 4) /* XXX 4=sizeof pte */ + .set _APTDpde,_PTD + (APTDPTDI * 4) /* XXX 4=sizeof pde */ /* * Access to each processes kernel stack is via a region of * per-process address space (at the beginning), immediatly above * the user process stack. */ .set _kstack,USRSTACK .globl _kstack .set PPDROFF,0x3F6 .set PPTEOFF,0x400-UPAGES /* 0x3FE */ /* * Globals */ .data .globl _esym _esym: .long 0 /* ptr to end of syms */ .globl _boothowto,_bootdev,_curpcb .globl _cpu,_cold,_atdevbase _cpu: .long 0 /* are we 386, 386sx, or 486 */ _cold: .long 1 /* cold till we are not */ _atdevbase: .long 0 /* location of start of iomem in virtual */ _atdevphys: .long 0 /* location of device mapping ptes (phys) */ .globl _IdlePTD,_KPTphys _IdlePTD: .long 0 _KPTphys: .long 0 .globl _cyloffset,_proc0paddr _cyloffset: .long 0 _proc0paddr: .long 0 .space 512 tmpstk: /* * System Initialization */ .text /* * btext: beginning of text section. * Also the entry point (jumped to directly from the boot blocks). */ ENTRY(btext) movw $0x1234,0x472 /* warm boot */ jmp 1f .space 0x500 /* skip over warm boot shit */ /* * pass parameters on stack (howto, bootdev, unit, cyloffset, esym) * note: (%esp) is return address of boot * ( if we want to hold onto /boot, it's physical %esp up to _end) */ 1: movl 4(%esp),%eax movl %eax,_boothowto-KERNBASE movl 8(%esp),%eax movl %eax,_bootdev-KERNBASE movl 12(%esp),%eax movl %eax,_cyloffset-KERNBASE movl 16(%esp),%eax addl $KERNBASE,%eax movl %eax,_esym-KERNBASE /* find out our CPU type. */ pushfl popl %eax movl %eax,%ecx xorl $0x40000,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax shrl $18,%eax andl $1,%eax push %ecx popfl cmpl $0,%eax jne 1f movl $CPU_386,_cpu-KERNBASE jmp 2f 1: movl $CPU_486,_cpu-KERNBASE 2: /* * Finished with old stack; load new %esp now instead of later so * we can trace this code without having to worry about the trace * trap clobbering the memory test or the zeroing of the bss+bootstrap * page tables. * * XXX - wdboot clears the bss after testing that this is safe. * This is too wasteful - memory below 640K is scarce. The boot * program should check: * text+data <= &stack_variable - more_space_for_stack * text+data+bss+pad+space_for_page_tables <= end_of_memory * Oops, the gdt is in the carcass of the boot program so clearing * the rest of memory is still not possible. */ movl $tmpstk-KERNBASE,%esp /* bootstrap stack end location */ -#ifdef garbage - /* count up memory */ - - xorl %eax,%eax /* start with base memory at 0x0 */ - #movl $0xA0000/NBPG,%ecx /* look every 4K up to 640K */ - movl $0xA0,%ecx /* look every 4K up to 640K */ -1: movl (%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,(%eax) /* write test pattern */ - /* flush stupid cache here! (with bcopy(0,0,512*1024) ) */ - cmpl $0xa55a5aa5,(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,(%eax) /* restore memory */ - addl $NBPG,%eax - loop 1b -2: shrl $12,%eax - movl %eax,_Maxmem-KERNBASE - - movl $0x100000,%eax /* next, talley remaining memory */ - #movl $((0xFFF000-0x100000)/NBPG),%ecx - movl $(0xFFF-0x100),%ecx -1: movl (%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,(%eax) /* write test pattern */ - cmpl $0xa55a5aa5,(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,(%eax) /* restore memory */ - addl $NBPG,%eax - loop 1b -2: shrl $12,%eax - movl %eax,_Maxmem-KERNBASE -#endif - /* * Virtual address space of kernel: * * text | data | bss | [syms] | page dir | proc0 kernel stack | usr stk map | Sysmap * 0 1 2 3 4 */ /* find end of kernel image */ movl $_end-KERNBASE,%ecx addl $NBPG-1,%ecx /* page align up */ andl $~(NBPG-1),%ecx movl %ecx,%esi /* esi=start of tables */ /* clear bss and memory for bootstrap pagetables. */ movl $_edata-KERNBASE,%edi subl %edi,%ecx addl $(UPAGES+5)*NBPG,%ecx /* size of tables */ xorl %eax,%eax /* pattern */ cld rep stosb +/* + * If we are loaded at 0x0 check to see if we have space for the + * page tables pages after the kernel and before the 640K ISA memory + * hole. If we do not have space relocate the page table pages and + * the kernel stack to start at 1MB. The value that ends up in esi + * is used by the rest of locore to build the tables. Locore adjusts + * esi each time it allocates a structure and then passes the final + * value to init386(first) as the value first. esi should ALWAYS + * be page aligned!! + */ + movl %esi,%ecx /* Get current first availiable address */ + cmpl $0x100000,%ecx /* Lets see if we are already above 1MB */ + jge 1f /* yep, don't need to check for room */ + addl $(NKPDE + 4) * NBPG,%ecx /* XXX the 4 is for kstack */ + /* space for kstack, PTD and PTE's */ + cmpl $(640*1024),%ecx + /* see if it fits in low memory */ + jle 1f /* yep, don't need to relocate it */ + movl $0x100000,%esi /* won't fit, so start it at 1MB */ +1: + /* physical address of Idle Address space */ movl %esi,_IdlePTD-KERNBASE /* * fillkpt * eax = (page frame address | control | status) == pte * ebx = address of page table * ecx = how many pages to map */ #define fillkpt \ 1: movl %eax,(%ebx) ; \ addl $NBPG,%eax ; /* increment physical address */ \ addl $4,%ebx ; /* next pte */ \ loop 1b ; /* * Map Kernel * N.B. don't bother with making kernel text RO, as 386 * ignores R/W AND U/S bits on kernel access (only v works) ! * * First step - build page tables */ movl %esi,%ecx /* this much memory, */ shrl $PGSHIFT,%ecx /* for this many pte s */ addl $UPAGES+4,%ecx /* including our early context */ cmpl $0xa0,%ecx /* XXX - cover debugger pages */ jae 1f movl $0xa0,%ecx 1: movl $PG_V|PG_KW,%eax /* having these bits set, */ lea (4*NBPG)(%esi),%ebx /* physical address of KPT in proc 0, */ movl %ebx,_KPTphys-KERNBASE /* in the kernel page table, */ fillkpt /* map I/O memory map */ movl $0x100-0xa0,%ecx /* for this many pte s, */ movl $(0xa0000|PG_V|PG_UW),%eax /* having these bits set,(perhaps URW?) XXX 06 Aug 92 */ movl %ebx,_atdevphys-KERNBASE /* remember phys addr of ptes */ fillkpt /* map proc 0's kernel stack into user page table page */ movl $UPAGES,%ecx /* for this many pte s, */ lea (1*NBPG)(%esi),%eax /* physical address in proc 0 */ lea (KERNBASE)(%eax),%edx movl %edx,_proc0paddr-KERNBASE /* remember VA for 0th process init */ orl $PG_V|PG_KW,%eax /* having these bits set, */ lea (3*NBPG)(%esi),%ebx /* physical address of stack pt in proc 0 */ addl $(PPTEOFF*4),%ebx fillkpt /* * Construct a page table directory * (of page directory elements - pde's) */ /* install a pde for temporary double map of bottom of VA */ lea (4*NBPG)(%esi),%eax /* physical address of kernel page table */ orl $PG_V|PG_UW,%eax /* pde entry is valid XXX 06 Aug 92 */ movl %eax,(%esi) /* which is where temp maps! */ /* kernel pde's */ movl $(NKPDE),%ecx /* for this many pde s, */ lea (KPTDI*4)(%esi),%ebx /* offset of pde for kernel */ fillkpt /* install a pde recursively mapping page directory as a page table! */ movl %esi,%eax /* phys address of ptd in proc 0 */ orl $PG_V|PG_UW,%eax /* pde entry is valid XXX 06 Aug 92 */ movl %eax,PTDPTDI*4(%esi) /* which is where PTmap maps! */ /* install a pde to map kernel stack for proc 0 */ lea (3*NBPG)(%esi),%eax /* physical address of pt in proc 0 */ orl $PG_V|PG_KW,%eax /* pde entry is valid */ movl %eax,PPDROFF*4(%esi) /* which is where kernel stack maps! */ /* copy and convert stuff from old gdt and idt for debugger */ cmpl $0x0375c339,0x96104 /* XXX - debugger signature */ jne 1f movb $1,_bdb_exists-KERNBASE 1: pushal subl $2*6,%esp sgdt (%esp) movl 2(%esp),%esi /* base address of current gdt */ movl $_gdt-KERNBASE,%edi movl %edi,2(%esp) movl $8*18/4,%ecx rep /* copy gdt */ movsl movl $_gdt-KERNBASE,-8+2(%edi) /* adjust gdt self-ptr */ movb $0x92,-8+5(%edi) sidt 6(%esp) movl 6+2(%esp),%esi /* base address of current idt */ movl 8+4(%esi),%eax /* convert dbg descriptor to ... */ movw 8(%esi),%ax movl %eax,bdb_dbg_ljmp+1-KERNBASE /* ... immediate offset ... */ movl 8+2(%esi),%eax movw %ax,bdb_dbg_ljmp+5-KERNBASE /* ... and selector for ljmp */ movl 24+4(%esi),%eax /* same for bpt descriptor */ movw 24(%esi),%ax movl %eax,bdb_bpt_ljmp+1-KERNBASE movl 24+2(%esi),%eax movw %ax,bdb_bpt_ljmp+5-KERNBASE movl $_idt-KERNBASE,%edi movl %edi,6+2(%esp) movl $8*4/4,%ecx rep /* copy idt */ movsl lgdt (%esp) lidt 6(%esp) addl $2*6,%esp popal /* load base of page directory and enable mapping */ movl %esi,%eax /* phys address of ptd in proc 0 */ orl $I386_CR3PAT,%eax movl %eax,%cr3 /* load ptd addr into mmu */ movl %cr0,%eax /* get control word */ /* * XXX it is now safe to always (attempt to) set CR0_WP and to set up * the page tables assuming it works, so USE_486_WRITE_PROTECT will go * away. The special 386 PTE checking needs to be conditional on * whatever distingiushes 486-only kernels from 386-486 kernels. */ #ifdef USE_486_WRITE_PROTECT orl $CR0_PE|CR0_PG|CR0_WP,%eax /* enable paging */ #else orl $CR0_PE|CR0_PG,%eax /* enable paging */ #endif movl %eax,%cr0 /* and let's page NOW! */ pushl $begin /* jump to high mem */ ret begin: /* now running relocated at KERNBASE where the system is linked to run */ .globl _Crtat /* XXX - locore should not know about */ movl _Crtat,%eax /* variables of device drivers (pccons)! */ subl $(KERNBASE+0xA0000),%eax movl _atdevphys,%edx /* get pte PA */ subl _KPTphys,%edx /* remove base of ptes, now have phys offset */ shll $PGSHIFT-2,%edx /* corresponding to virt offset */ addl $KERNBASE,%edx /* add virtual base */ movl %edx,_atdevbase addl %eax,%edx movl %edx,_Crtat /* set up bootstrap stack */ movl $_kstack+UPAGES*NBPG-4*12,%esp /* bootstrap stack end location */ xorl %eax,%eax /* mark end of frames */ movl %eax,%ebp movl _proc0paddr,%eax movl %esi,PCB_CR3(%eax) - lea 7*NBPG(%esi),%esi /* skip past stack. */ - pushl %esi - /* relocate debugger gdt entries */ movl $_gdt+8*9,%eax /* adjust slots 9-17 */ movl $9,%ecx reloc_gdt: movb $0xfe,7(%eax) /* top byte of base addresses, was 0, */ addl $8,%eax /* now KERNBASE>>24 */ loop reloc_gdt cmpl $0,_bdb_exists je 1f int $3 1: + /* + * Skip over the page tables and the kernel stack + * XXX 4 is kstack size + */ + lea (NKPDE + 4) * NBPG(%esi),%esi + + pushl %esi /* value of first for init386(first) */ call _init386 /* wire 386 chip for unix operation */ movl $0,_PTD call _main /* autoconfiguration, mountroot etc */ popl %esi /* * now we've run main() and determined what cpu-type we are, we can * enable WP mode on i486 cpus and above. * on return from main(), we are process 1 * set up address space and stack so that we can 'return' to user mode */ .globl __ucodesel,__udatasel movl __ucodesel,%eax movl __udatasel,%ecx /* build outer stack frame */ pushl %ecx /* user ss */ pushl $USRSTACK /* user esp */ pushl %eax /* user cs */ pushl $0 /* user ip */ movl %cx,%ds movl %cx,%es movl %ax,%fs /* double map cs to fs */ movl %cx,%gs /* and ds to gs */ lret /* goto user! */ pushl $lretmsg1 /* "should never get here!" */ call _panic lretmsg1: .asciz "lret: toinit\n" .set exec,59 .set exit,1 #define LCALL(x,y) .byte 0x9a ; .long y; .word x /* * Icode is copied out to process 1 and executed in user mode: * execve("/sbin/init", argv, envp); exit(0); * If the execve fails, process 1 exits and the system panics. */ NON_GPROF_ENTRY(icode) pushl $0 /* envp for execve() */ # pushl $argv-_icode /* can't do this 'cos gas 1.38 is broken */ movl $argv,%eax subl $_icode,%eax pushl %eax /* argp for execve() */ # pushl $init-_icode movl $init,%eax subl $_icode,%eax pushl %eax /* fname for execve() */ pushl %eax /* dummy return address */ movl $exec,%eax LCALL(0x7,0x0) /* exit if something botches up in the above execve() */ pushl %eax /* execve failed, the errno will do for an */ /* exit code because errnos are < 128 */ pushl %eax /* dummy return address */ movl $exit,%eax LCALL(0x7,0x0) init: .asciz "/sbin/init" ALIGN_DATA argv: .long init+6-_icode /* argv[0] = "init" ("/sbin/init" + 6) */ .long eicode-_icode /* argv[1] follows icode after copyout */ .long 0 eicode: .globl _szicode _szicode: .long _szicode-_icode NON_GPROF_ENTRY(sigcode) call SIGF_HANDLER(%esp) lea SIGF_SC(%esp),%eax /* scp (the call may have clobbered the */ /* copy at 8(%esp)) */ pushl %eax pushl %eax /* junk to fake return address */ movl $103,%eax /* XXX sigreturn() */ LCALL(0x7,0) /* enter kernel with args on stack */ hlt /* never gets here */ .globl _szsigcode _szsigcode: .long _szsigcode-_sigcode /* * Support routines for GCC, general C-callable functions */ ENTRY(__udivsi3) movl 4(%esp),%eax xorl %edx,%edx divl 8(%esp) ret ENTRY(__divsi3) movl 4(%esp),%eax cltd idivl 8(%esp) ret /* * I/O bus instructions via C */ ENTRY(inb) /* val = inb(port) */ movl 4(%esp),%edx subl %eax,%eax NOP inb %dx,%al ret ENTRY(inw) /* val = inw(port) */ movl 4(%esp),%edx subl %eax,%eax NOP inw %dx,%ax ret ENTRY(insb) /* insb(port, addr, cnt) */ pushl %edi movw 8(%esp),%dx movl 12(%esp),%edi movl 16(%esp),%ecx cld NOP rep insb NOP movl %edi,%eax popl %edi ret ENTRY(insw) /* insw(port, addr, cnt) */ pushl %edi movw 8(%esp),%dx movl 12(%esp),%edi movl 16(%esp),%ecx cld NOP rep insw NOP movl %edi,%eax popl %edi ret ENTRY(rtcin) /* rtcin(val) */ movl 4(%esp),%eax outb %al,$0x70 subl %eax,%eax inb $0x71,%al ret ENTRY(outb) /* outb(port, val) */ movl 4(%esp),%edx NOP movl 8(%esp),%eax outb %al,%dx NOP ret ENTRY(outw) /* outw(port, val) */ movl 4(%esp),%edx NOP movl 8(%esp),%eax outw %ax,%dx NOP ret ENTRY(outsb) /* outsb(port, addr, cnt) */ pushl %esi movw 8(%esp),%dx movl 12(%esp),%esi movl 16(%esp),%ecx cld NOP rep outsb NOP movl %esi,%eax popl %esi ret ENTRY(outsw) /* outsw(port, addr, cnt) */ pushl %esi movw 8(%esp),%dx movl 12(%esp),%esi movl 16(%esp),%ecx cld NOP rep outsw NOP movl %esi,%eax popl %esi ret /* * bcopy family */ ENTRY(bzero) /* void bzero(void *base, u_int cnt) */ pushl %edi movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx cld rep stosl movl 12(%esp),%ecx andl $3,%ecx rep stosb popl %edi ret ENTRY(fillw) /* fillw(pat, base, cnt) */ pushl %edi movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx cld rep stosw popl %edi ret ENTRY(bcopyb) bcopyb: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards. */ addl %ecx,%esi std decl %edi decl %esi rep movsb popl %edi popl %esi cld ret ENTRY(bcopyw) bcopyw: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ shrl $1,%ecx /* copy by 16-bit words */ rep movsw adc %ecx,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi std andl $1,%ecx /* any fractional bytes? */ decl %edi decl %esi rep movsb movl 20(%esp),%ecx /* copy remainder by 16-bit words */ shrl $1,%ecx decl %esi decl %edi rep movsw popl %edi popl %esi cld ret ENTRY(bcopyx) movl 16(%esp),%eax cmpl $2,%eax je bcopyw /* not _bcopyw, to avoid multiple mcounts */ cmpl $4,%eax je bcopy jmp bcopyb /* * (ov)bcopy(src, dst, cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ALTENTRY(ovbcopy) ENTRY(bcopy) bcopy: pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ shrl $2,%ecx /* copy by 32-bit words */ rep movsl movl 20(%esp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi std andl $3,%ecx /* any fractional bytes? */ decl %edi decl %esi rep movsb movl 20(%esp),%ecx /* copy remainder by 32-bit words */ shrl $2,%ecx subl $3,%esi subl $3,%edi rep movsl popl %edi popl %esi cld ret ALTENTRY(ntohl) ENTRY(htonl) movl 4(%esp),%eax #ifdef i486 /* XXX */ /* Since Gas 1.38 does not grok bswap this has been coded as the * equivalent bytes. This can be changed back to bswap when we * upgrade to a newer version of Gas */ /* bswap %eax */ .byte 0x0f .byte 0xc8 #else xchgb %al,%ah roll $16,%eax xchgb %al,%ah #endif ret ALTENTRY(ntohs) ENTRY(htons) movzwl 4(%esp),%eax xchgb %al,%ah ret /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines and possibly * the math- and DOS emulators should be the only places that do this. * * We have to access the memory with user's permissions, so use a segment * selector with RPL 3. For writes to user space we have to additionally * check the PTE for write permission, because the 386 does not check * write permissions when we are executing with EPL 0. The 486 does check * this if the WP bit is set in CR0, so we can use a simpler version here. * * These routines set curpcb->onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->onfault instead of the function. */ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ movl _curpcb,%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi pushl %ebx movl 16(%esp),%esi movl 20(%esp),%edi movl 24(%esp),%ebx orl %ebx,%ebx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. * * Otherwise, it saves having to load and restore %es to get the * usual segment-based protection (the destination segment for movs * is always %es). The other explicit checks for user-writablility * are not quite sufficient. They fail for the user area because * we mapped the user area read/write to avoid having an #ifdef in * vm_machdep.c. They fail for user PTEs and/or PTDs! (107 * addresses including 0xff800000 and 0xfc000000). I'm not sure if * this can be fixed. Marking the PTEs supervisor mode and the * PDE's user mode would almost work, but there may be a problem * with the self-referential PDE. */ movl %edi,%eax addl %ebx,%eax jc copyout_fault #define VM_END_USER_ADDRESS 0xFDBFE000 /* XXX */ cmpl $VM_END_USER_ADDRESS,%eax ja copyout_fault #ifndef USE_486_WRITE_PROTECT /* * We have to check each PTE for user write permission. * The checking may cause a page fault, so it is important to set * up everything for return via copyout_fault before here. */ /* compute number of pages */ movl %edi,%ecx andl $NBPG-1,%ecx addl %ebx,%ecx decl %ecx shrl $IDXSHIFT+2,%ecx incl %ecx /* compute PTE offset for start address */ movl %edi,%edx shrl $IDXSHIFT,%edx andb $0xfc,%dl 1: /* check PTE for each page */ movb _PTmap(%edx),%al andb $0x07,%al /* Pages must be VALID + USERACC + WRITABLE */ cmpb $0x07,%al je 2f /* simulate a trap */ pushl %edx pushl %ecx shll $IDXSHIFT,%edx pushl %edx call _trapwrite /* trapwrite(addr) */ popl %edx popl %ecx popl %edx orl %eax,%eax /* if not ok, return EFAULT */ jnz copyout_fault 2: addl $4,%edx decl %ecx jnz 1b /* check next page */ #endif /* ndef USE_486_WRITE_PROTECT */ /* bcopy(%esi, %edi, %ebx) */ cld movl %ebx,%ecx shrl $2,%ecx rep movsl movb %bl,%cl andb $3,%cl /* XXX can we trust the rest of %ecx on clones? */ rep movsb done_copyout: popl %ebx popl %edi popl %esi xorl %eax,%eax movl _curpcb,%edx movl %eax,PCB_ONFAULT(%edx) ret ALIGN_TEXT copyout_fault: popl %ebx popl %edi popl %esi movl _curpcb,%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret ENTRY(copyin) /* copyin(from_user, to_kernel, len) */ movl _curpcb,%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi movl 12(%esp),%esi /* caddr_t from */ movl 16(%esp),%edi /* caddr_t to */ movl 20(%esp),%ecx /* size_t len */ movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld gs rep movsl movb %al,%cl andb $3,%cl /* copy remaining bytes */ gs rep movsb popl %edi popl %esi xorl %eax,%eax movl _curpcb,%edx movl %eax,PCB_ONFAULT(%edx) ret ALIGN_TEXT copyin_fault: popl %edi popl %esi movl _curpcb,%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * fu{byte,sword,word} : fetch a byte(sword, word) from user memory */ ALTENTRY(fuiword) ENTRY(fuword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ENTRY(fusword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ALTENTRY(fuibyte) ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx gs movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret ALIGN_TEXT fusufault: movl _curpcb,%ecx xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) decl %eax ret /* * su{byte,sword,word}: write a byte(word, longword) to user memory */ #ifdef USE_486_WRITE_PROTECT /* * we only have to set the right segment selector. */ ALTENTRY(suiword) ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret ENTRY(susword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movw 8(%esp),%ax gs movw %ax,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret ALTENTRY(suibyte) ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movb 8(%esp),%al gs movb %al,(%edx) xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) ret #else /* USE_486_WRITE_PROTECT */ /* * here starts the trouble again: check PTE, twice if word crosses * a page boundary. */ /* XXX - page boundary crossing is not handled yet */ ALTENTRY(suibyte) ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movb %al,(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret ENTRY(susword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movw %ax,(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret ALTENTRY(suiword) ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl %edx,%eax shrl $IDXSHIFT,%edx andb $0xfc,%dl movb _PTmap(%edx),%dl andb $0x7,%dl /* must be VALID + USERACC + WRITE */ cmpb $0x7,%dl je 1f /* simulate a trap */ pushl %eax call _trapwrite popl %edx orl %eax,%eax jnz fusufault 1: movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,0(%edx) xorl %eax,%eax movl _curpcb,%ecx movl %eax,PCB_ONFAULT(%ecx) ret #endif /* USE_486_WRITE_PROTECT */ /* * copyoutstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ #ifdef USE_486_WRITE_PROTECT ENTRY(copyoutstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f /* * gs override doesn't work for stosb. Use the same explicit check * as in copyout(). It's much slower now because it is per-char. * XXX - however, it would be faster to rewrite this function to use * strlen() and copyout(). */ cmpl $VM_END_USER_ADDRESS,%edi jae cpystrflt lodsb gs stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f #else /* ndef USE_486_WRITE_PROTECT */ ENTRY(copyoutstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ 1: /* * It suffices to check that the first byte is in user space, because * we look at a page at a time and the end address is on a page * boundary. */ cmpl $VM_END_USER_ADDRESS,%edi jae cpystrflt movl %edi,%eax shrl $IDXSHIFT,%eax andb $0xfc,%al movb _PTmap(%eax),%al andb $7,%al cmpb $7,%al je 2f /* simulate trap */ pushl %edx pushl %edi call _trapwrite popl %edi popl %edx orl %eax,%eax jnz cpystrflt 2: /* copy up to end of this page */ movl %edi,%eax andl $NBPG-1,%eax movl $NBPG,%ecx subl %eax,%ecx /* ecx = NBPG - (src % NBPG) */ cmpl %ecx,%edx jge 3f movl %edx,%ecx /* ecx = min(ecx, edx) */ 3: orl %ecx,%ecx jz 4f decl %ecx decl %edx lodsb stosb orb %al,%al jnz 3b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* next page */ orl %edx,%edx jnz 1b /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f #endif /* USE_486_WRITE_PROTECT */ /* * copyinstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) pushl %esi pushl %edi movl _curpcb,%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f gs lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 6f cpystrflt: movl $EFAULT,%eax 6: /* set *lencopied and return %eax */ movl _curpcb,%ecx movl $0,PCB_ONFAULT(%ecx) movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx orl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret /* * copystr(from, to, maxlen, int *lencopied) */ ENTRY(copystr) pushl %esi pushl %edi movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx 1: decl %edx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax 6: /* set *lencopied and return %eax */ movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx orl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret /* * Handling of special 386 registers and descriptor tables etc */ ENTRY(lgdt) /* void lgdt(struct region_descriptor *rdp); */ /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) /* flush the prefetch q */ jmp 1f nop 1: /* reload "stale" selectors */ movl $KDSEL,%eax movl %ax,%ds movl %ax,%es movl %ax,%ss /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax pushl %eax # movl $KCSEL,4(%esp) movl $8,4(%esp) lret /* * void lidt(struct region_descriptor *rdp); */ ENTRY(lidt) movl 4(%esp),%eax lidt (%eax) ret /* * void lldt(u_short sel) */ ENTRY(lldt) lldt 4(%esp) ret /* * void ltr(u_short sel) */ ENTRY(ltr) ltr 4(%esp) ret ENTRY(ssdtosd) /* ssdtosd(*ssdp,*sdp) */ pushl %ebx movl 8(%esp),%ecx movl 8(%ecx),%ebx shll $16,%ebx movl (%ecx),%edx roll $16,%edx movb %dh,%bl movb %dl,%bh rorl $8,%ebx movl 4(%ecx),%eax movw %ax,%dx andl $0xf0000,%eax orl %eax,%ebx movl 12(%esp),%ecx movl %edx,(%ecx) movl %ebx,4(%ecx) popl %ebx ret ENTRY(tlbflush) /* tlbflush() */ movl %cr3,%eax orl $I386_CR3PAT,%eax movl %eax,%cr3 ret ENTRY(load_cr0) /* load_cr0(cr0) */ movl 4(%esp),%eax movl %eax,%cr0 ret ENTRY(rcr0) /* rcr0() */ movl %cr0,%eax ret ENTRY(rcr2) /* rcr2() */ movl %cr2,%eax ret ENTRY(rcr3) /* rcr3() */ movl %cr3,%eax ret ENTRY(load_cr3) /* void load_cr3(caddr_t cr3) */ movl 4(%esp),%eax orl $I386_CR3PAT,%eax movl %eax,%cr3 ret /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) /* save ebx */ movl %esp,4(%eax) /* save esp */ movl %ebp,8(%eax) /* save ebp */ movl %esi,12(%eax) /* save esi */ movl %edi,16(%eax) /* save edi */ movl (%esp),%edx /* get rta */ movl %edx,20(%eax) /* save eip */ xorl %eax,%eax /* return(0); */ ret ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx /* restore ebx */ movl 4(%eax),%esp /* restore esp */ movl 8(%eax),%ebp /* restore ebp */ movl 12(%eax),%esi /* restore esi */ movl 16(%eax),%edi /* restore edi */ movl 20(%eax),%edx /* get rta */ movl %edx,(%esp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret /*****************************************************************************/ /* Scheduling */ /*****************************************************************************/ /* * The following primitives manipulate the run queues. * _whichqs tells which of the 32 queues _qs * have processes in them. Setrq puts processes into queues, Remrq * removes them from queues. The running process is on no queue, * other processes are on a queue related to p->p_pri, divided by 4 * actually to shrink the 0-127 range of priorities into the 32 available * queues. */ .globl _whichqs,_qs,_cnt,_panic .comm _noproc,4 .comm _runrun,4 /* * Setrq(p) * * Call should be made at spl6(), and p->p_stat should be SRUN */ ENTRY(setrq) movl 4(%esp),%eax cmpl $0,P_RLINK(%eax) /* should not be on q already */ je set1 pushl $set2 call _panic set1: movzbl P_PRI(%eax),%edx shrl $2,%edx btsl %edx,_whichqs /* set q full bit */ shll $3,%edx addl $_qs,%edx /* locate q hdr */ movl %edx,P_LINK(%eax) /* link process on tail of q */ movl P_RLINK(%edx),%ecx movl %ecx,P_RLINK(%eax) movl %eax,P_RLINK(%edx) movl %eax,P_LINK(%ecx) ret set2: .asciz "setrq" /* * Remrq(p) * * Call should be made at spl6(). */ ENTRY(remrq) movl 4(%esp),%eax movzbl P_PRI(%eax),%edx shrl $2,%edx btrl %edx,_whichqs /* clear full bit, panic if clear already */ jb rem1 pushl $rem3 call _panic rem1: pushl %edx movl P_LINK(%eax),%ecx /* unlink process */ movl P_RLINK(%eax),%edx movl %edx,P_RLINK(%ecx) movl P_RLINK(%eax),%ecx movl P_LINK(%eax),%edx movl %edx,P_LINK(%ecx) popl %edx movl $_qs,%ecx shll $3,%edx addl %edx,%ecx cmpl P_LINK(%ecx),%ecx /* q still has something? */ je rem2 shrl $3,%edx /* yes, set bit as still full */ btsl %edx,_whichqs rem2: movl $0,P_RLINK(%eax) /* zap reverse link to indicate off list */ ret rem3: .asciz "remrq" sw0: .asciz "swtch" /* * When no processes are on the runq, Swtch branches to idle * to wait for something to come ready. */ ALIGN_TEXT Idle: sti SHOW_STI ALIGN_TEXT idle_loop: call _spl0 cmpl $0,_whichqs jne sw1 hlt /* wait for interrupt */ jmp idle_loop badsw: pushl $sw0 call _panic /*NOTREACHED*/ /* * Swtch() */ SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ ENTRY(swtch) incl _cnt+V_SWTCH /* switch to new process. first, save context as needed */ movl _curproc,%ecx /* if no process to save, don't bother */ testl %ecx,%ecx je sw1 movl P_ADDR(%ecx),%ecx movl (%esp),%eax /* Hardware registers */ movl %eax,PCB_EIP(%ecx) movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) -#ifdef NPX +#if NNPX > 0 /* have we used fp, and need a save? */ mov _curproc,%eax cmp %eax,_npxproc jne 1f pushl %ecx /* h/w bugs make saving complicated */ leal PCB_SAVEFPU(%ecx),%eax pushl %eax call _npxsave /* do it in a big C function */ popl %eax popl %ecx 1: -#endif +#endif /* NNPX > 0 */ movl _CMAP2,%eax /* save temporary map PTE */ movl %eax,PCB_CMAP2(%ecx) /* in our context */ movl $0,_curproc /* out of process */ # movw _cpl,%ax # movw %ax,PCB_IML(%ecx) /* save ipl */ /* save is done, now choose a new process or idle */ sw1: cli SHOW_CLI movl _whichqs,%edi 2: /* XXX - bsf is sloow */ bsfl %edi,%eax /* find a full q */ je Idle /* if none, idle */ /* XX update whichqs? */ swfnd: btrl %eax,%edi /* clear q full status */ jnb 2b /* if it was clear, look for another */ movl %eax,%ebx /* save which one we are using */ shll $3,%eax addl $_qs,%eax /* select q */ movl %eax,%esi #ifdef DIAGNOSTIC cmpl P_LINK(%eax),%eax /* linked to self? (e.g. not on list) */ je badsw /* not possible */ #endif movl P_LINK(%eax),%ecx /* unlink from front of process q */ movl P_LINK(%ecx),%edx movl %edx,P_LINK(%eax) movl P_RLINK(%ecx),%eax movl %eax,P_RLINK(%edx) cmpl P_LINK(%ecx),%esi /* q empty */ je 3f btsl %ebx,%edi /* nope, set to indicate full */ 3: movl %edi,_whichqs /* update q status */ movl $0,%eax movl %eax,_want_resched #ifdef DIAGNOSTIC cmpl %eax,P_WCHAN(%ecx) jne badsw cmpb $SRUN,P_STAT(%ecx) jne badsw #endif movl %eax,P_RLINK(%ecx) /* isolate process to run */ movl P_ADDR(%ecx),%edx movl PCB_CR3(%edx),%ebx /* switch address space */ movl %ebx,%cr3 /* restore context */ movl PCB_EBX(%edx),%ebx movl PCB_ESP(%edx),%esp movl PCB_EBP(%edx),%ebp movl PCB_ESI(%edx),%esi movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) movl PCB_CMAP2(%edx),%eax /* get temporary map */ movl %eax,_CMAP2 /* reload temporary map PTE */ movl %ecx,_curproc /* into next process */ movl %edx,_curpcb pushl %edx /* save p to return */ /* * XXX - 0.0 forgot to save it - is that why this was commented out in 0.1? * I think restoring the cpl is unnecessary, but we must turn off the cli * now that spl*() don't do it as a side affect. */ pushl PCB_IML(%edx) sti SHOW_STI #if 0 call _splx #endif addl $4,%esp /* * XXX - 0.0 gets here via swtch_to_inactive(). I think 0.1 gets here in the * same way. Better return a value. */ popl %eax /* return(p); */ ret ENTRY(mvesp) movl %esp,%eax ret /* * struct proc *swtch_to_inactive(p) ; struct proc *p; * * At exit of a process, move off the address space of the * process and onto a "safe" one. Then, on a temporary stack * return and run code that disposes of the old state. * Since this code requires a parameter from the "old" stack, * pass it back as a return value. */ ENTRY(swtch_to_inactive) popl %edx /* old pc */ popl %eax /* arg, our return value */ movl _IdlePTD,%ecx movl %ecx,%cr3 /* good bye address space */ #write buffer? movl $tmpstk-4,%esp /* temporary stack, compensated for call */ jmp %edx /* return, execute remainder of cleanup */ /* * savectx(pcb, altreturn) * Update pcb, saving current processor state and arranging * for alternate return ala longjmp in swtch if altreturn is true. */ ENTRY(savectx) movl 4(%esp),%ecx movw _cpl,%ax movw %ax,PCB_IML(%ecx) movl (%esp),%eax movl %eax,PCB_EIP(%ecx) movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) -#ifdef NPX +#if NNPX > 0 /* * If npxproc == NULL, then the npx h/w state is irrelevant and the * state had better already be in the pcb. This is true for forks * but not for dumps (the old book-keeping with FP flags in the pcb * always lost for dumps because the dump pcb has 0 flags). * * If npxproc != NULL, then we have to save the npx h/w state to * npxproc's pcb and copy it to the requested pcb, or save to the * requested pcb and reload. Copying is easier because we would * have to handle h/w bugs for reloading. We used to lose the * parent's npx state for forks by forgetting to reload. */ mov _npxproc,%eax testl %eax,%eax je 1f pushl %ecx movl P_ADDR(%eax),%eax leal PCB_SAVEFPU(%eax),%eax pushl %eax pushl %eax call _npxsave popl %eax popl %eax popl %ecx pushl %ecx pushl $108+8*2 /* XXX h/w state size + padding */ leal PCB_SAVEFPU(%ecx),%ecx pushl %ecx pushl %eax call _bcopy addl $12,%esp popl %ecx 1: -#endif +#endif /* NNPX > 0 */ movl _CMAP2,%edx /* save temporary map PTE */ movl %edx,PCB_CMAP2(%ecx) /* in our context */ cmpl $0,8(%esp) je 1f movl %esp,%edx /* relocate current sp relative to pcb */ subl $_kstack,%edx /* (sp is relative to kstack): */ addl %edx,%ecx /* pcb += sp - kstack; */ movl %eax,(%ecx) /* write return pc at (relocated) sp@ */ /* this mess deals with replicating register state gcc hides */ movl 12(%esp),%eax movl %eax,12(%ecx) movl 16(%esp),%eax movl %eax,16(%ecx) movl 20(%esp),%eax movl %eax,20(%ecx) movl 24(%esp),%eax movl %eax,24(%ecx) 1: xorl %eax,%eax /* return 0 */ ret /* * addupc(int pc, struct uprof *up, int ticks): * update profiling information for the user process. */ ENTRY(addupc) pushl %ebp movl %esp,%ebp movl 12(%ebp),%edx /* up */ movl 8(%ebp),%eax /* pc */ subl PR_OFF(%edx),%eax /* pc -= up->pr_off */ jl L1 /* if (pc < 0) return */ shrl $1,%eax /* praddr = pc >> 1 */ imull PR_SCALE(%edx),%eax /* praddr *= up->pr_scale */ shrl $15,%eax /* praddr = praddr << 15 */ andl $-2,%eax /* praddr &= ~1 */ cmpl PR_SIZE(%edx),%eax /* if (praddr > up->pr_size) return */ ja L1 /* addl %eax,%eax /* praddr -> word offset */ addl PR_BASE(%edx),%eax /* praddr += up-> pr_base */ movl 16(%ebp),%ecx /* ticks */ movl _curpcb,%edx movl $proffault,PCB_ONFAULT(%edx) addl %ecx,(%eax) /* storage location += ticks */ movl $0,PCB_ONFAULT(%edx) L1: leave ret ALIGN_TEXT proffault: /* if we get a fault, then kill profiling all together */ movl $0,PCB_ONFAULT(%edx) /* squish the fault handler */ movl 12(%ebp),%ecx movl $0,PR_SCALE(%ecx) /* up->pr_scale = 0 */ leave ret /* To be done: */ ENTRY(astoff) ret /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines * * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose * control. The sti's give the standard losing behaviour for ddb and kgdb. */ #define IDTVEC(name) ALIGN_TEXT; .globl _X/**/name; _X/**/name: #define TRAP(a) pushl $(a) ; jmp alltraps #ifdef KGDB # define BPTTRAP(a) sti; pushl $(a) ; jmp bpttraps #else # define BPTTRAP(a) sti; TRAP(a) #endif IDTVEC(div) pushl $0; TRAP(T_DIVIDE) IDTVEC(dbg) #ifdef BDBTRAP BDBTRAP(dbg) #endif pushl $0; BPTTRAP(T_TRCTRAP) IDTVEC(nmi) pushl $0; TRAP(T_NMI) IDTVEC(bpt) #ifdef BDBTRAP BDBTRAP(bpt) #endif pushl $0; BPTTRAP(T_BPTFLT) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) pushl $0; TRAP(T_BOUND) IDTVEC(ill) pushl $0; TRAP(T_PRIVINFLT) IDTVEC(dna) pushl $0; TRAP(T_DNA) IDTVEC(dble) TRAP(T_DOUBLEFLT) /*PANIC("Double Fault");*/ IDTVEC(fpusegm) pushl $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) /*PANIC("TSS not valid");*/ IDTVEC(missing) TRAP(T_SEGNPFLT) IDTVEC(stk) TRAP(T_STKFLT) IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) -#ifdef NPX +#if NNPX > 0 /* * Handle like an interrupt so that we can call npxintr to clear the * error. It would be better to handle npx interrupts as traps but * this is difficult for nested interrupts. */ pushl $0 /* dummy error code */ pushl $T_ASTFLT pushal nop /* silly, the bug is for popal and it only * bites when the next instruction has a * complicated address mode */ pushl %ds pushl %es /* now the stack frame is a trap frame */ movl $KDSEL,%eax movl %ax,%ds movl %ax,%es pushl _cpl pushl $0 /* dummy unit to finish building intr frame */ incl _cnt+V_TRAP call _npxintr jmp doreti -#else +#else /* NNPX > 0 */ pushl $0; TRAP(T_ARITHTRAP) -#endif +#endif /* NNPX > 0 */ /* 17 - 31 reserved for future exp */ IDTVEC(rsvd0) pushl $0; TRAP(17) IDTVEC(rsvd1) pushl $0; TRAP(18) IDTVEC(rsvd2) pushl $0; TRAP(19) IDTVEC(rsvd3) pushl $0; TRAP(20) IDTVEC(rsvd4) pushl $0; TRAP(21) IDTVEC(rsvd5) pushl $0; TRAP(22) IDTVEC(rsvd6) pushl $0; TRAP(23) IDTVEC(rsvd7) pushl $0; TRAP(24) IDTVEC(rsvd8) pushl $0; TRAP(25) IDTVEC(rsvd9) pushl $0; TRAP(26) IDTVEC(rsvd10) pushl $0; TRAP(27) IDTVEC(rsvd11) pushl $0; TRAP(28) IDTVEC(rsvd12) pushl $0; TRAP(29) IDTVEC(rsvd13) pushl $0; TRAP(30) IDTVEC(rsvd14) pushl $0; TRAP(31) SUPERALIGN_TEXT alltraps: pushal nop pushl %ds pushl %es movl $KDSEL,%eax movl %ax,%ds movl %ax,%es calltrap: incl _cnt+V_TRAP call _trap /* * Return through doreti to handle ASTs. Have to change trap frame * to interrupt frame. */ movl $T_ASTFLT,4+4+32(%esp) /* new trap type (err code not used) */ pushl _cpl pushl $0 /* dummy unit */ jmp doreti #ifdef KGDB /* * This code checks for a kgdb trap, then falls through * to the regular trap code. */ SUPERALIGN_TEXT bpttraps: pushal nop pushl %es pushl %ds movl $KDSEL,%eax movl %ax,%ds movl %ax,%es testb $SEL_RPL_MASK,TRAPF_CS_OFF(%esp) /* non-kernel mode? */ jne calltrap /* yes */ call _kgdb_trap_glue jmp calltrap #endif /* * Call gate entry for syscall */ SUPERALIGN_TEXT IDTVEC(syscall) pushfl /* only for stupid carry bit and more stupid wait3 cc kludge */ /* XXX - also for direction flag (bzero, etc. clear it) */ pushal /* only need eax,ecx,edx - trap resaves others */ nop movl $KDSEL,%eax /* switch to kernel segments */ movl %ax,%ds movl %ax,%es incl _cnt+V_SYSCALL /* kml 3/25/93 */ call _syscall /* * Return through doreti to handle ASTs. Have to change syscall frame * to interrupt frame. * * XXX - we should have set up the frame earlier to avoid the * following popal/pushal (not much can be done to avoid shuffling * the flags). Consistent frames would simplify things all over. */ movl 32+0(%esp),%eax /* old flags, shuffle to above cs:eip */ movl 32+4(%esp),%ebx /* `int' frame should have been ef, eip, cs */ movl 32+8(%esp),%ecx movl %ebx,32+0(%esp) movl %ecx,32+4(%esp) movl %eax,32+8(%esp) popal nop pushl $0 /* dummy error code */ pushl $T_ASTFLT pushal nop movl __udatasel,%eax /* switch back to user segments */ pushl %eax /* XXX - better to preserve originals? */ pushl %eax pushl _cpl pushl $0 jmp doreti #ifdef SHOW_A_LOT /* * 'show_bits' was too big when defined as a macro. The line length for some * enclosing macro was too big for gas. Perhaps the code would have blown * the cache anyway. */ ALIGN_TEXT show_bits: pushl %eax SHOW_BIT(0) SHOW_BIT(1) SHOW_BIT(2) SHOW_BIT(3) SHOW_BIT(4) SHOW_BIT(5) SHOW_BIT(6) SHOW_BIT(7) SHOW_BIT(8) SHOW_BIT(9) SHOW_BIT(10) SHOW_BIT(11) SHOW_BIT(12) SHOW_BIT(13) SHOW_BIT(14) SHOW_BIT(15) popl %eax ret .data bit_colors: .byte GREEN,RED,0,0 .text #endif /* SHOW_A_LOT */ /* * include generated interrupt vectors and ISA intr code */ #include "i386/isa/vector.s" #include "i386/isa/icu.s" Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c (revision 607) +++ head/sys/i386/i386/machdep.c (revision 608) @@ -1,1277 +1,1285 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.10 1993/10/10 06:01:44 rgrimes Exp $ + * $Id: machdep.c,v 1.11 1993/10/14 18:15:35 rgrimes Exp $ */ #include "npx.h" #include "isa.h" #include #include "param.h" #include "systm.h" #include "signalvar.h" #include "kernel.h" #include "map.h" #include "proc.h" #include "user.h" #include "exec.h" /* for PS_STRINGS */ #include "buf.h" #include "reboot.h" #include "conf.h" #include "file.h" #include "callout.h" #include "malloc.h" #include "mbuf.h" #include "msgbuf.h" #include "net/netisr.h" #ifdef SYSVSHM #include "sys/shm.h" #endif #include "vm/vm.h" #include "vm/vm_kern.h" #include "vm/vm_page.h" #include "sys/exec.h" #include "sys/vnode.h" #ifndef MACHINE_NONCONTIG extern vm_offset_t avail_end; #else extern vm_offset_t avail_start, avail_end; static vm_offset_t hole_start, hole_end; static vm_offset_t avail_next; static unsigned int avail_remaining; #endif /* MACHINE_NONCONTIG */ #include "machine/cpu.h" #include "machine/reg.h" #include "machine/psl.h" #include "machine/specialreg.h" #include "machine/sysarch.h" #include "i386/isa/isa.h" #include "i386/isa/rtc.h" #define EXPECT_BASEMEM 640 /* The expected base memory*/ #define INFORM_WAIT 1 /* Set to pause berfore crash in weird cases*/ /* * Declare these as initialized data so we can patch them. */ int nswbuf = 0; #ifdef NBUF int nbuf = NBUF; #else int nbuf = 0; #endif #ifdef BUFPAGES int bufpages = BUFPAGES; #else int bufpages = 0; #endif extern int freebufspace; int _udatasel, _ucodesel; /* * Machine-dependent startup code */ int boothowto = 0, Maxmem = 0; long dumplo; int physmem, maxmem; extern int bootdev; #ifdef SMALL extern int forcemaxmem; #endif int biosmem; extern cyloffset; int cpu_class; void dumpsys __P((void)); void cpu_startup() { register int unixsize; register unsigned i; register struct pte *pte; int mapaddr, j; register caddr_t v; int maxbufs, base, residual; extern long Usrptsize; vm_offset_t minaddr, maxaddr; vm_size_t size; int firstaddr; /* * Initialize error message buffer (at end of core). */ /* avail_end was pre-decremented in pmap_bootstrap to compensate */ for (i = 0; i < btoc(sizeof (struct msgbuf)); i++) #ifndef MACHINE_NONCONTIG pmap_enter(pmap_kernel(), msgbufp, avail_end + i * NBPG, VM_PROT_ALL, TRUE); #else pmap_enter(pmap_kernel(), (caddr_t)msgbufp + i * NBPG, avail_end + i * NBPG, VM_PROT_ALL, TRUE); #endif msgbufmapped = 1; /* * Good {morning,afternoon,evening,night}. */ printf(version); identifycpu(); printf("real mem = %d\n", ctob(physmem)); /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) /* valloc(cfree, struct cblock, nclist); no clists any more!!! - cgd */ valloc(callout, struct callout, ncallout); #ifdef NetBSD valloc(swapmap, struct map, nswapmap = maxproc * 2); #endif #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif /* * Determine how many buffers to allocate. - * Use 10% of memory for the first 2 Meg, 5% of the remaining - * memory. Insure a minimum of 16 buffers. + * Use 20% of memory of memory beyond the first 2MB + * Insure a minimum of 16 fs buffers. * We allocate 1/2 as many swap buffer headers as file i/o buffers. */ if (bufpages == 0) - if (physmem < btoc(2 * 1024 * 1024)) - bufpages = physmem / 10 / CLSIZE; - else - bufpages = (btoc(2 * 1024 * 1024) + physmem) / 20 / CLSIZE; + bufpages = (ctob(physmem) - 2048*1024) / NBPG / 5; + if (bufpages < 32) + bufpages = 32; + /* + * We must still limit the maximum number of buffers to be no + * more than 2/5's of the size of the kernal malloc region, this + * will only take effect for machines with lots of memory + */ + bufpages = min(bufpages, (VM_KMEM_SIZE / NBPG) * 2 / 5); if (nbuf == 0) { nbuf = bufpages / 2; if (nbuf < 16) nbuf = 16; } freebufspace = bufpages * NBPG; if (nswbuf == 0) { nswbuf = (nbuf / 2) &~ 1; /* force even */ if (nswbuf > 256) nswbuf = 256; /* sanity */ } valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); /* * Allocate a submap for buffer space allocations. + * XXX we are NOT using buffer_map, but due to + * the references to it we will just allocate 1 page of + * vm (not real memory) to make things happy... */ buffer_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - bufpages*NBPG, TRUE); + /* bufpages * */NBPG, TRUE); /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. */ /* exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, * 16*NCARGS, TRUE); * NOT CURRENTLY USED -- cgd */ /* * Allocate a submap for physio */ phys_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, VM_PHYS_SIZE, TRUE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES); mb_map = kmem_suballoc(kernel_map, (vm_offset_t)&mbutl, &maxaddr, VM_MBUF_SIZE, FALSE); /* * Initialize callouts */ callfree = callout; for (i = 1; i < ncallout; i++) callout[i-1].c_next = &callout[i]; printf("avail mem = %d\n", ptoa(vm_page_free_count)); printf("using %d buffers containing %d bytes of memory\n", nbuf, bufpages * CLBYTES); /* * Set up CPU-specific registers, cache, etc. */ initcpu(); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); /* * Configure the system. */ configure(); } struct cpu_nameclass i386_cpus[] = { { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ { "i386DX", CPUCLASS_386 }, /* CPU_386 */ { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ { "i486DX", CPUCLASS_486 }, /* CPU_486 */ { "i586", CPUCLASS_586 }, /* CPU_586 */ }; identifycpu() /* translated from hp300 -- cgd */ { printf("CPU: "); if (cpu >= 0 && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) { printf("%s", i386_cpus[cpu].cpu_name); cpu_class = i386_cpus[cpu].cpu_class; } else { printf("unknown cpu type %d\n", cpu); panic("startup: bad cpu id"); } printf(" ("); switch(cpu_class) { case CPUCLASS_286: printf("286"); break; case CPUCLASS_386: printf("386"); break; case CPUCLASS_486: printf("486"); break; case CPUCLASS_586: printf("586"); break; default: printf("unknown"); /* will panic below... */ } printf("-class CPU)"); printf("\n"); /* cpu speed would be nice, but how? */ /* * Now that we have told the user what they have, * let them know if that machine type isn't configured. */ switch (cpu_class) { case CPUCLASS_286: /* a 286 should not make it this far, anyway */ #if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) #error This kernel is not configured for one of the supported CPUs #endif #if !defined(I386_CPU) case CPUCLASS_386: #endif #if !defined(I486_CPU) case CPUCLASS_486: #endif #if !defined(I586_CPU) case CPUCLASS_586: #endif panic("CPU class not configured"); default: break; } } #ifdef PGINPROF /* * Return the difference (in microseconds) * between the current time and a previous * time as represented by the arguments. * If there is a pending clock interrupt * which has not been serviced due to high * ipl, return error code. */ /*ARGSUSED*/ vmtime(otime, olbolt, oicr) register int otime, olbolt, oicr; { return (((time.tv_sec-otime)*60 + lbolt-olbolt)*16667); } #endif extern int kstack[]; /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig, mask; unsigned code; { register struct proc *p = curproc; register int *regs; register struct sigframe *fp; struct sigacts *ps = p->p_sigacts; int oonstack, frmtrap; regs = p->p_regs; oonstack = ps->ps_onstack; frmtrap = curpcb->pcb_flags & FM_TRAP; /* * Allocate and validate space for the signal handler * context. Note that if the stack is in P0 space, the * call to grow() is a nop, and the useracc() check * will fail if the process has not already allocated * the space with a `brk'. */ if (!ps->ps_onstack && (ps->ps_sigonstack & sigmask(sig))) { fp = (struct sigframe *)(ps->ps_sigsp - sizeof(struct sigframe)); ps->ps_onstack = 1; } else { if (frmtrap) fp = (struct sigframe *)(regs[tESP] - sizeof(struct sigframe)); else fp = (struct sigframe *)(regs[sESP] - sizeof(struct sigframe)); } if ((unsigned)fp <= (unsigned)p->p_vmspace->vm_maxsaddr + MAXSSIZ - ctob(p->p_vmspace->vm_ssize)) (void)grow(p, (unsigned)fp); if (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; sig = sigmask(SIGILL); p->p_sigignore &= ~sig; p->p_sigcatch &= ~sig; p->p_sigmask &= ~sig; psignal(p, SIGILL); return; } /* * Build the argument list for the signal handler. */ fp->sf_signum = sig; fp->sf_code = code; fp->sf_scp = &fp->sf_sc; fp->sf_handler = catcher; /* save scratch registers */ if(frmtrap) { fp->sf_eax = regs[tEAX]; fp->sf_edx = regs[tEDX]; fp->sf_ecx = regs[tECX]; } else { fp->sf_eax = regs[sEAX]; fp->sf_edx = regs[sEDX]; fp->sf_ecx = regs[sECX]; } /* * Build the signal context to be used by sigreturn. */ fp->sf_sc.sc_onstack = oonstack; fp->sf_sc.sc_mask = mask; if(frmtrap) { fp->sf_sc.sc_sp = regs[tESP]; fp->sf_sc.sc_fp = regs[tEBP]; fp->sf_sc.sc_pc = regs[tEIP]; fp->sf_sc.sc_ps = regs[tEFLAGS]; regs[tESP] = (int)fp; regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc; } else { fp->sf_sc.sc_sp = regs[sESP]; fp->sf_sc.sc_fp = regs[sEBP]; fp->sf_sc.sc_pc = regs[sEIP]; fp->sf_sc.sc_ps = regs[sEFLAGS]; regs[sESP] = (int)fp; regs[sEIP] = (int)((struct pcb *)kstack)->pcb_sigc; } } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper priviledges or to cause * a machine fault. */ struct sigreturn_args { struct sigcontext *sigcntxp; }; sigreturn(p, uap, retval) struct proc *p; struct sigreturn_args *uap; int *retval; { register struct sigcontext *scp; register struct sigframe *fp; register int *regs = p->p_regs; /* * (XXX old comment) regs[sESP] points to the return address. * The user scp pointer is above that. * The return address is faked in the signal trampoline code * for consistency. */ scp = uap->sigcntxp; fp = (struct sigframe *) ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0) return(EINVAL); /* restore scratch registers */ regs[sEAX] = fp->sf_eax ; regs[sEDX] = fp->sf_edx ; regs[sECX] = fp->sf_ecx ; if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0) return(EINVAL); #ifdef notyet if ((scp->sc_ps & PSL_MBZ) != 0 || (scp->sc_ps & PSL_MBO) != PSL_MBO) { return(EINVAL); } #endif p->p_sigacts->ps_onstack = scp->sc_onstack & 01; p->p_sigmask = scp->sc_mask &~ (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP)); regs[sEBP] = scp->sc_fp; regs[sESP] = scp->sc_sp; regs[sEIP] = scp->sc_pc; regs[sEFLAGS] = scp->sc_ps; return(EJUSTRETURN); } /* * a simple function to make the system panic (and dump a vmcore) * in a predictable fashion */ void diediedie() { panic("because you said to!"); } int waittime = -1; struct pcb dumppcb; void boot(arghowto) int arghowto; { register long dummy; /* r12 is reserved */ register int howto; /* r11 == how to boot */ register int devtype; /* r10 == major of root dev */ extern int cold; int nomsg = 1; if(cold) { printf("hit reset please"); for(;;); } howto = arghowto; if ((howto&RB_NOSYNC) == 0 && waittime < 0 && bfreelist[0].b_forw) { register struct buf *bp; int iter, nbusy; waittime = 0; (void) splnet(); printf("syncing disks... "); /* * Release inodes held by texts before update. */ if (panicstr == 0) vnode_pager_umount(NULL); sync((struct sigcontext *)0); /* * Unmount filesystems */ #if 0 if (panicstr == 0) vfs_unmountall(); #endif for (iter = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) nbusy++; if (nbusy == 0) break; if (nomsg) { printf("updating disks before rebooting... "); nomsg = 0; } printf("%d ", nbusy); DELAY(40000 * iter); } if (nbusy) printf("giving up\n"); else printf("done\n"); DELAY(10000); /* wait for printf to finish */ } splhigh(); devtype = major(rootdev); if (howto&RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); cngetc(); } else { if (howto & RB_DUMP) { savectx(&dumppcb, 0); dumppcb.pcb_ptd = rcr3(); dumpsys(); /*NOTREACHED*/ } } #ifdef lint dummy = 0; dummy = dummy; printf("howto %d, devtype %d\n", arghowto, devtype); #endif cpu_reset(); for(;;) ; /*NOTREACHED*/ } unsigned dumpmag = 0x8fca0101; /* magic number for savecore */ int dumpsize = 0; /* also for savecore */ /* * Doadump comes here after turning off memory management and * getting on the dump stack, either when called above, or by * the auto-restart code. */ void dumpsys() { if (dumpdev == NODEV) return; if ((minor(dumpdev)&07) != 1) return; dumpsize = physmem; printf("\ndumping to dev %x, offset %d\n", dumpdev, dumplo); printf("dump "); switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) { case ENXIO: printf("device bad\n"); break; case EFAULT: printf("device not ready\n"); break; case EINVAL: printf("area improper\n"); break; case EIO: printf("i/o error\n"); break; case EINTR: printf("aborted from console\n"); break; default: printf("succeeded\n"); break; } printf("\n\n"); DELAY(1000); } #ifdef HZ /* * If HZ is defined we use this code, otherwise the code in * /sys/i386/i386/microtime.s is used. The othercode only works * for HZ=100. */ microtime(tvp) register struct timeval *tvp; { int s = splhigh(); *tvp = time; tvp->tv_usec += tick; while (tvp->tv_usec > 1000000) { tvp->tv_sec++; tvp->tv_usec -= 1000000; } splx(s); } #endif /* HZ */ physstrat(bp, strat, prio) struct buf *bp; int (*strat)(), prio; { register int s; caddr_t baddr; /* * vmapbuf clobbers b_addr so we must remember it so that it * can be restored after vunmapbuf. This is truely rude, we * should really be storing this in a field in the buf struct * but none are available and I didn't want to add one at * this time. Note that b_addr for dirty page pushes is * restored in vunmapbuf. (ugh!) */ baddr = bp->b_un.b_addr; vmapbuf(bp); (*strat)(bp); /* pageout daemon doesn't wait for pushed pages */ if (bp->b_flags & B_DIRTY) return; s = splbio(); while ((bp->b_flags & B_DONE) == 0) sleep((caddr_t)bp, prio); splx(s); vunmapbuf(bp); bp->b_un.b_addr = baddr; } initcpu() { } /* * Clear registers on exec */ void setregs(p, entry) struct proc *p; u_long entry; { p->p_regs[sEBP] = 0; /* bottom of the fp chain */ p->p_regs[sEIP] = entry; p->p_addr->u_pcb.pcb_flags = 0; /* no fp at all */ load_cr0(rcr0() | CR0_TS); /* start emulating */ #if NNPX > 0 npxinit(__INITIAL_NPXCW__); -#endif +#endif /* NNPX > 0 */ } /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ #define DESCRIPTOR_SIZE 8 #define GNULL_SEL 0 /* Null Descriptor */ #define GCODE_SEL 1 /* Kernel Code Descriptor */ #define GDATA_SEL 2 /* Kernel Data Descriptor */ #define GLDT_SEL 3 /* LDT - eventually one per process */ #define GTGATE_SEL 4 /* Process task switch gate */ #define GPANIC_SEL 5 /* Task state to consider panic from */ #define GPROC0_SEL 6 /* Task state process slot zero and up */ #define NGDT GPROC0_SEL+1 unsigned char gdt[GPROC0_SEL+1][DESCRIPTOR_SIZE]; /* interrupt descriptor table */ struct gate_descriptor idt[NIDT]; /* local descriptor table */ unsigned char ldt[5][DESCRIPTOR_SIZE]; #define LSYS5CALLS_SEL 0 /* forced by intel BCS */ #define LSYS5SIGR_SEL 1 #define L43BSDCALLS_SEL 2 /* notyet */ #define LUCODE_SEL 3 #define LUDATA_SEL 4 /* seperate stack, es,fs,gs sels ? */ /* #define LPOSIXCALLS_SEL 5 /* notyet */ struct i386tss tss, panic_tss; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Panic Tss Descriptor */ { (int) &panic_tss, /* segment base address */ sizeof(tss)-1, /* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Proc 0 Tss Descriptor */ { (int) kstack, /* segment base address */ sizeof(tss)-1, /* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }}; struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ } }; setidt(idx, func, typ, dpl) char *func; { struct gate_descriptor *ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = 8; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X, name) extern IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(rsvd0), IDTVEC(rsvd1), IDTVEC(rsvd2), IDTVEC(rsvd3), IDTVEC(rsvd4), IDTVEC(rsvd5), IDTVEC(rsvd6), IDTVEC(rsvd7), IDTVEC(rsvd8), IDTVEC(rsvd9), IDTVEC(rsvd10), IDTVEC(rsvd11), IDTVEC(rsvd12), IDTVEC(rsvd13), IDTVEC(rsvd14), IDTVEC(rsvd14), IDTVEC(syscall); int lcr0(), lcr3(), rcr0(), rcr2(); int _gsel_tss; init386(first) { extern ssdtosd(), lgdt(), lidt(), lldt(), etext; int x, *pi; unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; extern int sigcode,szsigcode; /* table descriptors - used to load tables by microp */ unsigned short r_gdt[3], r_idt[3]; int pagesinbase, pagesinext; proc0.p_addr = proc0paddr; /* * Initialize the console before we print anything out. */ cninit (); /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(i386_round_page(&etext)) - 1; gdt_segs[GDATA_SEL].ssd_limit = 0xffffffff; /* XXX constant? */ for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, gdt+x); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * NBPG) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, ldt+x); /* exceptions */ setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_KPL); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL); setidt(8, &IDTVEC(dble), SDT_SYS386TGT, SEL_KPL); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL); setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL); setidt(17, &IDTVEC(rsvd0), SDT_SYS386TGT, SEL_KPL); setidt(18, &IDTVEC(rsvd1), SDT_SYS386TGT, SEL_KPL); setidt(19, &IDTVEC(rsvd2), SDT_SYS386TGT, SEL_KPL); setidt(20, &IDTVEC(rsvd3), SDT_SYS386TGT, SEL_KPL); setidt(21, &IDTVEC(rsvd4), SDT_SYS386TGT, SEL_KPL); setidt(22, &IDTVEC(rsvd5), SDT_SYS386TGT, SEL_KPL); setidt(23, &IDTVEC(rsvd6), SDT_SYS386TGT, SEL_KPL); setidt(24, &IDTVEC(rsvd7), SDT_SYS386TGT, SEL_KPL); setidt(25, &IDTVEC(rsvd8), SDT_SYS386TGT, SEL_KPL); setidt(26, &IDTVEC(rsvd9), SDT_SYS386TGT, SEL_KPL); setidt(27, &IDTVEC(rsvd10), SDT_SYS386TGT, SEL_KPL); setidt(28, &IDTVEC(rsvd11), SDT_SYS386TGT, SEL_KPL); setidt(29, &IDTVEC(rsvd12), SDT_SYS386TGT, SEL_KPL); setidt(30, &IDTVEC(rsvd13), SDT_SYS386TGT, SEL_KPL); setidt(31, &IDTVEC(rsvd14), SDT_SYS386TGT, SEL_KPL); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif r_gdt[0] = (unsigned short) (sizeof(gdt) - 1); r_gdt[1] = (unsigned short) ((int) gdt & 0xffff); r_gdt[2] = (unsigned short) ((int) gdt >> 16); lgdt(&r_gdt); r_idt[0] = (unsigned short) (sizeof(idt) - 1); r_idt[1] = (unsigned short) ((int) idt & 0xfffff); r_idt[2] = (unsigned short) ((int) idt >> 16); lidt(&r_idt); lldt(GSEL(GLDT_SEL, SEL_KPL)); #include "ddb.h" #if NDDB > 0 kdb_init(); if (boothowto & RB_KDB) Debugger(); #endif /* Use BIOS values stored in RTC CMOS RAM, since probing * breaks certain 386 AT relics. */ biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); /*printf("bios base %d ext %d ", biosbasemem, biosextmem);*/ /* * 15 Aug 92 Terry Lambert The real fix for the CMOS bug */ if( biosbasemem != EXPECT_BASEMEM) { printf( "Warning: Base memory %dK, assuming %dK\n", biosbasemem, EXPECT_BASEMEM); biosbasemem = EXPECT_BASEMEM; /* assume base*/ } if( biosextmem > 65536) { printf( "Warning: Extended memory %dK(>64M), assuming 0K\n", biosextmem); biosextmem = 0; /* assume none*/ } /* * Go into normal calculation; Note that we try to run in 640K, and * that invalid CMOS values of non 0xffff are no longer a cause of * ptdi problems. I have found a gutted kernel can run in 640K. */ pagesinbase = 640/4 - first/NBPG; pagesinext = biosextmem/4; /* use greater of either base or extended memory. do this * until I reinstitue discontiguous allocation of vm_page * array. */ if (pagesinbase > pagesinext) Maxmem = 640/4; else { Maxmem = pagesinext + 0x100000/NBPG; if (first < 0x100000) first = 0x100000; /* skip hole */ } /* This used to explode, since Maxmem used to be 0 for bas CMOS*/ maxmem = Maxmem - 1; /* highest page of usable memory */ physmem = maxmem; /* number of pages of physmem addr space */ /*printf("using first 0x%x to 0x%x\n ", first, maxmem*NBPG);*/ if (maxmem < 2048/4) { printf("Too little RAM memory. Warning, running in degraded mode.\n"); #ifdef INFORM_WAIT /* * People with less than 2 Meg have to hit return; this way * we see the messages and can tell them why they blow up later. * If they get working well enough to recompile, they can unset * the flag; otherwise, it's a toy and they have to lump it. */ cngetc(); #endif /* !INFORM_WAIT*/ } /* call pmap initialization to make new kernel address space */ #ifndef MACHINCE_NONCONTIG pmap_bootstrap (first, 0); #else pmap_bootstrap ((vm_offset_t)atdevbase + IOM_SIZE); #endif /* MACHINE_NONCONTIG */ /* now running on new page tables, configured,and u/iom is accessible */ /* make a initial tss so microp can get interrupt stack on syscall! */ proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG; proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; _gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = (sizeof(tss))<<16; ltr(_gsel_tss); /* make a call gate to reenter kernel with */ gdp = (struct gate_descriptor *) &ldt[LSYS5CALLS_SEL][0]; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 0; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode); proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_ptd = IdlePTD; } extern struct pte *CMAP1, *CMAP2; extern caddr_t CADDR1, CADDR2; /* * zero out physical memory * specified in relocation units (NBPG bytes) */ clearseg(n) { *(int *)CMAP2 = PG_V | PG_KW | ctob(n); load_cr3(rcr3()); bzero(CADDR2,NBPG); #ifndef MACHINE_NONCONTIG *(int *) CADDR2 = 0; #endif /* MACHINE_NONCONTIG */ } /* * copy a page of physical memory * specified in relocation units (NBPG bytes) */ void copyseg(frm, n) { *(int *)CMAP2 = PG_V | PG_KW | ctob(n); load_cr3(rcr3()); bcopy((void *)frm, (void *)CADDR2, NBPG); } /* * copy a page of physical memory * specified in relocation units (NBPG bytes) */ void physcopyseg(frm, to) { *(int *)CMAP1 = PG_V | PG_KW | ctob(frm); *(int *)CMAP2 = PG_V | PG_KW | ctob(to); load_cr3(rcr3()); bcopy(CADDR1, CADDR2, NBPG); } /*aston() { schednetisr(NETISR_AST); }*/ void setsoftclock() { schednetisr(NETISR_SCLK); } /* * insert an element into a queue */ #undef insque _insque(element, head) register struct prochd *element, *head; { element->ph_link = head->ph_link; head->ph_link = (struct proc *)element; element->ph_rlink = (struct proc *)head; ((struct prochd *)(element->ph_link))->ph_rlink=(struct proc *)element; } /* * remove an element from a queue */ #undef remque _remque(element) register struct prochd *element; { ((struct prochd *)(element->ph_link))->ph_rlink = element->ph_rlink; ((struct prochd *)(element->ph_rlink))->ph_link = element->ph_link; element->ph_rlink = (struct proc *)0; } #ifdef SLOW_OLD_COPYSTRS vmunaccess() {} #if 0 /* assembler versions now in locore.s */ /* * Below written in C to allow access to debugging code */ copyinstr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *toaddr, *fromaddr; { int c,tally; tally = 0; while (maxlength--) { c = fubyte(fromaddr++); if (c == -1) { if(lencopied) *lencopied = tally; return(EFAULT); } tally++; *(char *)toaddr++ = (char) c; if (c == 0){ if(lencopied) *lencopied = (u_int)tally; return(0); } } if(lencopied) *lencopied = (u_int)tally; return(ENAMETOOLONG); } copyoutstr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *fromaddr, *toaddr; { int c; int tally; tally = 0; while (maxlength--) { c = subyte(toaddr++, *(char *)fromaddr); if (c == -1) return(EFAULT); tally++; if (*(char *)fromaddr++ == 0){ if(lencopied) *lencopied = tally; return(0); } } if(lencopied) *lencopied = tally; return(ENAMETOOLONG); } #endif /* SLOW_OLD_COPYSTRS */ copystr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *fromaddr, *toaddr; { u_int tally; tally = 0; while (maxlength--) { *(u_char *)toaddr = *(u_char *)fromaddr++; tally++; if (*(u_char *)toaddr++ == 0) { if(lencopied) *lencopied = tally; return(0); } } if(lencopied) *lencopied = tally; return(ENAMETOOLONG); } #endif Index: head/sys/i386/i386/pmap.c =================================================================== --- head/sys/i386/i386/pmap.c (revision 607) +++ head/sys/i386/i386/pmap.c (revision 608) @@ -1,1725 +1,1722 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.5 1993/10/12 13:53:25 rgrimes Exp $ + * $Id: pmap.c,v 1.6 1993/10/12 15:09:37 rgrimes Exp $ */ -static char rcsid[] = "$Id: pmap.c,v 1.5 1993/10/12 13:53:25 rgrimes Exp $"; /* * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * Derived from: hp300/@(#)pmap.c 7.1 (Berkeley) 12/5/90 */ /* * Reno i386 version, from Mike Hibler's hp300 version. */ /* * Manages physical address maps. * * In addition to hardware address maps, this * module is called upon to provide software-use-only * maps which may or may not be stored in the same * form as hardware maps. These pseudo-maps are * used to store intermediate results from copy * operations to and from address spaces. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "param.h" #include "proc.h" #include "malloc.h" #include "user.h" #include "vm/vm.h" #include "vm/vm_kern.h" #include "vm/vm_page.h" /*#include "vm/vm_pageout.h"*/ #include "i386/isa/isa.h" /* * Allocate various and sundry SYSMAPs used in the days of old VM * and not yet converted. XXX. */ #define BSDVM_COMPAT 1 #ifdef DEBUG struct { int kernel; /* entering kernel mapping */ int user; /* entering user mapping */ int ptpneeded; /* needed to allocate a PT page */ int pwchange; /* no mapping change, just wiring or protection */ int wchange; /* no mapping change, just wiring */ int mchange; /* was mapped but mapping to different page */ int managed; /* a managed page */ int firstpv; /* first mapping for this PA */ int secondpv; /* second mapping for this PA */ int ci; /* cache inhibited */ int unmanaged; /* not a managed page */ int flushes; /* cache flushes */ } enter_stats; struct { int calls; int removes; int pvfirst; int pvsearch; int ptinvalid; int uflushes; int sflushes; } remove_stats; int debugmap = 0; int pmapdebug = 0 /* 0xffff */; #define PDB_FOLLOW 0x0001 #define PDB_INIT 0x0002 #define PDB_ENTER 0x0004 #define PDB_REMOVE 0x0008 #define PDB_CREATE 0x0010 #define PDB_PTPAGE 0x0020 #define PDB_CACHE 0x0040 #define PDB_BITS 0x0080 #define PDB_COLLECT 0x0100 #define PDB_PROTECT 0x0200 #define PDB_PDRTAB 0x0400 #define PDB_PARANOIA 0x2000 #define PDB_WIRING 0x4000 #define PDB_PVDUMP 0x8000 int pmapvacflush = 0; #define PVF_ENTER 0x01 #define PVF_REMOVE 0x02 #define PVF_PROTECT 0x04 #define PVF_TOTAL 0x80 #endif /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023])) #define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) #define pmap_pde_v(pte) ((pte)->pd_v) #define pmap_pte_w(pte) ((pte)->pg_w) /* #define pmap_pte_ci(pte) ((pte)->pg_ci) */ #define pmap_pte_m(pte) ((pte)->pg_m) #define pmap_pte_u(pte) ((pte)->pg_u) #define pmap_pte_v(pte) ((pte)->pg_v) #define pmap_pte_set_w(pte, v) ((pte)->pg_w = (v)) #define pmap_pte_set_prot(pte, v) ((pte)->pg_prot = (v)) /* * Given a map and a machine independent protection code, * convert to a vax protection code. */ #define pte_prot(m, p) (protection_codes[p]) int protection_codes[8]; struct pmap kernel_pmap_store; pmap_t kernel_pmap; vm_offset_t avail_start; /* PA of first available physical page */ vm_offset_t avail_end; /* PA of last available physical page */ vm_size_t mem_size; /* memory size in bytes */ vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss)*/ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t vm_first_phys; /* PA of first managed page */ vm_offset_t vm_last_phys; /* PA just past last managed page */ int i386pagesperpage; /* PAGE_SIZE / NBPG */ boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ char *pmap_attributes; /* reference and modify bits */ boolean_t pmap_testbit(); void pmap_clear_modify(); #if BSDVM_COMPAT #include "msgbuf.h" /* * All those kernel PT submaps that BSD is so fond of */ struct pte *CMAP1, *CMAP2, *mmap; caddr_t CADDR1, CADDR2, vmmap; struct pte *msgbufmap; struct msgbuf *msgbufp; #endif /* * Bootstrap the system enough to run with virtual memory. * Map the kernel's code and data, and allocate the system page table. * * On the I386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address KERNBASE to the actual * (physical) address starting relative to 0] */ struct pte *pmap_pte(); void pmap_bootstrap(firstaddr, loadaddr) vm_offset_t firstaddr; vm_offset_t loadaddr; { #if BSDVM_COMPAT vm_offset_t va; struct pte *pte; #endif extern vm_offset_t maxmem, physmem; extern int IdlePTD; avail_start = firstaddr + 8 * NBPG; avail_end = maxmem << PG_SHIFT; /* XXX: allow for msgbuf */ avail_end -= i386_round_page(sizeof(struct msgbuf)); mem_size = physmem << PG_SHIFT; virtual_avail = (vm_offset_t)atdevbase + 0x100000 - 0xa0000 + 10*NBPG; virtual_end = VM_MAX_KERNEL_ADDRESS; i386pagesperpage = PAGE_SIZE / NBPG; /* * Initialize protection array. */ i386_protection_init(); /* * The kernel's pmap is statically allocated so we don't * have to use pmap_create, which is unlikely to work * correctly at this part of the boot sequence. */ kernel_pmap = &kernel_pmap_store; #ifdef notdef /* * Create Kernel page directory table and page maps. * [ currently done in locore. i have wild and crazy ideas -wfj ] * XXX IF THIS IS EVER USED, IT MUST BE MOVED TO THE TOP * OF THIS ROUTINE -- cgd */ bzero(firstaddr, 4*NBPG); kernel_pmap->pm_pdir = firstaddr + VM_MIN_KERNEL_ADDRESS; kernel_pmap->pm_ptab = firstaddr + VM_MIN_KERNEL_ADDRESS + NBPG; firstaddr += NBPG; for (x = i386_btod(VM_MIN_KERNEL_ADDRESS); x < i386_btod(VM_MIN_KERNEL_ADDRESS)+3; x++) { struct pde *pde; pde = kernel_pmap->pm_pdir + x; *(int *)pde = firstaddr + x*NBPG | PG_V | PG_KW; } #else kernel_pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD); #endif simple_lock_init(&kernel_pmap->pm_lock); kernel_pmap->pm_count = 1; #if BSDVM_COMPAT /* * Allocate all the submaps we need */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); va = virtual_avail; pte = pmap_pte(kernel_pmap, va); SYSMAP(caddr_t ,CMAP1 ,CADDR1 ,1 ) SYSMAP(caddr_t ,CMAP2 ,CADDR2 ,1 ) SYSMAP(caddr_t ,mmap ,vmmap ,1 ) SYSMAP(struct msgbuf * ,msgbufmap ,msgbufp ,1 ) virtual_avail = va; #endif /* * reserve special hunk of memory for use by bus dma as a bounce * buffer (contiguous virtual *and* physical memory). * do it from firstaddr -> firstaddr+8 pages. note that * avail_start was bumped up 8 pages, above, to accomodate this. */ { extern vm_offset_t isaphysmem; isaphysmem = va; virtual_avail = pmap_map(va, firstaddr, firstaddr + 8*NBPG, VM_PROT_ALL); } *(int *)PTD = 0; load_cr3(rcr3()); } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(phys_start, phys_end) vm_offset_t phys_start, phys_end; { vm_offset_t addr, addr2; vm_size_t npg, s; int rv; extern int KPTphys; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_init(%x, %x)\n", phys_start, phys_end); #endif /* * Now that kernel map has been allocated, we can mark as * unavailable regions which we have mapped in locore. */ addr = atdevbase; (void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0, &addr, (0x100000-0xa0000), FALSE); addr = (vm_offset_t) KERNBASE + KPTphys/* *NBPG */; vm_object_reference(kernel_object); (void) vm_map_find(kernel_map, kernel_object, addr, &addr, 2*NBPG, FALSE); /* * Allocate memory for random pmap data structures. Includes the * pv_head_table and pmap_attributes. */ npg = atop(phys_end - phys_start); s = (vm_size_t) (sizeof(struct pv_entry) * npg + npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); pv_table = (pv_entry_t) addr; addr += sizeof(struct pv_entry) * npg; pmap_attributes = (char *) addr; #ifdef DEBUG if (pmapdebug & PDB_INIT) printf("pmap_init: %x bytes (%x pgs): tbl %x attr %x\n", s, npg, pv_table, pmap_attributes); #endif /* * Now it is safe to enable pv_table recording. */ vm_first_phys = phys_start; vm_last_phys = phys_end; pmap_initialized = TRUE; } /* * Used to map a range of physical addresses into kernel * virtual address space. * * For now, VM is already on, we only need to map the * specified memory. */ vm_offset_t pmap_map(virt, start, end, prot) vm_offset_t virt; vm_offset_t start; vm_offset_t end; int prot; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_map(%x, %x, %x, %x)\n", virt, start, end, prot); #endif while (start < end) { pmap_enter(kernel_pmap, virt, start, prot, FALSE); virt += PAGE_SIZE; start += PAGE_SIZE; } return(virt); } /* * Create and return a physical map. * * If the size specified for the map * is zero, the map is an actual physical * map, and may be referenced by the * hardware. * * If the size specified is non-zero, * the map will be used in software only, and * is bounded by that size. * * [ just allocate a ptd and mark it uninitialize -- should we track * with a table which process has which ptd? -wfj ] */ pmap_t pmap_create(size) vm_size_t size; { register pmap_t pmap; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) printf("pmap_create(%x)\n", size); #endif /* * Software use map does not need a pmap */ if (size) return(NULL); /* XXX: is it ok to wait here? */ pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); #ifdef notifwewait if (pmap == NULL) panic("pmap_create: cannot allocate a pmap"); #endif bzero(pmap, sizeof(*pmap)); pmap_pinit(pmap); return (pmap); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ void pmap_pinit(pmap) register struct pmap *pmap; { #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) pg("pmap_pinit(%x)\n", pmap); #endif /* * No need to allocate page table space yet but we do need a * valid page directory table. */ pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, NBPG); /* wire in kernel global address entries */ bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, NKPDE*4); /* install self-referential address mapping entry */ *(int *)(pmap->pm_pdir+PTDPTDI) = (int)pmap_extract(kernel_pmap, pmap->pm_pdir) | PG_V | PG_KW; pmap->pm_count = 1; simple_lock_init(&pmap->pm_lock); } /* * Retire the given physical map from service. * Should only be called if the map contains * no valid mappings. */ void pmap_destroy(pmap) register pmap_t pmap; { int count; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_destroy(%x)\n", pmap); #endif if (pmap == NULL) return; simple_lock(&pmap->pm_lock); count = --pmap->pm_count; simple_unlock(&pmap->pm_lock); if (count == 0) { pmap_release(pmap); free((caddr_t)pmap, M_VMPMAP); } } /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap) register struct pmap *pmap; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) pg("pmap_release(%x)\n", pmap); #endif #ifdef notdef /* DIAGNOSTIC */ /* count would be 0 from pmap_destroy... */ simple_lock(&pmap->pm_lock); if (pmap->pm_count != 1) panic("pmap_release count"); #endif kmem_free(kernel_map, (vm_offset_t)pmap->pm_pdir, NBPG); } /* * Add a reference to the specified pmap. */ void pmap_reference(pmap) pmap_t pmap; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_reference(%x)", pmap); #endif if (pmap != NULL) { simple_lock(&pmap->pm_lock); pmap->pm_count++; simple_unlock(&pmap->pm_lock); } } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap, sva, eva) struct pmap *pmap; register vm_offset_t sva; register vm_offset_t eva; { register pt_entry_t *ptp,*ptq; vm_offset_t va; vm_offset_t pa; pt_entry_t *pte; pv_entry_t pv, npv; int ix; int s, bits; #ifdef DEBUG pt_entry_t opte; if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) pg("pmap_remove(%x, %x, %x)", pmap, sva, eva); #endif if (pmap == NULL) return; /* are we current address space or kernel? */ if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) ptp=PTmap; /* otherwise, we are alternate address space */ else { if (pmap->pm_pdir[PTDPTDI].pd_pfnum != APTDpde.pd_pfnum) { APTDpde = pmap->pm_pdir[PTDPTDI]; tlbflush(); } ptp=APTmap; } #ifdef DEBUG remove_stats.calls++; #endif /* this is essential since we must check the PDE(sva) for precense */ while (sva <= eva && !pmap_pde_v(pmap_pde(pmap, sva))) sva = (sva & PD_MASK) + (1<= eva) return; ptq++; } if(!(sva & 0x3ff)) /* Only check once in a while */ { if (!pmap_pde_v(pmap_pde(pmap, i386_ptob(sva)))) { /* We can race ahead here, straight to next pde.. */ sva = (sva & 0xffc00) + (1<<10) -1 ; continue; } } if(!pmap_pte_pa(ptp+sva)) continue; pte = ptp + sva; pa = pmap_pte_pa(pte); va = i386_ptob(sva); #ifdef DEBUG opte = *pte; remove_stats.removes++; #endif /* * Update statistics */ if (pmap_pte_w(pte)) pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; /* * Invalidate the PTEs. * XXX: should cluster them up and invalidate as many * as possible at once. */ #ifdef DEBUG if (pmapdebug & PDB_REMOVE) printf("remove: inv %x ptes at %x(%x) ", i386pagesperpage, pte, *(int *)pte); #endif bits = ix = 0; do { bits |= *(int *)pte & (PG_U|PG_M); *(int *)pte++ = 0; /*TBIS(va + ix * NBPG);*/ } while (++ix != i386pagesperpage); if (curproc && pmap == &curproc->p_vmspace->vm_pmap) pmap_activate(pmap, (struct pcb *)curproc->p_addr); /* are we current address space or kernel? */ /*if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) load_cr3(curpcb->pcb_ptd);*/ tlbflush(); #ifdef needednotdone reduce wiring count on page table pages as references drop #endif /* * Remove from the PV table (raise IPL since we * may be called at interrupt time). */ if (pa < vm_first_phys || pa >= vm_last_phys) continue; pv = pa_to_pvh(pa); s = splimp(); /* * If it is the first entry on the list, it is actually * in the header and we must copy the following entry up * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ if (pmap == pv->pv_pmap && va == pv->pv_va) { npv = pv->pv_next; if (npv) { *pv = *npv; free((caddr_t)npv, M_VMPVENT); } else pv->pv_pmap = NULL; #ifdef DEBUG remove_stats.pvfirst++; #endif } else { for (npv = pv->pv_next; npv; npv = npv->pv_next) { #ifdef DEBUG remove_stats.pvsearch++; #endif if (pmap == npv->pv_pmap && va == npv->pv_va) break; pv = npv; } #ifdef DEBUG if (npv == NULL) panic("pmap_remove: PA not in pv_tab"); #endif pv->pv_next = npv->pv_next; free((caddr_t)npv, M_VMPVENT); pv = pa_to_pvh(pa); } #ifdef notdef [tally number of pagetable pages, if sharing of ptpages adjust here] #endif /* * Update saved attributes for managed page */ pmap_attributes[pa_index(pa)] |= bits; splx(s); } #ifdef notdef [cache and tlb flushing, if needed] #endif } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. */ void pmap_remove_all(pa) vm_offset_t pa; { register pv_entry_t pv; int s; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) printf("pmap_remove_all(%x)", pa); /*pmap_pvdump(pa);*/ #endif /* * Not one of ours */ if (pa < vm_first_phys || pa >= vm_last_phys) return; pv = pa_to_pvh(pa); s = splimp(); /* * Do it the easy way for now */ while (pv->pv_pmap != NULL) { #ifdef DEBUG if (!pmap_pde_v(pmap_pde(pv->pv_pmap, pv->pv_va)) || pmap_pte_pa(pmap_pte(pv->pv_pmap, pv->pv_va)) != pa) panic("pmap_remove_all: bad mapping"); #endif pmap_remove(pv->pv_pmap, pv->pv_va, pv->pv_va + PAGE_SIZE); } splx(s); } /* * Routine: pmap_copy_on_write * Function: * Remove write privileges from all * physical maps for this physical page. */ void pmap_copy_on_write(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) printf("pmap_copy_on_write(%x)", pa); #endif pmap_changebit(pa, PG_RO, TRUE); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap, sva, eva, prot) register pmap_t pmap; vm_offset_t sva, eva; vm_prot_t prot; { register pt_entry_t *pte; register vm_offset_t va; register int ix; int i386prot; boolean_t firstpage = TRUE; register pt_entry_t *ptp; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) printf("pmap_protect(%x, %x, %x, %x)", pmap, sva, eva, prot); #endif if (pmap == NULL) return; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; /* are we current address space or kernel? */ if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) ptp=PTmap; /* otherwise, we are alternate address space */ else { if (pmap->pm_pdir[PTDPTDI].pd_pfnum != APTDpde.pd_pfnum) { APTDpde = pmap->pm_pdir[PTDPTDI]; tlbflush(); } ptp=APTmap; } for (va = sva; va < eva; va += PAGE_SIZE) { /* * Page table page is not allocated. * Skip it, we don't want to force allocation * of unnecessary PTE pages just to set the protection. */ if (!pmap_pde_v(pmap_pde(pmap, va))) { /* XXX: avoid address wrap around */ if (va >= i386_trunc_pdr((vm_offset_t)-1)) break; va = i386_round_pdr(va + PAGE_SIZE) - PAGE_SIZE; continue; } pte = ptp + i386_btop(va); /* * Page not valid. Again, skip it. * Should we do this? Or set protection anyway? */ if (!pmap_pte_v(pte)) continue; ix = 0; i386prot = pte_prot(pmap, prot); if(va < UPT_MAX_ADDRESS) i386prot |= 2 /*PG_u*/; do { /* clear VAC here if PG_RO? */ pmap_pte_set_prot(pte++, i386prot); /*TBIS(va + ix * NBPG);*/ } while (++ix != i386pagesperpage); } if (curproc && pmap == &curproc->p_vmspace->vm_pmap) pmap_activate(pmap, (struct pcb *)curproc->p_addr); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap, va, pa, prot, wired) register pmap_t pmap; vm_offset_t va; register vm_offset_t pa; vm_prot_t prot; boolean_t wired; { register pt_entry_t *pte; register int npte, ix; vm_offset_t opa; boolean_t cacheable = TRUE; boolean_t checkpv = TRUE; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) printf("pmap_enter(%x, %x, %x, %x, %x)", pmap, va, pa, prot, wired); #endif if (pmap == NULL) return; if(va > VM_MAX_KERNEL_ADDRESS)panic("pmap_enter: toobig"); /* also, should not muck with PTD va! */ #ifdef DEBUG if (pmap == kernel_pmap) enter_stats.kernel++; else enter_stats.user++; #endif /* * Page Directory table entry not valid, we need a new PT page */ if (!pmap_pde_v(pmap_pde(pmap, va))) { printf("ptdi %x\n", pmap->pm_pdir[PTDPTDI]); panic("Page Table Directory Invalid (ptdi)"); } pte = pmap_pte(pmap, va); opa = pmap_pte_pa(pte); #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: pte %x, *pte %x ", pte, *(int *)pte); #endif /* * Mapping has not changed, must be protection or wiring change. */ if (opa == pa) { #ifdef DEBUG enter_stats.pwchange++; #endif /* * Wiring change, just update stats. * We don't worry about wiring PT pages as they remain * resident as long as there are valid mappings in them. * Hence, if a user page is wired, the PT page will be also. */ if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) { #ifdef DEBUG if (pmapdebug & PDB_ENTER) pg("enter: wiring change -> %x ", wired); #endif if (wired) pmap->pm_stats.wired_count++; else pmap->pm_stats.wired_count--; #ifdef DEBUG enter_stats.wchange++; #endif } goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: removing old mapping %x pa %x ", va, opa); #endif pmap_remove(pmap, va, va + PAGE_SIZE); #ifdef DEBUG enter_stats.mchange++; #endif } /* * Enter on the PV list if part of our managed memory * Note that we raise IPL while manipulating pv_table * since pmap_enter can be called at interrupt time. */ if (pa >= vm_first_phys && pa < vm_last_phys) { register pv_entry_t pv, npv; int s; #ifdef DEBUG enter_stats.managed++; #endif pv = pa_to_pvh(pa); s = splimp(); #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: pv at %x: %x/%x/%x ", pv, pv->pv_va, pv->pv_pmap, pv->pv_next); #endif /* * No entries yet, use header as the first entry */ if (pv->pv_pmap == NULL) { #ifdef DEBUG enter_stats.firstpv++; #endif pv->pv_va = va; pv->pv_pmap = pmap; pv->pv_next = NULL; pv->pv_flags = 0; } /* * There is at least one other VA mapping this page. * Place this entry after the header. */ else { /*printf("second time: ");*/ #ifdef DEBUG for (npv = pv; npv; npv = npv->pv_next) if (pmap == npv->pv_pmap && va == npv->pv_va) panic("pmap_enter: already in pv_tab"); #endif npv = (pv_entry_t) malloc(sizeof *npv, M_VMPVENT, M_NOWAIT); npv->pv_va = va; npv->pv_pmap = pmap; npv->pv_next = pv->pv_next; pv->pv_next = npv; #ifdef DEBUG if (!npv->pv_next) enter_stats.secondpv++; #endif } splx(s); } /* * Assumption: if it is not part of our managed memory * then it must be device memory which may be volitile. */ if (pmap_initialized) { checkpv = cacheable = FALSE; #ifdef DEBUG enter_stats.unmanaged++; #endif } /* * Increment counters */ pmap->pm_stats.resident_count++; if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. * Assume uniform modified and referenced status for all * I386 pages in a MACH page. */ npte = (pa & PG_FRAME) | pte_prot(pmap, prot) | PG_V; npte |= (*(int *)pte & (PG_M|PG_U)); if (wired) npte |= PG_W; if(va < UPT_MIN_ADDRESS) npte |= PG_u; else if(va < UPT_MAX_ADDRESS) npte |= PG_u | PG_RW; #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("enter: new pte value %x ", npte); #endif ix = 0; do { *(int *)pte++ = npte; /*TBIS(va);*/ npte += NBPG; va += NBPG; } while (++ix != i386pagesperpage); pte--; -#ifdef DEBUGx -cache, tlb flushes -#endif /*pads(pmap);*/ /*load_cr3(((struct pcb *)curproc->p_addr)->pcb_ptd);*/ tlbflush(); } /* * pmap_page_protect: * * Lower the permission for all mappings to a given page. */ void pmap_page_protect(phys, prot) vm_offset_t phys; vm_prot_t prot; { switch (prot) { case VM_PROT_READ: case VM_PROT_READ|VM_PROT_EXECUTE: pmap_copy_on_write(phys); break; case VM_PROT_ALL: break; default: pmap_remove_all(phys); break; } } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap, va, wired) register pmap_t pmap; vm_offset_t va; boolean_t wired; { register pt_entry_t *pte; register int ix; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_change_wiring(%x, %x, %x)", pmap, va, wired); #endif if (pmap == NULL) return; pte = pmap_pte(pmap, va); #ifdef DEBUG /* * Page table page is not allocated. * Should this ever happen? Ignore it for now, * we don't want to force allocation of unnecessary PTE pages. */ if (!pmap_pde_v(pmap_pde(pmap, va))) { if (pmapdebug & PDB_PARANOIA) pg("pmap_change_wiring: invalid PDE for %x ", va); return; } /* * Page not valid. Should this ever happen? * Just continue and change wiring anyway. */ if (!pmap_pte_v(pte)) { if (pmapdebug & PDB_PARANOIA) pg("pmap_change_wiring: invalid PTE for %x ", va); } #endif if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) { if (wired) pmap->pm_stats.wired_count++; else pmap->pm_stats.wired_count--; } /* * Wiring is not a hardware characteristic so there is no need * to invalidate TLB. */ ix = 0; do { pmap_pte_set_w(pte++, wired); } while (++ix != i386pagesperpage); } /* * Routine: pmap_pte * Function: * Extract the page table entry associated * with the given map/virtual_address pair. * [ what about induced faults -wfj] */ struct pte *pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; { #ifdef DEBUGx if (pmapdebug & PDB_FOLLOW) printf("pmap_pte(%x, %x) ->\n", pmap, va); #endif + if (pmap && pmap_pde_v(pmap_pde(pmap, va))) { /* are we current address space or kernel? */ if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum || pmap == kernel_pmap) return ((struct pte *) vtopte(va)); /* otherwise, we are alternate address space */ else { if (pmap->pm_pdir[PTDPTDI].pd_pfnum != APTDpde.pd_pfnum) { APTDpde = pmap->pm_pdir[PTDPTDI]; tlbflush(); } return((struct pte *) avtopte(va)); } } return(0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_offset_t pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { register vm_offset_t pa; #ifdef DEBUGx if (pmapdebug & PDB_FOLLOW) pg("pmap_extract(%x, %x) -> ", pmap, va); #endif pa = 0; if (pmap && pmap_pde_v(pmap_pde(pmap, va))) { pa = *(int *) pmap_pte(pmap, va); } if (pa) pa = (pa & PG_FRAME) | (va & ~PG_FRAME); #ifdef DEBUGx if (pmapdebug & PDB_FOLLOW) printf("%x\n", pa); #endif return(pa); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) pmap_t dst_pmap; pmap_t src_pmap; vm_offset_t dst_addr; vm_size_t len; vm_offset_t src_addr; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_copy(%x, %x, %x, %x, %x)", dst_pmap, src_pmap, dst_addr, len, src_addr); #endif } /* * Require that all active physical maps contain no * incorrect entries NOW. [This update includes * forcing updates of any address map caching.] * * Generally used to insure that a thread about * to run will see a semantically correct world. */ void pmap_update() { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_update()"); #endif tlbflush(); } /* * Routine: pmap_collect * Function: * Garbage collects the physical map system for * pages which are no longer used. * Success need not be guaranteed -- that is, there * may well be pages which are not referenced, but * others may be collected. * Usage: * Called by the pageout daemon when pages are scarce. * [ needs to be written -wfj ] */ void pmap_collect(pmap) pmap_t pmap; { register vm_offset_t pa; register pv_entry_t pv; register int *pte; vm_offset_t kpa; int s; #ifdef DEBUG int *pde; int opmapdebug; printf("pmap_collect(%x) ", pmap); #endif if (pmap != kernel_pmap) return; } /* [ macro again?, should I force kstack into user map here? -wfj ] */ void pmap_activate(pmap, pcbp) register pmap_t pmap; struct pcb *pcbp; { int x; #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_PDRTAB)) pg("pmap_activate(%x, %x) ", pmap, pcbp); #endif PMAP_ACTIVATE(pmap, pcbp); /*printf("pde "); for(x=0x3f6; x < 0x3fA; x++) printf("%x ", pmap->pm_pdir[x]);*/ /*pads(pmap);*/ /*pg(" pcb_cr3 %x", pcbp->pcb_cr3);*/ } /* * Routine: pmap_kernel * Function: * Returns the physical map handle for the kernel. */ pmap_t pmap_kernel() { return (kernel_pmap); } /* * pmap_zero_page zeros the specified (machine independent) * page by mapping the page into virtual memory and using * bzero to clear its contents, one machine dependent page * at a time. */ pmap_zero_page(phys) register vm_offset_t phys; { register int ix; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_zero_page(%x)", phys); #endif phys >>= PG_SHIFT; ix = 0; do { clearseg(phys++); } while (++ix != i386pagesperpage); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ pmap_copy_page(src, dst) register vm_offset_t src, dst; { register int ix; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_copy_page(%x, %x)", src, dst); #endif src >>= PG_SHIFT; dst >>= PG_SHIFT; ix = 0; do { physcopyseg(src++, dst++); } while (++ix != i386pagesperpage); } /* * Routine: pmap_pageable * Function: * Make the specified pages (by pmap, offset) * pageable (or not) as requested. * * A page which is not pageable may not take * a fault; therefore, its page table entry * must remain valid for the duration. * * This routine is merely advisory; pmap_enter * will specify that these pages are to be wired * down (or not) as appropriate. */ pmap_pageable(pmap, sva, eva, pageable) pmap_t pmap; vm_offset_t sva, eva; boolean_t pageable; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_pageable(%x, %x, %x, %x)", pmap, sva, eva, pageable); #endif /* * If we are making a PT page pageable then all valid * mappings must be gone from that page. Hence it should * be all zeros and there is no need to clean it. * Assumptions: * - we are called with only one page at a time * - PT pages have only one pv_table entry */ if (pmap == kernel_pmap && pageable && sva + PAGE_SIZE == eva) { register pv_entry_t pv; register vm_offset_t pa; #ifdef DEBUG if ((pmapdebug & (PDB_FOLLOW|PDB_PTPAGE)) == PDB_PTPAGE) printf("pmap_pageable(%x, %x, %x, %x)", pmap, sva, eva, pageable); #endif /*if (!pmap_pde_v(pmap_pde(pmap, sva))) return;*/ if(pmap_pte(pmap, sva) == 0) return; pa = pmap_pte_pa(pmap_pte(pmap, sva)); if (pa < vm_first_phys || pa >= vm_last_phys) return; pv = pa_to_pvh(pa); /*if (!ispt(pv->pv_va)) return;*/ #ifdef DEBUG if (pv->pv_va != sva || pv->pv_next) { pg("pmap_pageable: bad PT page va %x next %x\n", pv->pv_va, pv->pv_next); return; } #endif /* * Mark it unmodified to avoid pageout */ pmap_clear_modify(pa); #ifdef needsomethinglikethis if (pmapdebug & PDB_PTPAGE) pg("pmap_pageable: PT page %x(%x) unmodified\n", sva, *(int *)pmap_pte(pmap, sva)); if (pmapdebug & PDB_WIRING) pmap_check_wiring("pageable", sva); #endif } } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_clear_modify(%x)", pa); #endif pmap_changebit(pa, PG_M, FALSE); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_clear_reference(%x)", pa); #endif pmap_changebit(pa, PG_U, FALSE); } /* * pmap_is_referenced: * * Return whether or not the specified physical page is referenced * by any physical maps. */ boolean_t pmap_is_referenced(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) { boolean_t rv = pmap_testbit(pa, PG_U); printf("pmap_is_referenced(%x) -> %c", pa, "FT"[rv]); return(rv); } #endif return(pmap_testbit(pa, PG_U)); } /* * pmap_is_modified: * * Return whether or not the specified physical page is modified * by any physical maps. */ boolean_t pmap_is_modified(pa) vm_offset_t pa; { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) { boolean_t rv = pmap_testbit(pa, PG_M); printf("pmap_is_modified(%x) -> %c", pa, "FT"[rv]); return(rv); } #endif return(pmap_testbit(pa, PG_M)); } vm_offset_t pmap_phys_address(ppn) int ppn; { return(i386_ptob(ppn)); } /* * Miscellaneous support routines follow */ i386_protection_init() { register int *kp, prot; kp = protection_codes; for (prot = 0; prot < 8; prot++) { switch (prot) { case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: *kp++ = 0; break; case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: *kp++ = PG_RO; break; case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: *kp++ = PG_RW; break; } } } boolean_t pmap_testbit(pa, bit) register vm_offset_t pa; int bit; { register pv_entry_t pv; register int *pte, ix; int s; if (pa < vm_first_phys || pa >= vm_last_phys) return(FALSE); pv = pa_to_pvh(pa); s = splimp(); /* * Check saved info first */ if (pmap_attributes[pa_index(pa)] & bit) { splx(s); return(TRUE); } /* * Not found, check current mappings returning * immediately if found. */ if (pv->pv_pmap != NULL) { for (; pv; pv = pv->pv_next) { pte = (int *) pmap_pte(pv->pv_pmap, pv->pv_va); ix = 0; do { if (*pte++ & bit) { splx(s); return(TRUE); } } while (++ix != i386pagesperpage); } } splx(s); return(FALSE); } pmap_changebit(pa, bit, setem) register vm_offset_t pa; int bit; boolean_t setem; { register pv_entry_t pv; register int *pte, npte, ix; vm_offset_t va; int s; boolean_t firstpage = TRUE; #ifdef DEBUG if (pmapdebug & PDB_BITS) printf("pmap_changebit(%x, %x, %s)", pa, bit, setem ? "set" : "clear"); #endif if (pa < vm_first_phys || pa >= vm_last_phys) return; pv = pa_to_pvh(pa); s = splimp(); /* * Clear saved attributes (modify, reference) */ if (!setem) pmap_attributes[pa_index(pa)] &= ~bit; /* * Loop over all current mappings setting/clearing as appropos * If setting RO do we need to clear the VAC? */ if (pv->pv_pmap != NULL) { #ifdef DEBUG int toflush = 0; #endif for (; pv; pv = pv->pv_next) { #ifdef DEBUG toflush |= (pv->pv_pmap == kernel_pmap) ? 2 : 1; #endif va = pv->pv_va; /* * XXX don't write protect pager mappings */ if (bit == PG_RO) { extern vm_offset_t pager_sva, pager_eva; if (va >= pager_sva && va < pager_eva) continue; } pte = (int *) pmap_pte(pv->pv_pmap, va); ix = 0; do { if (setem) npte = *pte | bit; else npte = *pte & ~bit; if (*pte != npte) { *pte = npte; /*TBIS(va);*/ } va += NBPG; pte++; } while (++ix != i386pagesperpage); if (curproc && pv->pv_pmap == &curproc->p_vmspace->vm_pmap) pmap_activate(pv->pv_pmap, (struct pcb *)curproc->p_addr); } #ifdef somethinglikethis if (setem && bit == PG_RO && (pmapvacflush & PVF_PROTECT)) { if ((pmapvacflush & PVF_TOTAL) || toflush == 3) DCIA(); else if (toflush == 2) DCIS(); else DCIU(); } #endif } splx(s); } #ifdef DEBUG pmap_pvdump(pa) vm_offset_t pa; { register pv_entry_t pv; printf("pa %x", pa); for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { printf(" -> pmap %x, va %x, flags %x", pv->pv_pmap, pv->pv_va, pv->pv_flags); pads(pv->pv_pmap); } printf(" "); } #ifdef notyet pmap_check_wiring(str, va) char *str; vm_offset_t va; { vm_map_entry_t entry; register int count, *pte; va = trunc_page(va); if (!pmap_pde_v(pmap_pde(kernel_pmap, va)) || !pmap_pte_v(pmap_pte(kernel_pmap, va))) return; if (!vm_map_lookup_entry(pt_map, va, &entry)) { pg("wired_check: entry for %x not found\n", va); return; } count = 0; for (pte = (int *)va; pte < (int *)(va+PAGE_SIZE); pte++) if (*pte) count++; if (entry->wired_count != count) pg("*%s*: %x: w%d/a%d\n", str, va, entry->wired_count, count); } #endif /* print address space of pmap*/ pads(pm) pmap_t pm; { unsigned va, i, j; struct pte *ptep; if(pm == kernel_pmap) return; for (i = 0; i < 1024; i++) if(pm->pm_pdir[i].pd_v) for (j = 0; j < 1024 ; j++) { - va = (i<<22)+(j<<12); + va = (i< UPT_MAX_ADDRESS) continue; ptep = pmap_pte(pm, va); if(pmap_pte_v(ptep)) printf("%x:%x ", va, *(int *)ptep); } ; } #endif Index: head/sys/i386/i386/trap.c =================================================================== --- head/sys/i386/i386/trap.c (revision 607) +++ head/sys/i386/i386/trap.c (revision 608) @@ -1,613 +1,585 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)trap.c 7.4 (Berkeley) 5/13/91 - * - * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE - * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 1 00137 - * -------------------- ----- ---------------------- - * - * 08 Apr 93 Bruce Evans Several VM system fixes - * Paul Kranenburg Add counter for vmstat + * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 + * $Id$ */ -static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/trap.c,v 1.2 1993/07/27 10:52:20 davidg Exp $"; /* * 386 Trap and System call handleing */ +#include "npx.h" #include "machine/cpu.h" #include "machine/psl.h" #include "machine/reg.h" #include "param.h" #include "systm.h" #include "proc.h" #include "user.h" #include "acct.h" #include "kernel.h" #ifdef KTRACE #include "ktrace.h" #endif #include "vm/vm_param.h" #include "vm/pmap.h" #include "vm/vm_map.h" #include "sys/vmmeter.h" #include "machine/trap.h" #ifdef __GNUC__ /* * The "r" contraint could be "rm" except for fatal bugs in gas. As usual, * we omit the size from the mov instruction to avoid nonfatal bugs in gas. */ #define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; }) #define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs)) #else /* not __GNUC__ */ u_short read_gs __P((void)); void write_gs __P((/* promoted u_short */ int gs)); #endif /* __GNUC__ */ struct sysent sysent[]; int nsysent; int dostacklimits; unsigned rcr2(); extern short cpl; /* * trap(frame): * Exception, fault, and trap interface to BSD kernel. This * common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. Note that the * effect is as if the arguments were passed call by reference. */ /*ARGSUSED*/ trap(frame) struct trapframe frame; { register int i; register struct proc *p = curproc; struct timeval syst; int ucode, type, code, eva; frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ type = frame.tf_trapno; #include "ddb.h" #if NDDB > 0 if (curpcb && curpcb->pcb_onfault) { if (frame.tf_trapno == T_BPTFLT || frame.tf_trapno == T_TRCTRAP) if (kdb_trap (type, 0, &frame)) return; } #endif /*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x", frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, rcr2(), frame.tf_esp);*/ if(curpcb == 0 || curproc == 0) goto we_re_toast; if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) { extern int _udatasel; if (read_gs() != (u_short) _udatasel) /* * Some user has corrupted %gs but we depend on it in * copyout() etc. Fix it up and retry. * * (We don't preserve %fs or %gs, so users can change * them to either _ucodesel, _udatasel or a not-present * selector, possibly ORed with 0 to 3, making them * volatile for other users. Not preserving them saves * time and doesn't lose functionality or open security * holes.) */ write_gs(_udatasel); else copyfault: frame.tf_eip = (int)curpcb->pcb_onfault; return; } syst = p->p_stime; if (ISPL(frame.tf_cs) == SEL_UPL) { type |= T_USER; p->p_regs = (int *)&frame; curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ } ucode=0; eva = rcr2(); code = frame.tf_err; switch (type) { default: we_re_toast: #ifdef KDB if (kdb_trap(&psl)) return; #endif #if NDDB > 0 if (kdb_trap (type, 0, &frame)) return; #endif printf("trap type %d code = %x eip = %x cs = %x eflags = %x ", frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags); eva = rcr2(); printf("cr2 %x cpl %x\n", eva, cpl); /* type &= ~T_USER; */ /* XXX what the hell is this */ panic("trap"); /*NOTREACHED*/ case T_SEGNPFLT|T_USER: case T_STKFLT|T_USER: case T_PROTFLT|T_USER: /* protection fault */ ucode = code + BUS_SEGM_FAULT ; i = SIGBUS; break; case T_PRIVINFLT|T_USER: /* privileged instruction fault */ case T_RESADFLT|T_USER: /* reserved addressing fault */ case T_RESOPFLT|T_USER: /* reserved operand fault */ case T_FPOPFLT|T_USER: /* coprocessor operand fault */ ucode = type &~ T_USER; i = SIGILL; break; case T_ASTFLT|T_USER: /* Allow process switch */ astoff(); cnt.v_soft++; if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) { addupc(frame.tf_eip, &p->p_stats->p_prof, 1); p->p_flag &= ~SOWEUPC; } goto out; case T_DNA|T_USER: -#ifdef NPX +#if NNPX > 0 /* if a transparent fault (due to context switch "late") */ if (npxdna()) return; -#endif +#endif /* NNPX > 0 */ #ifdef MATH_EMULATE i = math_emulate(&frame); if (i == 0) return; #else /* MATH_EMULTATE */ panic("trap: math emulation necessary!"); #endif /* MATH_EMULTATE */ ucode = FPE_FPU_NP_TRAP; break; case T_BOUND|T_USER: ucode = FPE_SUBRNG_TRAP; i = SIGFPE; break; case T_OFLOW|T_USER: ucode = FPE_INTOVF_TRAP; i = SIGFPE; break; case T_DIVIDE|T_USER: ucode = FPE_INTDIV_TRAP; i = SIGFPE; break; case T_ARITHTRAP|T_USER: ucode = code; i = SIGFPE; break; case T_PAGEFLT: /* allow page faults in kernel mode */ #if 0 /* XXX - check only applies to 386's and 486's with WP off */ if (code & PGEX_P) goto we_re_toast; #endif /* fall into */ case T_PAGEFLT|T_USER: /* page fault */ { register vm_offset_t va; register struct vmspace *vm = p->p_vmspace; register vm_map_t map; int rv; vm_prot_t ftype; extern vm_map_t kernel_map; unsigned nss,v; va = trunc_page((vm_offset_t)eva); - /* - * Avoid even looking at pde_v(va) for high va's. va's - * above VM_MAX_KERNEL_ADDRESS don't correspond to normal - * PDE's (half of them correspond to APDEpde and half to - * an unmapped kernel PDE). va's betweeen 0xFEC00000 and - * VM_MAX_KERNEL_ADDRESS correspond to unmapped kernel PDE's - * (XXX - why are only 3 initialized when 6 are required to - * reach VM_MAX_KERNEL_ADDRESS?). Faulting in an unmapped - * kernel page table would give inconsistent PTD's. - * - * XXX - faulting in unmapped page tables wastes a page if - * va turns out to be invalid. - * - * XXX - should "kernel address space" cover the kernel page - * tables? Might have same problem with PDEpde as with - * APDEpde (or there may be no problem with APDEpde). - */ - if (va > 0xFEBFF000) { - rv = KERN_FAILURE; /* becomes SIGBUS */ - goto nogo; - } /* * It is only a kernel address space fault iff: * 1. (type & T_USER) == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor space fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ if (type == T_PAGEFLT && va >= KERNBASE) map = kernel_map; else map = &vm->vm_map; if (code & PGEX_W) ftype = VM_PROT_READ | VM_PROT_WRITE; else ftype = VM_PROT_READ; #ifdef DEBUG if (map == kernel_map && va == 0) { printf("trap: bad kernel access at %x\n", va); goto we_re_toast; } #endif /* * XXX: rude hack to make stack limits "work" */ nss = 0; if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map && dostacklimits) { nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ - (unsigned)va)); if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) { /*pg("trap rlimit %d, maxsaddr %x va %x ", nss, vm->vm_maxsaddr, va);*/ rv = KERN_FAILURE; goto nogo; } } /* check if page table is mapped, if not, fault it first */ #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v) if (!pde_v(va)) { v = trunc_page(vtopte(va)); rv = vm_fault(map, v, ftype, FALSE); if (rv != KERN_SUCCESS) goto nogo; /* check if page table fault, increment wiring */ vm_map_pageable(map, v, round_page(v+1), FALSE); } else v=0; rv = vm_fault(map, va, ftype, FALSE); if (rv == KERN_SUCCESS) { /* * XXX: continuation of rude stack hack */ if (nss > vm->vm_ssize) vm->vm_ssize = nss; va = trunc_page(vtopte(va)); /* for page table, increment wiring as long as not a page table fault as well */ if (!v && type != T_PAGEFLT) vm_map_pageable(map, va, round_page(va+1), FALSE); if (type == T_PAGEFLT) return; goto out; } nogo: if (type == T_PAGEFLT) { if (curpcb->pcb_onfault) goto copyfault; printf("vm_fault(%x, %x, %x, 0) -> %x\n", map, va, ftype, rv); printf(" type %x, code %x\n", type, code); goto we_re_toast; } i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; break; } #if NDDB == 0 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ frame.tf_eflags &= ~PSL_T; /* Q: how do we turn it on again? */ return; #endif case T_BPTFLT|T_USER: /* bpt instruction fault */ case T_TRCTRAP|T_USER: /* trace trap */ frame.tf_eflags &= ~PSL_T; i = SIGTRAP; break; #include "isa.h" #if NISA > 0 case T_NMI: case T_NMI|T_USER: #if NDDB > 0 /* NMI can be hooked up to a pushbutton for debugging */ printf ("NMI ... going to debugger\n"); if (kdb_trap (type, 0, &frame)) return; #endif /* machine/parity/power fail/"kitchen sink" faults */ if(isa_nmi(code) == 0) return; else goto we_re_toast; #endif } trapsignal(p, i, ucode); if ((type & T_USER) == 0) return; out: while (i = CURSIG(p)) psig(i); p->p_pri = p->p_usrpri; if (want_resched) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another * (since the running process is not on a queue.) * If that happened after we setrq ourselves but before we * swtch()'ed, we might not be on the queue indicated by * our priority. */ (void) splclock(); setrq(p); p->p_stats->p_ru.ru_nivcsw++; swtch(); (void) splnone(); while (i = CURSIG(p)) psig(i); } if (p->p_stats->p_prof.pr_scale) { int ticks; struct timeval *tv = &p->p_stime; ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); if (ticks) { #ifdef PROFTIMER extern int profscale; addupc(frame.tf_eip, &p->p_stats->p_prof, ticks * profscale); #else addupc(frame.tf_eip, &p->p_stats->p_prof, ticks); #endif } } curpri = p->p_pri; curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ } /* * Compensate for 386 brain damage (missing URKR). * This is a little simpler than the pagefault handler in trap() because * it the page tables have already been faulted in and high addresses * are thrown out early for other reasons. */ int trapwrite(addr) unsigned addr; { unsigned nss; struct proc *p; vm_offset_t va; struct vmspace *vm; va = trunc_page((vm_offset_t)addr); /* * XXX - MAX is END. Changed > to >= for temp. fix. */ if (va >= VM_MAXUSER_ADDRESS) return (1); /* * XXX: rude stack hack adapted from trap(). */ nss = 0; p = curproc; vm = p->p_vmspace; if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) { nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ - (unsigned)va)); if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) return (1); } if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE) != KERN_SUCCESS) return (1); /* * XXX: continuation of rude stack hack */ if (nss > vm->vm_ssize) vm->vm_ssize = nss; return (0); } /* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ /*ARGSUSED*/ syscall(frame) volatile struct syscframe frame; { register int *locr0 = ((int *)&frame); register caddr_t params; register int i; register struct sysent *callp; register struct proc *p = curproc; struct timeval syst; int error, opc; int args[8], rval[2]; int code; #ifdef lint r0 = 0; r0 = r0; r1 = 0; r1 = r1; #endif syst = p->p_stime; if (ISPL(frame.sf_cs) != SEL_UPL) panic("syscall"); code = frame.sf_eax; curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ p->p_regs = (int *)&frame; params = (caddr_t)frame.sf_esp + sizeof (int) ; /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. */ opc = frame.sf_eip - 7; callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { i = fuword(params); params += sizeof (int); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; } if ((i = callp->sy_narg * sizeof (int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { frame.sf_eax = error; frame.sf_eflags |= PSL_C; /* carry bit */ #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); #endif goto done; } #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); #endif rval[0] = 0; rval[1] = frame.sf_edx; /*pg("%d. s %d\n", p->p_pid, code);*/ error = (*callp->sy_call)(p, args, rval); if (error == ERESTART) frame.sf_eip = opc; else if (error != EJUSTRETURN) { if (error) { /*pg("error %d", error);*/ frame.sf_eax = error; frame.sf_eflags |= PSL_C; /* carry bit */ } else { frame.sf_eax = rval[0]; frame.sf_edx = rval[1]; frame.sf_eflags &= ~PSL_C; /* carry bit */ } } /* else if (error == EJUSTRETURN) */ /* nothing to do */ done: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; while (i = CURSIG(p)) psig(i); p->p_pri = p->p_usrpri; if (want_resched) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another * (since the running process is not on a queue.) * If that happened after we setrq ourselves but before we * swtch()'ed, we might not be on the queue indicated by * our priority. */ (void) splclock(); setrq(p); p->p_stats->p_ru.ru_nivcsw++; swtch(); (void) splnone(); while (i = CURSIG(p)) psig(i); } if (p->p_stats->p_prof.pr_scale) { int ticks; struct timeval *tv = &p->p_stime; ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); if (ticks) { #ifdef PROFTIMER extern int profscale; addupc(frame.sf_eip, &p->p_stats->p_prof, ticks * profscale); #else addupc(frame.sf_eip, &p->p_stats->p_prof, ticks); #endif } } curpri = p->p_pri; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p->p_tracep, code, error, rval[0]); #endif #ifdef DIAGNOSTICx { extern int _udatasel, _ucodesel; if (frame.sf_ss != _udatasel) printf("ss %x call %d\n", frame.sf_ss, code); if ((frame.sf_cs&0xffff) != _ucodesel) printf("cs %x call %d\n", frame.sf_cs, code); if (frame.sf_eip > VM_MAXUSER_ADDRESS) { printf("eip %x call %d\n", frame.sf_eip, code); frame.sf_eip = 0; } } #endif } Index: head/sys/i386/i386/vm_machdep.c =================================================================== --- head/sys/i386/i386/vm_machdep.c (revision 607) +++ head/sys/i386/i386/vm_machdep.c (revision 608) @@ -1,425 +1,415 @@ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 - * - * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE - * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 1 00154 - * -------------------- ----- ---------------------- - * - * 20 Apr 93 Bruce Evans New npx-0.5 code - * - */ - -/* + * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ + * $Id$ */ -static char rcsid[] = "$Header: /usr/chroot/CVS/386BSD/src/sys/i386/i386/vm_machdep.c,v 1.3 1993/07/27 10:52:21 davidg Exp $"; +#include "npx.h" #include "param.h" #include "systm.h" #include "proc.h" #include "malloc.h" #include "buf.h" #include "user.h" #include "../include/cpu.h" #include "vm/vm.h" #include "vm/vm_kern.h" /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the kernel stack and pcb, making the child * ready to run, and marking it so that it can return differently * than the parent. Returns 1 in the child process, 0 in the parent. * We currently double-map the user area so that the stack is at the same * address in each process; in the future we will probably relocate * the frame pointers on the stack after copying. */ cpu_fork(p1, p2) register struct proc *p1, *p2; { register struct user *up = p2->p_addr; int foo, offset, addr, i; extern char kstack[]; extern int mvesp(); /* * Copy pcb and stack from proc p1 to p2. * We do this as cheaply as possible, copying only the active * part of the stack. The stack and pcb need to agree; * this is tricky, as the final pcb is constructed by savectx, * but its frame isn't yet on the stack when the stack is copied. * swtch compensates for this when the child eventually runs. * This should be done differently, with a single call * that copies and updates the pcb+stack, * replacing the bcopy and savectx. */ p2->p_addr->u_pcb = p1->p_addr->u_pcb; offset = mvesp() - (int)kstack; bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset, (unsigned) ctob(UPAGES) - offset); p2->p_regs = p1->p_regs; /* * Wire top of address space of child to it's kstack. * First, fault in a page of pte's to map it. */ addr = trunc_page((u_int)vtopte(kstack)); vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE); for (i=0; i < UPAGES; i++) pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG, pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), /* * The user area has to be mapped writable because * it contains the kernel stack (when CR0_WP is on * on a 486 there is no user-read/kernel-write * mode). It is protected from user mode access * by the segment limits. */ VM_PROT_READ|VM_PROT_WRITE, TRUE); pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb); /* * * Arrange for a non-local goto when the new process * is started, to resume here, returning nonzero from setjmp. */ if (savectx(up, 1)) { /* * Return 1 in child. */ return (1); } return (0); } #ifdef notyet /* * cpu_exit is called as the last action during exit. * * We change to an inactive address space and a "safe" stack, * passing thru an argument to the new stack. Now, safely isolated * from the resources we're shedding, we release the address space * and any remaining machine-dependent resources, including the * memory for the user structure and kernel stack. * * Next, we assign a dummy context to be written over by swtch, * calling it to send this process off to oblivion. * [The nullpcb allows us to minimize cost in swtch() by not having * a special case]. */ struct proc *swtch_to_inactive(); volatile void cpu_exit(p) register struct proc *p; { static struct pcb nullpcb; /* pcb to overwrite on last swtch */ -#ifdef NPX +#if NNPX > 0 npxexit(p); -#endif +#endif /* NNPX */ /* move to inactive space and stack, passing arg accross */ p = swtch_to_inactive(p); /* drop per-process resources */ vmspace_free(p->p_vmspace); kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); p->p_addr = (struct user *) &nullpcb; splclock(); swtch(); /* NOTREACHED */ } #else void cpu_exit(p) register struct proc *p; { -#ifdef NPX +#if NNPX > 0 npxexit(p); -#endif +#endif /* NNPX */ splclock(); swtch(); /* * This is to shutup the compiler, and if swtch() failed I suppose * this would be a good thing. This keeps gcc happy because panic * is a volatile void function as well. */ panic("cpu_exit"); } cpu_wait(p) struct proc *p; { /* drop per-process resources */ vmspace_free(p->p_vmspace); kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); } #endif /* * Set a red zone in the kernel stack after the u. area. */ setredzone(pte, vaddr) u_short *pte; caddr_t vaddr; { /* eventually do this by setting up an expand-down stack segment for ss0: selector, allowing stack access down to top of u. this means though that protection violations need to be handled thru a double fault exception that must do an integral task switch to a known good context, within which a dump can be taken. a sensible scheme might be to save the initial context used by sched (that has physical memory mapped 1:1 at bottom) and take the dump while still in mapped mode */ } /* * Move pages from one kernel virtual address to another. * Both addresses are assumed to reside in the Sysmap, * and size must be a multiple of CLSIZE. */ pagemove(from, to, size) register caddr_t from, to; int size; { register struct pte *fpte, *tpte; if (size % CLBYTES) panic("pagemove"); fpte = kvtopte(from); tpte = kvtopte(to); while (size > 0) { *tpte++ = *fpte; *(int *)fpte++ = 0; from += NBPG; to += NBPG; size -= NBPG; } tlbflush(); } /* * Convert kernel VA to physical address */ kvtop(addr) register caddr_t addr; { vm_offset_t va; va = pmap_extract(kernel_pmap, (vm_offset_t)addr); if (va == 0) panic("kvtop: zero page frame"); return((int)va); } #ifdef notdef /* * The probe[rw] routines should probably be redone in assembler * for efficiency. */ prober(addr) register u_int addr; { register int page; register struct proc *p; if (addr >= USRSTACK) return(0); p = u.u_procp; page = btop(addr); if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) return(1); return(0); } probew(addr) register u_int addr; { register int page; register struct proc *p; if (addr >= USRSTACK) return(0); p = u.u_procp; page = btop(addr); if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW); return(0); } /* * NB: assumes a physically contiguous kernel page table * (makes life a LOT simpler). */ kernacc(addr, count, rw) register u_int addr; int count, rw; { register struct pde *pde; register struct pte *pte; register int ix, cnt; extern long Syssize; if (count <= 0) return(0); pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG); ix = (addr & PD_MASK) >> PD_SHIFT; cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT; cnt -= ix; for (pde += ix; cnt; cnt--, pde++) if (pde->pd_v == 0) return(0); - ix = btop(addr-0xfe000000); - cnt = btop(addr-0xfe000000+count+NBPG-1); + ix = btop(addr-KERNBASE); + cnt = btop(addr-KERNBASE+count+NBPG-1); if (cnt > (int)&Syssize) return(0); cnt -= ix; for (pte = &Sysmap[ix]; cnt; cnt--, pte++) if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/) return(0); return(1); } useracc(addr, count, rw) register u_int addr; int count, rw; { register int (*func)(); register u_int addr2; extern int prober(), probew(); if (count <= 0) return(0); addr2 = addr; addr += count; func = (rw == B_READ) ? prober : probew; do { if ((*func)(addr2) == 0) return(0); addr2 = (addr2 + NBPG) & ~PGOFSET; } while (addr2 < addr); return(1); } #endif extern vm_map_t phys_map; /* * Map an IO request into kernel virtual address space. Requests fall into * one of five catagories: * * B_PHYS|B_UAREA: User u-area swap. * Address is relative to start of u-area (p_addr). * B_PHYS|B_PAGET: User page table swap. * Address is a kernel VA in usrpt (Usrptmap). * B_PHYS|B_DIRTY: Dirty page push. * Address is a VA in proc2's address space. * B_PHYS|B_PGIN: Kernel pagein of user pages. * Address is VA in user's address space. * B_PHYS: User "raw" IO request. * Address is VA in user's address space. * * All requests are (re)mapped into kernel VA space via the useriomap * (a name with only slightly more meaning than "kernelmap") */ vmapbuf(bp) register struct buf *bp; { register int npf; register caddr_t addr; register long flags = bp->b_flags; struct proc *p; int off; vm_offset_t kva; register vm_offset_t pa; if ((flags & B_PHYS) == 0) panic("vmapbuf"); addr = bp->b_saveaddr = bp->b_un.b_addr; off = (int)addr & PGOFSET; p = bp->b_proc; npf = btoc(round_page(bp->b_bcount + off)); kva = kmem_alloc_wait(phys_map, ctob(npf)); bp->b_un.b_addr = (caddr_t) (kva + off); while (npf--) { pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr); if (pa == 0) panic("vmapbuf: null page frame"); pmap_enter(vm_map_pmap(phys_map), kva, trunc_page(pa), VM_PROT_READ|VM_PROT_WRITE, TRUE); addr += PAGE_SIZE; kva += PAGE_SIZE; } } /* * Free the io map PTEs associated with this IO operation. * We also invalidate the TLB entries and restore the original b_addr. */ vunmapbuf(bp) register struct buf *bp; { register int npf; register caddr_t addr = bp->b_un.b_addr; vm_offset_t kva; if ((bp->b_flags & B_PHYS) == 0) panic("vunmapbuf"); npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET))); kva = (vm_offset_t)((int)addr & ~PGOFSET); kmem_free_wakeup(phys_map, kva, ctob(npf)); bp->b_un.b_addr = bp->b_saveaddr; bp->b_saveaddr = NULL; } /* * Force reset the processor by invalidating the entire address space! */ cpu_reset() { /* force a shutdown by unmapping entire address space ! */ bzero((caddr_t) PTD, NBPG); /* "good night, sweet prince .... " */ tlbflush(); /* NOTREACHED */ } Index: head/sys/kern/subr_trap.c =================================================================== --- head/sys/kern/subr_trap.c (revision 607) +++ head/sys/kern/subr_trap.c (revision 608) @@ -1,613 +1,585 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)trap.c 7.4 (Berkeley) 5/13/91 - * - * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE - * -------------------- ----- ---------------------- - * CURRENT PATCH LEVEL: 1 00137 - * -------------------- ----- ---------------------- - * - * 08 Apr 93 Bruce Evans Several VM system fixes - * Paul Kranenburg Add counter for vmstat + * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 + * $Id$ */ -static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/trap.c,v 1.2 1993/07/27 10:52:20 davidg Exp $"; /* * 386 Trap and System call handleing */ +#include "npx.h" #include "machine/cpu.h" #include "machine/psl.h" #include "machine/reg.h" #include "param.h" #include "systm.h" #include "proc.h" #include "user.h" #include "acct.h" #include "kernel.h" #ifdef KTRACE #include "ktrace.h" #endif #include "vm/vm_param.h" #include "vm/pmap.h" #include "vm/vm_map.h" #include "sys/vmmeter.h" #include "machine/trap.h" #ifdef __GNUC__ /* * The "r" contraint could be "rm" except for fatal bugs in gas. As usual, * we omit the size from the mov instruction to avoid nonfatal bugs in gas. */ #define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; }) #define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs)) #else /* not __GNUC__ */ u_short read_gs __P((void)); void write_gs __P((/* promoted u_short */ int gs)); #endif /* __GNUC__ */ struct sysent sysent[]; int nsysent; int dostacklimits; unsigned rcr2(); extern short cpl; /* * trap(frame): * Exception, fault, and trap interface to BSD kernel. This * common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. Note that the * effect is as if the arguments were passed call by reference. */ /*ARGSUSED*/ trap(frame) struct trapframe frame; { register int i; register struct proc *p = curproc; struct timeval syst; int ucode, type, code, eva; frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ type = frame.tf_trapno; #include "ddb.h" #if NDDB > 0 if (curpcb && curpcb->pcb_onfault) { if (frame.tf_trapno == T_BPTFLT || frame.tf_trapno == T_TRCTRAP) if (kdb_trap (type, 0, &frame)) return; } #endif /*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x", frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, rcr2(), frame.tf_esp);*/ if(curpcb == 0 || curproc == 0) goto we_re_toast; if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) { extern int _udatasel; if (read_gs() != (u_short) _udatasel) /* * Some user has corrupted %gs but we depend on it in * copyout() etc. Fix it up and retry. * * (We don't preserve %fs or %gs, so users can change * them to either _ucodesel, _udatasel or a not-present * selector, possibly ORed with 0 to 3, making them * volatile for other users. Not preserving them saves * time and doesn't lose functionality or open security * holes.) */ write_gs(_udatasel); else copyfault: frame.tf_eip = (int)curpcb->pcb_onfault; return; } syst = p->p_stime; if (ISPL(frame.tf_cs) == SEL_UPL) { type |= T_USER; p->p_regs = (int *)&frame; curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ } ucode=0; eva = rcr2(); code = frame.tf_err; switch (type) { default: we_re_toast: #ifdef KDB if (kdb_trap(&psl)) return; #endif #if NDDB > 0 if (kdb_trap (type, 0, &frame)) return; #endif printf("trap type %d code = %x eip = %x cs = %x eflags = %x ", frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags); eva = rcr2(); printf("cr2 %x cpl %x\n", eva, cpl); /* type &= ~T_USER; */ /* XXX what the hell is this */ panic("trap"); /*NOTREACHED*/ case T_SEGNPFLT|T_USER: case T_STKFLT|T_USER: case T_PROTFLT|T_USER: /* protection fault */ ucode = code + BUS_SEGM_FAULT ; i = SIGBUS; break; case T_PRIVINFLT|T_USER: /* privileged instruction fault */ case T_RESADFLT|T_USER: /* reserved addressing fault */ case T_RESOPFLT|T_USER: /* reserved operand fault */ case T_FPOPFLT|T_USER: /* coprocessor operand fault */ ucode = type &~ T_USER; i = SIGILL; break; case T_ASTFLT|T_USER: /* Allow process switch */ astoff(); cnt.v_soft++; if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) { addupc(frame.tf_eip, &p->p_stats->p_prof, 1); p->p_flag &= ~SOWEUPC; } goto out; case T_DNA|T_USER: -#ifdef NPX +#if NNPX > 0 /* if a transparent fault (due to context switch "late") */ if (npxdna()) return; -#endif +#endif /* NNPX > 0 */ #ifdef MATH_EMULATE i = math_emulate(&frame); if (i == 0) return; #else /* MATH_EMULTATE */ panic("trap: math emulation necessary!"); #endif /* MATH_EMULTATE */ ucode = FPE_FPU_NP_TRAP; break; case T_BOUND|T_USER: ucode = FPE_SUBRNG_TRAP; i = SIGFPE; break; case T_OFLOW|T_USER: ucode = FPE_INTOVF_TRAP; i = SIGFPE; break; case T_DIVIDE|T_USER: ucode = FPE_INTDIV_TRAP; i = SIGFPE; break; case T_ARITHTRAP|T_USER: ucode = code; i = SIGFPE; break; case T_PAGEFLT: /* allow page faults in kernel mode */ #if 0 /* XXX - check only applies to 386's and 486's with WP off */ if (code & PGEX_P) goto we_re_toast; #endif /* fall into */ case T_PAGEFLT|T_USER: /* page fault */ { register vm_offset_t va; register struct vmspace *vm = p->p_vmspace; register vm_map_t map; int rv; vm_prot_t ftype; extern vm_map_t kernel_map; unsigned nss,v; va = trunc_page((vm_offset_t)eva); - /* - * Avoid even looking at pde_v(va) for high va's. va's - * above VM_MAX_KERNEL_ADDRESS don't correspond to normal - * PDE's (half of them correspond to APDEpde and half to - * an unmapped kernel PDE). va's betweeen 0xFEC00000 and - * VM_MAX_KERNEL_ADDRESS correspond to unmapped kernel PDE's - * (XXX - why are only 3 initialized when 6 are required to - * reach VM_MAX_KERNEL_ADDRESS?). Faulting in an unmapped - * kernel page table would give inconsistent PTD's. - * - * XXX - faulting in unmapped page tables wastes a page if - * va turns out to be invalid. - * - * XXX - should "kernel address space" cover the kernel page - * tables? Might have same problem with PDEpde as with - * APDEpde (or there may be no problem with APDEpde). - */ - if (va > 0xFEBFF000) { - rv = KERN_FAILURE; /* becomes SIGBUS */ - goto nogo; - } /* * It is only a kernel address space fault iff: * 1. (type & T_USER) == 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor space fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ if (type == T_PAGEFLT && va >= KERNBASE) map = kernel_map; else map = &vm->vm_map; if (code & PGEX_W) ftype = VM_PROT_READ | VM_PROT_WRITE; else ftype = VM_PROT_READ; #ifdef DEBUG if (map == kernel_map && va == 0) { printf("trap: bad kernel access at %x\n", va); goto we_re_toast; } #endif /* * XXX: rude hack to make stack limits "work" */ nss = 0; if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map && dostacklimits) { nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ - (unsigned)va)); if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) { /*pg("trap rlimit %d, maxsaddr %x va %x ", nss, vm->vm_maxsaddr, va);*/ rv = KERN_FAILURE; goto nogo; } } /* check if page table is mapped, if not, fault it first */ #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v) if (!pde_v(va)) { v = trunc_page(vtopte(va)); rv = vm_fault(map, v, ftype, FALSE); if (rv != KERN_SUCCESS) goto nogo; /* check if page table fault, increment wiring */ vm_map_pageable(map, v, round_page(v+1), FALSE); } else v=0; rv = vm_fault(map, va, ftype, FALSE); if (rv == KERN_SUCCESS) { /* * XXX: continuation of rude stack hack */ if (nss > vm->vm_ssize) vm->vm_ssize = nss; va = trunc_page(vtopte(va)); /* for page table, increment wiring as long as not a page table fault as well */ if (!v && type != T_PAGEFLT) vm_map_pageable(map, va, round_page(va+1), FALSE); if (type == T_PAGEFLT) return; goto out; } nogo: if (type == T_PAGEFLT) { if (curpcb->pcb_onfault) goto copyfault; printf("vm_fault(%x, %x, %x, 0) -> %x\n", map, va, ftype, rv); printf(" type %x, code %x\n", type, code); goto we_re_toast; } i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; break; } #if NDDB == 0 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ frame.tf_eflags &= ~PSL_T; /* Q: how do we turn it on again? */ return; #endif case T_BPTFLT|T_USER: /* bpt instruction fault */ case T_TRCTRAP|T_USER: /* trace trap */ frame.tf_eflags &= ~PSL_T; i = SIGTRAP; break; #include "isa.h" #if NISA > 0 case T_NMI: case T_NMI|T_USER: #if NDDB > 0 /* NMI can be hooked up to a pushbutton for debugging */ printf ("NMI ... going to debugger\n"); if (kdb_trap (type, 0, &frame)) return; #endif /* machine/parity/power fail/"kitchen sink" faults */ if(isa_nmi(code) == 0) return; else goto we_re_toast; #endif } trapsignal(p, i, ucode); if ((type & T_USER) == 0) return; out: while (i = CURSIG(p)) psig(i); p->p_pri = p->p_usrpri; if (want_resched) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another * (since the running process is not on a queue.) * If that happened after we setrq ourselves but before we * swtch()'ed, we might not be on the queue indicated by * our priority. */ (void) splclock(); setrq(p); p->p_stats->p_ru.ru_nivcsw++; swtch(); (void) splnone(); while (i = CURSIG(p)) psig(i); } if (p->p_stats->p_prof.pr_scale) { int ticks; struct timeval *tv = &p->p_stime; ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); if (ticks) { #ifdef PROFTIMER extern int profscale; addupc(frame.tf_eip, &p->p_stats->p_prof, ticks * profscale); #else addupc(frame.tf_eip, &p->p_stats->p_prof, ticks); #endif } } curpri = p->p_pri; curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ } /* * Compensate for 386 brain damage (missing URKR). * This is a little simpler than the pagefault handler in trap() because * it the page tables have already been faulted in and high addresses * are thrown out early for other reasons. */ int trapwrite(addr) unsigned addr; { unsigned nss; struct proc *p; vm_offset_t va; struct vmspace *vm; va = trunc_page((vm_offset_t)addr); /* * XXX - MAX is END. Changed > to >= for temp. fix. */ if (va >= VM_MAXUSER_ADDRESS) return (1); /* * XXX: rude stack hack adapted from trap(). */ nss = 0; p = curproc; vm = p->p_vmspace; if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) { nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ - (unsigned)va)); if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) return (1); } if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE) != KERN_SUCCESS) return (1); /* * XXX: continuation of rude stack hack */ if (nss > vm->vm_ssize) vm->vm_ssize = nss; return (0); } /* * syscall(frame): * System call request from POSIX system call gate interface to kernel. * Like trap(), argument is call by reference. */ /*ARGSUSED*/ syscall(frame) volatile struct syscframe frame; { register int *locr0 = ((int *)&frame); register caddr_t params; register int i; register struct sysent *callp; register struct proc *p = curproc; struct timeval syst; int error, opc; int args[8], rval[2]; int code; #ifdef lint r0 = 0; r0 = r0; r1 = 0; r1 = r1; #endif syst = p->p_stime; if (ISPL(frame.sf_cs) != SEL_UPL) panic("syscall"); code = frame.sf_eax; curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ p->p_regs = (int *)&frame; params = (caddr_t)frame.sf_esp + sizeof (int) ; /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. */ opc = frame.sf_eip - 7; callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { i = fuword(params); params += sizeof (int); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; } if ((i = callp->sy_narg * sizeof (int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { frame.sf_eax = error; frame.sf_eflags |= PSL_C; /* carry bit */ #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); #endif goto done; } #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); #endif rval[0] = 0; rval[1] = frame.sf_edx; /*pg("%d. s %d\n", p->p_pid, code);*/ error = (*callp->sy_call)(p, args, rval); if (error == ERESTART) frame.sf_eip = opc; else if (error != EJUSTRETURN) { if (error) { /*pg("error %d", error);*/ frame.sf_eax = error; frame.sf_eflags |= PSL_C; /* carry bit */ } else { frame.sf_eax = rval[0]; frame.sf_edx = rval[1]; frame.sf_eflags &= ~PSL_C; /* carry bit */ } } /* else if (error == EJUSTRETURN) */ /* nothing to do */ done: /* * Reinitialize proc pointer `p' as it may be different * if this is a child returning from fork syscall. */ p = curproc; while (i = CURSIG(p)) psig(i); p->p_pri = p->p_usrpri; if (want_resched) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another * (since the running process is not on a queue.) * If that happened after we setrq ourselves but before we * swtch()'ed, we might not be on the queue indicated by * our priority. */ (void) splclock(); setrq(p); p->p_stats->p_ru.ru_nivcsw++; swtch(); (void) splnone(); while (i = CURSIG(p)) psig(i); } if (p->p_stats->p_prof.pr_scale) { int ticks; struct timeval *tv = &p->p_stime; ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); if (ticks) { #ifdef PROFTIMER extern int profscale; addupc(frame.sf_eip, &p->p_stats->p_prof, ticks * profscale); #else addupc(frame.sf_eip, &p->p_stats->p_prof, ticks); #endif } } curpri = p->p_pri; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p->p_tracep, code, error, rval[0]); #endif #ifdef DIAGNOSTICx { extern int _udatasel, _ucodesel; if (frame.sf_ss != _udatasel) printf("ss %x call %d\n", frame.sf_ss, code); if ((frame.sf_cs&0xffff) != _ucodesel) printf("cs %x call %d\n", frame.sf_cs, code); if (frame.sf_eip > VM_MAXUSER_ADDRESS) { printf("eip %x call %d\n", frame.sf_eip, code); frame.sf_eip = 0; } } #endif }