Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c (revision 24436) +++ head/sys/amd64/amd64/machdep.c (revision 24437) @@ -1,1577 +1,1575 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.232 1997/03/25 23:43:01 mpp Exp $ + * $Id: machdep.c,v 1.233 1997/03/28 12:37:44 joerg Exp $ */ #include "npx.h" #include "opt_sysvipc.h" #include "opt_ddb.h" #include "opt_bounce.h" #include "opt_machdep.h" #include "opt_perfmon.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SYSVSHM #include #endif #ifdef SYSVMSG #include #endif #ifdef SYSVSEM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #include #include extern void init386 __P((int first)); extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); extern int ptrace_single_step __P((struct proc *p)); extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) #ifdef BOUNCE_BUFFERS extern char *bouncememory; extern int maxbkva; #ifdef BOUNCEPAGES int bouncepages = BOUNCEPAGES; #else int bouncepages = 0; #endif #endif /* BOUNCE_BUFFERS */ extern int freebufspace; int msgbufmapped = 0; /* set when safe to use msgbuf */ int _udatasel, _ucodesel; u_int atdevbase; int physmem = 0; int cold = 1; static int sysctl_hw_physmem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int sysctl_hw_usermem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); int boothowto = 0, bootverbose = 0, Maxmem = 0; static int badpages = 0; long dumplo; extern int bootdev; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; extern struct linker_set netisr_set; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (badpages != 0) { int indx = 1; /* * XXX skip reporting ISA hole & unmanaged kernel memory */ if (phys_avail[0] == PAGE_SIZE) indx += 2; printf("Physical memory hole(s):\n"); for (; phys_avail[indx + 1] != 0; indx += 2) { int size = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); } } /* * Quickly wire in netisrs. */ setup_netisrs(&netisr_set); /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif #ifdef SYSVSEM valloc(sema, struct semid_ds, seminfo.semmni); valloc(sem, struct sem, seminfo.semmns); /* This is pretty disgusting! */ valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); #endif #ifdef SYSVMSG valloc(msgpool, char, msginfo.msgmax); valloc(msgmaps, struct msgmap, msginfo.msgseg); valloc(msghdrs, struct msg, msginfo.msgtql); valloc(msqids, struct msqid_ds, msginfo.msgmni); #endif if (nbuf == 0) { nbuf = 30; if( physmem > 1024) nbuf += min((physmem - 1024) / 8, 2048); } nswbuf = max(min(nbuf/4, 128), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); #ifdef BOUNCE_BUFFERS /* * If there is more than 16MB of memory, allocate some bounce buffers */ if (Maxmem > 4096) { if (bouncepages == 0) { bouncepages = 64; bouncepages += ((Maxmem - 4096) / 2048) * 32; } v = (caddr_t)((vm_offset_t)round_page(v)); valloc(bouncememory, char, bouncepages * PAGE_SIZE); } #endif /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); #ifdef BOUNCE_BUFFERS clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + maxbkva + pager_map_size, TRUE); io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); #else clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); #endif buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE), TRUE); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size, TRUE); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*ARG_MAX), TRUE); - exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - (16*PAGE_SIZE), TRUE); u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (maxproc*UPAGES*PAGE_SIZE), FALSE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size, FALSE); } /* * Initialize callouts */ callfree = callout; for (i = 1; i < ncallout; i++) callout[i-1].c_next = &callout[i]; #if defined(USERCONFIG) #if defined(USERCONFIG_BOOT) if (1) { #else if (boothowto & RB_CONFIG) { #endif userconfig(); cninit(); /* the preferred console may have changed */ } #endif #ifdef BOUNCE_BUFFERS /* * init bounce buffers */ vm_bounce_init(); #endif printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } static void setup_netisrs(ls) struct linker_set *ls; { int i; const struct netisrtab *nit; for(i = 0; ls->ls_items[i]; i++) { nit = (const struct netisrtab *)ls->ls_items[i]; register_netisr(nit->nit_num, nit->nit_isr); } } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig, mask; u_long code; { register struct proc *p = curproc; register int *regs; register struct sigframe *fp; struct sigframe sf; struct sigacts *psp = p->p_sigacts; int oonstack; regs = p->p_md.md_regs; oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; /* * Allocate and validate space for the signal handler context. */ if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && (psp->ps_sigonstack & sigmask(sig))) { fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + psp->ps_sigstk.ss_size - sizeof(struct sigframe)); psp->ps_sigstk.ss_flags |= SS_ONSTACK; } else { fp = (struct sigframe *)regs[tESP] - 1; } /* * grow() will return FALSE if the fp will not fit inside the stack * and the stack can not be grown. useracc will return FALSE * if access is denied. */ if ((grow(p, (int)fp) == FALSE) || (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; sig = sigmask(SIGILL); p->p_sigignore &= ~sig; p->p_sigcatch &= ~sig; p->p_sigmask &= ~sig; psignal(p, SIGILL); return; } /* * Build the argument list for the signal handler. */ if (p->p_sysent->sv_sigtbl) { if (sig < p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[sig]; else sig = p->p_sysent->sv_sigsize + 1; } sf.sf_signum = sig; sf.sf_code = code; sf.sf_scp = &fp->sf_sc; sf.sf_addr = (char *) regs[tERR]; sf.sf_handler = catcher; /* save scratch registers */ sf.sf_sc.sc_eax = regs[tEAX]; sf.sf_sc.sc_ebx = regs[tEBX]; sf.sf_sc.sc_ecx = regs[tECX]; sf.sf_sc.sc_edx = regs[tEDX]; sf.sf_sc.sc_esi = regs[tESI]; sf.sf_sc.sc_edi = regs[tEDI]; sf.sf_sc.sc_cs = regs[tCS]; sf.sf_sc.sc_ds = regs[tDS]; sf.sf_sc.sc_ss = regs[tSS]; sf.sf_sc.sc_es = regs[tES]; sf.sf_sc.sc_isp = regs[tISP]; /* * Build the signal context to be used by sigreturn. */ sf.sf_sc.sc_onstack = oonstack; sf.sf_sc.sc_mask = mask; sf.sf_sc.sc_sp = regs[tESP]; sf.sf_sc.sc_fp = regs[tEBP]; sf.sf_sc.sc_pc = regs[tEIP]; sf.sf_sc.sc_ps = regs[tEFLAGS]; /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); }; regs[tESP] = (int)fp; regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); regs[tEFLAGS] &= ~PSL_VM; regs[tCS] = _ucodesel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tSS] = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int sigreturn(p, uap, retval) struct proc *p; struct sigreturn_args /* { struct sigcontext *sigcntxp; } */ *uap; int *retval; { register struct sigcontext *scp; register struct sigframe *fp; register int *regs = p->p_md.md_regs; int eflags; /* * (XXX old comment) regs[tESP] points to the return address. * The user scp pointer is above that. * The return address is faked in the signal trampoline code * for consistency. */ scp = uap->sigcntxp; fp = (struct sigframe *) ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0) return(EFAULT); /* * Don't allow users to change privileged or reserved flags. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = scp->sc_ps; /* * XXX do allow users to change the privileged flag PSL_RF. The * cpu sets PSL_RF in tf_eflags for faults. Debuggers should * sometimes set it there too. tf_eflags is kept in the signal * context during signal handling and there is no other place * to remember it, so the PSL_RF bit may be corrupted by the * signal handler without us knowing. Corruption of the PSL_RF * bit at worst causes one more or one less debugger trap, so * allowing it is fairly harmless. */ if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { #ifdef DEBUG printf("sigreturn: eflags = 0x%x\n", eflags); #endif return(EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(scp->sc_cs)) { #ifdef DEBUG printf("sigreturn: cs = 0x%x\n", scp->sc_cs); #endif trapsignal(p, SIGBUS, T_PROTFLT); return(EINVAL); } /* restore scratch registers */ regs[tEAX] = scp->sc_eax; regs[tEBX] = scp->sc_ebx; regs[tECX] = scp->sc_ecx; regs[tEDX] = scp->sc_edx; regs[tESI] = scp->sc_esi; regs[tEDI] = scp->sc_edi; regs[tCS] = scp->sc_cs; regs[tDS] = scp->sc_ds; regs[tES] = scp->sc_es; regs[tSS] = scp->sc_ss; regs[tISP] = scp->sc_isp; if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0) return(EINVAL); if (scp->sc_onstack & 01) p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = scp->sc_mask & ~sigcantmask; regs[tEBP] = scp->sc_fp; regs[tESP] = scp->sc_sp; regs[tEIP] = scp->sc_pc; regs[tEFLAGS] = eflags; return(EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack) struct proc *p; u_long entry; u_long stack; { int *regs = p->p_md.md_regs; #ifdef USER_LDT struct pcb *pcb = &p->p_addr->u_pcb; /* was i386_user_cleanup() in NetBSD */ if (pcb->pcb_ldt) { if (pcb == curpcb) lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt, pcb->pcb_ldt_len * sizeof(union descriptor)); pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0; } #endif bzero(regs, sizeof(struct trapframe)); regs[tEIP] = entry; regs[tESP] = stack; regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T); regs[tSS] = _udatasel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tCS] = _ucodesel; /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif } static int sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int currentldt; int _default_ldt; union descriptor gdt[NGDT]; /* global descriptor table */ struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 3 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 4 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 5 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ { (int) kstack, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 7 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void init386(first) int first; { int x; unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; int gsel_tss; struct isa_device *idp; /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; int pagesinbase, pagesinext; int target_page, pa_indx; int off; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; /* * Initialize the console before we print anything out. */ cninit(); /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ for (x = 0; x < NLDT; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif rand_initialize(); r_gdt.rd_limit = sizeof(gdt) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); r_idt.rd_limit = sizeof(idt) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); currentldt = _default_ldt; #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* Use BIOS values stored in RTC CMOS RAM, since probing * breaks certain 386 AT relics. */ biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); /* * If BIOS tells us that it has more than 640k in the basemem, * don't believe it - set it to 640k. */ if (biosbasemem > 640) { printf("Preposterous RTC basemem of %dK, truncating to 640K\n", biosbasemem); biosbasemem = 640; } if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) { printf("Preposterous BIOS basemem of %dK, truncating to 640K\n", bootinfo.bi_basemem); bootinfo.bi_basemem = 640; } /* * Warn if the official BIOS interface disagrees with the RTC * interface used above about the amount of base memory or the * amount of extended memory. Prefer the BIOS value for the base * memory. This is necessary for machines that `steal' base * memory for use as BIOS memory, at least if we are going to use * the BIOS for apm. Prefer the RTC value for extended memory. * Eventually the hackish interface shouldn't even be looked at. */ if (bootinfo.bi_memsizes_valid) { if (bootinfo.bi_basemem != biosbasemem) { vm_offset_t pa; printf( "BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n", bootinfo.bi_basemem, biosbasemem); biosbasemem = bootinfo.bi_basemem; /* * XXX if biosbasemem is now < 640, there is `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from 0 to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(biosbasemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { unsigned *pte; pte = (unsigned *)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } } if (bootinfo.bi_extmem != biosextmem) printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", bootinfo.bi_extmem, biosextmem); } pagesinbase = biosbasemem * 1024 / PAGE_SIZE; pagesinext = biosextmem * 1024 / PAGE_SIZE; /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. */ /* * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((pagesinext > 3840) && (pagesinext < 4096)) pagesinext = 3840; /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = pagesinext + 0x100000/PAGE_SIZE; #ifdef MAXMEM Maxmem = MAXMEM/4; #endif #if NNPX > 0 idp = find_isadev(isa_devtab_null, &npxdriver, 0); if (idp != NULL && idp->id_msize != 0) Maxmem = idp->id_msize / 4; #endif /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); /* * Size up each available chunk of physical memory. */ /* * We currently don't bother testing base memory. * XXX ...but we probably should. */ pa_indx = 0; badpages = 0; if (pagesinbase > 1) { phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ physmem = pagesinbase - 1; } else { /* point at first chunk end */ pa_indx++; } for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { int tmp, page_bad = FALSE; /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == FALSE) { /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. */ if (phys_avail[pa_indx] == target_page) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = target_page; /* start */ phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ } physmem++; } else { badpages++; page_bad = FALSE; } } *(int *)CMAP1 = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); avail_end = phys_avail[pa_indx]; /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(sizeof(struct msgbuf)); off += PAGE_SIZE) pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off, avail_end + off, VM_PROT_ALL, TRUE); msgbufmapped = 1; /* make a initial tss so microp can get interrupt stack on syscall! */ proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE; proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = (sizeof(struct i386tss))<<16; ltr(gsel_tss); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD; } /* * The registers are in the frame; the frame is in the user area of * the process in question; when the process is active, the registers * are in "the kernel stack"; when it's not, they're still there, but * things get flipped around. So, since p->p_md.md_regs is the whole address * of the register set, take its offset from the kernel stack, and * index into the user block. Don't you just *love* virtual memory? * (I'm starting to think seymour is right...) */ #define TF_REGP(p) ((struct trapframe *) \ ((char *)(p)->p_addr \ + ((char *)(p)->p_md.md_regs - kstack))) int ptrace_set_pc(p, addr) struct proc *p; unsigned int addr; { TF_REGP(p)->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { TF_REGP(p)->tf_eflags |= PSL_T; return (0); } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; int data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - kstack; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = TF_REGP(p); frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; return (0); } #ifndef DDB void Debugger(const char *msg) { printf("Debugger(\"%s\") called.\n", msg); } #endif /* no DDB */ #include /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->b_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->b_blkno == maxsz) { bp->b_resid = bp->b_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->b_blkno; if (sz <= 0) { bp->b_error = EINVAL; goto bad; } bp->b_bcount = sz << DEV_BSHIFT; } bp->b_pblkno = bp->b_blkno + p->p_offset; return(1); bad: bp->b_flags |= B_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c (revision 24436) +++ head/sys/i386/i386/machdep.c (revision 24437) @@ -1,1577 +1,1575 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.232 1997/03/25 23:43:01 mpp Exp $ + * $Id: machdep.c,v 1.233 1997/03/28 12:37:44 joerg Exp $ */ #include "npx.h" #include "opt_sysvipc.h" #include "opt_ddb.h" #include "opt_bounce.h" #include "opt_machdep.h" #include "opt_perfmon.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SYSVSHM #include #endif #ifdef SYSVMSG #include #endif #ifdef SYSVSEM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #include #include extern void init386 __P((int first)); extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); extern int ptrace_single_step __P((struct proc *p)); extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) #ifdef BOUNCE_BUFFERS extern char *bouncememory; extern int maxbkva; #ifdef BOUNCEPAGES int bouncepages = BOUNCEPAGES; #else int bouncepages = 0; #endif #endif /* BOUNCE_BUFFERS */ extern int freebufspace; int msgbufmapped = 0; /* set when safe to use msgbuf */ int _udatasel, _ucodesel; u_int atdevbase; int physmem = 0; int cold = 1; static int sysctl_hw_physmem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int sysctl_hw_usermem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); int boothowto = 0, bootverbose = 0, Maxmem = 0; static int badpages = 0; long dumplo; extern int bootdev; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; extern struct linker_set netisr_set; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (badpages != 0) { int indx = 1; /* * XXX skip reporting ISA hole & unmanaged kernel memory */ if (phys_avail[0] == PAGE_SIZE) indx += 2; printf("Physical memory hole(s):\n"); for (; phys_avail[indx + 1] != 0; indx += 2) { int size = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); } } /* * Quickly wire in netisrs. */ setup_netisrs(&netisr_set); /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif #ifdef SYSVSEM valloc(sema, struct semid_ds, seminfo.semmni); valloc(sem, struct sem, seminfo.semmns); /* This is pretty disgusting! */ valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); #endif #ifdef SYSVMSG valloc(msgpool, char, msginfo.msgmax); valloc(msgmaps, struct msgmap, msginfo.msgseg); valloc(msghdrs, struct msg, msginfo.msgtql); valloc(msqids, struct msqid_ds, msginfo.msgmni); #endif if (nbuf == 0) { nbuf = 30; if( physmem > 1024) nbuf += min((physmem - 1024) / 8, 2048); } nswbuf = max(min(nbuf/4, 128), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); #ifdef BOUNCE_BUFFERS /* * If there is more than 16MB of memory, allocate some bounce buffers */ if (Maxmem > 4096) { if (bouncepages == 0) { bouncepages = 64; bouncepages += ((Maxmem - 4096) / 2048) * 32; } v = (caddr_t)((vm_offset_t)round_page(v)); valloc(bouncememory, char, bouncepages * PAGE_SIZE); } #endif /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); #ifdef BOUNCE_BUFFERS clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + maxbkva + pager_map_size, TRUE); io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); #else clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); #endif buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE), TRUE); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size, TRUE); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*ARG_MAX), TRUE); - exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - (16*PAGE_SIZE), TRUE); u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (maxproc*UPAGES*PAGE_SIZE), FALSE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size, FALSE); } /* * Initialize callouts */ callfree = callout; for (i = 1; i < ncallout; i++) callout[i-1].c_next = &callout[i]; #if defined(USERCONFIG) #if defined(USERCONFIG_BOOT) if (1) { #else if (boothowto & RB_CONFIG) { #endif userconfig(); cninit(); /* the preferred console may have changed */ } #endif #ifdef BOUNCE_BUFFERS /* * init bounce buffers */ vm_bounce_init(); #endif printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } static void setup_netisrs(ls) struct linker_set *ls; { int i; const struct netisrtab *nit; for(i = 0; ls->ls_items[i]; i++) { nit = (const struct netisrtab *)ls->ls_items[i]; register_netisr(nit->nit_num, nit->nit_isr); } } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig, mask; u_long code; { register struct proc *p = curproc; register int *regs; register struct sigframe *fp; struct sigframe sf; struct sigacts *psp = p->p_sigacts; int oonstack; regs = p->p_md.md_regs; oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; /* * Allocate and validate space for the signal handler context. */ if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && (psp->ps_sigonstack & sigmask(sig))) { fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + psp->ps_sigstk.ss_size - sizeof(struct sigframe)); psp->ps_sigstk.ss_flags |= SS_ONSTACK; } else { fp = (struct sigframe *)regs[tESP] - 1; } /* * grow() will return FALSE if the fp will not fit inside the stack * and the stack can not be grown. useracc will return FALSE * if access is denied. */ if ((grow(p, (int)fp) == FALSE) || (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; sig = sigmask(SIGILL); p->p_sigignore &= ~sig; p->p_sigcatch &= ~sig; p->p_sigmask &= ~sig; psignal(p, SIGILL); return; } /* * Build the argument list for the signal handler. */ if (p->p_sysent->sv_sigtbl) { if (sig < p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[sig]; else sig = p->p_sysent->sv_sigsize + 1; } sf.sf_signum = sig; sf.sf_code = code; sf.sf_scp = &fp->sf_sc; sf.sf_addr = (char *) regs[tERR]; sf.sf_handler = catcher; /* save scratch registers */ sf.sf_sc.sc_eax = regs[tEAX]; sf.sf_sc.sc_ebx = regs[tEBX]; sf.sf_sc.sc_ecx = regs[tECX]; sf.sf_sc.sc_edx = regs[tEDX]; sf.sf_sc.sc_esi = regs[tESI]; sf.sf_sc.sc_edi = regs[tEDI]; sf.sf_sc.sc_cs = regs[tCS]; sf.sf_sc.sc_ds = regs[tDS]; sf.sf_sc.sc_ss = regs[tSS]; sf.sf_sc.sc_es = regs[tES]; sf.sf_sc.sc_isp = regs[tISP]; /* * Build the signal context to be used by sigreturn. */ sf.sf_sc.sc_onstack = oonstack; sf.sf_sc.sc_mask = mask; sf.sf_sc.sc_sp = regs[tESP]; sf.sf_sc.sc_fp = regs[tEBP]; sf.sf_sc.sc_pc = regs[tEIP]; sf.sf_sc.sc_ps = regs[tEFLAGS]; /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); }; regs[tESP] = (int)fp; regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); regs[tEFLAGS] &= ~PSL_VM; regs[tCS] = _ucodesel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tSS] = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int sigreturn(p, uap, retval) struct proc *p; struct sigreturn_args /* { struct sigcontext *sigcntxp; } */ *uap; int *retval; { register struct sigcontext *scp; register struct sigframe *fp; register int *regs = p->p_md.md_regs; int eflags; /* * (XXX old comment) regs[tESP] points to the return address. * The user scp pointer is above that. * The return address is faked in the signal trampoline code * for consistency. */ scp = uap->sigcntxp; fp = (struct sigframe *) ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0) return(EFAULT); /* * Don't allow users to change privileged or reserved flags. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = scp->sc_ps; /* * XXX do allow users to change the privileged flag PSL_RF. The * cpu sets PSL_RF in tf_eflags for faults. Debuggers should * sometimes set it there too. tf_eflags is kept in the signal * context during signal handling and there is no other place * to remember it, so the PSL_RF bit may be corrupted by the * signal handler without us knowing. Corruption of the PSL_RF * bit at worst causes one more or one less debugger trap, so * allowing it is fairly harmless. */ if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { #ifdef DEBUG printf("sigreturn: eflags = 0x%x\n", eflags); #endif return(EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(scp->sc_cs)) { #ifdef DEBUG printf("sigreturn: cs = 0x%x\n", scp->sc_cs); #endif trapsignal(p, SIGBUS, T_PROTFLT); return(EINVAL); } /* restore scratch registers */ regs[tEAX] = scp->sc_eax; regs[tEBX] = scp->sc_ebx; regs[tECX] = scp->sc_ecx; regs[tEDX] = scp->sc_edx; regs[tESI] = scp->sc_esi; regs[tEDI] = scp->sc_edi; regs[tCS] = scp->sc_cs; regs[tDS] = scp->sc_ds; regs[tES] = scp->sc_es; regs[tSS] = scp->sc_ss; regs[tISP] = scp->sc_isp; if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0) return(EINVAL); if (scp->sc_onstack & 01) p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = scp->sc_mask & ~sigcantmask; regs[tEBP] = scp->sc_fp; regs[tESP] = scp->sc_sp; regs[tEIP] = scp->sc_pc; regs[tEFLAGS] = eflags; return(EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack) struct proc *p; u_long entry; u_long stack; { int *regs = p->p_md.md_regs; #ifdef USER_LDT struct pcb *pcb = &p->p_addr->u_pcb; /* was i386_user_cleanup() in NetBSD */ if (pcb->pcb_ldt) { if (pcb == curpcb) lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt, pcb->pcb_ldt_len * sizeof(union descriptor)); pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0; } #endif bzero(regs, sizeof(struct trapframe)); regs[tEIP] = entry; regs[tESP] = stack; regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T); regs[tSS] = _udatasel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tCS] = _ucodesel; /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif } static int sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int currentldt; int _default_ldt; union descriptor gdt[NGDT]; /* global descriptor table */ struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 3 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 4 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 5 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ { (int) kstack, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 7 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void init386(first) int first; { int x; unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; int gsel_tss; struct isa_device *idp; /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; int pagesinbase, pagesinext; int target_page, pa_indx; int off; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; /* * Initialize the console before we print anything out. */ cninit(); /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ for (x = 0; x < NLDT; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif rand_initialize(); r_gdt.rd_limit = sizeof(gdt) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); r_idt.rd_limit = sizeof(idt) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); currentldt = _default_ldt; #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* Use BIOS values stored in RTC CMOS RAM, since probing * breaks certain 386 AT relics. */ biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); /* * If BIOS tells us that it has more than 640k in the basemem, * don't believe it - set it to 640k. */ if (biosbasemem > 640) { printf("Preposterous RTC basemem of %dK, truncating to 640K\n", biosbasemem); biosbasemem = 640; } if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) { printf("Preposterous BIOS basemem of %dK, truncating to 640K\n", bootinfo.bi_basemem); bootinfo.bi_basemem = 640; } /* * Warn if the official BIOS interface disagrees with the RTC * interface used above about the amount of base memory or the * amount of extended memory. Prefer the BIOS value for the base * memory. This is necessary for machines that `steal' base * memory for use as BIOS memory, at least if we are going to use * the BIOS for apm. Prefer the RTC value for extended memory. * Eventually the hackish interface shouldn't even be looked at. */ if (bootinfo.bi_memsizes_valid) { if (bootinfo.bi_basemem != biosbasemem) { vm_offset_t pa; printf( "BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n", bootinfo.bi_basemem, biosbasemem); biosbasemem = bootinfo.bi_basemem; /* * XXX if biosbasemem is now < 640, there is `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from 0 to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(biosbasemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { unsigned *pte; pte = (unsigned *)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } } if (bootinfo.bi_extmem != biosextmem) printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", bootinfo.bi_extmem, biosextmem); } pagesinbase = biosbasemem * 1024 / PAGE_SIZE; pagesinext = biosextmem * 1024 / PAGE_SIZE; /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. */ /* * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((pagesinext > 3840) && (pagesinext < 4096)) pagesinext = 3840; /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = pagesinext + 0x100000/PAGE_SIZE; #ifdef MAXMEM Maxmem = MAXMEM/4; #endif #if NNPX > 0 idp = find_isadev(isa_devtab_null, &npxdriver, 0); if (idp != NULL && idp->id_msize != 0) Maxmem = idp->id_msize / 4; #endif /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); /* * Size up each available chunk of physical memory. */ /* * We currently don't bother testing base memory. * XXX ...but we probably should. */ pa_indx = 0; badpages = 0; if (pagesinbase > 1) { phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ physmem = pagesinbase - 1; } else { /* point at first chunk end */ pa_indx++; } for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { int tmp, page_bad = FALSE; /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == FALSE) { /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. */ if (phys_avail[pa_indx] == target_page) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = target_page; /* start */ phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ } physmem++; } else { badpages++; page_bad = FALSE; } } *(int *)CMAP1 = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); avail_end = phys_avail[pa_indx]; /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(sizeof(struct msgbuf)); off += PAGE_SIZE) pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off, avail_end + off, VM_PROT_ALL, TRUE); msgbufmapped = 1; /* make a initial tss so microp can get interrupt stack on syscall! */ proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE; proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = (sizeof(struct i386tss))<<16; ltr(gsel_tss); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD; } /* * The registers are in the frame; the frame is in the user area of * the process in question; when the process is active, the registers * are in "the kernel stack"; when it's not, they're still there, but * things get flipped around. So, since p->p_md.md_regs is the whole address * of the register set, take its offset from the kernel stack, and * index into the user block. Don't you just *love* virtual memory? * (I'm starting to think seymour is right...) */ #define TF_REGP(p) ((struct trapframe *) \ ((char *)(p)->p_addr \ + ((char *)(p)->p_md.md_regs - kstack))) int ptrace_set_pc(p, addr) struct proc *p; unsigned int addr; { TF_REGP(p)->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { TF_REGP(p)->tf_eflags |= PSL_T; return (0); } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; int data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - kstack; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = TF_REGP(p); frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; return (0); } #ifndef DDB void Debugger(const char *msg) { printf("Debugger(\"%s\") called.\n", msg); } #endif /* no DDB */ #include /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->b_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->b_blkno == maxsz) { bp->b_resid = bp->b_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->b_blkno; if (sz <= 0) { bp->b_error = EINVAL; goto bad; } bp->b_bcount = sz << DEV_BSHIFT; } bp->b_pblkno = bp->b_blkno + p->p_offset; return(1); bad: bp->b_flags |= B_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/kern/kern_exec.c =================================================================== --- head/sys/kern/kern_exec.c (revision 24436) +++ head/sys/kern/kern_exec.c (revision 24437) @@ -1,619 +1,628 @@ /* * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id$ + * $Id: kern_exec.c,v 1.52 1997/02/22 09:39:04 peter Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include static int *exec_copyout_strings __P((struct image_params *)); static int exec_check_permissions(struct image_params *); /* * XXX trouble here if sizeof(caddr_t) != sizeof(int), other parts * of the sysctl code also assumes this, and sizeof(int) == sizeof(long). */ static struct ps_strings *ps_strings = PS_STRINGS; SYSCTL_INT(_kern, KERN_PS_STRINGS, ps_strings, 0, &ps_strings, 0, ""); static caddr_t usrstack = (caddr_t)USRSTACK; SYSCTL_INT(_kern, KERN_USRSTACK, usrstack, 0, &usrstack, 0, ""); /* * execsw_set is constructed for us by the linker. Each of the items * is a pointer to a `const struct execsw', hence the double pointer here. */ static const struct execsw **execsw = (const struct execsw **)&execsw_set.ls_items[0]; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif /* * execve() system call. */ int execve(p, uap, retval) struct proc *p; register struct execve_args *uap; int *retval; { struct nameidata nd, *ndp; int *stack_base; int error, len, i; struct image_params image_params, *imgp; struct vattr attr; + struct buf *bp = NULL; imgp = &image_params; /* * Initialize part of the common data */ imgp->proc = p; imgp->uap = uap; imgp->attr = &attr; imgp->image_header = NULL; imgp->argc = imgp->envc = 0; imgp->entry_addr = 0; imgp->vmspace_destroyed = 0; imgp->interpreted = 0; imgp->interpreter_name[0] = '\0'; imgp->auxargs = NULL; /* * Allocate temporary demand zeroed space for argument and * environment strings */ imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX); if (imgp->stringbase == NULL) { error = ENOMEM; goto exec_fail; } imgp->stringp = imgp->stringbase; imgp->stringspace = ARG_MAX; /* * Translate the file name. namei() returns a vnode pointer * in ni_vp amoung other things. */ ndp = &nd; NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_USERSPACE, uap->fname, p); interpret: error = namei(ndp); if (error) { kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX); goto exec_fail; } imgp->vp = ndp->ni_vp; if (imgp->vp == NULL) { error = ENOEXEC; goto exec_fail_dealloc; } /* * Check file permissions (also 'opens' file) */ error = exec_check_permissions(imgp); - /* - * Lose the lock on the vnode. It's no longer needed, and must not - * exist for the pagefault paging to work below. - */ - VOP_UNLOCK(imgp->vp, 0, p); - if (error) goto exec_fail_dealloc; /* - * Map the image header (first page) of the file into - * kernel address space + * Get the image header, which we define here as meaning the first + * page of the executable. */ - error = vm_mmap(exech_map, /* map */ - (vm_offset_t *)&imgp->image_header, /* address */ - PAGE_SIZE, /* size */ - VM_PROT_READ, /* protection */ - VM_PROT_READ, /* max protection */ - 0, /* flags */ - (caddr_t)imgp->vp, /* vnode */ - 0); /* offset */ + if (imgp->vp->v_mount && imgp->vp->v_mount->mnt_stat.f_iosize >= PAGE_SIZE) { + /* + * Get a buffer with (at least) the first page. + */ + error = bread(imgp->vp, 0, imgp->vp->v_mount->mnt_stat.f_iosize, + p->p_ucred, &bp); + imgp->image_header = bp->b_data; + } else { + /* + * The filesystem block size is too small, so do this the hard + * way. Malloc some space and read PAGE_SIZE worth of the image + * header into it. + */ + imgp->image_header = malloc(PAGE_SIZE, M_TEMP, M_WAITOK); + error = vn_rdwr(UIO_READ, imgp->vp, (void *)imgp->image_header, PAGE_SIZE, 0, + UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, NULL, p); + } + VOP_UNLOCK(imgp->vp, 0, p); if (error) { - uprintf("mmap failed: %d\n",error); goto exec_fail_dealloc; } /* * Loop through list of image activators, calling each one. * If there is no match, the activator returns -1. If there * is a match, but there was an error during the activation, * the error is returned. Otherwise 0 means success. If the * image is interpreted, loop back up and try activating * the interpreter. */ for (i = 0; execsw[i]; ++i) { if (execsw[i]->ex_imgact) error = (*execsw[i]->ex_imgact)(imgp); else continue; if (error == -1) continue; if (error) goto exec_fail_dealloc; if (imgp->interpreted) { + /* free old bp/image_header */ + if (bp != NULL) { + brelse(bp); + bp = NULL; + } else { + free((void *)imgp->image_header, M_TEMP); + } /* free old vnode and name buffer */ vrele(ndp->ni_vp); FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI); - if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, - (vm_offset_t)imgp->image_header + PAGE_SIZE)) - panic("execve: header dealloc failed (1)"); - /* set new name to that of the interpreter */ NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, p); goto interpret; } break; } /* If we made it through all the activators and none matched, exit. */ if (error == -1) { error = ENOEXEC; goto exec_fail_dealloc; } /* * Copy out strings (args and env) and initialize stack base */ stack_base = exec_copyout_strings(imgp); p->p_vmspace->vm_minsaddr = (char *)stack_base; /* * If custom stack fixup routine present for this process * let it do the stack setup. * Else stuff argument count as first item on stack */ if (p->p_sysent->sv_fixup) (*p->p_sysent->sv_fixup)(&stack_base, imgp); else suword(--stack_base, imgp->argc); /* close files on exec */ fdcloseexec(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN); bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len); p->p_comm[len] = 0; /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has it's own resources back */ p->p_flag |= P_EXEC; if (p->p_pptr && (p->p_flag & P_PPWAIT)) { p->p_flag &= ~P_PPWAIT; wakeup((caddr_t)p->p_pptr); } /* * Implement image setuid/setgid. Disallow if the process is * being traced. */ if ((attr.va_mode & (VSUID | VSGID)) && (p->p_flag & P_TRACED) == 0) { /* * Turn off syscall tracing for set-id programs, except for * root. */ if (p->p_tracep && suser(p->p_ucred, &p->p_acflag)) { p->p_traceflag = 0; vrele(p->p_tracep); p->p_tracep = NULL; } /* * Set the new credentials. */ p->p_ucred = crcopy(p->p_ucred); if (attr.va_mode & VSUID) p->p_ucred->cr_uid = attr.va_uid; if (attr.va_mode & VSGID) p->p_ucred->cr_groups[0] = attr.va_gid; p->p_flag |= P_SUGID; } else { if (p->p_ucred->cr_uid == p->p_cred->p_ruid && p->p_ucred->cr_gid == p->p_cred->p_rgid) p->p_flag &= ~P_SUGID; } /* * Implement correct POSIX saved-id behavior. */ p->p_cred->p_svuid = p->p_ucred->cr_uid; p->p_cred->p_svgid = p->p_ucred->cr_gid; /* * Store the vp for use in procfs */ if (p->p_textvp) /* release old reference */ vrele(p->p_textvp); VREF(ndp->ni_vp); p->p_textvp = ndp->ni_vp; /* * If tracing the process, trap to debugger so breakpoints * can be set before the program executes. */ if (p->p_flag & P_TRACED) psignal(p, SIGTRAP); /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* Set entry address */ setregs(p, imgp->entry_addr, (u_long)stack_base); /* * free various allocated resources */ kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX); - if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, - (vm_offset_t)imgp->image_header + PAGE_SIZE)) - panic("execve: header dealloc failed (2)"); + if (bp != NULL) + brelse(bp); + else if (imgp->image_header != NULL) + free((void *)imgp->image_header, M_TEMP); vrele(ndp->ni_vp); FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI); return (0); exec_fail_dealloc: if (imgp->stringbase != NULL) kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX); - if (imgp->image_header && imgp->image_header != (char *)-1) - if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, - (vm_offset_t)imgp->image_header + PAGE_SIZE)) - panic("execve: header dealloc failed (3)"); + if (bp != NULL) + brelse(bp); + else if (imgp->image_header != NULL) + free((void *)imgp->image_header, M_TEMP); if (ndp->ni_vp) vrele(ndp->ni_vp); FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI); exec_fail: if (imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(p, W_EXITCODE(0, SIGABRT)); /* NOT REACHED */ return(0); } else { return(error); } } /* * Destroy old address space, and allocate a new stack * The new stack is only SGROWSIZ large because it is grown * automatically in trap.c. */ int exec_new_vmspace(imgp) struct image_params *imgp; { int error; struct vmspace *vmspace = imgp->proc->p_vmspace; caddr_t stack_addr = (caddr_t) (USRSTACK - SGROWSIZ); imgp->vmspace_destroyed = 1; /* Blow away entire process VM */ if (vmspace->vm_shm) shmexit(imgp->proc); pmap_remove_pages(&vmspace->vm_pmap, 0, USRSTACK); vm_map_remove(&vmspace->vm_map, 0, USRSTACK); /* Allocate a new stack */ error = vm_map_find(&vmspace->vm_map, NULL, 0, (vm_offset_t *)&stack_addr, SGROWSIZ, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) return(error); vmspace->vm_ssize = SGROWSIZ >> PAGE_SHIFT; /* Initialize maximum stack address */ vmspace->vm_maxsaddr = (char *)USRSTACK - MAXSSIZ; return(0); } /* * Copy out argument and environment strings from the old process * address space into the temporary string buffer. */ int exec_extract_strings(imgp) struct image_params *imgp; { char **argv, **envv; char *argp, *envp; int error, length; /* * extract arguments first */ argv = imgp->uap->argv; if (argv) { while ((argp = (caddr_t) fuword(argv++))) { if (argp == (caddr_t) -1) return (EFAULT); if ((error = copyinstr(argp, imgp->stringp, imgp->stringspace, &length))) { if (error == ENAMETOOLONG) return(E2BIG); return (error); } imgp->stringspace -= length; imgp->stringp += length; imgp->argc++; } } /* * extract environment strings */ envv = imgp->uap->envv; if (envv) { while ((envp = (caddr_t) fuword(envv++))) { if (envp == (caddr_t) -1) return (EFAULT); if ((error = copyinstr(envp, imgp->stringp, imgp->stringspace, &length))) { if (error == ENAMETOOLONG) return(E2BIG); return (error); } imgp->stringspace -= length; imgp->stringp += length; imgp->envc++; } } return (0); } /* * Copy strings out to the new process address space, constructing * new arg and env vector tables. Return a pointer to the base * so that it can be used as the initial stack pointer. */ int * exec_copyout_strings(imgp) struct image_params *imgp; { int argc, envc; char **vectp; char *stringp, *destp; int *stack_base; struct ps_strings *arginfo; int szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ arginfo = PS_STRINGS; szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - roundup((ARG_MAX - imgp->stringspace), sizeof(char *)); /* * install sigcode */ if (szsigcode) copyout(imgp->proc->p_sysent->sv_sigcode, ((caddr_t)arginfo - szsigcode), szsigcode); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) /* * The '+ 2' is for the null pointers at the end of each of the * arg and env vector sets, and 'AT_COUNT*2' is room for the * ELF Auxargs data. */ vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 + AT_COUNT*2) * sizeof(char*)); else /* * The '+ 2' is for the null pointers at the end of each of the * arg and env vector sets */ vectp = (char **) (destp - (imgp->argc + imgp->envc + 2) * sizeof(char*)); /* * vectp also becomes our initial stack base */ stack_base = (int *)vectp; stringp = imgp->stringbase; argc = imgp->argc; envc = imgp->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (int)vectp); suword(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (int)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer seperates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (int)vectp); suword(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (int)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Check permissions of file to execute. * Return 0 for success or error code on failure. */ static int exec_check_permissions(imgp) struct image_params *imgp; { struct proc *p = imgp->proc; struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; int error; /* * Check number of open-for-writes on the file and deny execution * if there are any. */ if (vp->v_writecount) { return (ETXTBSY); } /* Get file attributes */ error = VOP_GETATTR(vp, attr, p->p_ucred, p); if (error) return (error); /* * 1) Check if file execution is disabled for the filesystem that this * file resides on. * 2) Insure that at least one execute bit is on - otherwise root * will always succeed, and we don't want to happen unless the * file really is executable. * 3) Insure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr->va_mode & 0111) == 0) || (attr->va_type != VREG)) { return (EACCES); } /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Disable setuid/setgid if the filesystem prohibits it or if * the process is being traced. */ if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED)) attr->va_mode &= ~(VSUID | VSGID); /* * Check for execute permission to file based on current credentials. * Then call filesystem specific open routine (which does nothing * in the general case). */ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); if (error) return (error); error = VOP_OPEN(vp, FREAD, p->p_ucred, p); if (error) return (error); return (0); } Index: head/sys/pc98/i386/machdep.c =================================================================== --- head/sys/pc98/i386/machdep.c (revision 24436) +++ head/sys/pc98/i386/machdep.c (revision 24437) @@ -1,1613 +1,1611 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.32 1997/03/26 07:03:30 kato Exp $ + * $Id: machdep.c,v 1.33 1997/03/29 02:48:49 kato Exp $ */ #include "npx.h" #include "opt_sysvipc.h" #include "opt_ddb.h" #include "opt_bounce.h" #include "opt_machdep.h" #include "opt_perfmon.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SYSVSHM #include #endif #ifdef SYSVMSG #include #endif #ifdef SYSVSEM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef PC98 #include #else #include #endif #include extern void init386 __P((int first)); extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); extern int ptrace_single_step __P((struct proc *p)); extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ #endif #ifdef BOUNCE_BUFFERS extern char *bouncememory; extern int maxbkva; #ifdef BOUNCEPAGES int bouncepages = BOUNCEPAGES; #else int bouncepages = 0; #endif #endif /* BOUNCE_BUFFERS */ extern int freebufspace; int msgbufmapped = 0; /* set when safe to use msgbuf */ int _udatasel, _ucodesel; u_int atdevbase; int physmem = 0; int cold = 1; static int sysctl_hw_physmem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int sysctl_hw_usermem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); int boothowto = 0, bootverbose = 0, Maxmem = 0; static int badpages = 0; #ifdef PC98 int Maxmem_under16M = 0; #endif long dumplo; extern int bootdev; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; extern struct linker_set netisr_set; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (badpages != 0) { int indx = 1; /* * XXX skip reporting ISA hole & unmanaged kernel memory */ if (phys_avail[0] == PAGE_SIZE) indx += 2; printf("Physical memory hole(s):\n"); for (; phys_avail[indx + 1] != 0; indx += 2) { int size = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); } } /* * Quickly wire in netisrs. */ setup_netisrs(&netisr_set); /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif #ifdef SYSVSEM valloc(sema, struct semid_ds, seminfo.semmni); valloc(sem, struct sem, seminfo.semmns); /* This is pretty disgusting! */ valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); #endif #ifdef SYSVMSG valloc(msgpool, char, msginfo.msgmax); valloc(msgmaps, struct msgmap, msginfo.msgseg); valloc(msghdrs, struct msg, msginfo.msgtql); valloc(msqids, struct msqid_ds, msginfo.msgmni); #endif if (nbuf == 0) { nbuf = 30; if( physmem > 1024) nbuf += min((physmem - 1024) / 8, 2048); } nswbuf = max(min(nbuf/4, 128), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); #ifdef BOUNCE_BUFFERS /* * If there is more than 16MB of memory, allocate some bounce buffers */ if (Maxmem > 4096) { if (bouncepages == 0) { bouncepages = 64; bouncepages += ((Maxmem - 4096) / 2048) * 32; } v = (caddr_t)((vm_offset_t)round_page(v)); valloc(bouncememory, char, bouncepages * PAGE_SIZE); } #endif /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); #ifdef BOUNCE_BUFFERS clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + maxbkva + pager_map_size, TRUE); io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); #else clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); #endif buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE), TRUE); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size, TRUE); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*ARG_MAX), TRUE); - exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - (16*PAGE_SIZE), TRUE); u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (maxproc*UPAGES*PAGE_SIZE), FALSE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size, FALSE); } /* * Initialize callouts */ callfree = callout; for (i = 1; i < ncallout; i++) callout[i-1].c_next = &callout[i]; #if defined(USERCONFIG) #if defined(USERCONFIG_BOOT) if (1) { #else if (boothowto & RB_CONFIG) { #endif userconfig(); cninit(); /* the preferred console may have changed */ } #endif #ifdef BOUNCE_BUFFERS /* * init bounce buffers */ vm_bounce_init(); #endif printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } static void setup_netisrs(ls) struct linker_set *ls; { int i; const struct netisrtab *nit; for(i = 0; ls->ls_items[i]; i++) { nit = (const struct netisrtab *)ls->ls_items[i]; register_netisr(nit->nit_num, nit->nit_isr); } } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig, mask; u_long code; { register struct proc *p = curproc; register int *regs; register struct sigframe *fp; struct sigframe sf; struct sigacts *psp = p->p_sigacts; int oonstack; regs = p->p_md.md_regs; oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; /* * Allocate and validate space for the signal handler context. */ if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && (psp->ps_sigonstack & sigmask(sig))) { fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + psp->ps_sigstk.ss_size - sizeof(struct sigframe)); psp->ps_sigstk.ss_flags |= SS_ONSTACK; } else { fp = (struct sigframe *)regs[tESP] - 1; } /* * grow() will return FALSE if the fp will not fit inside the stack * and the stack can not be grown. useracc will return FALSE * if access is denied. */ if ((grow(p, (int)fp) == FALSE) || (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; sig = sigmask(SIGILL); p->p_sigignore &= ~sig; p->p_sigcatch &= ~sig; p->p_sigmask &= ~sig; psignal(p, SIGILL); return; } /* * Build the argument list for the signal handler. */ if (p->p_sysent->sv_sigtbl) { if (sig < p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[sig]; else sig = p->p_sysent->sv_sigsize + 1; } sf.sf_signum = sig; sf.sf_code = code; sf.sf_scp = &fp->sf_sc; sf.sf_addr = (char *) regs[tERR]; sf.sf_handler = catcher; /* save scratch registers */ sf.sf_sc.sc_eax = regs[tEAX]; sf.sf_sc.sc_ebx = regs[tEBX]; sf.sf_sc.sc_ecx = regs[tECX]; sf.sf_sc.sc_edx = regs[tEDX]; sf.sf_sc.sc_esi = regs[tESI]; sf.sf_sc.sc_edi = regs[tEDI]; sf.sf_sc.sc_cs = regs[tCS]; sf.sf_sc.sc_ds = regs[tDS]; sf.sf_sc.sc_ss = regs[tSS]; sf.sf_sc.sc_es = regs[tES]; sf.sf_sc.sc_isp = regs[tISP]; /* * Build the signal context to be used by sigreturn. */ sf.sf_sc.sc_onstack = oonstack; sf.sf_sc.sc_mask = mask; sf.sf_sc.sc_sp = regs[tESP]; sf.sf_sc.sc_fp = regs[tEBP]; sf.sf_sc.sc_pc = regs[tEIP]; sf.sf_sc.sc_ps = regs[tEFLAGS]; /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); }; regs[tESP] = (int)fp; regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); regs[tEFLAGS] &= ~PSL_VM; regs[tCS] = _ucodesel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tSS] = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int sigreturn(p, uap, retval) struct proc *p; struct sigreturn_args /* { struct sigcontext *sigcntxp; } */ *uap; int *retval; { register struct sigcontext *scp; register struct sigframe *fp; register int *regs = p->p_md.md_regs; int eflags; /* * (XXX old comment) regs[tESP] points to the return address. * The user scp pointer is above that. * The return address is faked in the signal trampoline code * for consistency. */ scp = uap->sigcntxp; fp = (struct sigframe *) ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0) return(EFAULT); /* * Don't allow users to change privileged or reserved flags. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = scp->sc_ps; /* * XXX do allow users to change the privileged flag PSL_RF. The * cpu sets PSL_RF in tf_eflags for faults. Debuggers should * sometimes set it there too. tf_eflags is kept in the signal * context during signal handling and there is no other place * to remember it, so the PSL_RF bit may be corrupted by the * signal handler without us knowing. Corruption of the PSL_RF * bit at worst causes one more or one less debugger trap, so * allowing it is fairly harmless. */ if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { #ifdef DEBUG printf("sigreturn: eflags = 0x%x\n", eflags); #endif return(EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(scp->sc_cs)) { #ifdef DEBUG printf("sigreturn: cs = 0x%x\n", scp->sc_cs); #endif trapsignal(p, SIGBUS, T_PROTFLT); return(EINVAL); } /* restore scratch registers */ regs[tEAX] = scp->sc_eax; regs[tEBX] = scp->sc_ebx; regs[tECX] = scp->sc_ecx; regs[tEDX] = scp->sc_edx; regs[tESI] = scp->sc_esi; regs[tEDI] = scp->sc_edi; regs[tCS] = scp->sc_cs; regs[tDS] = scp->sc_ds; regs[tES] = scp->sc_es; regs[tSS] = scp->sc_ss; regs[tISP] = scp->sc_isp; if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0) return(EINVAL); if (scp->sc_onstack & 01) p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = scp->sc_mask & ~sigcantmask; regs[tEBP] = scp->sc_fp; regs[tESP] = scp->sc_sp; regs[tEIP] = scp->sc_pc; regs[tEFLAGS] = eflags; return(EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack) struct proc *p; u_long entry; u_long stack; { int *regs = p->p_md.md_regs; #ifdef USER_LDT struct pcb *pcb = &p->p_addr->u_pcb; /* was i386_user_cleanup() in NetBSD */ if (pcb->pcb_ldt) { if (pcb == curpcb) lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt, pcb->pcb_ldt_len * sizeof(union descriptor)); pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0; } #endif bzero(regs, sizeof(struct trapframe)); regs[tEIP] = entry; regs[tESP] = stack; regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T); regs[tSS] = _udatasel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tCS] = _ucodesel; /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif } static int sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int currentldt; int _default_ldt; union descriptor gdt[NGDT]; /* global descriptor table */ struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 3 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 4 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 5 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ { (int) kstack, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 7 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void init386(first) int first; { int x; unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; int gsel_tss; struct isa_device *idp; /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; int pagesinbase, pagesinext; int target_page, pa_indx; int off; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; /* * Initialize the console before we print anything out. */ cninit(); #ifdef PC98 /* * Initialize DMAC */ pc98_init_dmac(); #endif /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ for (x = 0; x < NLDT; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #ifdef CPU_BUGGY_CYRIX setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #else setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif rand_initialize(); r_gdt.rd_limit = sizeof(gdt) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); r_idt.rd_limit = sizeof(idt) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); currentldt = _default_ldt; #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ #ifdef PC98 pc98_getmemsize(); biosbasemem = 640; /* 640KB */ biosextmem = (Maxmem * PAGE_SIZE - 0x100000)/1024; /* extent memory */ #else /* IBM-PC */ /* Use BIOS values stored in RTC CMOS RAM, since probing * breaks certain 386 AT relics. */ biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); /* * If BIOS tells us that it has more than 640k in the basemem, * don't believe it - set it to 640k. */ if (biosbasemem > 640) { printf("Preposterous RTC basemem of %dK, truncating to 640K\n", biosbasemem); biosbasemem = 640; } if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) { printf("Preposterous BIOS basemem of %dK, truncating to 640K\n", bootinfo.bi_basemem); bootinfo.bi_basemem = 640; } /* * Warn if the official BIOS interface disagrees with the RTC * interface used above about the amount of base memory or the * amount of extended memory. Prefer the BIOS value for the base * memory. This is necessary for machines that `steal' base * memory for use as BIOS memory, at least if we are going to use * the BIOS for apm. Prefer the RTC value for extended memory. * Eventually the hackish interface shouldn't even be looked at. */ if (bootinfo.bi_memsizes_valid) { if (bootinfo.bi_basemem != biosbasemem) { vm_offset_t pa; printf( "BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n", bootinfo.bi_basemem, biosbasemem); biosbasemem = bootinfo.bi_basemem; /* * XXX if biosbasemem is now < 640, there is `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from 0 to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(biosbasemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { unsigned *pte; pte = (unsigned *)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } } if (bootinfo.bi_extmem != biosextmem) printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", bootinfo.bi_extmem, biosextmem); } #endif pagesinbase = biosbasemem * 1024 / PAGE_SIZE; pagesinext = biosextmem * 1024 / PAGE_SIZE; /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. */ #ifndef PC98 /* * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((pagesinext > 3840) && (pagesinext < 4096)) pagesinext = 3840; #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = pagesinext + 0x100000/PAGE_SIZE; #ifdef MAXMEM Maxmem = MAXMEM/4; #endif #if NNPX > 0 idp = find_isadev(isa_devtab_null, &npxdriver, 0); if (idp != NULL && idp->id_msize != 0) Maxmem = idp->id_msize / 4; #endif /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); /* * Size up each available chunk of physical memory. */ /* * We currently don't bother testing base memory. * XXX ...but we probably should. */ pa_indx = 0; badpages = 0; if (pagesinbase > 1) { phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ physmem = pagesinbase - 1; } else { /* point at first chunk end */ pa_indx++; } for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { int tmp, page_bad = FALSE; #ifdef PC98 /* skip system area */ if (target_page>=ptoa(Maxmem_under16M) && target_page < ptoa(4096)) page_bad = TRUE; #endif /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == FALSE) { /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. */ if (phys_avail[pa_indx] == target_page) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = target_page; /* start */ phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ } physmem++; } else { badpages++; page_bad = FALSE; } } *(int *)CMAP1 = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); avail_end = phys_avail[pa_indx]; /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(sizeof(struct msgbuf)); off += PAGE_SIZE) pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off, avail_end + off, VM_PROT_ALL, TRUE); msgbufmapped = 1; /* make a initial tss so microp can get interrupt stack on syscall! */ proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE; proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = (sizeof(struct i386tss))<<16; ltr(gsel_tss); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD; } /* * The registers are in the frame; the frame is in the user area of * the process in question; when the process is active, the registers * are in "the kernel stack"; when it's not, they're still there, but * things get flipped around. So, since p->p_md.md_regs is the whole address * of the register set, take its offset from the kernel stack, and * index into the user block. Don't you just *love* virtual memory? * (I'm starting to think seymour is right...) */ #define TF_REGP(p) ((struct trapframe *) \ ((char *)(p)->p_addr \ + ((char *)(p)->p_md.md_regs - kstack))) int ptrace_set_pc(p, addr) struct proc *p; unsigned int addr; { TF_REGP(p)->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { TF_REGP(p)->tf_eflags |= PSL_T; return (0); } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; int data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - kstack; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = TF_REGP(p); frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; return (0); } #ifndef DDB void Debugger(const char *msg) { printf("Debugger(\"%s\") called.\n", msg); } #endif /* no DDB */ #include /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->b_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->b_blkno == maxsz) { bp->b_resid = bp->b_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->b_blkno; if (sz <= 0) { bp->b_error = EINVAL; goto bad; } bp->b_bcount = sz << DEV_BSHIFT; } bp->b_pblkno = bp->b_blkno + p->p_offset; return(1); bad: bp->b_flags |= B_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/pc98/pc98/machdep.c =================================================================== --- head/sys/pc98/pc98/machdep.c (revision 24436) +++ head/sys/pc98/pc98/machdep.c (revision 24437) @@ -1,1613 +1,1611 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.32 1997/03/26 07:03:30 kato Exp $ + * $Id: machdep.c,v 1.33 1997/03/29 02:48:49 kato Exp $ */ #include "npx.h" #include "opt_sysvipc.h" #include "opt_ddb.h" #include "opt_bounce.h" #include "opt_machdep.h" #include "opt_perfmon.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SYSVSHM #include #endif #ifdef SYSVMSG #include #endif #ifdef SYSVSEM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef PC98 #include #else #include #endif #include extern void init386 __P((int first)); extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); extern int ptrace_single_step __P((struct proc *p)); extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ #endif #ifdef BOUNCE_BUFFERS extern char *bouncememory; extern int maxbkva; #ifdef BOUNCEPAGES int bouncepages = BOUNCEPAGES; #else int bouncepages = 0; #endif #endif /* BOUNCE_BUFFERS */ extern int freebufspace; int msgbufmapped = 0; /* set when safe to use msgbuf */ int _udatasel, _ucodesel; u_int atdevbase; int physmem = 0; int cold = 1; static int sysctl_hw_physmem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int sysctl_hw_usermem SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); int boothowto = 0, bootverbose = 0, Maxmem = 0; static int badpages = 0; #ifdef PC98 int Maxmem_under16M = 0; #endif long dumplo; extern int bootdev; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; extern struct linker_set netisr_set; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (badpages != 0) { int indx = 1; /* * XXX skip reporting ISA hole & unmanaged kernel memory */ if (phys_avail[0] == PAGE_SIZE) indx += 2; printf("Physical memory hole(s):\n"); for (; phys_avail[indx + 1] != 0; indx += 2) { int size = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); } } /* * Quickly wire in netisrs. */ setup_netisrs(&netisr_set); /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif #ifdef SYSVSEM valloc(sema, struct semid_ds, seminfo.semmni); valloc(sem, struct sem, seminfo.semmns); /* This is pretty disgusting! */ valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); #endif #ifdef SYSVMSG valloc(msgpool, char, msginfo.msgmax); valloc(msgmaps, struct msgmap, msginfo.msgseg); valloc(msghdrs, struct msg, msginfo.msgtql); valloc(msqids, struct msqid_ds, msginfo.msgmni); #endif if (nbuf == 0) { nbuf = 30; if( physmem > 1024) nbuf += min((physmem - 1024) / 8, 2048); } nswbuf = max(min(nbuf/4, 128), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); #ifdef BOUNCE_BUFFERS /* * If there is more than 16MB of memory, allocate some bounce buffers */ if (Maxmem > 4096) { if (bouncepages == 0) { bouncepages = 64; bouncepages += ((Maxmem - 4096) / 2048) * 32; } v = (caddr_t)((vm_offset_t)round_page(v)); valloc(bouncememory, char, bouncepages * PAGE_SIZE); } #endif /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); #ifdef BOUNCE_BUFFERS clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + maxbkva + pager_map_size, TRUE); io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); #else clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); #endif buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE), TRUE); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size, TRUE); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*ARG_MAX), TRUE); - exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - (16*PAGE_SIZE), TRUE); u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (maxproc*UPAGES*PAGE_SIZE), FALSE); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size, FALSE); } /* * Initialize callouts */ callfree = callout; for (i = 1; i < ncallout; i++) callout[i-1].c_next = &callout[i]; #if defined(USERCONFIG) #if defined(USERCONFIG_BOOT) if (1) { #else if (boothowto & RB_CONFIG) { #endif userconfig(); cninit(); /* the preferred console may have changed */ } #endif #ifdef BOUNCE_BUFFERS /* * init bounce buffers */ vm_bounce_init(); #endif printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } static void setup_netisrs(ls) struct linker_set *ls; { int i; const struct netisrtab *nit; for(i = 0; ls->ls_items[i]; i++) { nit = (const struct netisrtab *)ls->ls_items[i]; register_netisr(nit->nit_num, nit->nit_isr); } } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig, mask; u_long code; { register struct proc *p = curproc; register int *regs; register struct sigframe *fp; struct sigframe sf; struct sigacts *psp = p->p_sigacts; int oonstack; regs = p->p_md.md_regs; oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; /* * Allocate and validate space for the signal handler context. */ if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && (psp->ps_sigonstack & sigmask(sig))) { fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + psp->ps_sigstk.ss_size - sizeof(struct sigframe)); psp->ps_sigstk.ss_flags |= SS_ONSTACK; } else { fp = (struct sigframe *)regs[tESP] - 1; } /* * grow() will return FALSE if the fp will not fit inside the stack * and the stack can not be grown. useracc will return FALSE * if access is denied. */ if ((grow(p, (int)fp) == FALSE) || (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; sig = sigmask(SIGILL); p->p_sigignore &= ~sig; p->p_sigcatch &= ~sig; p->p_sigmask &= ~sig; psignal(p, SIGILL); return; } /* * Build the argument list for the signal handler. */ if (p->p_sysent->sv_sigtbl) { if (sig < p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[sig]; else sig = p->p_sysent->sv_sigsize + 1; } sf.sf_signum = sig; sf.sf_code = code; sf.sf_scp = &fp->sf_sc; sf.sf_addr = (char *) regs[tERR]; sf.sf_handler = catcher; /* save scratch registers */ sf.sf_sc.sc_eax = regs[tEAX]; sf.sf_sc.sc_ebx = regs[tEBX]; sf.sf_sc.sc_ecx = regs[tECX]; sf.sf_sc.sc_edx = regs[tEDX]; sf.sf_sc.sc_esi = regs[tESI]; sf.sf_sc.sc_edi = regs[tEDI]; sf.sf_sc.sc_cs = regs[tCS]; sf.sf_sc.sc_ds = regs[tDS]; sf.sf_sc.sc_ss = regs[tSS]; sf.sf_sc.sc_es = regs[tES]; sf.sf_sc.sc_isp = regs[tISP]; /* * Build the signal context to be used by sigreturn. */ sf.sf_sc.sc_onstack = oonstack; sf.sf_sc.sc_mask = mask; sf.sf_sc.sc_sp = regs[tESP]; sf.sf_sc.sc_fp = regs[tEBP]; sf.sf_sc.sc_pc = regs[tEIP]; sf.sf_sc.sc_ps = regs[tEFLAGS]; /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); }; regs[tESP] = (int)fp; regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); regs[tEFLAGS] &= ~PSL_VM; regs[tCS] = _ucodesel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tSS] = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int sigreturn(p, uap, retval) struct proc *p; struct sigreturn_args /* { struct sigcontext *sigcntxp; } */ *uap; int *retval; { register struct sigcontext *scp; register struct sigframe *fp; register int *regs = p->p_md.md_regs; int eflags; /* * (XXX old comment) regs[tESP] points to the return address. * The user scp pointer is above that. * The return address is faked in the signal trampoline code * for consistency. */ scp = uap->sigcntxp; fp = (struct sigframe *) ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0) return(EFAULT); /* * Don't allow users to change privileged or reserved flags. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = scp->sc_ps; /* * XXX do allow users to change the privileged flag PSL_RF. The * cpu sets PSL_RF in tf_eflags for faults. Debuggers should * sometimes set it there too. tf_eflags is kept in the signal * context during signal handling and there is no other place * to remember it, so the PSL_RF bit may be corrupted by the * signal handler without us knowing. Corruption of the PSL_RF * bit at worst causes one more or one less debugger trap, so * allowing it is fairly harmless. */ if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { #ifdef DEBUG printf("sigreturn: eflags = 0x%x\n", eflags); #endif return(EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(scp->sc_cs)) { #ifdef DEBUG printf("sigreturn: cs = 0x%x\n", scp->sc_cs); #endif trapsignal(p, SIGBUS, T_PROTFLT); return(EINVAL); } /* restore scratch registers */ regs[tEAX] = scp->sc_eax; regs[tEBX] = scp->sc_ebx; regs[tECX] = scp->sc_ecx; regs[tEDX] = scp->sc_edx; regs[tESI] = scp->sc_esi; regs[tEDI] = scp->sc_edi; regs[tCS] = scp->sc_cs; regs[tDS] = scp->sc_ds; regs[tES] = scp->sc_es; regs[tSS] = scp->sc_ss; regs[tISP] = scp->sc_isp; if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0) return(EINVAL); if (scp->sc_onstack & 01) p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = scp->sc_mask & ~sigcantmask; regs[tEBP] = scp->sc_fp; regs[tESP] = scp->sc_sp; regs[tEIP] = scp->sc_pc; regs[tEFLAGS] = eflags; return(EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack) struct proc *p; u_long entry; u_long stack; { int *regs = p->p_md.md_regs; #ifdef USER_LDT struct pcb *pcb = &p->p_addr->u_pcb; /* was i386_user_cleanup() in NetBSD */ if (pcb->pcb_ldt) { if (pcb == curpcb) lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt, pcb->pcb_ldt_len * sizeof(union descriptor)); pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0; } #endif bzero(regs, sizeof(struct trapframe)); regs[tEIP] = entry; regs[tESP] = stack; regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T); regs[tSS] = _udatasel; regs[tDS] = _udatasel; regs[tES] = _udatasel; regs[tCS] = _ucodesel; /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif } static int sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int currentldt; int _default_ldt; union descriptor gdt[NGDT]; /* global descriptor table */ struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 3 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 4 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 5 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ { (int) kstack, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 7 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten by APM) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void init386(first) int first; { int x; unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; int gsel_tss; struct isa_device *idp; /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; int pagesinbase, pagesinext; int target_page, pa_indx; int off; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; /* * Initialize the console before we print anything out. */ cninit(); #ifdef PC98 /* * Initialize DMAC */ pc98_init_dmac(); #endif /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ for (x = 0; x < NLDT; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #ifdef CPU_BUGGY_CYRIX setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #else setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif rand_initialize(); r_gdt.rd_limit = sizeof(gdt) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); r_idt.rd_limit = sizeof(idt) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); currentldt = _default_ldt; #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ #ifdef PC98 pc98_getmemsize(); biosbasemem = 640; /* 640KB */ biosextmem = (Maxmem * PAGE_SIZE - 0x100000)/1024; /* extent memory */ #else /* IBM-PC */ /* Use BIOS values stored in RTC CMOS RAM, since probing * breaks certain 386 AT relics. */ biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); /* * If BIOS tells us that it has more than 640k in the basemem, * don't believe it - set it to 640k. */ if (biosbasemem > 640) { printf("Preposterous RTC basemem of %dK, truncating to 640K\n", biosbasemem); biosbasemem = 640; } if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) { printf("Preposterous BIOS basemem of %dK, truncating to 640K\n", bootinfo.bi_basemem); bootinfo.bi_basemem = 640; } /* * Warn if the official BIOS interface disagrees with the RTC * interface used above about the amount of base memory or the * amount of extended memory. Prefer the BIOS value for the base * memory. This is necessary for machines that `steal' base * memory for use as BIOS memory, at least if we are going to use * the BIOS for apm. Prefer the RTC value for extended memory. * Eventually the hackish interface shouldn't even be looked at. */ if (bootinfo.bi_memsizes_valid) { if (bootinfo.bi_basemem != biosbasemem) { vm_offset_t pa; printf( "BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n", bootinfo.bi_basemem, biosbasemem); biosbasemem = bootinfo.bi_basemem; /* * XXX if biosbasemem is now < 640, there is `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from 0 to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(biosbasemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { unsigned *pte; pte = (unsigned *)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } } if (bootinfo.bi_extmem != biosextmem) printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", bootinfo.bi_extmem, biosextmem); } #endif pagesinbase = biosbasemem * 1024 / PAGE_SIZE; pagesinext = biosextmem * 1024 / PAGE_SIZE; /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. */ #ifndef PC98 /* * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((pagesinext > 3840) && (pagesinext < 4096)) pagesinext = 3840; #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = pagesinext + 0x100000/PAGE_SIZE; #ifdef MAXMEM Maxmem = MAXMEM/4; #endif #if NNPX > 0 idp = find_isadev(isa_devtab_null, &npxdriver, 0); if (idp != NULL && idp->id_msize != 0) Maxmem = idp->id_msize / 4; #endif /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); /* * Size up each available chunk of physical memory. */ /* * We currently don't bother testing base memory. * XXX ...but we probably should. */ pa_indx = 0; badpages = 0; if (pagesinbase > 1) { phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ physmem = pagesinbase - 1; } else { /* point at first chunk end */ pa_indx++; } for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { int tmp, page_bad = FALSE; #ifdef PC98 /* skip system area */ if (target_page>=ptoa(Maxmem_under16M) && target_page < ptoa(4096)) page_bad = TRUE; #endif /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == FALSE) { /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. */ if (phys_avail[pa_indx] == target_page) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = target_page; /* start */ phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ } physmem++; } else { badpages++; page_bad = FALSE; } } *(int *)CMAP1 = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); avail_end = phys_avail[pa_indx]; /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(sizeof(struct msgbuf)); off += PAGE_SIZE) pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off, avail_end + off, VM_PROT_ALL, TRUE); msgbufmapped = 1; /* make a initial tss so microp can get interrupt stack on syscall! */ proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE; proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = (sizeof(struct i386tss))<<16; ltr(gsel_tss); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD; } /* * The registers are in the frame; the frame is in the user area of * the process in question; when the process is active, the registers * are in "the kernel stack"; when it's not, they're still there, but * things get flipped around. So, since p->p_md.md_regs is the whole address * of the register set, take its offset from the kernel stack, and * index into the user block. Don't you just *love* virtual memory? * (I'm starting to think seymour is right...) */ #define TF_REGP(p) ((struct trapframe *) \ ((char *)(p)->p_addr \ + ((char *)(p)->p_md.md_regs - kstack))) int ptrace_set_pc(p, addr) struct proc *p; unsigned int addr; { TF_REGP(p)->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { TF_REGP(p)->tf_eflags |= PSL_T; return (0); } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; int data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - kstack; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = TF_REGP(p); frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct trapframe *tp; tp = TF_REGP(p); if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; return (0); } #ifndef DDB void Debugger(const char *msg) { printf("Debugger(\"%s\") called.\n", msg); } #endif /* no DDB */ #include /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->b_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && (bp->b_flags & B_READ) == 0 && wlabel == 0) { bp->b_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->b_blkno == maxsz) { bp->b_resid = bp->b_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->b_blkno; if (sz <= 0) { bp->b_error = EINVAL; goto bad; } bp->b_bcount = sz << DEV_BSHIFT; } bp->b_pblkno = bp->b_blkno + p->p_offset; return(1); bad: bp->b_flags |= B_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/vm/vm_kern.c =================================================================== --- head/sys/vm/vm_kern.c (revision 24436) +++ head/sys/vm/vm_kern.c (revision 24437) @@ -1,455 +1,454 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: vm_kern.c,v 1.33 1997/02/22 09:48:21 peter Exp $ */ /* * Kernel memory management. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include vm_map_t kernel_map=0; vm_map_t kmem_map=0; vm_map_t exec_map=0; -vm_map_t exech_map=0; vm_map_t clean_map=0; vm_map_t u_map=0; vm_map_t buffer_map=0; vm_map_t mb_map=0; int mb_map_full=0; vm_map_t io_map=0; vm_map_t phys_map=0; /* * kmem_alloc_pageable: * * Allocate pageable memory to the kernel's address map. * "map" must be kernel_map or a submap of kernel_map. */ vm_offset_t kmem_alloc_pageable(map, size) vm_map_t map; register vm_size_t size; { vm_offset_t addr; register int result; size = round_page(size); addr = vm_map_min(map); result = vm_map_find(map, NULL, (vm_offset_t) 0, &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); if (result != KERN_SUCCESS) { return (0); } return (addr); } /* * Allocate wired-down memory in the kernel's address map * or a submap. */ vm_offset_t kmem_alloc(map, size) register vm_map_t map; register vm_size_t size; { vm_offset_t addr; register vm_offset_t offset; vm_offset_t i; size = round_page(size); /* * Use the kernel object for wired-down kernel pages. Assume that no * region of the kernel object is referenced more than once. */ /* * Locate sufficient space in the map. This will give us the final * virtual address for the new memory, and thus will tell us the * offset within the kernel map. */ vm_map_lock(map); if (vm_map_findspace(map, 0, size, &addr)) { vm_map_unlock(map); return (0); } offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(kernel_object); vm_map_insert(map, kernel_object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); /* * Guarantee that there are pages already in this object before * calling vm_map_pageable. This is to prevent the following * scenario: * * 1) Threads have swapped out, so that there is a pager for the * kernel_object. 2) The kmsg zone is empty, and so we are * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault; * there is no page, but there is a pager, so we call * pager_data_request. But the kmsg zone is empty, so we must * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when * we get the data back from the pager, it will be (very stale) * non-zero data. kmem_alloc is defined to return zero-filled memory. * * We're intentionally not activating the pages we allocate to prevent a * race with page-out. vm_map_pageable will wire the pages. */ for (i = 0; i < size; i += PAGE_SIZE) { vm_page_t mem; while ((mem = vm_page_alloc(kernel_object, OFF_TO_IDX(offset + i), VM_ALLOC_ZERO)) == NULL) { VM_WAIT; } if ((mem->flags & PG_ZERO) == 0) vm_page_zero_fill(mem); mem->flags &= ~(PG_BUSY|PG_ZERO); mem->valid = VM_PAGE_BITS_ALL; } /* * And finally, mark the data as non-pageable. */ (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); return (addr); } /* * kmem_free: * * Release a region of kernel virtual memory allocated * with kmem_alloc, and return the physical pages * associated with that region. */ void kmem_free(map, addr, size) vm_map_t map; register vm_offset_t addr; vm_size_t size; { (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); } /* * kmem_suballoc: * * Allocates a map to manage a subrange * of the kernel virtual address space. * * Arguments are as follows: * * parent Map to take range from * size Size of range to find * min, max Returned endpoints of map * pageable Can the region be paged */ vm_map_t kmem_suballoc(parent, min, max, size, pageable) register vm_map_t parent; vm_offset_t *min, *max; register vm_size_t size; boolean_t pageable; { register int ret; vm_map_t result; size = round_page(size); *min = (vm_offset_t) vm_map_min(parent); ret = vm_map_find(parent, NULL, (vm_offset_t) 0, min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); if (ret != KERN_SUCCESS) { printf("kmem_suballoc: bad status return of %d.\n", ret); panic("kmem_suballoc"); } *max = *min + size; pmap_reference(vm_map_pmap(parent)); result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); if (result == NULL) panic("kmem_suballoc: cannot create submap"); if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) panic("kmem_suballoc: unable to change range to submap"); return (result); } /* * Allocate wired-down memory in the kernel's address map for the higher * level kernel memory allocator (kern/kern_malloc.c). We cannot use * kmem_alloc() because we may need to allocate memory at interrupt * level where we cannot block (canwait == FALSE). * * This routine has its own private kernel submap (kmem_map) and object * (kmem_object). This, combined with the fact that only malloc uses * this routine, ensures that we will never block in map or object waits. * * Note that this still only works in a uni-processor environment and * when called at splhigh(). * * We don't worry about expanding the map (adding entries) since entries * for wired maps are statically allocated. */ vm_offset_t kmem_malloc(map, size, waitflag) register vm_map_t map; register vm_size_t size; boolean_t waitflag; { register vm_offset_t offset, i; vm_map_entry_t entry; vm_offset_t addr; vm_page_t m; if (map != kmem_map && map != mb_map) panic("kmem_malloc: map != {kmem,mb}_map"); size = round_page(size); addr = vm_map_min(map); /* * Locate sufficient space in the map. This will give us the final * virtual address for the new memory, and thus will tell us the * offset within the kernel map. */ vm_map_lock(map); if (vm_map_findspace(map, 0, size, &addr)) { vm_map_unlock(map); if (map == mb_map) { mb_map_full = TRUE; log(LOG_ERR, "Out of mbuf clusters - increase maxusers!\n"); return (0); } if (waitflag == M_WAITOK) panic("kmem_malloc: kmem_map too small"); return (0); } offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(kmem_object); vm_map_insert(map, kmem_object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); for (i = 0; i < size; i += PAGE_SIZE) { retry: m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), (waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM); /* * Ran out of space, free everything up and return. Don't need * to lock page queues here as we know that the pages we got * aren't on any queues. */ if (m == NULL) { if (waitflag == M_WAITOK) { VM_WAIT; goto retry; } while (i != 0) { i -= PAGE_SIZE; m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); PAGE_WAKEUP(m); vm_page_free(m); } vm_map_delete(map, addr, addr + size); vm_map_unlock(map); return (0); } m->flags &= ~PG_ZERO; m->valid = VM_PAGE_BITS_ALL; } /* * Mark map entry as non-pageable. Assert: vm_map_insert() will never * be able to extend the previous entry so there will be a new entry * exactly corresponding to this address range and it will have * wired_count == 0. */ if (!vm_map_lookup_entry(map, addr, &entry) || entry->start != addr || entry->end != addr + size || entry->wired_count) panic("kmem_malloc: entry not found or misaligned"); entry->wired_count++; vm_map_simplify_entry(map, entry); /* * Loop thru pages, entering them in the pmap. (We cannot add them to * the wired count without wrapping the vm_page_queue_lock in * splimp...) */ for (i = 0; i < size; i += PAGE_SIZE) { m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); vm_page_wire(m); PAGE_WAKEUP(m); pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m), VM_PROT_ALL, 1); m->flags |= PG_MAPPED|PG_WRITEABLE; } vm_map_unlock(map); return (addr); } /* * kmem_alloc_wait * * Allocates pageable memory from a sub-map of the kernel. If the submap * has no room, the caller sleeps waiting for more memory in the submap. * */ vm_offset_t kmem_alloc_wait(map, size) vm_map_t map; vm_size_t size; { vm_offset_t addr; size = round_page(size); for (;;) { /* * To make this work for more than one map, use the map's lock * to lock out sleepers/wakers. */ vm_map_lock(map); if (vm_map_findspace(map, 0, size, &addr) == 0) break; /* no space now; see if we can ever get space */ if (vm_map_max(map) - vm_map_min(map) < size) { vm_map_unlock(map); return (0); } vm_map_unlock(map); tsleep(map, PVM, "kmaw", 0); } vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); return (addr); } /* * kmem_free_wakeup * * Returns memory to a submap of the kernel, and wakes up any processes * waiting for memory in that map. */ void kmem_free_wakeup(map, addr, size) vm_map_t map; vm_offset_t addr; vm_size_t size; { vm_map_lock(map); (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); wakeup(map); vm_map_unlock(map); } /* * Create the kernel map; insert a mapping covering kernel text, data, bss, * and all space allocated thus far (`boostrap' data). The new map will thus * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and * the range between `start' and `end' as free. */ void kmem_init(start, end) vm_offset_t start, end; { register vm_map_t m; m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE); vm_map_lock(m); /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; (void) vm_map_insert(m, NULL, (vm_offset_t) 0, VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); /* ... and ending with the completion of the above `insert' */ vm_map_unlock(m); } Index: head/sys/vm/vm_kern.h =================================================================== --- head/sys/vm/vm_kern.h (revision 24436) +++ head/sys/vm/vm_kern.h (revision 24437) @@ -1,88 +1,87 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_kern.h 8.1 (Berkeley) 6/11/93 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id$ + * $Id: vm_kern.h,v 1.12 1997/02/22 09:48:21 peter Exp $ */ #ifndef _VM_VM_KERN_H_ #define _VM_VM_KERN_H_ 1 /* Kernel memory management definitions. */ extern vm_map_t buffer_map; extern vm_map_t kernel_map; extern vm_map_t kmem_map; extern vm_map_t mb_map; extern int mb_map_full; extern vm_map_t io_map; extern vm_map_t clean_map; extern vm_map_t phys_map; extern vm_map_t exec_map; -extern vm_map_t exech_map; extern vm_map_t u_map; extern vm_offset_t kernel_vm_end; /* XXX - elsewhere? */ extern void *contigmalloc1(u_long, int, int, u_long, u_long, u_long, u_long, vm_map_t); #endif /* _VM_VM_KERN_H_ */