Index: head/sys/amd64/amd64/amd64-gdbstub.c =================================================================== --- head/sys/amd64/amd64/amd64-gdbstub.c (revision 123179) +++ head/sys/amd64/amd64/amd64-gdbstub.c (revision 123180) @@ -1,614 +1,614 @@ /**************************************************************************** THIS SOFTWARE IS NOT COPYRIGHTED HP offers the following for use in the public domain. HP makes no warranty with regard to the software or its performance and the user accepts the software "AS IS" with all faults. HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ****************************************************************************/ +#include +__FBSDID("$FreeBSD$"); + /**************************************************************************** * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ * * Module name: remcom.c $ * Revision: 1.34 $ * Date: 91/03/09 12:29:49 $ * Contributor: Lake Stevens Instrument Division$ * * Description: low level support for gdb debugger. $ * * Considerations: only works on target hardware $ * * Written by: Glenn Engel $ * ModuleState: Experimental $ * * NOTES: See Below $ * * Modified for FreeBSD by Stu Grossman. * * To enable debugger support, two things need to happen. One, a * call to set_debug_traps() is necessary in order to allow any breakpoints * or error conditions to be properly intercepted and reported to gdb. * Two, a breakpoint needs to be generated to begin communication. This * is most easily accomplished by a call to breakpoint(). Breakpoint() * simulates a breakpoint by executing a trap #1. * * The external function exceptionHandler() is * used to attach a specific handler to a specific 386 vector number. * It should use the same privilege level it runs at. It should * install it as an interrupt gate so that interrupts are masked * while the handler runs. * Also, need to assign exceptionHook and oldExceptionHook. * * Because gdb will sometimes write to the stack area to execute function * calls, this program cannot rely on using the supervisor stack so it * uses its own stack area reserved in the int array remcomStack. * ************* * * The following gdb commands are supported: * * command function Return value * * g return the value of the CPU registers hex data or ENN * G set the value of the CPU registers OK or ENN * * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN * * c Resume at current address SNN ( signal NN) * cAA..AA Continue at address AA..AA SNN * * s Step one instruction SNN * sAA..AA Step one instruction from AA..AA SNN * * k kill * * ? What was the last sigval ? SNN (signal NN) * * D detach OK * * All commands and responses are sent with a packet which includes a * checksum. A packet consists of * * $#. * * where * :: * :: < two hex digits computed as modulo 256 sum of > * * When a packet is received, it is first acknowledged with either '+' or '-'. * '+' indicates a successful transfer. '-' indicates a failed transfer. * * Example: * * Host: Reply: * $m0,10#2a +$00010203040506070809101112131415#42 * ****************************************************************************/ - -#include -__FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "opt_ddb.h" int gdb_handle_exception (db_regs_t *, int, int); /************************************************************************/ extern jmp_buf db_jmpbuf; /************************************************************************/ /* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ /* at least NUMREGBYTES*2 are needed for register packets */ #define BUFMAX 400 /* Create private copies of common functions used by the stub. This prevents nasty interactions between app code and the stub (for instance if user steps into strlen, etc..) */ static int gdb_strlen (const char *s) { const char *s1 = s; while (*s1++ != '\000'); return s1 - s; } static char * gdb_strcpy (char *dst, const char *src) { char *retval = dst; while ((*dst++ = *src++) != '\000'); return retval; } static int putDebugChar (int c) /* write a single character */ { if (gdb_arg == NULL) return 0; (*gdb_putc)(gdb_arg, c); return 1; } static int getDebugChar (void) /* read and return a single char */ { if (gdb_arg == NULL) return -1; return (*gdb_getc)(gdb_arg); } static const char hexchars[]="0123456789abcdef"; static int hex(char ch) { if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10); if ((ch >= '0') && (ch <= '9')) return (ch-'0'); if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10); return (-1); } /* scan for the sequence $# */ static void getpacket (char *buffer) { unsigned char checksum; unsigned char xmitcsum; int i; int count; unsigned char ch; do { /* wait around for the start character, ignore all other characters */ while ((ch = (getDebugChar () & 0x7f)) != '$'); checksum = 0; xmitcsum = -1; count = 0; /* now, read until a # or end of buffer is found */ while (count < BUFMAX) { ch = getDebugChar () & 0x7f; if (ch == '#') break; checksum = checksum + ch; buffer[count] = ch; count = count + 1; } buffer[count] = 0; if (ch == '#') { xmitcsum = hex (getDebugChar () & 0x7f) << 4; xmitcsum += hex (getDebugChar () & 0x7f); if (checksum != xmitcsum) putDebugChar ('-'); /* failed checksum */ else { putDebugChar ('+'); /* successful transfer */ /* if a sequence char is present, reply the sequence ID */ if (buffer[2] == ':') { putDebugChar (buffer[0]); putDebugChar (buffer[1]); /* remove sequence chars from buffer */ count = gdb_strlen (buffer); for (i=3; i <= count; i++) buffer[i-3] = buffer[i]; } } } } while (checksum != xmitcsum); } /* send the packet in buffer. */ static void putpacket (char *buffer) { unsigned char checksum; int count; unsigned char ch; /* $#. */ do { /* * This is a non-standard hack to allow use of the serial console for * operation as well as debugging. Simply turn on 'remotechat' in gdb. * * This extension is not part of the Cygnus protocol, is kinda gross, * but gets the job done. */ #ifdef GDB_REMOTE_CHAT putDebugChar ('|'); putDebugChar ('|'); putDebugChar ('|'); putDebugChar ('|'); #endif putDebugChar ('$'); checksum = 0; count = 0; while ((ch=buffer[count]) != 0) { putDebugChar (ch); checksum += ch; count += 1; } putDebugChar ('#'); putDebugChar (hexchars[checksum >> 4]); putDebugChar (hexchars[checksum & 0xf]); } while ((getDebugChar () & 0x7f) != '+'); } static char remcomInBuffer[BUFMAX]; static char remcomOutBuffer[BUFMAX]; static int get_char (vm_offset_t addr) { char data; if (setjmp (db_jmpbuf)) return -1; db_read_bytes (addr, 1, &data); return data & 0xff; } static int set_char (vm_offset_t addr, int val) { char data; if (setjmp (db_jmpbuf)) return -1; data = val; db_write_bytes (addr, 1, &data); return 0; } /* convert the memory pointed to by mem into hex, placing result in buf */ /* return a pointer to the last char put in buf (null) */ static char * mem2hex (vm_offset_t mem, char *buf, int count) { int i; int ch; for (i=0;i> 4]; *buf++ = hexchars[ch % 16]; } *buf = 0; return(buf); } /* convert the hex array pointed to by buf into binary to be placed in mem */ /* return a pointer to the character AFTER the last byte written */ static char * hex2mem (char *buf, vm_offset_t mem, int count) { int i; int ch; int rv; for (i=0;i=0) { *intValue = (*intValue <<4) | hexValue; numChars ++; } else break; (*ptr)++; } return (numChars); } #define NUMREGBYTES (sizeof registers) #define PC 8 #define SP 4 #define FP 5 #define NUM_REGS 14 /* * This function does all command procesing for interfacing to gdb. */ int gdb_handle_exception (db_regs_t *raw_regs, int type, int code) { int sigval; int addr, length; char * ptr; struct i386regs { unsigned int rax; unsigned int rcx; unsigned int rdx; unsigned int rbx; unsigned int rsp; unsigned int rbp; unsigned int rsi; unsigned int rdi; unsigned int rip; unsigned int rflags; unsigned int cs; unsigned int ss; unsigned int ds; unsigned int es; }; struct i386regs registers; registers.rax = raw_regs->tf_rax; registers.rbx = raw_regs->tf_rbx; registers.rcx = raw_regs->tf_rcx; registers.rdx = raw_regs->tf_rdx; registers.rsp = raw_regs->tf_rsp; registers.rbp = raw_regs->tf_rbp; registers.rsi = raw_regs->tf_rsi; registers.rdi = raw_regs->tf_rdi; registers.rip = raw_regs->tf_rip; registers.rflags = raw_regs->tf_rflags; registers.cs = raw_regs->tf_cs; registers.ss = raw_regs->tf_ss; registers.ds = 0; /* XXX rds() */ registers.es = 0; /* XXX res() */ /* reply to host that an exception has occurred */ sigval = computeSignal (type); ptr = remcomOutBuffer; *ptr++ = 'T'; *ptr++ = hexchars[sigval >> 4]; *ptr++ = hexchars[sigval & 0xf]; *ptr++ = hexchars[PC >> 4]; *ptr++ = hexchars[PC & 0xf]; *ptr++ = ':'; ptr = mem2hex ((vm_offset_t)®isters.rip, ptr, 4); *ptr++ = ';'; *ptr++ = hexchars[FP >> 4]; *ptr++ = hexchars[FP & 0xf]; *ptr++ = ':'; ptr = mem2hex ((vm_offset_t)®isters.rbp, ptr, 4); *ptr++ = ';'; *ptr++ = hexchars[SP >> 4]; *ptr++ = hexchars[SP & 0xf]; *ptr++ = ':'; ptr = mem2hex ((vm_offset_t)®isters.rsp, ptr, 4); *ptr++ = ';'; *ptr++ = 0; putpacket (remcomOutBuffer); while (1) { if (gdb_arg == NULL) return 1; /* somebody has removed the gdb device */ remcomOutBuffer[0] = 0; getpacket (remcomInBuffer); switch (remcomInBuffer[0]) { case '?': remcomOutBuffer[0] = 'S'; remcomOutBuffer[1] = hexchars[sigval >> 4]; remcomOutBuffer[2] = hexchars[sigval % 16]; remcomOutBuffer[3] = 0; break; case 'D': /* detach; say OK and turn off gdb */ putpacket(remcomOutBuffer); boothowto &= ~RB_GDB; return 0; case 'g': /* return the value of the CPU registers */ mem2hex ((vm_offset_t)®isters, remcomOutBuffer, NUMREGBYTES); break; case 'G': /* set the value of the CPU registers - return OK */ hex2mem (&remcomInBuffer[1], (vm_offset_t)®isters, NUMREGBYTES); gdb_strcpy (remcomOutBuffer, "OK"); break; case 'P': /* Set the value of one register */ { int regno; ptr = &remcomInBuffer[1]; if (hexToInt (&ptr, ®no) && *ptr++ == '=' && regno < NUM_REGS) { hex2mem (ptr, (vm_offset_t)®isters + regno * 4, 4); gdb_strcpy(remcomOutBuffer,"OK"); } else gdb_strcpy (remcomOutBuffer, "P01"); break; } case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ /* Try to read %x,%x. */ ptr = &remcomInBuffer[1]; if (hexToInt (&ptr, &addr) && *(ptr++) == ',' && hexToInt (&ptr, &length)) { if (mem2hex((vm_offset_t) addr, remcomOutBuffer, length) == NULL) gdb_strcpy (remcomOutBuffer, "E03"); break; } else gdb_strcpy (remcomOutBuffer, "E01"); break; case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ /* Try to read '%x,%x:'. */ ptr = &remcomInBuffer[1]; if (hexToInt(&ptr,&addr) && *(ptr++) == ',' && hexToInt(&ptr, &length) && *(ptr++) == ':') { if (hex2mem(ptr, (vm_offset_t) addr, length) == NULL) gdb_strcpy (remcomOutBuffer, "E03"); else gdb_strcpy (remcomOutBuffer, "OK"); } else gdb_strcpy (remcomOutBuffer, "E02"); break; /* cAA..AA Continue at address AA..AA(optional) */ /* sAA..AA Step one instruction from AA..AA(optional) */ case 'c' : case 's' : /* try to read optional parameter, pc unchanged if no parm */ ptr = &remcomInBuffer[1]; if (hexToInt(&ptr,&addr)) registers.rip = addr; /* set the trace bit if we're stepping */ if (remcomInBuffer[0] == 's') registers.rflags |= PSL_T; else registers.rflags &= ~PSL_T; raw_regs->tf_rax = registers.rax; raw_regs->tf_rbx = registers.rbx; raw_regs->tf_rcx = registers.rcx; raw_regs->tf_rdx = registers.rdx; raw_regs->tf_rsp = registers.rsp; raw_regs->tf_rbp = registers.rbp; raw_regs->tf_rsi = registers.rsi; raw_regs->tf_rdi = registers.rdi; raw_regs->tf_rip = registers.rip; raw_regs->tf_rflags = registers.rflags; raw_regs->tf_cs = registers.cs; raw_regs->tf_ss = registers.ss; #if 0 raw_regs->tf_ds = registers.ds; raw_regs->tf_es = registers.es; #endif return 0; } /* switch */ /* reply to the request */ putpacket (remcomOutBuffer); } return 0; } Index: head/sys/amd64/amd64/amd64_mem.c =================================================================== --- head/sys/amd64/amd64/amd64_mem.c (revision 123179) +++ head/sys/amd64/amd64/amd64_mem.c (revision 123180) @@ -1,626 +1,625 @@ /*- * Copyright (c) 1999 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include /* * amd64 memory range operations * * This code will probably be impenetrable without reference to the * Intel Pentium Pro documentation or x86-64 programmers manual vol 2. */ static char *mem_owner_bios = "BIOS"; #define MR686_FIXMTRR (1<<0) #define mrwithin(mr, a) \ (((a) >= (mr)->mr_base) && ((a) < ((mr)->mr_base + (mr)->mr_len))) #define mroverlap(mra, mrb) \ (mrwithin(mra, mrb->mr_base) || mrwithin(mrb, mra->mr_base)) #define mrvalid(base, len) \ ((!(base & ((1 << 12) - 1))) && /* base is multiple of 4k */ \ ((len) >= (1 << 12)) && /* length is >= 4k */ \ powerof2((len)) && /* ... and power of two */ \ !((base) & ((len) - 1))) /* range is not discontiuous */ #define mrcopyflags(curr, new) (((curr) & ~MDF_ATTRMASK) | ((new) & MDF_ATTRMASK)) static int mtrrs_disabled; TUNABLE_INT("machdep.disable_mtrrs", &mtrrs_disabled); SYSCTL_INT(_machdep, OID_AUTO, disable_mtrrs, CTLFLAG_RDTUN, &mtrrs_disabled, 0, "Disable amd64 MTRRs."); static void amd64_mrinit(struct mem_range_softc *sc); static int amd64_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); static void amd64_mrAPinit(struct mem_range_softc *sc); static struct mem_range_ops amd64_mrops = { amd64_mrinit, amd64_mrset, amd64_mrAPinit }; /* XXX for AP startup hook */ static u_int64_t mtrrcap, mtrrdef; static struct mem_range_desc *mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd); static void amd64_mrfetch(struct mem_range_softc *sc); static int amd64_mtrrtype(int flags); static int amd64_mrt2mtrr(int flags, int oldval); static int amd64_mtrrconflict(int flag1, int flag2); static void amd64_mrstore(struct mem_range_softc *sc); static void amd64_mrstoreone(void *arg); static struct mem_range_desc *amd64_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr); static int amd64_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); static int amd64_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); /* amd64 MTRR type to memory range type conversion */ static int amd64_mtrrtomrt[] = { MDF_UNCACHEABLE, MDF_WRITECOMBINE, MDF_UNKNOWN, MDF_UNKNOWN, MDF_WRITETHROUGH, MDF_WRITEPROTECT, MDF_WRITEBACK }; #define MTRRTOMRTLEN (sizeof(amd64_mtrrtomrt) / sizeof(amd64_mtrrtomrt[0])) static int amd64_mtrr2mrt(int val) { if (val < 0 || val >= MTRRTOMRTLEN) return MDF_UNKNOWN; return amd64_mtrrtomrt[val]; } /* * amd64 MTRR conflicts. Writeback and uncachable may overlap. */ static int amd64_mtrrconflict(int flag1, int flag2) { flag1 &= MDF_ATTRMASK; flag2 &= MDF_ATTRMASK; if ((flag1 & MDF_UNKNOWN) || (flag2 & MDF_UNKNOWN)) return 1; if (flag1 == flag2 || (flag1 == MDF_WRITEBACK && flag2 == MDF_UNCACHEABLE) || (flag2 == MDF_WRITEBACK && flag1 == MDF_UNCACHEABLE)) return 0; return 1; } /* * Look for an exactly-matching range. */ static struct mem_range_desc * mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd) { struct mem_range_desc *cand; int i; for (i = 0, cand = sc->mr_desc; i < sc->mr_ndesc; i++, cand++) if ((cand->mr_base == mrd->mr_base) && (cand->mr_len == mrd->mr_len)) return(cand); return(NULL); } /* * Fetch the current mtrr settings from the current CPU (assumed to all * be in sync in the SMP case). Note that if we are here, we assume * that MTRRs are enabled, and we may or may not have fixed MTRRs. */ static void amd64_mrfetch(struct mem_range_softc *sc) { struct mem_range_desc *mrd; u_int64_t msrv; int i, j, msr; mrd = sc->mr_desc; /* Get fixed-range MTRRs */ if (sc->mr_cap & MR686_FIXMTRR) { msr = MSR_MTRR64kBase; for (i = 0; i < (MTRR_N64K / 8); i++, msr++) { msrv = rdmsr(msr); for (j = 0; j < 8; j++, mrd++) { mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE; if (mrd->mr_owner[0] == 0) strcpy(mrd->mr_owner, mem_owner_bios); msrv = msrv >> 8; } } msr = MSR_MTRR16kBase; for (i = 0; i < (MTRR_N16K / 8); i++, msr++) { msrv = rdmsr(msr); for (j = 0; j < 8; j++, mrd++) { mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE; if (mrd->mr_owner[0] == 0) strcpy(mrd->mr_owner, mem_owner_bios); msrv = msrv >> 8; } } msr = MSR_MTRR4kBase; for (i = 0; i < (MTRR_N4K / 8); i++, msr++) { msrv = rdmsr(msr); for (j = 0; j < 8; j++, mrd++) { mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE; if (mrd->mr_owner[0] == 0) strcpy(mrd->mr_owner, mem_owner_bios); msrv = msrv >> 8; } } } /* Get remainder which must be variable MTRRs */ msr = MSR_MTRRVarBase; for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) { msrv = rdmsr(msr); mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | amd64_mtrr2mrt(msrv & 0xff); mrd->mr_base = msrv & 0x000000fffffff000L; msrv = rdmsr(msr + 1); mrd->mr_flags = (msrv & 0x800) ? (mrd->mr_flags | MDF_ACTIVE) : (mrd->mr_flags & ~MDF_ACTIVE); /* Compute the range from the mask. Ick. */ mrd->mr_len = (~(msrv & 0x000000fffffff000L) & 0x000000ffffffffffL) + 1; if (!mrvalid(mrd->mr_base, mrd->mr_len)) mrd->mr_flags |= MDF_BOGUS; /* If unclaimed and active, must be the BIOS */ if ((mrd->mr_flags & MDF_ACTIVE) && (mrd->mr_owner[0] == 0)) strcpy(mrd->mr_owner, mem_owner_bios); } } /* * Return the MTRR memory type matching a region's flags */ static int amd64_mtrrtype(int flags) { int i; flags &= MDF_ATTRMASK; for (i = 0; i < MTRRTOMRTLEN; i++) { if (amd64_mtrrtomrt[i] == MDF_UNKNOWN) continue; if (flags == amd64_mtrrtomrt[i]) return(i); } return(-1); } static int amd64_mrt2mtrr(int flags, int oldval) { int val; if ((val = amd64_mtrrtype(flags)) == -1) return oldval & 0xff; return val & 0xff; } /* * Update running CPU(s) MTRRs to match the ranges in the descriptor * list. * * XXX Must be called with interrupts enabled. */ static void amd64_mrstore(struct mem_range_softc *sc) { #ifdef SMP /* * We should use ipi_all_but_self() to call other CPUs into a * locking gate, then call a target function to do this work. * The "proper" solution involves a generalised locking gate * implementation, not ready yet. */ smp_rendezvous(NULL, amd64_mrstoreone, NULL, (void *)sc); #else disable_intr(); /* disable interrupts */ amd64_mrstoreone((void *)sc); enable_intr(); #endif } /* * Update the current CPU's MTRRs with those represented in the * descriptor list. Note that we do this wholesale rather than * just stuffing one entry; this is simpler (but slower, of course). */ static void amd64_mrstoreone(void *arg) { struct mem_range_softc *sc = (struct mem_range_softc *)arg; struct mem_range_desc *mrd; u_int64_t omsrv, msrv; int i, j, msr; u_int cr4save; mrd = sc->mr_desc; cr4save = rcr4(); /* save cr4 */ if (cr4save & CR4_PGE) load_cr4(cr4save & ~CR4_PGE); load_cr0((rcr0() & ~CR0_NW) | CR0_CD); /* disable caches (CD = 1, NW = 0) */ wbinvd(); /* flush caches, TLBs */ wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) & ~0x800); /* disable MTRRs (E = 0) */ /* Set fixed-range MTRRs */ if (sc->mr_cap & MR686_FIXMTRR) { msr = MSR_MTRR64kBase; for (i = 0; i < (MTRR_N64K / 8); i++, msr++) { msrv = 0; omsrv = rdmsr(msr); for (j = 7; j >= 0; j--) { msrv = msrv << 8; msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8)); } wrmsr(msr, msrv); mrd += 8; } msr = MSR_MTRR16kBase; for (i = 0; i < (MTRR_N16K / 8); i++, msr++) { msrv = 0; omsrv = rdmsr(msr); for (j = 7; j >= 0; j--) { msrv = msrv << 8; msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8)); } wrmsr(msr, msrv); mrd += 8; } msr = MSR_MTRR4kBase; for (i = 0; i < (MTRR_N4K / 8); i++, msr++) { msrv = 0; omsrv = rdmsr(msr); for (j = 7; j >= 0; j--) { msrv = msrv << 8; msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8)); } wrmsr(msr, msrv); mrd += 8; } } /* Set remainder which must be variable MTRRs */ msr = MSR_MTRRVarBase; for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) { /* base/type register */ omsrv = rdmsr(msr); if (mrd->mr_flags & MDF_ACTIVE) { msrv = mrd->mr_base & 0x000000fffffff000L; msrv |= amd64_mrt2mtrr(mrd->mr_flags, omsrv); } else { msrv = 0; } wrmsr(msr, msrv); /* mask/active register */ if (mrd->mr_flags & MDF_ACTIVE) { msrv = 0x800 | (~(mrd->mr_len - 1) & 0x000000fffffff000L); } else { msrv = 0; } wrmsr(msr + 1, msrv); } wbinvd(); /* flush caches, TLBs */ wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) | 0x800); /* restore MTRR state */ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* enable caches CD = 0 and NW = 0 */ load_cr4(cr4save); /* restore cr4 */ } /* * Hunt for the fixed MTRR referencing (addr) */ static struct mem_range_desc * amd64_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr) { struct mem_range_desc *mrd; int i; for (i = 0, mrd = sc->mr_desc; i < (MTRR_N64K + MTRR_N16K + MTRR_N4K); i++, mrd++) if ((addr >= mrd->mr_base) && (addr < (mrd->mr_base + mrd->mr_len))) return(mrd); return(NULL); } /* * Try to satisfy the given range request by manipulating the fixed MTRRs that * cover low memory. * * Note that we try to be generous here; we'll bloat the range out to the * next higher/lower boundary to avoid the consumer having to know too much * about the mechanisms here. * * XXX note that this will have to be updated when we start supporting "busy" ranges. */ static int amd64_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg) { struct mem_range_desc *first_md, *last_md, *curr_md; /* range check */ if (((first_md = amd64_mtrrfixsearch(sc, mrd->mr_base)) == NULL) || ((last_md = amd64_mtrrfixsearch(sc, mrd->mr_base + mrd->mr_len - 1)) == NULL)) return(EINVAL); /* check we aren't doing something risky */ if (!(mrd->mr_flags & MDF_FORCE)) for (curr_md = first_md; curr_md <= last_md; curr_md++) { if ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN) return (EACCES); } /* set flags, clear set-by-firmware flag */ for (curr_md = first_md; curr_md <= last_md; curr_md++) { curr_md->mr_flags = mrcopyflags(curr_md->mr_flags & ~MDF_FIRMWARE, mrd->mr_flags); bcopy(mrd->mr_owner, curr_md->mr_owner, sizeof(mrd->mr_owner)); } return(0); } /* * Modify/add a variable MTRR to satisfy the request. * * XXX needs to be updated to properly support "busy" ranges. */ static int amd64_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg) { struct mem_range_desc *curr_md, *free_md; int i; /* * Scan the currently active variable descriptors, look for * one we exactly match (straight takeover) and for possible * accidental overlaps. * Keep track of the first empty variable descriptor in case we * can't perform a takeover. */ i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0; curr_md = sc->mr_desc + i; free_md = NULL; for (; i < sc->mr_ndesc; i++, curr_md++) { if (curr_md->mr_flags & MDF_ACTIVE) { /* exact match? */ if ((curr_md->mr_base == mrd->mr_base) && (curr_md->mr_len == mrd->mr_len)) { /* whoops, owned by someone */ if (curr_md->mr_flags & MDF_BUSY) return(EBUSY); /* check we aren't doing something risky */ if (!(mrd->mr_flags & MDF_FORCE) && ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN)) return (EACCES); /* Ok, just hijack this entry */ free_md = curr_md; break; } /* non-exact overlap ? */ if (mroverlap(curr_md, mrd)) { /* between conflicting region types? */ if (amd64_mtrrconflict(curr_md->mr_flags, mrd->mr_flags)) return(EINVAL); } } else if (free_md == NULL) { free_md = curr_md; } } /* got somewhere to put it? */ if (free_md == NULL) return(ENOSPC); /* Set up new descriptor */ free_md->mr_base = mrd->mr_base; free_md->mr_len = mrd->mr_len; free_md->mr_flags = mrcopyflags(MDF_ACTIVE, mrd->mr_flags); bcopy(mrd->mr_owner, free_md->mr_owner, sizeof(mrd->mr_owner)); return(0); } /* * Handle requests to set memory range attributes by manipulating MTRRs. * */ static int amd64_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg) { struct mem_range_desc *targ; int error = 0; switch(*arg) { case MEMRANGE_SET_UPDATE: /* make sure that what's being asked for is even possible at all */ if (!mrvalid(mrd->mr_base, mrd->mr_len) || amd64_mtrrtype(mrd->mr_flags) == -1) return(EINVAL); #define FIXTOP ((MTRR_N64K * 0x10000) + (MTRR_N16K * 0x4000) + (MTRR_N4K * 0x1000)) /* are the "low memory" conditions applicable? */ if ((sc->mr_cap & MR686_FIXMTRR) && ((mrd->mr_base + mrd->mr_len) <= FIXTOP)) { if ((error = amd64_mrsetlow(sc, mrd, arg)) != 0) return(error); } else { /* it's time to play with variable MTRRs */ if ((error = amd64_mrsetvariable(sc, mrd, arg)) != 0) return(error); } break; case MEMRANGE_SET_REMOVE: if ((targ = mem_range_match(sc, mrd)) == NULL) return(ENOENT); if (targ->mr_flags & MDF_FIXACTIVE) return(EPERM); if (targ->mr_flags & MDF_BUSY) return(EBUSY); targ->mr_flags &= ~MDF_ACTIVE; targ->mr_owner[0] = 0; break; default: return(EOPNOTSUPP); } /* update the hardware */ amd64_mrstore(sc); amd64_mrfetch(sc); /* refetch to see where we're at */ return(0); } /* * Work out how many ranges we support, initialise storage for them, * fetch the initial settings. */ static void amd64_mrinit(struct mem_range_softc *sc) { struct mem_range_desc *mrd; int nmdesc = 0; int i; mtrrcap = rdmsr(MSR_MTRRcap); mtrrdef = rdmsr(MSR_MTRRdefType); /* For now, bail out if MTRRs are not enabled */ if (!(mtrrdef & 0x800)) { if (bootverbose) printf("CPU supports MTRRs but not enabled\n"); return; } nmdesc = mtrrcap & 0xff; /* If fixed MTRRs supported and enabled */ if ((mtrrcap & 0x100) && (mtrrdef & 0x400)) { sc->mr_cap = MR686_FIXMTRR; nmdesc += MTRR_N64K + MTRR_N16K + MTRR_N4K; } sc->mr_desc = (struct mem_range_desc *)malloc(nmdesc * sizeof(struct mem_range_desc), M_MEMDESC, M_WAITOK | M_ZERO); sc->mr_ndesc = nmdesc; mrd = sc->mr_desc; /* Populate the fixed MTRR entries' base/length */ if (sc->mr_cap & MR686_FIXMTRR) { for (i = 0; i < MTRR_N64K; i++, mrd++) { mrd->mr_base = i * 0x10000; mrd->mr_len = 0x10000; mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE; } for (i = 0; i < MTRR_N16K; i++, mrd++) { mrd->mr_base = i * 0x4000 + 0x80000; mrd->mr_len = 0x4000; mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE; } for (i = 0; i < MTRR_N4K; i++, mrd++) { mrd->mr_base = i * 0x1000 + 0xc0000; mrd->mr_len = 0x1000; mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE; } } /* * Get current settings, anything set now is considered to have * been set by the firmware. (XXX has something already played here?) */ amd64_mrfetch(sc); mrd = sc->mr_desc; for (i = 0; i < sc->mr_ndesc; i++, mrd++) { if (mrd->mr_flags & MDF_ACTIVE) mrd->mr_flags |= MDF_FIRMWARE; } } /* * Initialise MTRRs on an AP after the BSP has run the init code. */ static void amd64_mrAPinit(struct mem_range_softc *sc) { amd64_mrstoreone((void *)sc); /* set MTRRs to match BSP */ wrmsr(MSR_MTRRdefType, mtrrdef); /* set MTRR behaviour to match BSP */ } static void amd64_mem_drvinit(void *unused) { if (mtrrs_disabled) return; if (!(cpu_feature & CPUID_MTRR)) return; if ((cpu_id & 0xf00) != 0x600 && (cpu_id & 0xf00) != 0xf00) return; if ((strcmp(cpu_vendor, "GenuineIntel") != 0) && (strcmp(cpu_vendor, "AuthenticAMD") != 0)) return; mem_range_softc.mr_op = &amd64_mrops; } SYSINIT(amd64memdev,SI_SUB_DRIVERS,SI_ORDER_FIRST,amd64_mem_drvinit,NULL) Index: head/sys/amd64/amd64/apic_vector.S =================================================================== --- head/sys/amd64/amd64/apic_vector.S (revision 123179) +++ head/sys/amd64/amd64/apic_vector.S (revision 123180) @@ -1,343 +1,343 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD$ */ /* * Interrupt entry points for external interrupts triggered by I/O APICs * as well as IPI handlers. */ #include #include #include "assym.s" /* * Macros to create and destroy a trap frame. */ #define PUSH_FRAME \ subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ jz 1f ; /* Yes, dont swapgs again */ \ swapgs ; \ 1: movq %rdi,TF_RDI(%rsp) ; \ movq %rsi,TF_RSI(%rsp) ; \ movq %rdx,TF_RDX(%rsp) ; \ movq %rcx,TF_RCX(%rsp) ; \ movq %r8,TF_R8(%rsp) ; \ movq %r9,TF_R9(%rsp) ; \ movq %rax,TF_RAX(%rsp) ; \ movq %rbx,TF_RBX(%rsp) ; \ movq %rbp,TF_RBP(%rsp) ; \ movq %r10,TF_R10(%rsp) ; \ movq %r11,TF_R11(%rsp) ; \ movq %r12,TF_R12(%rsp) ; \ movq %r13,TF_R13(%rsp) ; \ movq %r14,TF_R14(%rsp) ; \ movq %r15,TF_R15(%rsp) #define POP_FRAME \ movq TF_RDI(%rsp),%rdi ; \ movq TF_RSI(%rsp),%rsi ; \ movq TF_RDX(%rsp),%rdx ; \ movq TF_RCX(%rsp),%rcx ; \ movq TF_R8(%rsp),%r8 ; \ movq TF_R9(%rsp),%r9 ; \ movq TF_RAX(%rsp),%rax ; \ movq TF_RBX(%rsp),%rbx ; \ movq TF_RBP(%rsp),%rbp ; \ movq TF_R10(%rsp),%r10 ; \ movq TF_R11(%rsp),%r11 ; \ movq TF_R12(%rsp),%r12 ; \ movq TF_R13(%rsp),%r13 ; \ movq TF_R14(%rsp),%r14 ; \ movq TF_R15(%rsp),%r15 ; \ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ jz 1f ; /* keep kernel GS.base */ \ cli ; \ swapgs ; \ 1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ /* * I/O Interrupt Entry Point. Rather than having one entry point for * each interrupt source, we use one entry point for each 32-bit word * in the ISR. The handler determines the highest bit set in the ISR, * translates that into a vector, and passes the vector to the * lapic_handle_intr() function. */ #define ISR_VEC(index, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ + FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid double count */ \ movq lapic, %rdx ; /* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \ bsrl %eax, %eax ; /* index of highset set bit in ISR */ \ jz 2f ; \ addl $(32 * index),%eax ; \ 1: ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid double count */ \ movq %rax, %rdi ; /* pass the IRQ */ \ call lapic_handle_intr ; \ MEXITCOUNT ; \ jmp doreti ; \ 2: movl $-1, %eax ; /* send a vector of -1 */ \ jmp 1b /* * Handle "spurious INTerrupts". * Notes: * This is different than the "spurious INTerrupt" generated by an * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ .text SUPERALIGN_TEXT IDTVEC(spuriousint) /* No EOI cycle used here */ iretq MCOUNT_LABEL(bintr2) ISR_VEC(1, apic_isr1) ISR_VEC(2, apic_isr2) ISR_VEC(3, apic_isr3) ISR_VEC(4, apic_isr4) ISR_VEC(5, apic_isr5) ISR_VEC(6, apic_isr6) ISR_VEC(7, apic_isr7) MCOUNT_LABEL(eintr2) #ifdef SMP /* * Global address space TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invltlb) pushq %rax movq %cr3, %rax /* invalidate the TLB */ movq %rax, %cr3 movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popq %rax iretq /* * Single page TLB shootdown */ .text SUPERALIGN_TEXT IDTVEC(invlpg) pushq %rax movq smp_tlb_addr1, %rax invlpg (%rax) /* invalidate single page */ movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popq %rax iretq /* * Page range TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invlrng) pushq %rax pushq %rdx movq smp_tlb_addr1, %rdx movq smp_tlb_addr2, %rax 1: invlpg (%rdx) /* invalidate single page */ addq $PAGE_SIZE, %rdx cmpq %rax, %rdx jb 1b movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popq %rdx popq %rax iretq /* * Forward hardclock to another CPU. Pushes a clockframe and calls * forwarded_hardclock(). */ .text SUPERALIGN_TEXT IDTVEC(hardclock) PUSH_FRAME movq lapic, %rdx movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ call forwarded_hardclock MEXITCOUNT jmp doreti /* * Forward statclock to another CPU. Pushes a clockframe and calls * forwarded_statclock(). */ .text SUPERALIGN_TEXT IDTVEC(statclock) PUSH_FRAME movq lapic, %rdx movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) call forwarded_statclock MEXITCOUNT jmp doreti /* * Executed by a CPU when it receives an Xcpuast IPI from another CPU, * * The other CPU has already executed aston() or need_resched() on our * current process, so we simply need to ack the interrupt and return * via doreti to run ast(). */ .text SUPERALIGN_TEXT IDTVEC(cpuast) PUSH_FRAME movq lapic, %rdx movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) MEXITCOUNT jmp doreti /* * Executed by a CPU when it receives an Xcpustop IPI from another CPU, * * - Signals its receipt. * - Waits for permission to restart. * - Signals its restart. */ .text SUPERALIGN_TEXT IDTVEC(cpustop) PUSH_FRAME movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ movl PCPU(CPUID), %eax imull $PCB_SIZE, %eax leaq stoppcbs(%rax), %rdi call savectx /* Save process context */ movl PCPU(CPUID), %eax lock btsl %eax, stopped_cpus /* stopped_cpus |= (1< __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Floating point support. */ #if defined(__GNUC__) && !defined(lint) #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) #define fnclex() __asm("fnclex") #define fninit() __asm("fninit") #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm("clts") #else /* not __GNUC__ */ void fldcw(caddr_t addr); void fnclex(void); void fninit(void); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void start_emulating(void); void stop_emulating(void); #endif /* __GNUC__ */ #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) typedef u_char bool_t; int hw_float = 1; SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floatingpoint instructions executed in hardware"); static struct savefpu fpu_cleanstate; static bool_t fpu_cleanstate_ready; /* * Initialize floating point unit. */ void fpuinit() { register_t savecrit; u_short control; /* * fpusave() initializes the fpu and sets fpcurthread = NULL */ savecrit = intr_disable(); fpusave(&fpu_cleanstate); /* XXX borrow for now */ stop_emulating(); /* XXX fpusave() doesn't actually initialize the fpu in the SSE case. */ fninit(); control = __INITIAL_FPUCW__; fldcw(&control); start_emulating(); intr_restore(savecrit); savecrit = intr_disable(); stop_emulating(); fxsave(&fpu_cleanstate); start_emulating(); fpu_cleanstate_ready = 1; intr_restore(savecrit); } /* * Free coprocessor (if we have it). */ void fpuexit(struct thread *td) { register_t savecrit; savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) fpusave(&PCPU_GET(curpcb)->pcb_save); intr_restore(savecrit); } int fpuformat() { return (_MC_FPFMT_XMM); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. * * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now * depend on longjmp() restoring a usable state. Restoring the state * or examining it might fail if we didn't clear exceptions. * * The error code chosen will be one of the FPE_... macros. It will be * sent as the second argument to old BSD-style signal handlers and as * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. * * XXX the FP state is not preserved across signal handlers. So signal * handlers cannot afford to do FP unless they preserve the state or * longjmp() out. Both preserving the state and longjmp()ing may be * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable * solution for signals other than SIGFPE. */ int fputrap() { register_t savecrit; u_short control, status; savecrit = intr_disable(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { control = GET_FPU_CW(curthread); status = GET_FPU_SW(curthread); } else { fnstcw(&control); fnstsw(&status); } if (PCPU_GET(fpcurthread) == curthread) fnclex(); intr_restore(savecrit); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ static int err_count = 0; int fpudna() { struct pcb *pcb; register_t s; u_short control; if (PCPU_GET(fpcurthread) == curthread) { printf("fpudna: fpcurthread == curthread %d times\n", ++err_count); stop_emulating(); return (1); } if (PCPU_GET(fpcurthread) != NULL) { printf("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, curthread, curthread->td_proc->p_pid); panic("fpudna"); } s = intr_disable(); stop_emulating(); /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, curthread); pcb = PCPU_GET(curpcb); if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * initialize the FPU and load the default control word. */ fninit(); control = __INITIAL_FPUCW__; fldcw(&control); pcb->pcb_flags |= PCB_FPUINITDONE; } else { /* * The following frstor may cause a trap when the state * being restored has a pending error. The error will * appear to have been triggered by the current (fpu) user * instruction even when that instruction is a no-wait * instruction that should not trigger an error (e.g., * instructions are broken the same as frstor, so our * treatment does not amplify the breakage. */ fxrstor(&pcb->pcb_save); } intr_restore(s); return (1); } /* * Wrapper for fnsave instruction. * * fpusave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * fpusave() atomically with checking fpcurthread. */ void fpusave(struct savefpu *addr) { stop_emulating(); fxsave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); } /* * This should be called with interrupts disabled and only when the owning * FPU thread is non-null. */ void fpudrop() { struct thread *td; td = PCPU_GET(fpcurthread); PCPU_SET(fpcurthread, NULL); td->td_pcb->pcb_flags &= ~PCB_FPUINITDONE; start_emulating(); } /* * Get the state of the FPU without dropping ownership (if possible). * It returns the FPU ownership status. */ int fpugetregs(struct thread *td, struct savefpu *addr) { register_t s; if ((td->td_pcb->pcb_flags & PCB_FPUINITDONE) == 0) { if (fpu_cleanstate_ready) bcopy(&fpu_cleanstate, addr, sizeof(fpu_cleanstate)); else bzero(addr, sizeof(*addr)); return (_MC_FPOWNED_NONE); } s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { fxsave(addr); intr_restore(s); return (_MC_FPOWNED_FPU); } else { intr_restore(s); bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } /* * Set the state of the FPU. */ void fpusetregs(struct thread *td, struct savefpu *addr) { register_t s; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { fxrstor(addr); intr_restore(s); } else { intr_restore(s); bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); } curthread->td_pcb->pcb_flags |= PCB_FPUINITDONE; } /* * This really sucks. We want the acpi version only, but it requires * the isa_if.h file in order to get the definitions. */ #include "opt_isa.h" #ifdef DEV_ISA #include /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id fpupnp_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int fpupnp_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); if (result <= 0) device_quiet(dev); return (result); } static int fpupnp_attach(device_t dev) { return (0); } static device_method_t fpupnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fpupnp_probe), DEVMETHOD(device_attach, fpupnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t fpupnp_driver = { "fpupnp", fpupnp_methods, 1, /* no softc */ }; static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c (revision 123179) +++ head/sys/amd64/amd64/machdep.c (revision 123180) @@ -1,1625 +1,1625 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)machdep.c 7.4 (Berkeley) 6/3/91 + * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atalk.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef SMP #include #endif #include #include #include #include #include /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern u_int64_t hammer_time(u_int64_t, u_int64_t); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void identify_cpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) int _udatasel, _ucodesel, _ucode32sel; u_long atdevbase; int cold = 1; long Maxmem = 0; vm_paddr_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xF); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; regs->tf_rcx = regs->tf_addr; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ regs->tf_rsi = code; /* arg 2 in %rsi */ regs->tf_rcx = regs->tf_addr; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_rflags &= ~PSL_T; regs->tf_cs = _ucodesel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Build siginfo_t for SA thread */ void cpu_thread_siginfo(int sig, u_long code, siginfo_t *si) { struct proc *p; struct thread *td; struct trapframe *regs; td = curthread; p = td->td_proc; regs = td->td_frame; PROC_LOCK_ASSERT(p, MA_OWNED); bzero(si, sizeof(*si)); si->si_signo = sig; si->si_code = code; si->si_addr = (void *)regs->tf_addr; /* XXXKSE fill other fields */ } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; long rflags; int cs, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("sigreturn: rflags = 0x%lx\n", rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(td, SIGBUS, T_PROTFLT); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); td->td_pcb->pcb_flags |= PCB_FULLCTX; return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(void) { if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) enable_intr(); else (*cpu_idle_hook)(); } } /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; /* * Clear registers on exec */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; load_ds(_udatasel); load_es(_udatasel); load_fs(_udatasel); load_gs(_udatasel); pcb->pcb_ds = _udatasel; pcb->pcb_es = _udatasel; pcb->pcb_fs = _udatasel; pcb->pcb_gs = _udatasel; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; regs->tf_rsp = ((stack - 8) & ~0xF) + 8; regs->tf_rdi = stack; /* argv */ regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; /* * Arrange to trap the next fpu or `fwait' instruction (see fpu.c * for why fwait must be trapped at least if there is an fpu or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the fpu routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `fpu_exists' for testing the emulator on * systems with an fpu. */ load_cr0(rcr0() | CR0_MP | CR0_TS); /* Initialize the fpu (if any) for the current process. */ /* * XXX the above load_cr0() also initializes it and is a layering * violation. It drops the fpu state partially * and this would be fatal if we were interrupted now, and decided * to force the state to the pcb, and checked the invariant * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL). * ALL of this can happen except the check. The check used to * happen and be fatal later when we didn't complete the drop * before returning to user mode. This should be fixed properly * soon. */ fpstate_drop(td); } void cpu_setregs(void) { register_t cr0; cr0 = rcr0(); cr0 |= CR0_NE; /* Done by fpuinit() */ cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ cr0 |= CR0_WP | CR0_AM; load_cr0(cr0); } static int sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); struct amd64tss common_tss[MAXCPU]; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, /* long */ 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 1, /* long */ 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 1, /* long */ 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUCODE32_SEL 3 32 bit Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUDATA_SEL 4 32/64 bit Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUCODE_SEL 5 64 bit Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 1, /* long */ 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct amd64tss)-1,/* length - all address space */ SDT_SYSTSS, /* segment type */ SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, /* long */ 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Actually, the TSS is a system descriptor which is double size */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, /* long */ 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, ist) int idx; inthand_t *func; int typ; int dpl; int ist; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (uintptr_t)func; ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((uintptr_t)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), IDTVEC(fast_syscall), IDTVEC(fast_syscall32); void sdtossd(sd, ssd) struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void ssdtosd(ssd, sd) struct soft_segment_descriptor *ssd; struct user_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_long = ssd->ssd_long; sd->sd_def32 = ssd->ssd_def32; sd->sd_gran = ssd->ssd_gran; } void ssdtosyssd(ssd, sd) struct soft_segment_descriptor *ssd; struct system_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_gran = ssd->ssd_gran; } #if !defined(DEV_ATPIC) && defined(DEV_ISA) #include u_int isa_irq_pending(void) { return (0); } #endif #define PHYSMAP_SIZE (2 * 8) struct bios_smap { u_int64_t base; u_int64_t length; u_int32_t type; } __packed; u_int basemem; /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; char *cp; struct bios_smap *smapbase, *smap, *smapend; u_int32_t smapsize; bzero(physmap, sizeof(physmap)); basemem = 0; physmap_idx = 0; /* * get memory map from INT 15:E820, kindly supplied by the loader. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) panic("No BIOS smap info from loader!"); smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); if (smap->type != 0x01) continue; if (smap->length == 0) continue; for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); goto next_run; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; next_run: continue; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; } /* * Find the 'base memory' segment for SMP */ basemem = 0; for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0) panic("BIOS smap did not include a basemem segment!"); #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif /* * hw.physmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ cp = getenv("hw.physmem"); if (cp != NULL) { u_int64_t AllowMem, sanity; char *ep; sanity = AllowMem = strtouq(cp, &ep, 0); if ((ep != cp) && (*ep != 0)) { switch(*ep) { case 'g': case 'G': AllowMem <<= 10; case 'm': case 'M': AllowMem <<= 10; case 'k': case 'K': AllowMem <<= 10; break; default: AllowMem = sanity = 0; } if (AllowMem < sanity) AllowMem = 0; } if (AllowMem == 0) printf("Ignoring invalid memory size of '%s'\n", cp); else Maxmem = atop(AllowMem); freeenv(cp); } if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; pte = CMAP1; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; int *ptr = (int *)CADDR1; /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, off, x; struct pcpu *pc; u_int64_t msr; char *env; #ifdef DEV_ISA /* Preemptively mask the atpics and leave them shut down */ outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); #else #error "have you forgotten the isa device?"; #endif /* Turn on PTE NX (no execute) bit */ msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); proc0.p_uarea = (struct user *)(physfree + KERNBASE); bzero(proc0.p_uarea, UAREA_PAGES * PAGE_SIZE); physfree += UAREA_PAGES * PAGE_SIZE; thread0.td_kstack = physfree + KERNBASE; bzero((void *)thread0.td_kstack, KSTACK_PAGES * PAGE_SIZE); physfree += KSTACK_PAGES * PAGE_SIZE; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; atdevbase = ISA_HOLE_START + KERNBASE; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup(&proc0, &ksegrp0, &kse0, &thread0); preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * make gdt memory segments */ gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) ssdtosd(&gdt_segs[x], &gdt[x]); } ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(tssp, &common_tss[0]); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&clock_lock, "clk", NULL, MTX_SPIN); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); #ifdef DEV_ATPIC atpic_startup(); #endif #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss[0].tss_rsp0 = thread0.td_kstack + \ KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); /* Ensure the stack is aligned to 16 bytes */ common_tss[0].tss_rsp0 &= ~0xF; PCPU_SET(rsp0, common_tss[0].tss_rsp0); /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); fpuinit(); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ thread0.td_pcb->pcb_cr3 = KPML4phys; thread0.td_frame = &proc0_tf; env = getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_rip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_rflags |= PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; regs->r_r15 = tp->tf_r15; regs->r_r14 = tp->tf_r14; regs->r_r13 = tp->tf_r13; regs->r_r12 = tp->tf_r12; regs->r_r11 = tp->tf_r11; regs->r_r10 = tp->tf_r10; regs->r_r9 = tp->tf_r9; regs->r_r8 = tp->tf_r8; regs->r_rdi = tp->tf_rdi; regs->r_rsi = tp->tf_rsi; regs->r_rbp = tp->tf_rbp; regs->r_rbx = tp->tf_rbx; regs->r_rdx = tp->tf_rdx; regs->r_rcx = tp->tf_rcx; regs->r_rax = tp->tf_rax; regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; pcb = td->td_pcb; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_r15 = regs->r_r15; tp->tf_r14 = regs->r_r14; tp->tf_r13 = regs->r_r13; tp->tf_r12 = regs->r_r12; tp->tf_r11 = regs->r_r11; tp->tf_r10 = regs->r_r10; tp->tf_r9 = regs->r_r9; tp->tf_r8 = regs->r_r8; tp->tf_rdi = regs->r_rdi; tp->tf_rsi = regs->r_rsi; tp->tf_rbp = regs->r_rbp; tp->tf_rbx = regs->r_rbx; tp->tf_rdx = regs->r_rdx; tp->tf_rcx = regs->r_rcx; tp->tf_rax = regs->r_rax; tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; tp->tf_rflags = regs->r_rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; pcb = td->td_pcb; return (0); } /* XXX check all this stuff! */ /* externalize from sv_xmm */ static void fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* pcb -> fpregs */ bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ penv_fpreg->en_cw = penv_xmm->en_cw; penv_fpreg->en_sw = penv_xmm->en_sw; penv_fpreg->en_tw = penv_xmm->en_tw; penv_fpreg->en_opcode = penv_xmm->en_opcode; penv_fpreg->en_rip = penv_xmm->en_rip; penv_fpreg->en_rdp = penv_xmm->en_rdp; penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } /* internalize from fpregs into sv_xmm */ static void set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { struct envxmm *penv_xmm = &sv_xmm->sv_env; struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; /* fpregs -> pcb */ /* FPU control/status */ penv_xmm->en_cw = penv_fpreg->en_cw; penv_xmm->en_sw = penv_fpreg->en_sw; penv_xmm->en_tw = penv_fpreg->en_tw; penv_xmm->en_opcode = penv_fpreg->en_opcode; penv_xmm->en_rip = penv_fpreg->en_rip; penv_xmm->en_rdp = penv_fpreg->en_rdp; penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } /* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); return (0); } /* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); mcp->mc_r15 = tp->tf_r15; mcp->mc_r14 = tp->tf_r14; mcp->mc_r13 = tp->tf_r13; mcp->mc_r12 = tp->tf_r12; mcp->mc_r11 = tp->tf_r11; mcp->mc_r10 = tp->tf_r10; mcp->mc_r9 = tp->tf_r9; mcp->mc_r8 = tp->tf_r8; mcp->mc_rdi = tp->tf_rdi; mcp->mc_rsi = tp->tf_rsi; mcp->mc_rbp = tp->tf_rbp; mcp->mc_rbx = tp->tf_rbx; mcp->mc_rcx = tp->tf_rcx; if (flags & GET_MC_CLEAR_RET) { mcp->mc_rax = 0; mcp->mc_rdx = 0; } else { mcp->mc_rax = tp->tf_rax; mcp->mc_rdx = tp->tf_rdx; } mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_rflags = tp->tf_rflags; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; long rflags; int ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); ret = set_fpcontext(td, mcp); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; tp->tf_r14 = mcp->mc_r14; tp->tf_r13 = mcp->mc_r13; tp->tf_r12 = mcp->mc_r12; tp->tf_r11 = mcp->mc_r11; tp->tf_r10 = mcp->mc_r10; tp->tf_r9 = mcp->mc_r9; tp->tf_r8 = mcp->mc_r8; tp->tf_rdi = mcp->mc_rdi; tp->tf_rsi = mcp->mc_rsi; tp->tf_rbp = mcp->mc_rbp; tp->tf_rbx = mcp->mc_rbx; tp->tf_rdx = mcp->mc_rdx; tp->tf_rcx = mcp->mc_rcx; tp->tf_rax = mcp->mc_rax; tp->tf_rip = mcp->mc_rip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate); mcp->mc_fpformat = fpuformat(); } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* * XXX we violate the dubious requirement that fpusetregs() * be called with interrupts disabled. * XXX obsolete on trap-16 systems? */ fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate); } else return (EINVAL); return (0); } void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); if (PCPU_GET(fpcurthread) == td) fpudrop(); /* * XXX force a full drop of the fpu. The above only drops it if we * owned it. * * XXX I don't much like fpugetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of fpugetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (0); } #ifndef DDB void Debugger(const char *msg) { printf("Debugger(\"%s\") called.\n", msg); } #endif /* no DDB */ #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/amd64/amd64/mem.c =================================================================== --- head/sys/amd64/amd64/mem.c (revision 123179) +++ head/sys/amd64/amd64/mem.c (revision 123180) @@ -1,368 +1,369 @@ /*- * Copyright (c) 1988 University of Utah. * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and code derived from software contributed to * Berkeley by William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * Utah $Hdr: mem.c 1.13 89/10/08$ - * @(#)mem.c 7.2 (Berkeley) 5/9/91 + * from: Utah $Hdr: mem.c 1.13 89/10/08$ + * from: @(#)mem.c 7.2 (Berkeley) 5/9/91 */ #include __FBSDID("$FreeBSD$"); /* * Memory special file */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static dev_t memdev, kmemdev, iodev; static d_open_t mmopen; static d_close_t mmclose; static d_read_t mmrw; static d_ioctl_t mmioctl; static d_mmap_t memmmap; #define CDEV_MAJOR 2 static struct cdevsw mem_cdevsw = { .d_open = mmopen, .d_close = mmclose, .d_read = mmrw, .d_write = mmrw, .d_ioctl = mmioctl, .d_mmap = memmmap, .d_name = "mem", .d_maj = CDEV_MAJOR, .d_flags = D_MEM, }; MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); struct mem_range_softc mem_range_softc; static int mmclose(dev_t dev, int flags, int fmt, struct thread *td) { switch (minor(dev)) { case 14: td->td_frame->tf_rflags &= ~PSL_IOPL; } return (0); } static int mmopen(dev_t dev, int flags, int fmt, struct thread *td) { int error; switch (minor(dev)) { case 0: case 1: if (flags & FWRITE) { error = securelevel_gt(td->td_ucred, 0); if (error != 0) return (error); } break; case 14: error = suser(td); if (error != 0) return (error); error = securelevel_gt(td->td_ucred, 0); if (error != 0) return (error); td->td_frame->tf_rflags |= PSL_IOPL; break; } return (0); } /*ARGSUSED*/ static int mmrw(dev_t dev, struct uio *uio, int flags) { int o; u_long c = 0, v; struct iovec *iov; int error = 0; vm_offset_t addr, eaddr; GIANT_REQUIRED; while (uio->uio_resid > 0 && error == 0) { iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iov++; uio->uio_iovcnt--; if (uio->uio_iovcnt < 0) panic("mmrw"); continue; } switch (minor(dev)) { /* minor device 0 is physical memory */ case 0: v = uio->uio_offset; kmemphys: o = v & PAGE_MASK; c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o)); error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio); continue; /* minor device 1 is kernel memory */ case 1: v = uio->uio_offset; if (v >= DMAP_MIN_ADDRESS && v < DMAP_MAX_ADDRESS) { v = DMAP_TO_PHYS(v); goto kmemphys; } c = iov->iov_len; + /* * Make sure that all of the pages are currently resident so * that we don't create any zero-fill pages. */ addr = trunc_page(v); eaddr = round_page(v + c); if (addr < (vm_offset_t)KERNBASE) return (EFAULT); for (; addr < eaddr; addr += PAGE_SIZE) if (pmap_extract(kernel_pmap, addr) == 0) return (EFAULT); if (!kernacc((caddr_t)(long)v, c, uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE)) return (EFAULT); error = uiomove((caddr_t)(long)v, (int)c, uio); continue; default: return (ENODEV); } if (error) break; iov->iov_base = (char *)iov->iov_base + c; iov->iov_len -= c; uio->uio_offset += c; uio->uio_resid -= c; } return (error); } /*******************************************************\ * allow user processes to MMAP some memory sections * * instead of going through read/write * \*******************************************************/ static int memmmap(dev_t dev, vm_offset_t offset, vm_paddr_t *paddr, int prot) { switch (minor(dev)) { /* minor device 0 is physical memory */ case 0: *paddr = offset; break; /* minor device 1 is kernel memory */ case 1: *paddr = vtophys(offset); break; default: return (-1); } return (0); } /* * Operations for changing memory attributes. * * This is basically just an ioctl shim for mem_range_attr_get * and mem_range_attr_set. */ static int mmioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td) { int nd, error = 0; struct mem_range_op *mo = (struct mem_range_op *)data; struct mem_range_desc *md; /* is this for us? */ if ((cmd != MEMRANGE_GET) && (cmd != MEMRANGE_SET)) return (ENOTTY); /* any chance we can handle this? */ if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); /* do we have any descriptors? */ if (mem_range_softc.mr_ndesc == 0) return (ENXIO); switch (cmd) { case MEMRANGE_GET: nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc); if (nd > 0) { md = (struct mem_range_desc *) malloc(nd * sizeof(struct mem_range_desc), M_MEMDESC, M_WAITOK); error = mem_range_attr_get(md, &nd); if (!error) error = copyout(md, mo->mo_desc, nd * sizeof(struct mem_range_desc)); free(md, M_MEMDESC); } else nd = mem_range_softc.mr_ndesc; mo->mo_arg[0] = nd; break; case MEMRANGE_SET: md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc), M_MEMDESC, M_WAITOK); error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc)); /* clamp description string */ md->mr_owner[sizeof(md->mr_owner) - 1] = 0; if (error == 0) error = mem_range_attr_set(md, &mo->mo_arg[0]); free(md, M_MEMDESC); break; } return (error); } /* * Implementation-neutral, kernel-callable functions for manipulating * memory range attributes. */ int mem_range_attr_get(struct mem_range_desc *mrd, int *arg) { /* can we handle this? */ if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); if (*arg == 0) *arg = mem_range_softc.mr_ndesc; else bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc)); return (0); } int mem_range_attr_set(struct mem_range_desc *mrd, int *arg) { /* can we handle this? */ if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg)); } #ifdef SMP void mem_range_AP_init(void) { if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) (mem_range_softc.mr_op->initAP(&mem_range_softc)); } #endif static int mem_modevent(module_t mod, int type, void *data) { switch(type) { case MOD_LOAD: if (bootverbose) printf("mem: \n"); /* Initialise memory range handling */ if (mem_range_softc.mr_op != NULL) mem_range_softc.mr_op->init(&mem_range_softc); memdev = make_dev(&mem_cdevsw, 0, UID_ROOT, GID_KMEM, 0640, "mem"); kmemdev = make_dev(&mem_cdevsw, 1, UID_ROOT, GID_KMEM, 0640, "kmem"); iodev = make_dev(&mem_cdevsw, 14, UID_ROOT, GID_WHEEL, 0600, "io"); return (0); case MOD_UNLOAD: destroy_dev(memdev); destroy_dev(kmemdev); destroy_dev(iodev); return (0); case MOD_SHUTDOWN: return (0); default: return (EOPNOTSUPP); } } DEV_MODULE(mem, mem_modevent, NULL); Index: head/sys/amd64/amd64/nexus.c =================================================================== --- head/sys/amd64/amd64/nexus.c (revision 123179) +++ head/sys/amd64/amd64/nexus.c (revision 123180) @@ -1,542 +1,542 @@ /* * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This code implements a `root nexus' for Intel Architecture * machines. The function of the root nexus is to serve as an * attachment point for both processors and buses, and to manage * resources which are common to all of them. In particular, * this code implements the core resource managers for interrupt * requests, DMA requests (which rightfully should be a part of the * ISA code but it's easier to do it here for now), I/O port addresses, * and I/O memory address space. */ #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #include #endif #include static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); struct nexus_device { struct resource_list nx_resources; }; #define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) static struct rman irq_rman, drq_rman, port_rman, mem_rman; static int nexus_probe(device_t); static int nexus_attach(device_t); static int nexus_print_all_resources(device_t dev); static int nexus_print_child(device_t, device_t); static device_t nexus_add_child(device_t bus, int order, const char *name, int unit); static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, u_long, u_long, u_long, u_int); static int nexus_activate_resource(device_t, device_t, int, int, struct resource *); static int nexus_deactivate_resource(device_t, device_t, int, int, struct resource *); static int nexus_release_resource(device_t, device_t, int, int, struct resource *); static int nexus_setup_intr(device_t, device_t, struct resource *, int flags, void (*)(void *), void *, void **); static int nexus_teardown_intr(device_t, device_t, struct resource *, void *); static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long); static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *); static void nexus_delete_resource(device_t, device_t, int, int); static device_method_t nexus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_probe), DEVMETHOD(device_attach, nexus_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_print_child, nexus_print_child), DEVMETHOD(bus_add_child, nexus_add_child), DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), DEVMETHOD(bus_release_resource, nexus_release_resource), DEVMETHOD(bus_activate_resource, nexus_activate_resource), DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), DEVMETHOD(bus_set_resource, nexus_set_resource), DEVMETHOD(bus_get_resource, nexus_get_resource), DEVMETHOD(bus_delete_resource, nexus_delete_resource), { 0, 0 } }; static driver_t nexus_driver = { "nexus", nexus_methods, 1, /* no softc */ }; static devclass_t nexus_devclass; DRIVER_MODULE(nexus, root, nexus_driver, nexus_devclass, 0, 0); static int nexus_probe(device_t dev) { int irq, last; device_quiet(dev); /* suppress attach message for neatness */ /* * XXX working notes: * * - IRQ resource creation should be moved to the PIC/APIC driver. * - DRQ resource creation should be moved to the DMAC driver. * - The above should be sorted to probe earlier than any child busses. * * - Leave I/O and memory creation here, as child probes may need them. * (especially eg. ACPI) */ /* * IRQ's are on the mainboard on old systems, but on the ISA part * of PCI->ISA bridges. There would be multiple sets of IRQs on * multi-ISA-bus systems. PCI interrupts are routed to the ISA * component, so in a way, PCI can be a partial child of an ISA bus(!). * APIC interrupts are global though. */ irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; irq_rman.rm_end = NUM_IO_INTS - 1; if (rman_init(&irq_rman)) panic("nexus_probe irq_rman"); /* * We search for regions of existing IRQs and add those to the IRQ * resource manager. */ last = -1; for (irq = 0; irq < NUM_IO_INTS; irq++) if (intr_lookup_source(irq) != NULL) { if (last == -1) last = irq; } else if (last != -1) { - if (rman_manage_region(&irq_rman, last, irq - 1) != 0) + if (rman_manage_region(&irq_rman, last, irq - 1) != 0) panic("nexus_probe irq_rman add"); last = -1; } if (last != -1 && rman_manage_region(&irq_rman, last, irq - 1) != 0) panic("nexus_probe irq_rman add"); /* * ISA DMA on PCI systems is implemented in the ISA part of each * PCI->ISA bridge and the channels can be duplicated if there are * multiple bridges. (eg: laptops with docking stations) */ drq_rman.rm_start = 0; drq_rman.rm_end = 7; drq_rman.rm_type = RMAN_ARRAY; drq_rman.rm_descr = "DMA request lines"; /* XXX drq 0 not available on some machines */ if (rman_init(&drq_rman) || rman_manage_region(&drq_rman, drq_rman.rm_start, drq_rman.rm_end)) panic("nexus_probe drq_rman"); /* * However, IO ports and Memory truely are global at this level, * as are APIC interrupts (however many IO APICS there turn out * to be on large systems..) */ port_rman.rm_start = 0; port_rman.rm_end = 0xffff; port_rman.rm_type = RMAN_ARRAY; port_rman.rm_descr = "I/O ports"; if (rman_init(&port_rman) || rman_manage_region(&port_rman, 0, 0xffff)) panic("nexus_probe port_rman"); mem_rman.rm_start = 0; mem_rman.rm_end = ~0u; mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) || rman_manage_region(&mem_rman, 0, ~0)) panic("nexus_probe mem_rman"); return 0; } static int nexus_attach(device_t dev) { bus_generic_probe(dev); bus_generic_attach(dev); return 0; } static int nexus_print_all_resources(device_t dev) { struct nexus_device *ndev = DEVTONX(dev); struct resource_list *rl = &ndev->nx_resources; int retval = 0; if (SLIST_FIRST(rl)) retval += printf(" at"); retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx"); retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx"); retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld"); return retval; } static int nexus_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += nexus_print_all_resources(child); retval += printf(" on motherboard\n"); /* XXX "motherboard", ick */ return (retval); } static device_t nexus_add_child(device_t bus, int order, const char *name, int unit) { device_t child; struct nexus_device *ndev; ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); if (!ndev) return(0); resource_list_init(&ndev->nx_resources); child = device_add_child_ordered(bus, order, name, unit); /* should we free this in nexus_child_detached? */ device_set_ivars(child, ndev); return(child); } /* * Allocate a resource on behalf of child. NB: child is usually going to be a * child of one of our descendants, not a direct child of nexus0. */ static struct resource * nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct nexus_device *ndev = DEVTONX(child); struct resource *rv; struct resource_list_entry *rle; struct rman *rm; int needactivate = flags & RF_ACTIVE; /* * If this is an allocation of the "default" range for a given RID, and * we know what the resources for this device are (ie. they aren't maintained * by a child bus), then work out the start/end values. */ if ((start == 0UL) && (end == ~0UL) && (count == 1)) { if (ndev == NULL) return(NULL); rle = resource_list_find(&ndev->nx_resources, type, *rid); if (rle == NULL) return(NULL); start = rle->start; end = rle->end; count = rle->count; } flags &= ~RF_ACTIVE; switch (type) { case SYS_RES_IRQ: rm = &irq_rman; break; case SYS_RES_DRQ: rm = &drq_rman; break; case SYS_RES_IOPORT: rm = &port_rman; break; case SYS_RES_MEMORY: rm = &mem_rman; break; default: return 0; } rv = rman_reserve_resource(rm, start, end, count, flags, child); if (rv == 0) return 0; if (type == SYS_RES_MEMORY) { rman_set_bustag(rv, AMD64_BUS_SPACE_MEM); } else if (type == SYS_RES_IOPORT) { rman_set_bustag(rv, AMD64_BUS_SPACE_IO); rman_set_bushandle(rv, rv->r_start); } if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { rman_release_resource(rv); return 0; } } return rv; } static int nexus_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { /* * If this is a memory resource, map it into the kernel. */ if (rman_get_bustag(r) == AMD64_BUS_SPACE_MEM) { caddr_t vaddr = 0; if (rman_get_end(r) < 1024 * 1024) { /* * The first 1Mb is mapped at KERNBASE. */ vaddr = (caddr_t)(uintptr_t)(KERNBASE + rman_get_start(r)); } else { u_int64_t paddr; u_int64_t psize; u_int32_t poffs; paddr = rman_get_start(r); psize = rman_get_size(r); poffs = paddr - trunc_page(paddr); vaddr = (caddr_t) pmap_mapdev(paddr-poffs, psize+poffs) + poffs; } rman_set_virtual(r, vaddr); rman_set_bushandle(r, (bus_space_handle_t) vaddr); } return (rman_activate_resource(r)); } static int nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { /* * If this is a memory resource, unmap it. */ if ((rman_get_bustag(r) == AMD64_BUS_SPACE_MEM) && (rman_get_end(r) >= 1024 * 1024)) { u_int32_t psize; psize = rman_get_size(r); pmap_unmapdev((vm_offset_t)rman_get_virtual(r), psize); } return (rman_deactivate_resource(r)); } static int nexus_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { if (rman_get_flags(r) & RF_ACTIVE) { int error = bus_deactivate_resource(child, type, rid, r); if (error) return error; } return (rman_release_resource(r)); } /* * Currently this uses the really grody interface from kern/kern_intr.c * (which really doesn't belong in kern/anything.c). Eventually, all of * the code in kern_intr.c and machdep_intr.c should get moved here, since * this is going to be the official interface. */ static int nexus_setup_intr(device_t bus, device_t child, struct resource *irq, int flags, void (*ihand)(void *), void *arg, void **cookiep) { int error; /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) panic("nexus_setup_intr: NULL irq resource!"); *cookiep = 0; if ((irq->r_flags & RF_SHAREABLE) == 0) flags |= INTR_EXCL; /* * We depend here on rman_activate_resource() being idempotent. */ error = rman_activate_resource(irq); if (error) return (error); error = intr_add_handler(device_get_nameunit(child), irq->r_start, ihand, arg, flags, cookiep); return (error); } static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { return (intr_remove_handler(ih)); } static int nexus_set_resource(device_t dev, device_t child, int type, int rid, u_long start, u_long count) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; /* XXX this should return a success/failure indicator */ resource_list_add(rl, type, rid, start, start + count - 1, count); return(0); } static int nexus_get_resource(device_t dev, device_t child, int type, int rid, u_long *startp, u_long *countp) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; struct resource_list_entry *rle; rle = resource_list_find(rl, type, rid); device_printf(child, "type %d rid %d startp %p countp %p - got %p\n", type, rid, startp, countp, rle); if (!rle) return(ENOENT); if (startp) *startp = rle->start; if (countp) *countp = rle->count; return(0); } static void nexus_delete_resource(device_t dev, device_t child, int type, int rid) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; resource_list_delete(rl, type, rid); } #ifdef DEV_ISA /* * Placeholder which claims PnP 'devices' which describe system * resources. */ static struct isa_pnp_id sysresource_ids[] = { { 0x010cd041 /* PNP0c01 */, "System Memory" }, { 0x020cd041 /* PNP0c02 */, "System Resource" }, { 0 } }; static int sysresource_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, sysresource_ids)) <= 0) { device_quiet(dev); } return(result); } static int sysresource_attach(device_t dev) { return(0); } static device_method_t sysresource_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sysresource_probe), DEVMETHOD(device_attach, sysresource_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t sysresource_driver = { "sysresource", sysresource_methods, 1, /* no softc */ }; static devclass_t sysresource_devclass; DRIVER_MODULE(sysresource, isa, sysresource_driver, sysresource_devclass, 0, 0); #endif /* DEV_ISA */ Index: head/sys/amd64/pci/pci_bus.c =================================================================== --- head/sys/amd64/pci/pci_bus.c (revision 123179) +++ head/sys/amd64/pci/pci_bus.c (revision 123180) @@ -1,614 +1,614 @@ /* * Copyright (c) 1997, Stefan Esser * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" int legacy_pcib_maxslots(device_t dev) { return 31; } /* read configuration space register */ u_int32_t legacy_pcib_read_config(device_t dev, int bus, int slot, int func, int reg, int bytes) { return(pci_cfgregread(bus, slot, func, reg, bytes)); } /* write configuration space register */ void legacy_pcib_write_config(device_t dev, int bus, int slot, int func, int reg, u_int32_t data, int bytes) { pci_cfgregwrite(bus, slot, func, reg, data, bytes); } /* route interrupt */ static int legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin) { /* No routing possible */ return (PCI_INVALID_IRQ); } static const char * legacy_pcib_is_host_bridge(int bus, int slot, int func, - u_int32_t id, u_int8_t class, u_int8_t subclass, - u_int8_t *busnum) + u_int32_t id, u_int8_t class, u_int8_t subclass, + u_int8_t *busnum) { const char *s = NULL; static u_int8_t pxb[4]; /* hack for 450nx */ *busnum = 0; switch (id) { case 0x12258086: s = "Intel 824?? host to PCI bridge"; /* XXX This is a guess */ /* *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x41, 1); */ *busnum = bus; break; case 0x71208086: s = "Intel 82810 (i810 GMCH) Host To Hub bridge"; break; case 0x71228086: s = "Intel 82810-DC100 (i810-DC100 GMCH) Host To Hub bridge"; break; case 0x71248086: s = "Intel 82810E (i810E GMCH) Host To Hub bridge"; break; case 0x11308086: s = "Intel 82815 (i815 GMCH) Host To Hub bridge"; break; case 0x71808086: s = "Intel 82443LX (440 LX) host to PCI bridge"; break; case 0x71908086: s = "Intel 82443BX (440 BX) host to PCI bridge"; break; case 0x71928086: s = "Intel 82443BX host to PCI bridge (AGP disabled)"; break; case 0x71948086: s = "Intel 82443MX host to PCI bridge"; break; case 0x71a08086: s = "Intel 82443GX host to PCI bridge"; break; case 0x71a18086: s = "Intel 82443GX host to AGP bridge"; break; case 0x71a28086: s = "Intel 82443GX host to PCI bridge (AGP disabled)"; break; case 0x84c48086: s = "Intel 82454KX/GX (Orion) host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x4a, 1); break; case 0x84ca8086: /* * For the 450nx chipset, there is a whole bundle of * things pretending to be host bridges. The MIOC will * be seen first and isn't really a pci bridge (the * actual busses are attached to the PXB's). We need to * read the registers of the MIOC to figure out the * bus numbers for the PXB channels. * * Since the MIOC doesn't have a pci bus attached, we * pretend it wasn't there. */ pxb[0] = legacy_pcib_read_config(0, bus, slot, func, 0xd0, 1); /* BUSNO[0] */ pxb[1] = legacy_pcib_read_config(0, bus, slot, func, 0xd1, 1) + 1; /* SUBA[0]+1 */ pxb[2] = legacy_pcib_read_config(0, bus, slot, func, 0xd3, 1); /* BUSNO[1] */ pxb[3] = legacy_pcib_read_config(0, bus, slot, func, 0xd4, 1) + 1; /* SUBA[1]+1 */ return NULL; case 0x84cb8086: switch (slot) { case 0x12: s = "Intel 82454NX PXB#0, Bus#A"; *busnum = pxb[0]; break; case 0x13: s = "Intel 82454NX PXB#0, Bus#B"; *busnum = pxb[1]; break; case 0x14: s = "Intel 82454NX PXB#1, Bus#A"; *busnum = pxb[2]; break; case 0x15: s = "Intel 82454NX PXB#1, Bus#B"; *busnum = pxb[3]; break; } break; /* AMD -- vendor 0x1022 */ case 0x30001022: s = "AMD Elan SC520 host to PCI bridge"; #ifdef CPU_ELAN init_AMD_Elan_sc520(); #else printf( "*** WARNING: missing CPU_ELAN -- timekeeping may be wrong\n"); #endif break; case 0x70061022: s = "AMD-751 host to PCI bridge"; break; case 0x700e1022: s = "AMD-761 host to PCI bridge"; break; /* SiS -- vendor 0x1039 */ case 0x04961039: s = "SiS 85c496"; break; case 0x04061039: s = "SiS 85c501"; break; case 0x06011039: s = "SiS 85c601"; break; case 0x55911039: s = "SiS 5591 host to PCI bridge"; break; case 0x00011039: s = "SiS 5591 host to AGP bridge"; break; /* VLSI -- vendor 0x1004 */ case 0x00051004: s = "VLSI 82C592 Host to PCI bridge"; break; /* XXX Here is MVP3, I got the datasheet but NO M/B to test it */ /* totally. Please let me know if anything wrong. -F */ /* XXX need info on the MVP3 -- any takers? */ case 0x05981106: s = "VIA 82C598MVP (Apollo MVP3) host bridge"; break; /* AcerLabs -- vendor 0x10b9 */ /* Funny : The datasheet told me vendor id is "10b8",sub-vendor */ /* id is '10b9" but the register always shows "10b9". -Foxfair */ case 0x154110b9: s = "AcerLabs M1541 (Aladdin-V) PCI host bridge"; break; /* OPTi -- vendor 0x1045 */ case 0xc7011045: s = "OPTi 82C700 host to PCI bridge"; break; case 0xc8221045: s = "OPTi 82C822 host to PCI Bridge"; break; /* ServerWorks -- vendor 0x1166 */ case 0x00051166: s = "ServerWorks NB6536 2.0HE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00061166: /* FALLTHROUGH */ case 0x00081166: /* FALLTHROUGH */ case 0x02011166: /* FALLTHROUGH */ case 0x010f1014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00091166: s = "ServerWorks NB6635 3.0LE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00101166: s = "ServerWorks CIOB30 host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00111166: /* FALLTHROUGH */ case 0x03021014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks CMIC-HE host to PCI-X bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* XXX unknown chipset, but working */ case 0x00171166: /* FALLTHROUGH */ case 0x01011166: s = "ServerWorks host to PCI bridge(unknown chipset)"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* Integrated Micro Solutions -- vendor 0x10e0 */ case 0x884910e0: s = "Integrated Micro Solutions VL Bridge"; break; default: if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST) s = "Host to PCI bridge"; break; } return s; } /* * Scan the first pci bus for host-pci bridges and add pcib instances * to the nexus for each bridge. */ static void legacy_pcib_identify(driver_t *driver, device_t parent) { int bus, slot, func; u_int8_t hdrtype; int found = 0; int pcifunchigh; int found824xx = 0; int found_orion = 0; device_t child; devclass_t pci_devclass; if (pci_cfgregopen() == 0) return; /* * Check to see if we haven't already had a PCI bus added * via some other means. If we have, bail since otherwise * we're going to end up duplicating it. */ if ((pci_devclass = devclass_find("pci")) && devclass_get_device(pci_devclass, 0)) return; bus = 0; retry: for (slot = 0; slot <= PCI_SLOTMAX; slot++) { func = 0; hdrtype = legacy_pcib_read_config(0, bus, slot, func, PCIR_HDRTYPE, 1); /* * When enumerating bus devices, the standard says that * one should check the header type and ignore the slots whose * header types that the software doesn't know about. We use * this to filter out devices. */ if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if ((hdrtype & PCIM_MFDEV) && (!found_orion || hdrtype != 0xff)) pcifunchigh = PCI_FUNCMAX; else pcifunchigh = 0; for (func = 0; func <= pcifunchigh; func++) { /* * Read the IDs and class from the device. */ u_int32_t id; u_int8_t class, subclass, busnum; const char *s; device_t *devs; int ndevs, i; id = legacy_pcib_read_config(0, bus, slot, func, PCIR_DEVVENDOR, 4); if (id == -1) continue; class = legacy_pcib_read_config(0, bus, slot, func, PCIR_CLASS, 1); subclass = legacy_pcib_read_config(0, bus, slot, func, PCIR_SUBCLASS, 1); s = legacy_pcib_is_host_bridge(bus, slot, func, id, class, subclass, &busnum); if (s == NULL) continue; /* * Check to see if the physical bus has already * been seen. Eg: hybrid 32 and 64 bit host * bridges to the same logical bus. */ if (device_get_children(parent, &devs, &ndevs) == 0) { for (i = 0; s != NULL && i < ndevs; i++) { if (strcmp(device_get_name(devs[i]), "pcib") != 0) continue; if (legacy_get_pcibus(devs[i]) == busnum) s = NULL; } free(devs, M_TEMP); } if (s == NULL) continue; /* * Add at priority 100 to make sure we * go after any motherboard resources */ child = BUS_ADD_CHILD(parent, 100, "pcib", busnum); device_set_desc(child, s); legacy_set_pcibus(child, busnum); found = 1; if (id == 0x12258086) found824xx = 1; if (id == 0x84c48086) found_orion = 1; } } if (found824xx && bus == 0) { bus++; goto retry; } /* * Make sure we add at least one bridge since some old * hardware doesn't actually have a host-pci bridge device. * Note that pci_cfgregopen() thinks we have PCI devices.. */ if (!found) { if (bootverbose) printf( "legacy_pcib_identify: no bridge found, adding pcib0 anyway\n"); child = BUS_ADD_CHILD(parent, 100, "pcib", 0); legacy_set_pcibus(child, 0); } } static int legacy_pcib_probe(device_t dev) { if (pci_cfgregopen() == 0) return ENXIO; return -100; } int legacy_pcib_attach(device_t dev) { device_add_child(dev, "pci", pcib_get_bus(dev)); return bus_generic_attach(dev); } int legacy_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { switch (which) { case PCIB_IVAR_BUS: *result = legacy_get_pcibus(dev); return 0; } return ENOENT; } int legacy_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { switch (which) { case PCIB_IVAR_BUS: legacy_set_pcibus(dev, value); return 0; } return ENOENT; } static device_method_t legacy_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_identify, legacy_pcib_identify), DEVMETHOD(device_probe, legacy_pcib_probe), DEVMETHOD(device_attach, legacy_pcib_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar), DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots), DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt), { 0, 0 } }; static driver_t legacy_pcib_driver = { "pcib", legacy_pcib_methods, 1, }; DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, pcib_devclass, 0, 0); /* * Provide a device to "eat" the host->pci bridges that we dug up above * and stop them showing up twice on the probes. This also stops them * showing up as 'none' in pciconf -l. */ static int pci_hostb_probe(device_t dev) { u_int32_t id; id = pci_get_devid(dev); switch (id) { /* VIA VT82C596 Power Managment Function */ case 0x30501106: return ENXIO; default: break; } if (pci_get_class(dev) == PCIC_BRIDGE && pci_get_subclass(dev) == PCIS_BRIDGE_HOST) { device_set_desc(dev, "Host to PCI bridge"); device_quiet(dev); return -10000; } return ENXIO; } static int pci_hostb_attach(device_t dev) { return 0; } static device_method_t pci_hostb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pci_hostb_probe), DEVMETHOD(device_attach, pci_hostb_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t pci_hostb_driver = { "hostb", pci_hostb_methods, 1, }; static devclass_t pci_hostb_devclass; DRIVER_MODULE(hostb, pci, pci_hostb_driver, pci_hostb_devclass, 0, 0); /* * Install placeholder to claim the resources owned by the * PCI bus interface. This could be used to extract the * config space registers in the extreme case where the PnP * ID is available and the PCI BIOS isn't, but for now we just * eat the PnP ID and do nothing else. * * XXX we should silence this probe, as it will generally confuse * people. */ static struct isa_pnp_id pcibus_pnp_ids[] = { { 0x030ad041 /* PNP0A03 */, "PCI Bus" }, { 0 } }; static int pcibus_pnp_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, pcibus_pnp_ids)) <= 0) device_quiet(dev); return(result); } static int pcibus_pnp_attach(device_t dev) { return(0); } static device_method_t pcibus_pnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcibus_pnp_probe), DEVMETHOD(device_attach, pcibus_pnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t pcibus_pnp_driver = { "pcibus_pnp", pcibus_pnp_methods, 1, /* no softc */ }; static devclass_t pcibus_pnp_devclass; DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0); Index: head/sys/amd64/pci/pci_cfgreg.c =================================================================== --- head/sys/amd64/pci/pci_cfgreg.c (revision 123179) +++ head/sys/amd64/pci/pci_cfgreg.c (revision 123180) @@ -1,348 +1,349 @@ /* * Copyright (c) 1997, Stefan Esser * Copyright (c) 2000, Michael Smith * Copyright (c) 2000, BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include /* XXX trim includes */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" static int cfgmech; static int devmax; static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes); static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes); static int pcireg_cfgopen(void); static struct mtx pcicfg_mtx; /* * Initialise access to PCI configuration space */ int pci_cfgregopen(void) { static int opened = 0; if (opened) return (1); if (pcireg_cfgopen() == 0) return (0); mtx_init(&pcicfg_mtx, "pcicfg", NULL, MTX_SPIN); opened = 1; return (1); } /* * Read configuration space register */ u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes) { uint32_t line; /* * Some BIOS writers seem to want to ignore the spec and put * 0 in the intline rather than 255 to indicate none. Some use * numbers in the range 128-254 to indicate something strange and * apparently undocumented anywhere. Assume these are completely bogus * and map them to 255, which the rest of the PCI code recognizes as * as an invalid IRQ. */ if (reg == PCIR_INTLINE && bytes == 1) { line = pcireg_cfgread(bus, slot, func, PCIR_INTLINE, 1); if (line == 0 || line >= 128) line = PCI_INVALID_IRQ; return (line); } return (pcireg_cfgread(bus, slot, func, reg, bytes)); } /* * Write configuration space register */ void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes) { pcireg_cfgwrite(bus, slot, func, reg, data, bytes); } /* * Route a PCI interrupt */ int pci_cfgintr(int bus, int device, int pin, int oldirq) { printf("pci_cfgintr: can't route an interrupt to %d:%d INT%c without ACPI\n", bus, device, 'A' + pin - 1); return (PCI_INVALID_IRQ); } /* * Configuration space access using direct register operations */ /* enable configuration space accesses and return data port address */ static int pci_cfgenable(unsigned bus, unsigned slot, unsigned func, int reg, int bytes) { int dataport = 0; if (bus <= PCI_BUSMAX && slot < devmax && func <= PCI_FUNCMAX && reg <= PCI_REGMAX && bytes != 3 && (unsigned) bytes <= 4 && (reg & (bytes - 1)) == 0) { switch (cfgmech) { case 1: outl(CONF1_ADDR_PORT, (1 << 31) | (bus << 16) | (slot << 11) | (func << 8) | (reg & ~0x03)); dataport = CONF1_DATA_PORT + (reg & 0x03); break; case 2: outb(CONF2_ENABLE_PORT, 0xf0 | (func << 1)); outb(CONF2_FORWARD_PORT, bus); dataport = 0xc000 | (slot << 8) | reg; break; } } return (dataport); } /* disable configuration space accesses */ static void pci_cfgdisable(void) { switch (cfgmech) { case 1: outl(CONF1_ADDR_PORT, 0); break; case 2: outb(CONF2_ENABLE_PORT, 0); outb(CONF2_FORWARD_PORT, 0); break; } } static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes) { int data = -1; int port; mtx_lock_spin(&pcicfg_mtx); port = pci_cfgenable(bus, slot, func, reg, bytes); if (port != 0) { switch (bytes) { case 1: data = inb(port); break; case 2: data = inw(port); break; case 4: data = inl(port); break; } pci_cfgdisable(); } mtx_unlock_spin(&pcicfg_mtx); return (data); } static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes) { int port; mtx_lock_spin(&pcicfg_mtx); port = pci_cfgenable(bus, slot, func, reg, bytes); if (port != 0) { switch (bytes) { case 1: outb(port, data); break; case 2: outw(port, data); break; case 4: outl(port, data); break; } pci_cfgdisable(); } mtx_unlock_spin(&pcicfg_mtx); } /* check whether the configuration mechanism has been correctly identified */ static int pci_cfgcheck(int maxdev) { uint32_t id, class; uint8_t header; uint8_t device; int port; if (bootverbose) printf("pci_cfgcheck:\tdevice "); for (device = 0; device < maxdev; device++) { if (bootverbose) printf("%d ", device); port = pci_cfgenable(0, device, 0, 0, 4); id = inl(port); if (id == 0 || id == 0xffffffff) continue; port = pci_cfgenable(0, device, 0, 8, 4); class = inl(port) >> 8; if (bootverbose) printf("[class=%06x] ", class); if (class == 0 || (class & 0xf870ff) != 0) continue; port = pci_cfgenable(0, device, 0, 14, 1); header = inb(port); if (bootverbose) printf("[hdr=%02x] ", header); if ((header & 0x7e) != 0) continue; if (bootverbose) printf("is there (id=%08x)\n", id); pci_cfgdisable(); return (1); } if (bootverbose) printf("-- nothing found\n"); pci_cfgdisable(); return (0); } static int pcireg_cfgopen(void) { uint32_t mode1res, oldval1; uint8_t mode2res, oldval2; oldval1 = inl(CONF1_ADDR_PORT); if (bootverbose) { printf("pci_open(1):\tmode 1 addr port (0x0cf8) is 0x%08x\n", oldval1); } if ((oldval1 & CONF1_ENABLE_MSK) == 0) { cfgmech = 1; devmax = 32; outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK); outb(CONF1_ADDR_PORT + 3, 0); mode1res = inl(CONF1_ADDR_PORT); outl(CONF1_ADDR_PORT, oldval1); if (bootverbose) printf("pci_open(1a):\tmode1res=0x%08x (0x%08lx)\n", mode1res, CONF1_ENABLE_CHK); if (mode1res) { if (pci_cfgcheck(32)) return (cfgmech); } outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK1); mode1res = inl(CONF1_ADDR_PORT); outl(CONF1_ADDR_PORT, oldval1); if (bootverbose) printf("pci_open(1b):\tmode1res=0x%08x (0x%08lx)\n", mode1res, CONF1_ENABLE_CHK1); if ((mode1res & CONF1_ENABLE_MSK1) == CONF1_ENABLE_RES1) { if (pci_cfgcheck(32)) return (cfgmech); } } oldval2 = inb(CONF2_ENABLE_PORT); if (bootverbose) { printf("pci_open(2):\tmode 2 enable port (0x0cf8) is 0x%02x\n", oldval2); } if ((oldval2 & 0xf0) == 0) { cfgmech = 2; devmax = 16; outb(CONF2_ENABLE_PORT, CONF2_ENABLE_CHK); mode2res = inb(CONF2_ENABLE_PORT); outb(CONF2_ENABLE_PORT, oldval2); if (bootverbose) printf("pci_open(2a):\tmode2res=0x%02x (0x%02x)\n", mode2res, CONF2_ENABLE_CHK); if (mode2res == CONF2_ENABLE_RES) { if (bootverbose) printf("pci_open(2a):\tnow trying mechanism 2\n"); if (pci_cfgcheck(16)) return (cfgmech); } } cfgmech = 0; devmax = 0; return (cfgmech); } +