Index: head/sys/alpha/alpha/machdep.c =================================================================== --- head/sys/alpha/alpha/machdep.c (revision 62572) +++ head/sys/alpha/alpha/machdep.c (revision 62573) @@ -1,2154 +1,2154 @@ /*- * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center and by Chris G. Demetriou. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. * All rights reserved. * * Author: Chris G. Demetriou * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include "opt_compat.h" #include "opt_ddb.h" #include "opt_simos.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct proc* curproc; struct proc* fpcurproc; struct pcb* curpcb; u_int64_t cycles_per_usec; u_int32_t cycles_per_sec; int whichqs, whichrtqs, whichidqs; int cold = 1; struct platform platform; alpha_chipset_t chipset; struct bootinfo_kernel bootinfo; struct timeval switchtime; int switchticks; struct user *proc0paddr; char machine[] = "alpha"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); static char cpu_model[128]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, ""); #ifdef DDB /* start and end of kernel symbol table */ void *ksym_start, *ksym_end; #endif int alpha_unaligned_print = 1; /* warn about unaligned accesses */ int alpha_unaligned_fix = 1; /* fix up unaligned accesses */ int alpha_unaligned_sigbus = 0; /* don't SIGBUS on fixed-up accesses */ SYSCTL_INT(_machdep, CPU_UNALIGNED_PRINT, unaligned_print, CTLFLAG_RW, &alpha_unaligned_print, 0, ""); SYSCTL_INT(_machdep, CPU_UNALIGNED_FIX, unaligned_fix, CTLFLAG_RW, &alpha_unaligned_fix, 0, ""); SYSCTL_INT(_machdep, CPU_UNALIGNED_SIGBUS, unaligned_sigbus, CTLFLAG_RW, &alpha_unaligned_sigbus, 0, ""); static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); struct msgbuf *msgbufp=0; int bootverbose = 0, Maxmem = 0; long dumplo; int totalphysmem; /* total amount of physical memory in system */ int physmem; /* physical memory used by NetBSD + some rsvd */ int resvmem; /* amount of memory reserved for PROM */ int unusedmem; /* amount of memory for OS that we don't use */ int unknownmem; /* amount of memory with an unknown use */ int ncpus; /* number of cpus */ vm_offset_t phys_avail[10]; static int -sysctl_hw_physmem (SYSCTL_HANDLER_ARGS) +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, alpha_ptob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int -sysctl_hw_usermem (SYSCTL_HANDLER_ARGS) +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, alpha_ptob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); SYSCTL_INT(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, ""); /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static void identifycpu __P((void)); static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) /* * Hooked into the shutdown chain; if the system is to be halted, * unconditionally drop back to the SRM console. */ static void alpha_srm_shutdown(void *junk, int howto) { if (howto & RB_HALT) alpha_pal_halt(); } static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; vm_offset_t firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); identifycpu(); /* startrtclock(); */ #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ld (%ldK bytes)\n", alpha_ptob(Maxmem), alpha_ptob(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } /* * Calculate callout wheel size */ for (callwheelsize = 1, callwheelbits = 0; callwheelsize < ncallout; callwheelsize <<= 1, ++callwheelbits) ; callwheelmask = callwheelsize - 1; /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); valloc(callwheel, struct callout_tailq, callwheelsize); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional * buffers to cover 1/20 of our ram over 64MB. */ if (nbuf == 0) { int factor = 4 * BKVASIZE / PAGE_SIZE; nbuf = 50; if (physmem > 1024) nbuf += min((physmem - 1024) / factor, 16384 / factor); if (physmem > 16384) nbuf += (physmem - 16384) * 2 / (factor * 5); } nswbuf = max(min(nbuf/4, 64), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); v = bufhashinit(v); /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (vm_offset_t)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE)); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size); pager_map->system_map = 1; exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*(ARG_MAX+(PAGE_SIZE*3)))); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size); mb_map->system_map = 1; } /* * Initialize callouts */ SLIST_INIT(&callfree); for (i = 0; i < ncallout; i++) { callout_init(&callout[i]); callout[i].c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle); } for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&callwheel[i]); } #if defined(USERCONFIG) #if defined(USERCONFIG_BOOT) if (1) #else if (boothowto & RB_CONFIG) #endif { userconfig(); cninit(); /* the preferred console may have changed */ } #endif printf("avail memory = %ld (%ldK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, alpha_srm_shutdown, 0, SHUTDOWN_PRI_LAST); } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } int unregister_netisr(num) int num; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("unregister_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = NULL; return (0); } /* * Retrieve the platform name from the DSR. */ const char * alpha_dsr_sysname() { struct dsrdb *dsr; const char *sysname; /* * DSR does not exist on early HWRPB versions. */ if (hwrpb->rpb_version < HWRPB_DSRDB_MINVERS) return (NULL); dsr = (struct dsrdb *)(((caddr_t)hwrpb) + hwrpb->rpb_dsrdb_off); sysname = (const char *)((caddr_t)dsr + (dsr->dsr_sysname_off + sizeof(u_int64_t))); return (sysname); } /* * Lookup the system specified system variation in the provided table, * returning the model string on match. */ const char * alpha_variation_name(u_int64_t variation, const struct alpha_variation_table *avtp) { int i; for (i = 0; avtp[i].avt_model != NULL; i++) if (avtp[i].avt_variation == variation) return (avtp[i].avt_model); return (NULL); } /* * Generate a default platform name based for unknown system variations. */ const char * alpha_unknown_sysname() { static char s[128]; /* safe size */ snprintf(s, sizeof(s), "%s family, unknown model variation 0x%lx", platform.family, hwrpb->rpb_variation & SV_ST_MASK); return ((const char *)s); } static void identifycpu(void) { u_int64_t type, major, minor; u_int64_t amask; struct pcs *pcsp; char *cpuname[] = { "unknown", /* 0 */ "EV3", /* 1 */ "EV4 (21064)", /* 2 */ "Simulation", /* 3 */ "LCA Family", /* 4 */ "EV5 (21164)", /* 5 */ "EV45 (21064A)", /* 6 */ "EV56 (21164A)", /* 7 */ "EV6 (21264)", /* 8 */ "PCA56 (21164PC)" /* 9 */ }; /* * print out CPU identification information. */ printf("%s\n%s, %ldMHz\n", platform.family, platform.model, hwrpb->rpb_cc_freq / 1000000); /* XXX true for 21164? */ printf("%ld byte page size, %d processor%s.\n", hwrpb->rpb_page_size, ncpus, ncpus == 1 ? "" : "s"); #if 0 /* this isn't defined for any systems that we run on? */ printf("serial number 0x%lx 0x%lx\n", ((long *)hwrpb->rpb_ssn)[0], ((long *)hwrpb->rpb_ssn)[1]); /* and these aren't particularly useful! */ printf("variation: 0x%lx, revision 0x%lx\n", hwrpb->rpb_variation, *(long *)hwrpb->rpb_revision); #endif pcsp = LOCATE_PCS(hwrpb, hwrpb->rpb_primary_cpu_id); /* cpu type */ type = pcsp->pcs_proc_type; major = (type & PCS_PROC_MAJOR) >> PCS_PROC_MAJORSHIFT; minor = (type & PCS_PROC_MINOR) >> PCS_PROC_MINORSHIFT; if (major < sizeof(cpuname)/sizeof(char *)) printf("CPU: %s major=%lu minor=%lu", cpuname[major], major, minor); else printf("CPU: major=%lu minor=%lu\n", major, minor); /* amask */ if (major >= PCS_PROC_EV56) { amask = 0xffffffff; /* 32 bit for printf */ amask = (~alpha_amask(amask)) & amask; printf(" extensions=0x%b\n", (u_int32_t) amask, "\020" "\001BWX" "\002FIX" "\003CIX" "\011MVI" "\012PRECISE" ); } else printf("\n"); /* PAL code */ printf("OSF PAL rev: 0x%lx\n", pcsp->pcs_palrevisions[PALvar_OSF1]); } extern char kernel_text[], _end[]; void alpha_init(pfn, ptb, bim, bip, biv) u_long pfn; /* first free PFN number */ u_long ptb; /* PFN of current level 1 page table */ u_long bim; /* bootinfo magic */ u_long bip; /* bootinfo pointer */ u_long biv; /* bootinfo version */ { int phys_avail_cnt; char *bootinfo_msg; vm_offset_t kernstart, kernend; vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1; struct mddt *mddtp; struct mddt_cluster *memc; int i, mddtweird; int cputype; char *p; /* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */ /* * Turn off interrupts (not mchecks) and floating point. * Make sure the instruction and data streams are consistent. */ (void)alpha_pal_swpipl(ALPHA_PSL_IPL_HIGH); /* alpha_pal_wrfen(0); */ ALPHA_TBIA(); alpha_pal_imb(); /* * Get critical system information (if possible, from the * information provided by the boot program). */ bootinfo_msg = NULL; if (bim == BOOTINFO_MAGIC) { if (biv == 0) { /* backward compat */ biv = *(u_long *)bip; bip += 8; } switch (biv) { case 1: { struct bootinfo_v1 *v1p = (struct bootinfo_v1 *)bip; bootinfo.ssym = v1p->ssym; bootinfo.esym = v1p->esym; bootinfo.kernend = v1p->kernend; bootinfo.modptr = v1p->modptr; bootinfo.envp = v1p->envp; /* hwrpb may not be provided by boot block in v1 */ if (v1p->hwrpb != NULL) { bootinfo.hwrpb_phys = ((struct rpb *)v1p->hwrpb)->rpb_phys; bootinfo.hwrpb_size = v1p->hwrpbsize; } else { bootinfo.hwrpb_phys = ((struct rpb *)HWRPB_ADDR)->rpb_phys; bootinfo.hwrpb_size = ((struct rpb *)HWRPB_ADDR)->rpb_size; } bcopy(v1p->boot_flags, bootinfo.boot_flags, min(sizeof v1p->boot_flags, sizeof bootinfo.boot_flags)); bcopy(v1p->booted_kernel, bootinfo.booted_kernel, min(sizeof v1p->booted_kernel, sizeof bootinfo.booted_kernel)); /* booted dev not provided in bootinfo */ init_prom_interface((struct rpb *) ALPHA_PHYS_TO_K0SEG(bootinfo.hwrpb_phys)); prom_getenv(PROM_E_BOOTED_DEV, bootinfo.booted_dev, sizeof bootinfo.booted_dev); break; } default: bootinfo_msg = "unknown bootinfo version"; goto nobootinfo; } } else { bootinfo_msg = "boot program did not pass bootinfo"; nobootinfo: bootinfo.ssym = (u_long)&_end; bootinfo.esym = (u_long)&_end; #ifdef SIMOS { char* p = (char*)bootinfo.ssym + 8; if (p[EI_MAG0] == ELFMAG0 && p[EI_MAG1] == ELFMAG1 && p[EI_MAG2] == ELFMAG2 && p[EI_MAG3] == ELFMAG3) { bootinfo.ssym = (u_long) p; bootinfo.esym = (u_long)p + *(u_long*)(p - 8); } } #endif bootinfo.hwrpb_phys = ((struct rpb *)HWRPB_ADDR)->rpb_phys; bootinfo.hwrpb_size = ((struct rpb *)HWRPB_ADDR)->rpb_size; init_prom_interface((struct rpb *)HWRPB_ADDR); prom_getenv(PROM_E_BOOTED_OSFLAGS, bootinfo.boot_flags, sizeof bootinfo.boot_flags); #ifndef SIMOS prom_getenv(PROM_E_BOOTED_FILE, bootinfo.booted_kernel, sizeof bootinfo.booted_kernel); #endif prom_getenv(PROM_E_BOOTED_DEV, bootinfo.booted_dev, sizeof bootinfo.booted_dev); } /* * Initialize the kernel's mapping of the RPB. It's needed for * lots of things. */ hwrpb = (struct rpb *)ALPHA_PHYS_TO_K0SEG(bootinfo.hwrpb_phys); /* * Remember how many cycles there are per microsecond, * so that we can use delay(). Round up, for safety. */ cycles_per_usec = (hwrpb->rpb_cc_freq + 999999) / 1000000; /* * Remember how many cycles per closk for coping with missed * clock interrupts. */ cycles_per_sec = hwrpb->rpb_cc_freq; /* * Initalize the (temporary) bootstrap console interface, so * we can use printf until the VM system starts being setup. * The real console is initialized before then. */ init_bootstrap_console(); /* OUTPUT NOW ALLOWED */ /* delayed from above */ if (bootinfo_msg) printf("WARNING: %s (0x%lx, 0x%lx, 0x%lx)\n", bootinfo_msg, bim, bip, biv); /* * Point interrupt/exception vectors to our own. */ alpha_pal_wrent(XentInt, ALPHA_KENTRY_INT); alpha_pal_wrent(XentArith, ALPHA_KENTRY_ARITH); alpha_pal_wrent(XentMM, ALPHA_KENTRY_MM); alpha_pal_wrent(XentIF, ALPHA_KENTRY_IF); alpha_pal_wrent(XentUna, ALPHA_KENTRY_UNA); alpha_pal_wrent(XentSys, ALPHA_KENTRY_SYS); /* * Clear pending machine checks and error reports, and enable * system- and processor-correctable error reporting. */ alpha_pal_wrmces(alpha_pal_rdmces() & ~(ALPHA_MCES_DSC|ALPHA_MCES_DPC)); /* * Find out what hardware we're on, and do basic initialization. */ cputype = hwrpb->rpb_type; if (cputype < 0) { /* * At least some white-box (NT) systems have SRM which * reports a systype that's the negative of their * blue-box (UNIX/OVMS) counterpart. */ cputype = -cputype; } if (cputype >= API_ST_BASE) { if (cputype >= napi_cpuinit + API_ST_BASE) { platform_not_supported(cputype); /* NOTREACHED */ } cputype -= API_ST_BASE; api_cpuinit[cputype].init(cputype); } else { if (cputype >= ncpuinit) { platform_not_supported(cputype); /* NOTREACHED */ } cpuinit[cputype].init(cputype); } snprintf(cpu_model, sizeof(cpu_model), "%s", platform.model); /* * Initalize the real console, so the the bootstrap console is * no longer necessary. */ if (platform.cons_init) platform.cons_init(); /* NO MORE FIRMWARE ACCESS ALLOWED */ #ifdef _PMAP_MAY_USE_PROM_CONSOLE /* * XXX (unless _PMAP_MAY_USE_PROM_CONSOLE is defined and * XXX pmap_uses_prom_console() evaluates to non-zero.) */ #endif /* * find out this system's page size */ if (hwrpb->rpb_page_size != PAGE_SIZE) panic("page size %ld != 8192?!", hwrpb->rpb_page_size); /* * Find the beginning and end of the kernel (and leave a * bit of space before the beginning for the bootstrap * stack). */ kernstart = trunc_page(kernel_text) - 2 * PAGE_SIZE; #ifdef DDB ksym_start = (void *)bootinfo.ssym; ksym_end = (void *)bootinfo.esym; kernend = (vm_offset_t)round_page(ksym_end); #else kernend = (vm_offset_t)round_page(_end); #endif /* But if the bootstrap tells us otherwise, believe it! */ if (bootinfo.kernend) kernend = round_page(bootinfo.kernend); preload_metadata = (caddr_t)bootinfo.modptr; kern_envp = bootinfo.envp; p = getenv("kernelname"); if (p) strncpy(kernelname, p, sizeof(kernelname) - 1); kernstartpfn = atop(ALPHA_K0SEG_TO_PHYS(kernstart)); kernendpfn = atop(ALPHA_K0SEG_TO_PHYS(kernend)); #ifdef SIMOS /* * SimOS console puts the bootstrap stack after kernel */ kernendpfn += 4; #endif /* * Find out how much memory is available, by looking at * the memory cluster descriptors. This also tries to do * its best to detect things things that have never been seen * before... */ mddtp = (struct mddt *)(((caddr_t)hwrpb) + hwrpb->rpb_memdat_off); /* MDDT SANITY CHECKING */ mddtweird = 0; if (mddtp->mddt_cluster_cnt < 2) { mddtweird = 1; printf("WARNING: weird number of mem clusters: %ld\n", mddtp->mddt_cluster_cnt); } #ifdef DEBUG_CLUSTER printf("Memory cluster count: %d\n", mddtp->mddt_cluster_cnt); #endif phys_avail_cnt = 0; for (i = 0; i < mddtp->mddt_cluster_cnt; i++) { memc = &mddtp->mddt_clusters[i]; #ifdef DEBUG_CLUSTER printf("MEMC %d: pfn 0x%lx cnt 0x%lx usage 0x%lx\n", i, memc->mddt_pfn, memc->mddt_pg_cnt, memc->mddt_usage); #endif totalphysmem += memc->mddt_pg_cnt; if (memc->mddt_usage & MDDT_mbz) { mddtweird = 1; printf("WARNING: mem cluster %d has weird " "usage 0x%lx\n", i, memc->mddt_usage); unknownmem += memc->mddt_pg_cnt; continue; } if (memc->mddt_usage & MDDT_NONVOLATILE) { /* XXX should handle these... */ printf("WARNING: skipping non-volatile mem " "cluster %d\n", i); unusedmem += memc->mddt_pg_cnt; continue; } if (memc->mddt_usage & MDDT_PALCODE) { resvmem += memc->mddt_pg_cnt; continue; } /* * We have a memory cluster available for system * software use. We must determine if this cluster * holds the kernel. */ /* * XXX If the kernel uses the PROM console, we only use the * XXX memory after the kernel in the first system segment, * XXX to avoid clobbering prom mapping, data, etc. */ physmem += memc->mddt_pg_cnt; pfn0 = memc->mddt_pfn; pfn1 = memc->mddt_pfn + memc->mddt_pg_cnt; if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) { /* * Must compute the location of the kernel * within the segment. */ #ifdef DEBUG_CLUSTER printf("Cluster %d contains kernel\n", i); #endif if (!pmap_uses_prom_console()) { if (pfn0 < kernstartpfn) { /* * There is a chunk before the kernel. */ #ifdef DEBUG_CLUSTER printf("Loading chunk before kernel: " "0x%lx / 0x%lx\n", pfn0, kernstartpfn); #endif phys_avail[phys_avail_cnt] = alpha_ptob(pfn0); phys_avail[phys_avail_cnt+1] = alpha_ptob(kernstartpfn); phys_avail_cnt += 2; } } if (kernendpfn < pfn1) { /* * There is a chunk after the kernel. */ #ifdef DEBUG_CLUSTER printf("Loading chunk after kernel: " "0x%lx / 0x%lx\n", kernendpfn, pfn1); #endif phys_avail[phys_avail_cnt] = alpha_ptob(kernendpfn); phys_avail[phys_avail_cnt+1] = alpha_ptob(pfn1); phys_avail_cnt += 2; } } else { /* * Just load this cluster as one chunk. */ #ifdef DEBUG_CLUSTER printf("Loading cluster %d: 0x%lx / 0x%lx\n", i, pfn0, pfn1); #endif phys_avail[phys_avail_cnt] = alpha_ptob(pfn0); phys_avail[phys_avail_cnt+1] = alpha_ptob(pfn1); phys_avail_cnt += 2; } } phys_avail[phys_avail_cnt] = 0; /* * Dump out the MDDT if it looks odd... */ if (mddtweird) { printf("\n"); printf("complete memory cluster information:\n"); for (i = 0; i < mddtp->mddt_cluster_cnt; i++) { printf("mddt %d:\n", i); printf("\tpfn %lx\n", mddtp->mddt_clusters[i].mddt_pfn); printf("\tcnt %lx\n", mddtp->mddt_clusters[i].mddt_pg_cnt); printf("\ttest %lx\n", mddtp->mddt_clusters[i].mddt_pg_test); printf("\tbva %lx\n", mddtp->mddt_clusters[i].mddt_v_bitaddr); printf("\tbpa %lx\n", mddtp->mddt_clusters[i].mddt_p_bitaddr); printf("\tbcksum %lx\n", mddtp->mddt_clusters[i].mddt_bit_cksum); printf("\tusage %lx\n", mddtp->mddt_clusters[i].mddt_usage); } printf("\n"); } Maxmem = physmem; /* * Initialize error message buffer (at end of core). */ { size_t sz = round_page(MSGBUF_SIZE); int i = phys_avail_cnt - 2; /* shrink so that it'll fit in the last segment */ if (phys_avail[i+1] - phys_avail[i] < sz) sz = phys_avail[i+1] - phys_avail[i]; phys_avail[i+1] -= sz; msgbufp = (struct msgbuf*) ALPHA_PHYS_TO_K0SEG(phys_avail[i+1]); msgbufinit(msgbufp, sz); /* Remove the last segment if it now has no pages. */ if (phys_avail[i] == phys_avail[i+1]) phys_avail[i] = 0; /* warn if the message buffer had to be shrunk */ if (sz != round_page(MSGBUF_SIZE)) printf("WARNING: %ld bytes not available for msgbuf in last cluster (%ld used)\n", round_page(MSGBUF_SIZE), sz); } /* * Init mapping for u page(s) for proc 0 */ proc0.p_addr = proc0paddr = (struct user *)pmap_steal_memory(UPAGES * PAGE_SIZE); /* * Initialize the virtual memory system, and set the * page table base register in proc 0's PCB. */ pmap_bootstrap(ALPHA_PHYS_TO_K0SEG(alpha_ptob(ptb)), hwrpb->rpb_max_asn); /* * Initialize the rest of proc 0's PCB, and cache its physical * address. */ proc0.p_md.md_pcbpaddr = (struct pcb *)ALPHA_K0SEG_TO_PHYS((vm_offset_t)&proc0paddr->u_pcb); /* * Set the kernel sp, reserving space for an (empty) trapframe, * and make proc0's trapframe pointer point to it for sanity. */ proc0paddr->u_pcb.pcb_hw.apcb_ksp = (u_int64_t)proc0paddr + USPACE - sizeof(struct trapframe); proc0.p_md.md_tf = (struct trapframe *)proc0paddr->u_pcb.pcb_hw.apcb_ksp; /* * Look at arguments passed to us and compute boothowto. */ boothowto = 0; #ifdef KADB boothowto |= RB_KDB; #endif /* boothowto |= RB_KDB | RB_GDB; */ for (p = bootinfo.boot_flags; p && *p != '\0'; p++) { /* * Note that we'd really like to differentiate case here, * but the Alpha AXP Architecture Reference Manual * says that we shouldn't. */ switch (*p) { case 'a': /* autoboot */ case 'A': boothowto &= ~RB_SINGLE; break; #ifdef DEBUG case 'c': /* crash dump immediately after autoconfig */ case 'C': boothowto |= RB_DUMP; break; #endif #if defined(DDB) case 'd': /* break into the kernel debugger ASAP */ case 'D': boothowto |= RB_KDB; break; case 'g': /* use kernel gdb */ case 'G': boothowto |= RB_GDB; break; #endif case 'h': /* always halt, never reboot */ case 'H': boothowto |= RB_HALT; break; #if 0 case 'm': /* mini root present in memory */ case 'M': boothowto |= RB_MINIROOT; break; #endif case 'n': /* askname */ case 'N': boothowto |= RB_ASKNAME; break; case 's': /* single-user (default, supported for sanity) */ case 'S': boothowto |= RB_SINGLE; break; case 'v': case 'V': boothowto |= RB_VERBOSE; bootverbose = 1; break; default: printf("Unrecognized boot flag '%c'.\n", *p); break; } } /* * Catch case of boot_verbose set in environment. */ if ((p = getenv("boot_verbose")) != NULL) { if (strcmp(p, "yes") == 0 || strcmp(p, "YES") == 0) { boothowto |= RB_VERBOSE; bootverbose = 1; } } /* * Initialize debuggers, and break into them if appropriate. */ #ifdef DDB kdb_init(); if (boothowto & RB_KDB) { printf("Boot flags requested debugger\n"); breakpoint(); } #endif /* * Figure out the number of cpus in the box, from RPB fields. * Really. We mean it. */ for (i = 0; i < hwrpb->rpb_pcs_cnt; i++) { struct pcs *pcsp; pcsp = (struct pcs *)((char *)hwrpb + hwrpb->rpb_pcs_off + (i * hwrpb->rpb_pcs_size)); if ((pcsp->pcs_flags & PCS_PP) != 0) ncpus++; } /* * Figure out our clock frequency, from RPB fields. */ hz = hwrpb->rpb_intr_freq >> 12; if (!(60 <= hz && hz <= 10240)) { hz = 1024; #ifdef DIAGNOSTIC printf("WARNING: unbelievable rpb_intr_freq: %ld (%d hz)\n", hwrpb->rpb_intr_freq, hz); #endif } alpha_pal_wrfen(0); } void bzero(void *buf, size_t len) { caddr_t p = buf; while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { *p++ = 0; len--; } while (len >= sizeof(u_long) * 8) { *(u_long*) p = 0; *((u_long*) p + 1) = 0; *((u_long*) p + 2) = 0; *((u_long*) p + 3) = 0; len -= sizeof(u_long) * 8; *((u_long*) p + 4) = 0; *((u_long*) p + 5) = 0; *((u_long*) p + 6) = 0; *((u_long*) p + 7) = 0; p += sizeof(u_long) * 8; } while (len >= sizeof(u_long)) { *(u_long*) p = 0; len -= sizeof(u_long); p += sizeof(u_long); } while (len) { *p++ = 0; len--; } } void DELAY(int n) { #ifndef SIMOS unsigned long pcc0, pcc1, curcycle, cycles; int usec; if (n == 0) return; pcc0 = alpha_rpcc() & 0xffffffffUL; cycles = 0; usec = 0; while (usec <= n) { /* * Get the next CPU cycle count. The assumption here * is that we can't have wrapped twice past 32 bits worth * of CPU cycles since we last checked. */ pcc1 = alpha_rpcc() & 0xffffffffUL; if (pcc1 < pcc0) { curcycle = (pcc1 + 0x100000000UL) - pcc0; } else { curcycle = pcc1 - pcc0; } /* * We now have the number of processor cycles since we * last checked. Add the current cycle count to the * running total. If it's over cycles_per_usec, increment * the usec counter. */ cycles += curcycle; while (cycles > cycles_per_usec) { usec++; cycles -= cycles_per_usec; } pcc0 = pcc1; } #endif } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) { struct proc *p = curproc; osiginfo_t *sip, ksi; struct trapframe *frame; struct sigacts *psp = p->p_sigacts; int oonstack, fsize, rndfsize; frame = p->p_md.md_tf; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; fsize = sizeof ksi; rndfsize = ((fsize + 15) / 16) * 16; /* * Allocate and validate space for the signal handler * context. Note that if the stack is in P0 space, the * call to grow() is a nop, and the useracc() check * will fail if the process has not already allocated * the space with a `brk'. */ if ((p->p_flag & P_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sip = (osiginfo_t *)((caddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size - rndfsize); p->p_sigstk.ss_flags |= SS_ONSTACK; } else sip = (osiginfo_t *)(alpha_pal_rdusp() - rndfsize); (void)grow_stack(p, (u_long)sip); if (!useracc((caddr_t)sip, fsize, VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* * Build the signal context to be used by sigreturn. */ ksi.si_sc.sc_onstack = oonstack; SIG2OSIG(*mask, ksi.si_sc.sc_mask); ksi.si_sc.sc_pc = frame->tf_regs[FRAME_PC]; ksi.si_sc.sc_ps = frame->tf_regs[FRAME_PS]; /* copy the registers. */ fill_regs(p, (struct reg *)ksi.si_sc.sc_regs); ksi.si_sc.sc_regs[R_ZERO] = 0xACEDBADE; /* magic number */ ksi.si_sc.sc_regs[R_SP] = alpha_pal_rdusp(); /* save the floating-point state, if necessary, then copy it. */ alpha_fpstate_save(p, 1); /* XXX maybe write=0 */ ksi.si_sc.sc_ownedfp = p->p_md.md_flags & MDP_FPUSED; bcopy(&p->p_addr->u_pcb.pcb_fp, (struct fpreg *)ksi.si_sc.sc_fpregs, sizeof(struct fpreg)); ksi.si_sc.sc_fp_control = p->p_addr->u_pcb.pcb_fp_control; bzero(ksi.si_sc.sc_reserved, sizeof ksi.si_sc.sc_reserved); /* XXX */ ksi.si_sc.sc_xxx1[0] = 0; /* XXX */ ksi.si_sc.sc_xxx1[1] = 0; /* XXX */ ksi.si_sc.sc_traparg_a0 = frame->tf_regs[FRAME_TRAPARG_A0]; ksi.si_sc.sc_traparg_a1 = frame->tf_regs[FRAME_TRAPARG_A1]; ksi.si_sc.sc_traparg_a2 = frame->tf_regs[FRAME_TRAPARG_A2]; ksi.si_sc.sc_xxx2[0] = 0; /* XXX */ ksi.si_sc.sc_xxx2[1] = 0; /* XXX */ ksi.si_sc.sc_xxx2[2] = 0; /* XXX */ /* Fill in POSIX parts */ ksi.si_signo = sig; ksi.si_code = code; ksi.si_value.sigval_ptr = NULL; /* XXX */ /* * copy the frame out to userland. */ (void) copyout((caddr_t)&ksi, (caddr_t)sip, fsize); /* * Set up the registers to return to sigcode. */ frame->tf_regs[FRAME_PC] = PS_STRINGS - (esigcode - sigcode); frame->tf_regs[FRAME_A0] = sig; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) frame->tf_regs[FRAME_A1] = (u_int64_t)sip; else frame->tf_regs[FRAME_A1] = code; frame->tf_regs[FRAME_A2] = (u_int64_t)&sip->si_sc; frame->tf_regs[FRAME_T12] = (u_int64_t)catcher; /* t12 is pv */ alpha_pal_wrusp((unsigned long)sip); } void sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) { struct proc *p = curproc; struct trapframe *frame; struct sigacts *psp = p->p_sigacts; struct sigframe sf, *sfp; int oonstack, rndfsize; if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } frame = p->p_md.md_tf; oonstack = (p->p_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0; rndfsize = ((sizeof(sf) + 15) / 16) * 16; /* save user context */ bzero(&sf, sizeof(struct sigframe)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = oonstack; fill_regs(p, (struct reg *)sf.sf_uc.uc_mcontext.mc_regs); sf.sf_uc.uc_mcontext.mc_regs[R_SP] = alpha_pal_rdusp(); sf.sf_uc.uc_mcontext.mc_regs[R_ZERO] = 0xACEDBADE; /* magic number */ sf.sf_uc.uc_mcontext.mc_regs[R_PS] = frame->tf_regs[FRAME_PS]; sf.sf_uc.uc_mcontext.mc_regs[R_PC] = frame->tf_regs[FRAME_PC]; sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A0] = frame->tf_regs[FRAME_TRAPARG_A0]; sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A1] = frame->tf_regs[FRAME_TRAPARG_A1]; sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A2] = frame->tf_regs[FRAME_TRAPARG_A2]; /* * Allocate and validate space for the signal handler * context. Note that if the stack is in P0 space, the * call to grow() is a nop, and the useracc() check * will fail if the process has not already allocated * the space with a `brk'. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)((caddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size - rndfsize); p->p_sigstk.ss_flags |= SS_ONSTACK; } else sfp = (struct sigframe *)(alpha_pal_rdusp() - rndfsize); (void)grow_stack(p, (u_long)sfp); #ifdef DEBUG if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid) printf("sendsig(%d): sig %d ssp %p usp %p\n", p->p_pid, sig, &sf, sfp); #endif if (!useracc((caddr_t)sfp, sizeof(sf), VM_PROT_WRITE)) { #ifdef DEBUG if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid) printf("sendsig(%d): useracc failed on sig %d\n", p->p_pid, sig); #endif /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* save the floating-point state, if necessary, then copy it. */ alpha_fpstate_save(p, 1); sf.sf_uc.uc_mcontext.mc_ownedfp = p->p_md.md_flags & MDP_FPUSED; bcopy(&p->p_addr->u_pcb.pcb_fp, (struct fpreg *)sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(struct fpreg)); sf.sf_uc.uc_mcontext.mc_fp_control = p->p_addr->u_pcb.pcb_fp_control; #ifdef COMPAT_OSF1 /* * XXX Create an OSF/1-style sigcontext and associated goo. */ #endif /* * copy the frame out to userland. */ (void) copyout((caddr_t)&sf, (caddr_t)sfp, sizeof(sf)); #ifdef DEBUG if (sigdebug & SDB_FOLLOW) printf("sendsig(%d): sig %d sfp %p code %lx\n", p->p_pid, sig, sfp, code); #endif /* * Set up the registers to return to sigcode. */ frame->tf_regs[FRAME_PC] = PS_STRINGS - (esigcode - sigcode); frame->tf_regs[FRAME_A0] = sig; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { frame->tf_regs[FRAME_A1] = (u_int64_t)&(sfp->sf_si); /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void*)frame->tf_regs[FRAME_TRAPARG_A0]; } else frame->tf_regs[FRAME_A1] = code; frame->tf_regs[FRAME_A2] = (u_int64_t)&(sfp->sf_uc); frame->tf_regs[FRAME_T12] = (u_int64_t)catcher; /* t12 is pv */ alpha_pal_wrusp((unsigned long)sfp); #ifdef DEBUG if (sigdebug & SDB_FOLLOW) printf("sendsig(%d): pc %lx, catcher %lx\n", p->p_pid, frame->tf_regs[FRAME_PC], frame->tf_regs[FRAME_A3]); if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid) printf("sendsig(%d): sig %d returns\n", p->p_pid, sig); #endif } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int osigreturn(struct proc *p, struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap) { struct osigcontext *scp, ksc; scp = uap->sigcntxp; /* * Fetch the entire context structure at once for speed. */ if (copyin((caddr_t)scp, (caddr_t)&ksc, sizeof ksc)) return (EFAULT); /* * XXX - Should we do this. What if we get a "handcrafted" * but valid sigcontext that hasn't the magic number? */ if (ksc.sc_regs[R_ZERO] != 0xACEDBADE) /* magic number */ return (EINVAL); /* * Restore the user-supplied information */ if (ksc.sc_onstack) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; /* * longjmp is still implemented by calling osigreturn. The new * sigmask is stored in sc_reserved, sc_mask is only used for * backward compatibility. */ SIGSETOLD(p->p_sigmask, ksc.sc_mask); SIG_CANTMASK(p->p_sigmask); set_regs(p, (struct reg *)ksc.sc_regs); p->p_md.md_tf->tf_regs[FRAME_PC] = ksc.sc_pc; p->p_md.md_tf->tf_regs[FRAME_PS] = (ksc.sc_ps | ALPHA_PSL_USERSET) & ~ALPHA_PSL_USERCLR; alpha_pal_wrusp(ksc.sc_regs[R_SP]); /* XXX ksc.sc_ownedfp ? */ alpha_fpstate_drop(p); bcopy((struct fpreg *)ksc.sc_fpregs, &p->p_addr->u_pcb.pcb_fp, sizeof(struct fpreg)); p->p_addr->u_pcb.pcb_fp_control = ksc.sc_fp_control; return (EJUSTRETURN); } int sigreturn(struct proc *p, struct sigreturn_args /* { ucontext_t *sigcntxp; } */ *uap) { ucontext_t uc, *ucp; struct pcb *pcb; unsigned long val; if (((struct osigcontext*)uap->sigcntxp)->sc_regs[R_ZERO] == 0xACEDBADE) return osigreturn(p, (struct osigreturn_args *)uap); ucp = uap->sigcntxp; pcb = &p->p_addr->u_pcb; #ifdef DEBUG if (sigdebug & SDB_FOLLOW) printf("sigreturn: pid %d, scp %p\n", p->p_pid, ucp); #endif /* * Fetch the entire context structure at once for speed. */ if (copyin((caddr_t)ucp, (caddr_t)&uc, sizeof(ucontext_t))) return (EFAULT); /* * Restore the user-supplied information */ set_regs(p, (struct reg *)uc.uc_mcontext.mc_regs); val = (uc.uc_mcontext.mc_regs[R_PS] | ALPHA_PSL_USERSET) & ~ALPHA_PSL_USERCLR; p->p_md.md_tf->tf_regs[FRAME_PS] = val; p->p_md.md_tf->tf_regs[FRAME_PC] = uc.uc_mcontext.mc_regs[R_PC]; alpha_pal_wrusp(uc.uc_mcontext.mc_regs[R_SP]); if (uc.uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = uc.uc_sigmask; SIG_CANTMASK(p->p_sigmask); /* XXX ksc.sc_ownedfp ? */ alpha_fpstate_drop(p); bcopy((struct fpreg *)uc.uc_mcontext.mc_fpregs, &p->p_addr->u_pcb.pcb_fp, sizeof(struct fpreg)); p->p_addr->u_pcb.pcb_fp_control = uc.uc_mcontext.mc_fp_control; #ifdef DEBUG if (sigdebug & SDB_FOLLOW) printf("sigreturn(%d): returns\n", p->p_pid); #endif return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { /*alpha_pal_halt(); */ prom_halt(1); } /* * Clear registers on exec */ void setregs(struct proc *p, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tfp = p->p_md.md_tf; bzero(tfp->tf_regs, FRAME_SIZE * sizeof tfp->tf_regs[0]); bzero(&p->p_addr->u_pcb.pcb_fp, sizeof p->p_addr->u_pcb.pcb_fp); p->p_addr->u_pcb.pcb_fp_control = 0; p->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL | FPCR_INVD | FPCR_DZED | FPCR_OVFD | FPCR_INED | FPCR_UNFD); alpha_pal_wrusp(stack); tfp->tf_regs[FRAME_PS] = ALPHA_PSL_USERSET; tfp->tf_regs[FRAME_PC] = entry & ~3; tfp->tf_regs[FRAME_A0] = stack; /* a0 = sp */ tfp->tf_regs[FRAME_A1] = 0; /* a1 = rtld cleanup */ tfp->tf_regs[FRAME_A2] = 0; /* a2 = rtld object */ tfp->tf_regs[FRAME_A3] = PS_STRINGS; /* a3 = ps_strings */ tfp->tf_regs[FRAME_T12] = tfp->tf_regs[FRAME_PC]; /* a.k.a. PV */ p->p_md.md_flags &= ~MDP_FPUSED; alpha_fpstate_drop(p); } int ptrace_set_pc(struct proc *p, unsigned long addr) { struct trapframe *tp = p->p_md.md_tf; tp->tf_regs[FRAME_PC] = addr; return 0; } static int ptrace_read_int(struct proc *p, vm_offset_t addr, u_int32_t *v) { struct iovec iov; struct uio uio; iov.iov_base = (caddr_t) v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_procp = p; return procfs_domem(curproc, p, NULL, &uio); } static int ptrace_write_int(struct proc *p, vm_offset_t addr, u_int32_t v) { struct iovec iov; struct uio uio; iov.iov_base = (caddr_t) &v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_procp = p; return procfs_domem(curproc, p, NULL, &uio); } static u_int64_t ptrace_read_register(struct proc *p, int regno) { static int reg_to_frame[32] = { FRAME_V0, FRAME_T0, FRAME_T1, FRAME_T2, FRAME_T3, FRAME_T4, FRAME_T5, FRAME_T6, FRAME_T7, FRAME_S0, FRAME_S1, FRAME_S2, FRAME_S3, FRAME_S4, FRAME_S5, FRAME_S6, FRAME_A0, FRAME_A1, FRAME_A2, FRAME_A3, FRAME_A4, FRAME_A5, FRAME_T8, FRAME_T9, FRAME_T10, FRAME_T11, FRAME_RA, FRAME_T12, FRAME_AT, FRAME_GP, FRAME_SP, -1, /* zero */ }; if (regno == R_ZERO) return 0; return p->p_md.md_tf->tf_regs[reg_to_frame[regno]]; } static int ptrace_clear_bpt(struct proc *p, struct mdbpt *bpt) { return ptrace_write_int(p, bpt->addr, bpt->contents); } static int ptrace_set_bpt(struct proc *p, struct mdbpt *bpt) { int error; u_int32_t bpins = 0x00000080; error = ptrace_read_int(p, bpt->addr, &bpt->contents); if (error) return error; return ptrace_write_int(p, bpt->addr, bpins); } int ptrace_clear_single_step(struct proc *p) { if (p->p_md.md_flags & MDP_STEP2) { ptrace_clear_bpt(p, &p->p_md.md_sstep[1]); ptrace_clear_bpt(p, &p->p_md.md_sstep[0]); p->p_md.md_flags &= ~MDP_STEP2; } else if (p->p_md.md_flags & MDP_STEP1) { ptrace_clear_bpt(p, &p->p_md.md_sstep[0]); p->p_md.md_flags &= ~MDP_STEP1; } return 0; } int ptrace_single_step(struct proc *p) { int error; vm_offset_t pc = p->p_md.md_tf->tf_regs[FRAME_PC]; alpha_instruction ins; vm_offset_t addr[2]; /* places to set breakpoints */ int count = 0; /* count of breakpoints */ if (p->p_md.md_flags & (MDP_STEP1|MDP_STEP2)) panic("ptrace_single_step: step breakpoints not removed"); error = ptrace_read_int(p, pc, &ins.bits); if (error) return error; switch (ins.branch_format.opcode) { case op_j: /* Jump: target is register value */ addr[0] = ptrace_read_register(p, ins.jump_format.rs) & ~3; count = 1; break; case op_br: case op_fbeq: case op_fblt: case op_fble: case op_bsr: case op_fbne: case op_fbge: case op_fbgt: case op_blbc: case op_beq: case op_blt: case op_ble: case op_blbs: case op_bne: case op_bge: case op_bgt: /* Branch: target is pc+4+4*displacement */ addr[0] = pc + 4; addr[1] = pc + 4 + 4 * ins.branch_format.displacement; count = 2; break; default: addr[0] = pc + 4; count = 1; } p->p_md.md_sstep[0].addr = addr[0]; error = ptrace_set_bpt(p, &p->p_md.md_sstep[0]); if (error) return error; if (count == 2) { p->p_md.md_sstep[1].addr = addr[1]; error = ptrace_set_bpt(p, &p->p_md.md_sstep[1]); if (error) { ptrace_clear_bpt(p, &p->p_md.md_sstep[0]); return error; } p->p_md.md_flags |= MDP_STEP2; } else p->p_md.md_flags |= MDP_STEP1; return 0; } int ptrace_read_u_check(p, addr, len) struct proc *p; vm_offset_t addr; size_t len; { vm_offset_t gap; if ((vm_offset_t) (addr + len) < addr) return EPERM; if ((vm_offset_t) (addr + len) <= sizeof(struct user)) return 0; gap = (char *) p->p_md.md_tf - (char *) p->p_addr; if ((vm_offset_t) addr < gap) return EPERM; if ((vm_offset_t) (addr + len) <= (vm_offset_t) (gap + sizeof(struct trapframe))) return 0; return EPERM; } int ptrace_write_u(struct proc *p, vm_offset_t off, long data) { vm_offset_t min; #if 0 struct trapframe frame_copy; struct trapframe *tp; #endif /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_tf - (char *)p->p_addr; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { #if 0 tp = p->p_md.md_tf; frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); #endif *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_fp); if (off >= min && off <= min + sizeof(struct fpreg) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int alpha_pa_access(vm_offset_t pa) { #if 0 int i; for (i = 0; phys_avail[i] != 0; i += 2) { if (pa < phys_avail[i]) continue; if (pa < phys_avail[i+1]) return VM_PROT_READ|VM_PROT_WRITE; } return 0; #else return VM_PROT_READ|VM_PROT_WRITE; #endif } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb = &p->p_addr->u_pcb; struct trapframe *tp = p->p_md.md_tf; tp = p->p_md.md_tf; #define C(r) regs->r_regs[R_ ## r] = tp->tf_regs[FRAME_ ## r] C(V0); C(T0); C(T1); C(T2); C(T3); C(T4); C(T5); C(T6); C(T7); C(S0); C(S1); C(S2); C(S3); C(S4); C(S5); C(S6); C(A0); C(A1); C(A2); C(A3); C(A4); C(A5); C(T8); C(T9); C(T10); C(T11); C(RA); C(T12); C(AT); C(GP); #undef C regs->r_regs[R_ZERO] = tp->tf_regs[FRAME_PC]; regs->r_regs[R_SP] = pcb->pcb_hw.apcb_usp; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb = &p->p_addr->u_pcb; struct trapframe *tp = p->p_md.md_tf; tp = p->p_md.md_tf; #define C(r) tp->tf_regs[FRAME_ ## r] = regs->r_regs[R_ ## r] C(V0); C(T0); C(T1); C(T2); C(T3); C(T4); C(T5); C(T6); C(T7); C(S0); C(S1); C(S2); C(S3); C(S4); C(S5); C(S6); C(A0); C(A1); C(A2); C(A3); C(A4); C(A5); C(T8); C(T9); C(T10); C(T11); C(RA); C(T12); C(AT); C(GP); #undef C tp->tf_regs[FRAME_PC] = regs->r_regs[R_ZERO]; pcb->pcb_hw.apcb_usp = regs->r_regs[R_SP]; return (0); } int fill_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { alpha_fpstate_save(p, 0); bcopy(&p->p_addr->u_pcb.pcb_fp, fpregs, sizeof *fpregs); return (0); } int set_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { alpha_fpstate_drop(p); bcopy(fpregs, &p->p_addr->u_pcb.pcb_fp, sizeof *fpregs); return (0); } #ifndef DDB void Debugger(const char *msg) { printf("Debugger(\"%s\") called.\n", msg); } #endif /* no DDB */ #include /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel) { #if 0 struct partition *p = lp->d_partitions + dkpart(bp->bio_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR && (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->bio_blkno == maxsz) { bp->bio_resid = bp->bio_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->bio_blkno; if (sz <= 0) { bp->bio_error = EINVAL; goto bad; } bp->bio_bcount = sz << DEV_BSHIFT; } bp->bio_pblkno = bp->bio_blkno + p->p_offset; return(1); bad: bp->bio_flags |= BIO_ERROR; #endif return(-1); } static int -sysctl_machdep_adjkerntz (SYSCTL_HANDLER_ARGS) +sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); void alpha_fpstate_check(struct proc *p) { if (p->p_addr->u_pcb.pcb_hw.apcb_flags & ALPHA_PCB_FLAGS_FEN) if (p != fpcurproc) panic("alpha_check_fpcurproc: bogus"); } #define SET_FEN(p) \ (p)->p_addr->u_pcb.pcb_hw.apcb_flags |= ALPHA_PCB_FLAGS_FEN #define CLEAR_FEN(p) \ (p)->p_addr->u_pcb.pcb_hw.apcb_flags &= ~ALPHA_PCB_FLAGS_FEN /* * Save the floating point state in the pcb. Use this to get read-only * access to the floating point state. If write is true, the current * fp process is cleared so that fp state can safely be modified. The * process will automatically reload the changed state by generating a * FEN trap. */ void alpha_fpstate_save(struct proc *p, int write) { if (p == fpcurproc) { /* * If curproc != fpcurproc, then we need to enable FEN * so that we can dump the fp state. */ alpha_pal_wrfen(1); /* * Save the state in the pcb. */ savefpstate(&p->p_addr->u_pcb.pcb_fp); if (write) { /* * If fpcurproc == curproc, just ask the * PALcode to disable FEN, otherwise we must * clear the FEN bit in fpcurproc's pcb. */ if (fpcurproc == curproc) alpha_pal_wrfen(0); else CLEAR_FEN(fpcurproc); fpcurproc = NULL; } else { /* * Make sure that we leave FEN enabled if * curproc == fpcurproc. We must have at most * one process with FEN enabled. Note that FEN * must already be set in fpcurproc's pcb. */ if (curproc != fpcurproc) alpha_pal_wrfen(0); } } } /* * Relinquish ownership of the FP state. This is called instead of * alpha_save_fpstate() if the entire FP state is being changed * (e.g. on sigreturn). */ void alpha_fpstate_drop(struct proc *p) { if (p == fpcurproc) { if (p == curproc) { /* * Disable FEN via the PALcode. This will * clear the bit in the pcb as well. */ alpha_pal_wrfen(0); } else { /* * Clear the FEN bit of the pcb. */ CLEAR_FEN(p); } fpcurproc = NULL; } } /* * Switch the current owner of the fp state to p, reloading the state * from the pcb. */ void alpha_fpstate_switch(struct proc *p) { /* * Enable FEN so that we can access the fp registers. */ alpha_pal_wrfen(1); if (fpcurproc) { /* * Dump the old fp state if its valid. */ savefpstate(&fpcurproc->p_addr->u_pcb.pcb_fp); CLEAR_FEN(fpcurproc); } /* * Remember the new FP owner and reload its state. */ fpcurproc = p; restorefpstate(&fpcurproc->p_addr->u_pcb.pcb_fp); /* * If the new owner is curproc, leave FEN enabled, otherwise * mark its PCB so that it gets FEN when we context switch to * it later. */ if (p != curproc) { alpha_pal_wrfen(0); SET_FEN(p); } p->p_md.md_flags |= MDP_FPUSED; } Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c (revision 62572) +++ head/sys/amd64/amd64/machdep.c (revision 62573) @@ -1,2435 +1,2435 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD$ */ #include "apm.h" #include "npx.h" #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" #include "opt_smp.h" #include "opt_user_ldt.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* pcb.h included via sys/user.h */ #ifdef SMP #include #include #endif #ifdef PERFMON #include #endif #ifdef OLD_BUS_ARCH #include #endif #include #include #include #include #include extern void init386 __P((int first)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); int _udatasel, _ucodesel; u_int atdevbase; #if defined(SWTCH_OPTIM_STATS) extern int swtch_optim_stats; SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, CTLFLAG_RD, &swtch_optim_stats, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif #ifdef PC98 static int ispc98 = 1; #else static int ispc98 = 0; #endif SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); int physmem = 0; int cold = 1; static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code)); static int -sysctl_hw_physmem (SYSCTL_HANDLER_ARGS) +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int -sysctl_hw_usermem (SYSCTL_HANDLER_ARGS) +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); static int -sysctl_hw_availpages (SYSCTL_HANDLER_ARGS) +sysctl_hw_availpages(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, i386_btop(avail_end - avail_start), req); return (error); } SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_availpages, "I", ""); static int -sysctl_machdep_msgbuf (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS) { int error; /* Unwind the buffer, so that it's linear (possibly starting with * some initial nulls). */ error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr, msgbufp->msg_size-msgbufp->msg_bufr,req); if(error) return(error); if(msgbufp->msg_bufr>0) { error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr, msgbufp->msg_bufr,req); } return(error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer"); static int msgbuf_clear; static int -sysctl_machdep_msgbuf_clear (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) { /* Clear the buffer and reset write pointer */ bzero(msgbufp->msg_ptr,msgbufp->msg_size); msgbufp->msg_bufr=msgbufp->msg_bufx=0; msgbuf_clear=0; } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW, &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I", "Clear kernel message buffer"); int bootverbose = 0, Maxmem = 0; long dumplo; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %u (%uK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %u bytes (%u pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } /* * Calculate callout wheel size */ for (callwheelsize = 1, callwheelbits = 0; callwheelsize < ncallout; callwheelsize <<= 1, ++callwheelbits) ; callwheelmask = callwheelsize - 1; /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); valloc(callwheel, struct callout_tailq, callwheelsize); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional * buffers to cover 1/20 of our ram over 64MB. * * factor represents the 1/4 x ram conversion. */ if (nbuf == 0) { int factor = 4 * BKVASIZE / PAGE_SIZE; nbuf = 50; if (physmem > 1024) nbuf += min((physmem - 1024) / factor, 16384 / factor); if (physmem > 16384) nbuf += (physmem - 16384) * 2 / (factor * 5); } /* * Do not allow the buffer_map to be more then 1/2 the size of the * kernel_map. */ if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2)) { nbuf = (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2); printf("Warning: nbufs capped at %d\n", nbuf); } nswbuf = max(min(nbuf/4, 256), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); v = bufhashinit(v); /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE)); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size); pager_map->system_map = 1; exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*(ARG_MAX+(PAGE_SIZE*3)))); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size); mb_map->system_map = 1; } /* * Initialize callouts */ SLIST_INIT(&callfree); for (i = 0; i < ncallout; i++) { callout_init(&callout[i]); callout[i].c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle); } for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&callwheel[i]); } #if defined(USERCONFIG) userconfig(); cninit(); /* the preferred console may have changed */ #endif printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! */ mp_start(); /* fire up the APs and APICs */ mp_announce(); #endif /* SMP */ } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } int unregister_netisr(num) int num; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("unregister_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = NULL; return (0); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void osendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct osigframe sf; struct osigframe *fp; struct proc *p; struct sigacts *psp; struct trapframe *regs; int oonstack; p = curproc; psp = p->p_sigacts; regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct osigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else fp = (struct osigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *fp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)fp) == 0 || !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = oonstack; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf; struct proc *p; struct sigacts *psp; struct trapframe *regs; struct sigframe *sfp; int oonstack; p = curproc; psp = p->p_sigacts; if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = oonstack; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else sfp = (struct sigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *sfp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)sfp) == 0 || !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG printf("process %d has trashed its stack\n", p->p_pid); #endif SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill siginfo structure. */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * We should never have PSL_T set when returning from vm86 * mode. It may be set here if we deliver a signal before * getting to vm86 mode, so turn it off. * * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int osigreturn(p, uap) struct proc *p; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct trapframe *regs; struct osigcontext *scp; int eflags; regs = p->p_md.md_regs; scp = uap->sigcntxp; if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ)) return (EFAULT); eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; if (scp->sc_onstack & 01) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; SIGSETOLD(p->p_sigmask, scp->sc_mask); SIG_CANTMASK(p->p_sigmask); regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; return (EJUSTRETURN); } int sigreturn(p, uap) struct proc *p; struct sigreturn_args /* { ucontext_t *sigcntxp; } */ *uap; { struct trapframe *regs; ucontext_t *ucp; int cs, eflags; ucp = uap->sigcntxp; if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ)) return (EFAULT); if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516) return (osigreturn(p, (struct osigreturn_args *)uap)); /* * Since ucp is not an osigcontext but a ucontext_t, we have to * check again if all of it is accessible. A ucontext_t is * much larger, so instead of just checking for the pointer * being valid for the size of an osigcontext, now check for * it being valid for a whole, new-style ucontext_t. */ if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ)) return (EFAULT); regs = p->p_md.md_regs; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack, ps_strings) struct proc *p; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = p->p_md.md_regs; struct pcb *pcb = &p->p_addr->u_pcb; #ifdef USER_LDT /* was i386_user_cleanup() in NetBSD */ user_ldt_free(pcb); #endif bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* reset %gs as well */ if (pcb == curpcb) load_gs(_udatasel); else pcb->pcb_gs = _udatasel; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ p->p_retval[1] = 0; } static int -sysctl_machdep_adjkerntz (SYSCTL_HANDLER_ARGS) +sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; #ifdef SMP union descriptor gdt[NGDT * NCPU]; /* global descriptor table */ #else union descriptor gdt[NGDT]; /* global descriptor table */ #endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #ifdef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif #ifndef SMP extern struct segment_descriptor common_tssd, *tss_gdt; #endif int private_tss; /* flag indicating private tss */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 4 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 5 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 6 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 7 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 9 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #define PHYSMAP_SIZE (2 * 8) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. */ static void getmemsize(int first) { int i, physmap_idx, pa_indx; u_int basemem, extmem; struct vm86frame vmf; struct vm86context vmc; vm_offset_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t pte; const char *cp; struct { u_int64_t base; u_int64_t length; u_int32_t type; } *smap; bzero(&vmf, sizeof(struct vm86frame)); bzero(physmap, sizeof(physmap)); /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { pte = (pt_entry_t)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT)); *pte = (1 << PAGE_SHIFT) | PG_RW | PG_V; /* * get memory map with INT 15:E820 */ #define SMAPSIZ sizeof(*smap) #define SMAP_SIG 0x534D4150 /* 'SMAP' */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); physmap_idx = 0; vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = SMAPSIZ; i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n", smap->type, *(u_int32_t *)((char *)&smap->base + 4), (u_int32_t)smap->base, *(u_int32_t *)((char *)&smap->length + 4), (u_int32_t)smap->length); if (smap->type != 0x01) goto next_run; if (smap->length == 0) goto next_run; if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); goto next_run; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; goto next_run; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; next_run: } while (vmf.vmf_ebx != 0); if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); /* look for the MP hardware - needed for apic addresses */ mp_probe(); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif /* * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ if ((cp = getenv("hw.physmem")) != NULL) { u_int64_t AllowMem, sanity; char *ep; sanity = AllowMem = strtouq(cp, &ep, 0); if ((ep != cp) && (*ep != 0)) { switch(*ep) { case 'g': case 'G': AllowMem <<= 10; case 'm': case 'M': AllowMem <<= 10; case 'k': case 'K': AllowMem <<= 10; break; default: AllowMem = sanity = 0; } if (AllowMem < sanity) AllowMem = 0; } if (AllowMem == 0) printf("Ignoring invalid memory size of '%s'\n", cp); else Maxmem = atop(AllowMem); } if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %uK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa(Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first, 0); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; #if 0 pte = (pt_entry_t)vtopte(KERNBASE); #else pte = (pt_entry_t)CMAP1; #endif /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_offset_t end; end = ptoa(Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; #if 0 int *ptr = 0; #else int *ptr = (int *)CADDR1; #endif /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } void init386(first) int first; { int x; struct gate_descriptor *gdp; int gsel_tss; #ifndef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif int off; /* * Prevent lowering of the ipl if we call tsleep() early. */ safepri = cpl; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; #ifdef SMP gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(sizeof(struct privatespace)) - 1; gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0]; gdt_segs[GPROC0_SEL].ssd_base = (int) &SMP_prvspace[0].globaldata.gd_common_tss; SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0]; #else gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GPROC0_SEL].ssd_base = (int) &common_tss; #endif for (x = 0; x < NGDT; x++) { #ifdef BDE_DEBUGGER /* avoid overwriting db entries with APM ones */ if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL) continue; #endif ssdtosd(&gdt_segs[x], &gdt[x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); #ifdef USER_LDT currentldt = _default_ldt; #endif /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; tss_gdt = &gdt[GPROC0_SEL].sd; common_tssd = *tss_gdt; common_tss.tss_ioopt = (sizeof common_tss) << 16; ltr(gsel_tss); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = (int)IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; #ifdef SMP proc0.p_addr->u_pcb.pcb_mpnest = 1; #endif proc0.p_addr->u_pcb.pcb_ext = 0; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; #ifndef SMP struct region_descriptor r_idt; #endif vm_offset_t tmp; if (!has_f00f_bug) return; printf("Intel Pentium detected, installing workaround for F00F bug\n"); r_idt.rd_limit = sizeof(idt0) - 1; tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) panic("kmem_alloc returned non-page-aligned memory"); /* Put the first seven entries in the lower page */ new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ int ptrace_set_pc(p, addr) struct proc *p; unsigned long addr; { p->p_md.md_regs->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { p->p_md.md_regs->tf_eflags |= PSL_T; return (0); } int ptrace_read_u_check(p, addr, len) struct proc *p; vm_offset_t addr; size_t len; { vm_offset_t gap; if ((vm_offset_t) (addr + len) < addr) return EPERM; if ((vm_offset_t) (addr + len) <= sizeof(struct user)) return 0; gap = (char *) p->p_md.md_regs - (char *) p->p_addr; if ((vm_offset_t) addr < gap) return EPERM; if ((vm_offset_t) (addr + len) <= (vm_offset_t) (gap + sizeof(struct trapframe))) return 0; return EPERM; } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; long data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - (char *)p->p_addr; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = p->p_md.md_regs; frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = &p->p_addr->u_pcb; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = &p->p_addr->u_pcb; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs); return (0); } int set_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs); return (0); } int fill_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; dbregs->dr0 = pcb->pcb_dr0; dbregs->dr1 = pcb->pcb_dr1; dbregs->dr2 = pcb->pcb_dr2; dbregs->dr3 = pcb->pcb_dr3; dbregs->dr4 = 0; dbregs->dr5 = 0; dbregs->dr6 = pcb->pcb_dr6; dbregs->dr7 = pcb->pcb_dr7; return (0); } int set_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space, unless, perhaps, we were called by * uid 0. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (p->p_ucred->cr_uid != 0) { if (dbregs->dr7 & 0x3) { /* dr0 is enabled */ if (dbregs->dr0 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<2)) { /* dr1 is enabled */ if (dbregs->dr1 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<4)) { /* dr2 is enabled */ if (dbregs->dr2 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<6)) { /* dr3 is enabled */ if (dbregs->dr3 >= VM_MAXUSER_ADDRESS) return (EINVAL); } } pcb->pcb_dr0 = dbregs->dr0; pcb->pcb_dr1 = dbregs->dr1; pcb->pcb_dr2 = dbregs->dr2; pcb->pcb_dr3 = dbregs->dr3; pcb->pcb_dr6 = dbregs->dr6; pcb->pcb_dr7 = dbregs->dr7; pcb->pcb_flags |= PCB_DBREGS; return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i=0; i /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->bio_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR && (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->bio_blkno == maxsz) { bp->bio_resid = bp->bio_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->bio_blkno; if (sz <= 0) { bp->bio_error = EINVAL; goto bad; } bp->bio_bcount = sz << DEV_BSHIFT; } bp->bio_pblkno = bp->bio_blkno + p->p_offset; return(1); bad: bp->bio_flags |= BIO_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/amd64/amd64/prof_machdep.c =================================================================== --- head/sys/amd64/amd64/prof_machdep.c (revision 62572) +++ head/sys/amd64/amd64/prof_machdep.c (revision 62573) @@ -1,355 +1,355 @@ /*- * Copyright (c) 1996 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifdef GUPROF #include "opt_i586_guprof.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #undef MCOUNT #endif #include #ifdef PC98 #include #else #include #endif #include #ifdef GUPROF #define CPUTIME_CLOCK_UNINITIALIZED 0 #define CPUTIME_CLOCK_I8254 1 #define CPUTIME_CLOCK_TSC 2 #define CPUTIME_CLOCK_I586_PMC 3 #define CPUTIME_CLOCK_I8254_SHIFT 7 int cputime_bias = 1; /* initialize for locality of reference */ static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; #ifdef I586_PMC_GUPROF static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; static int cputime_clock_pmc_init; static struct gmonparam saved_gmp; #endif #endif /* GUPROF */ #ifdef __GNUC__ __asm(" \n\ GM_STATE = 0 \n\ GMON_PROF_OFF = 3 \n\ \n\ .text \n\ .p2align 4,0x90 \n\ .globl __mcount \n\ .type __mcount,@function \n\ __mcount: \n\ # \n\ # Check that we are profiling. Do it early for speed. \n\ # \n\ cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ je .mcount_exit \n\ # \n\ # __mcount is the same as [.]mcount except the caller \n\ # hasn't changed the stack except to call here, so the \n\ # caller's raddr is above our raddr. \n\ # \n\ movl 4(%esp),%edx \n\ jmp .got_frompc \n\ \n\ .p2align 4,0x90 \n\ .globl " __XSTRING(HIDENAME(mcount)) " \n\ " __XSTRING(HIDENAME(mcount)) ": \n\ cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ je .mcount_exit \n\ # \n\ # The caller's stack frame has already been built, so \n\ # %ebp is the caller's frame pointer. The caller's \n\ # raddr is in the caller's frame following the caller's \n\ # caller's frame pointer. \n\ # \n\ movl 4(%ebp),%edx \n\ .got_frompc: \n\ # \n\ # Our raddr is the caller's pc. \n\ # \n\ movl (%esp),%eax \n\ \n\ pushfl \n\ pushl %eax \n\ pushl %edx \n\ cli \n\ call " __XSTRING(CNAME(mcount)) " \n\ addl $8,%esp \n\ popfl \n\ .mcount_exit: \n\ ret \n\ "); #else /* !__GNUC__ */ #error #endif /* __GNUC__ */ #ifdef GUPROF /* * [.]mexitcount saves the return register(s), loads selfpc and calls * mexitcount(selfpc) to do the work. Someday it should be in a machine * dependent file together with cputime(), __mcount and [.]mcount. cputime() * can't just be put in machdep.c because it has to be compiled without -pg. */ #ifdef __GNUC__ __asm(" \n\ .text \n\ # \n\ # Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ # \n\ .p2align 4,0x90 \n\ .globl __mexitcount \n\ .type __mexitcount,@function \n\ __mexitcount: \n\ nop \n\ \n\ GMON_PROF_HIRES = 4 \n\ \n\ .p2align 4,0x90 \n\ .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ " __XSTRING(HIDENAME(mexitcount)) ": \n\ cmpl $GMON_PROF_HIRES," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ jne .mexitcount_exit \n\ pushl %edx \n\ pushl %eax \n\ movl 8(%esp),%eax \n\ pushfl \n\ pushl %eax \n\ cli \n\ call " __XSTRING(CNAME(mexitcount)) " \n\ addl $4,%esp \n\ popfl \n\ popl %eax \n\ popl %edx \n\ .mexitcount_exit: \n\ ret \n\ "); #else /* !__GNUC__ */ #error #endif /* __GNUC__ */ /* * Return the time elapsed since the last call. The units are machine- * dependent. */ int cputime() { u_int count; int delta; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) && \ defined(PERFMON) && defined(I586_PMC_GUPROF) u_quad_t event_count; #endif u_char high, low; static u_int prev_count; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) if (cputime_clock == CPUTIME_CLOCK_TSC) { count = (u_int)rdtsc(); delta = (int)(count - prev_count); prev_count = count; return (delta); } #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { /* * XXX permon_read() should be inlined so that the * perfmon module doesn't need to be compiled with * profiling disabled and so that it is fast. */ perfmon_read(0, &event_count); count = (u_int)event_count; delta = (int)(count - prev_count); prev_count = count; return (delta); } #endif /* PERFMON && I586_PMC_GUPROF */ #endif /* (I586_CPU || I686_CPU) && !SMP */ /* * Read the current value of the 8254 timer counter 0. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; /* * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. * While profiling is enabled, this routine is called at least twice * per timer reset (for mcounting and mexitcounting hardclock()), * so at most one reset has occurred since the last call, and one * has occurred iff the current count is larger than the previous * count. This allows counter underflow to be detected faster * than in microtime(). */ delta = prev_count - count; prev_count = count; if ((int) delta <= 0) return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); return (delta); } static int -sysctl_machdep_cputime_clock (SYSCTL_HANDLER_ARGS) +sysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) { int clock; int error; #if defined(PERFMON) && defined(I586_PMC_GUPROF) int event; struct pmc pmc; #endif clock = cputime_clock; #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (clock == CPUTIME_CLOCK_I586_PMC) { pmc.pmc_val = cputime_clock_pmc_conf; clock += pmc.pmc_event; } #endif error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); if (error == 0 && req->newptr != NULL) { #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (clock >= CPUTIME_CLOCK_I586_PMC) { event = clock - CPUTIME_CLOCK_I586_PMC; if (event >= 256) return (EINVAL); pmc.pmc_num = 0; pmc.pmc_event = event; pmc.pmc_unit = 0; pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; pmc.pmc_mask = 0; cputime_clock_pmc_conf = pmc.pmc_val; cputime_clock = CPUTIME_CLOCK_I586_PMC; } else #endif { if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) return (EINVAL); cputime_clock = clock; } } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); /* * The start and stop routines need not be here since we turn off profiling * before calling them. They are here for convenience. */ void startguprof(gp) struct gmonparam *gp; { if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { cputime_clock = CPUTIME_CLOCK_I8254; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) if (tsc_freq != 0) cputime_clock = CPUTIME_CLOCK_TSC; #endif } gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) if (cputime_clock == CPUTIME_CLOCK_TSC) gp->profrate = tsc_freq; #if defined(PERFMON) && defined(I586_PMC_GUPROF) else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { if (perfmon_avail() && perfmon_setup(0, cputime_clock_pmc_conf) == 0) { if (perfmon_start(0) != 0) perfmon_fini(0); else { /* XXX 1 event == 1 us. */ gp->profrate = 1000000; saved_gmp = *gp; /* Zap overheads. They are invalid. */ gp->cputime_overhead = 0; gp->mcount_overhead = 0; gp->mcount_post_overhead = 0; gp->mcount_pre_overhead = 0; gp->mexitcount_overhead = 0; gp->mexitcount_post_overhead = 0; gp->mexitcount_pre_overhead = 0; cputime_clock_pmc_init = TRUE; } } } #endif /* PERFMON && I586_PMC_GUPROF */ #endif /* (I586_CPU || I686_CPU) && !SMP */ cputime_bias = 0; cputime(); } void stopguprof(gp) struct gmonparam *gp; { #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (cputime_clock_pmc_init) { *gp = saved_gmp; perfmon_fini(0); cputime_clock_pmc_init = FALSE; } #endif } #else /* !GUPROF */ #ifdef __GNUC__ __asm(" \n\ .text \n\ .p2align 4,0x90 \n\ .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ " __XSTRING(HIDENAME(mexitcount)) ": \n\ ret \n\ "); #else /* !__GNUC__ */ #error #endif /* __GNUC__ */ #endif /* GUPROF */ Index: head/sys/amd64/amd64/tsc.c =================================================================== --- head/sys/amd64/amd64/tsc.c (revision 62572) +++ head/sys/amd64/amd64/tsc.c (revision 62573) @@ -1,1279 +1,1279 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #include #include #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #define TIMER_FREQ 1193182 #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } splx(x); return (0); } /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; int yd; int year, month; int y, m, s; struct timespec ts; if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); } /* * Start both clocks running. */ void cpu_initclocks() { int diag; #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/amd64/isa/clock.c =================================================================== --- head/sys/amd64/isa/clock.c (revision 62572) +++ head/sys/amd64/isa/clock.c (revision 62573) @@ -1,1279 +1,1279 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #include #include #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #define TIMER_FREQ 1193182 #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } splx(x); return (0); } /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; int yd; int year, month; int y, m, s; struct timespec ts; if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); } /* * Start both clocks running. */ void cpu_initclocks() { int diag; #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/compat/linux/linux_mib.c =================================================================== --- head/sys/compat/linux/linux_mib.c (revision 62572) +++ head/sys/compat/linux/linux_mib.c (revision 62573) @@ -1,231 +1,231 @@ /*- * Copyright (c) 1999 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include struct linux_prison { char pr_osname[LINUX_MAX_UTSNAME]; char pr_osrelease[LINUX_MAX_UTSNAME]; int pr_oss_version; }; SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, "Linux mode"); static char linux_osname[LINUX_MAX_UTSNAME] = "Linux"; static int -linux_sysctl_osname (SYSCTL_HANDLER_ARGS) +linux_sysctl_osname(SYSCTL_HANDLER_ARGS) { char osname[LINUX_MAX_UTSNAME]; int error; strcpy(osname, linux_get_osname(req->p)); error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); if (error || req->newptr == NULL) return (error); error = linux_set_osname(req->p, osname); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, osname, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 0, 0, linux_sysctl_osname, "A", "Linux kernel OS name"); static char linux_osrelease[LINUX_MAX_UTSNAME] = "2.2.12"; static int -linux_sysctl_osrelease (SYSCTL_HANDLER_ARGS) +linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) { char osrelease[LINUX_MAX_UTSNAME]; int error; strcpy(osrelease, linux_get_osrelease(req->p)); error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); if (error || req->newptr == NULL) return (error); error = linux_set_osrelease(req->p, osrelease); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 0, 0, linux_sysctl_osrelease, "A", "Linux kernel OS release"); static int linux_oss_version = 0x030600; static int -linux_sysctl_oss_version (SYSCTL_HANDLER_ARGS) +linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) { int oss_version; int error; oss_version = linux_get_oss_version(req->p); error = sysctl_handle_int(oidp, &oss_version, 0, req); if (error || req->newptr == NULL) return (error); error = linux_set_oss_version(req->p, oss_version); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON, 0, 0, linux_sysctl_oss_version, "I", "Linux OSS version"); static struct linux_prison * get_prison(struct proc *p) { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr == NULL) return (NULL); if (pr->pr_linux == NULL) { MALLOC(lpr, struct linux_prison *, sizeof *lpr, M_PRISON, M_WAITOK); bzero((caddr_t)lpr, sizeof *lpr); pr->pr_linux = lpr; } return (pr->pr_linux); } char * linux_get_osname(p) struct proc *p; { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr != NULL && pr->pr_linux != NULL) { lpr = pr->pr_linux; if (lpr->pr_osname[0]) return (lpr->pr_osname); } return (linux_osname); } int linux_set_osname(p, osname) struct proc *p; char *osname; { register struct linux_prison *lpr; lpr = get_prison(p); if (lpr != NULL) strcpy(lpr->pr_osname, osname); else strcpy(linux_osname, osname); return (0); } char * linux_get_osrelease(p) struct proc *p; { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr != NULL && pr->pr_linux != NULL) { lpr = pr->pr_linux; if (lpr->pr_osrelease[0]) return (lpr->pr_osrelease); } return (linux_osrelease); } int linux_set_osrelease(p, osrelease) struct proc *p; char *osrelease; { register struct linux_prison *lpr; lpr = get_prison(p); if (lpr != NULL) strcpy(lpr->pr_osrelease, osrelease); else strcpy(linux_osrelease, osrelease); return (0); } int linux_get_oss_version(p) struct proc *p; { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr != NULL && pr->pr_linux != NULL) { lpr = pr->pr_linux; if (lpr->pr_oss_version) return (lpr->pr_oss_version); } return (linux_oss_version); } int linux_set_oss_version(p, oss_version) struct proc *p; int oss_version; { register struct linux_prison *lpr; lpr = get_prison(p); if (lpr != NULL) lpr->pr_oss_version = oss_version; else linux_oss_version = oss_version; return (0); } Index: head/sys/ddb/db_sysctl.c =================================================================== --- head/sys/ddb/db_sysctl.c (revision 62572) +++ head/sys/ddb/db_sysctl.c (revision 62573) @@ -1,77 +1,77 @@ /* * db_sysctl.c * * Copyright (c) 2000 Whistle Communications, Inc. * All rights reserved. * * Subject to the following obligations and disclaimer of warranty, use and * redistribution of this software, in source or object code forms, with or * without modifications are expressly permitted by Whistle Communications; * provided, however, that: * 1. Any and all reproductions of the source or object code must include the * copyright notice above and the following disclaimer of warranties; and * 2. No rights are granted, in any manner or form, to use Whistle * Communications, Inc. trademarks, including the mark "WHISTLE * COMMUNICATIONS" on advertising, endorsements, or otherwise except as * such appears in the above copyright notice or in the software. * * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Author: Archie Cobbs * * $FreeBSD$ */ #include #include #include #include #include #define DESCRIPTION "Set to `ddb' or `gdb' to enter the kernel debugger" #define READ_MODE "" #define MODE_DDB "ddb" #define MODE_GDB "gdb" static int -sysctl_debug_enter_debugger (SYSCTL_HANDLER_ARGS) +sysctl_debug_enter_debugger(SYSCTL_HANDLER_ARGS) { char dmode[64]; int error; strncpy(dmode, READ_MODE, sizeof(dmode) - 1); dmode[sizeof(dmode) - 1] = '\0'; error = sysctl_handle_string(oidp, &dmode[0], sizeof(dmode), req); if (error == 0 && req->newptr != NULL) { if (strcmp(dmode, MODE_DDB) == 0) boothowto &= ~RB_GDB; else if (strcmp(dmode, MODE_GDB) == 0) boothowto |= RB_GDB; else return EINVAL; Debugger("debug.enter_debugger"); } return error; } SYSCTL_PROC(_debug, OID_AUTO, enter_debugger, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_debug_enter_debugger, "A", DESCRIPTION); Index: head/sys/dev/ata/ata-all.c =================================================================== --- head/sys/dev/ata/ata-all.c (revision 62572) +++ head/sys/dev/ata/ata-all.c (revision 62573) @@ -1,1685 +1,1685 @@ /*- * Copyright (c) 1998,1999,2000 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "ata.h" #include "isa.h" #include "card.h" #include "pci.h" #include "atadisk.h" #include "atapicd.h" #include "atapifd.h" #include "atapist.h" #include "opt_global.h" #include "opt_ata.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if NPCI > 0 #include #include #endif #include #include #include #ifdef __i386__ #include #include #endif #ifdef __alpha__ #include #endif #include #include #include /* misc defines */ #define IOMASK 0xfffffffc #define ATA_IOADDR_RID 0 #define ATA_ALTADDR_RID 1 #define ATA_BMADDR_RID 2 /* prototypes */ static int ata_probe(device_t); static int ata_attach(device_t); static int ata_detach(device_t); static int ata_resume(device_t); static void ata_boot_attach(void); static void ata_intr(void *); static int32_t ata_getparam(struct ata_softc *, int32_t, u_int8_t); static int8_t *active2str(int32_t); static void bswap(int8_t *, int32_t); static void btrim(int8_t *, int32_t); static void bpack(int8_t *, int8_t *, int32_t); /* local vars */ static devclass_t ata_devclass; static devclass_t ata_pci_devclass; static struct intr_config_hook *ata_delayed_attach = NULL; static char ata_conf[256]; MALLOC_DEFINE(M_ATA, "ATA generic", "ATA driver generic layer"); #if NISA > 0 static struct isa_pnp_id ata_ids[] = { {0x0006d041, "Generic ESDI/IDE/ATA controller"}, /* PNP0600 */ {0x0106d041, "Plus Hardcard II"}, /* PNP0601 */ {0x0206d041, "Plus Hardcard IIXL/EZ"}, /* PNP0602 */ {0x0306d041, "Generic ATA"}, /* PNP0603 */ {0} }; static int ata_isa_probe(device_t dev) { struct ata_softc *scp = device_get_softc(dev); struct resource *port; int rid; u_long tmp; /* check isapnp ids */ if (ISA_PNP_PROBE(device_get_parent(dev), dev, ata_ids) == ENXIO) return ENXIO; /* allocate the port range */ rid = ATA_IOADDR_RID; port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, ATA_IOSIZE, RF_ACTIVE); if (!port) return ENOMEM; /* alloctate the altport range */ if (bus_get_resource(dev, SYS_RES_IOPORT, 1, &tmp, &tmp)) { bus_set_resource(dev, SYS_RES_IOPORT, 1, rman_get_start(port) + ATA_ALTPORT, ATA_ALTIOSIZE); } bus_release_resource(dev, SYS_RES_IOPORT, 0, port); scp->unit = device_get_unit(dev); scp->flags |= ATA_USE_16BIT; return ata_probe(dev); } static device_method_t ata_isa_methods[] = { /* device interface */ DEVMETHOD(device_probe, ata_isa_probe), DEVMETHOD(device_attach, ata_attach), DEVMETHOD(device_resume, ata_resume), { 0, 0 } }; static driver_t ata_isa_driver = { "ata", ata_isa_methods, sizeof(struct ata_softc), }; DRIVER_MODULE(ata, isa, ata_isa_driver, ata_devclass, 0, 0); #endif #if NCARD > 0 static int ata_pccard_probe(device_t dev) { struct ata_softc *scp = device_get_softc(dev); struct resource *port; int rid, len; /* allocate the port range */ rid = ATA_IOADDR_RID; len = bus_get_resource_count(dev, SYS_RES_IOPORT, rid); port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, len, RF_ACTIVE); if (!port) return ENOMEM; /* * if we got more than the default ATA_IOSIZE ports, this is likely * a pccard system where the altio ports are located just after the * normal io ports, so no need to allocate them. */ if (len <= ATA_IOSIZE) { bus_set_resource(dev, SYS_RES_IOPORT, ATA_ALTADDR_RID, rman_get_start(port) + ATA_ALTPORT, ATA_ALTIOSIZE); } bus_release_resource(dev, SYS_RES_IOPORT, 0, port); scp->unit = device_get_unit(dev); scp->flags |= ATA_USE_16BIT; return ata_probe(dev); } static device_method_t ata_pccard_methods[] = { /* device interface */ DEVMETHOD(device_probe, ata_pccard_probe), DEVMETHOD(device_attach, ata_attach), DEVMETHOD(device_detach, ata_detach), { 0, 0 } }; static driver_t ata_pccard_driver = { "ata", ata_pccard_methods, sizeof(struct ata_softc), }; DRIVER_MODULE(ata, pccard, ata_pccard_driver, ata_devclass, 0, 0); #endif #if NPCI > 0 struct ata_pci_softc { struct resource *bmio; struct resource bmio_1; struct resource bmio_2; struct resource *irq; int32_t irqcnt; }; int32_t ata_find_dev(device_t dev, int32_t type, int32_t revid) { device_t *children, child; int nchildren, i; if (device_get_children(device_get_parent(dev), &children, &nchildren)) return 0; for (i = 0; i < nchildren; i++) { child = children[i]; /* check that it's on the same silicon and the device we want */ if (pci_get_slot(dev) == pci_get_slot(child) && pci_get_vendor(child) == (type & 0xffff) && pci_get_device(child) == ((type & 0xffff0000) >> 16) && pci_get_revid(child) >= revid) { free(children, M_TEMP); return 1; } } free(children, M_TEMP); return 0; } static const char * ata_pci_match(device_t dev) { if (pci_get_class(dev) != PCIC_STORAGE) return NULL; switch (pci_get_devid(dev)) { /* supported chipsets */ case 0x12308086: return "Intel PIIX ATA controller"; case 0x70108086: return "Intel PIIX3 ATA controller"; case 0x71118086: case 0x71998086: return "Intel PIIX4 ATA33 controller"; case 0x24118086: return "Intel ICH ATA66 controller"; case 0x24218086: return "Intel ICH0 ATA33 controller"; case 0x522910b9: return "AcerLabs Aladdin ATA33 controller"; case 0x05711106: if (ata_find_dev(dev, 0x05861106, 0)) return "VIA 82C586 ATA33 controller"; if (ata_find_dev(dev, 0x05961106, 0x12)) return "VIA 82C596B ATA66 controller"; if (ata_find_dev(dev, 0x05961106, 0)) return "VIA 82C596 ATA33 controller"; if (ata_find_dev(dev, 0x06861106, 0)) return "VIA 82C686 ATA66 controller"; return "VIA Apollo ATA controller"; case 0x55131039: return "SiS 5591 ATA33 controller"; case 0x06461095: return "CMD 646 ATA controller"; case 0xc6931080: if (pci_get_subclass(dev) == PCIS_STORAGE_IDE) return "Cypress 82C693 ATA controller"; break; case 0x74091022: return "AMD 756 ATA66 controller"; case 0x4d33105a: return "Promise ATA33 controller"; case 0x4d38105a: return "Promise ATA66 controller"; case 0x00041103: return "HighPoint HPT366 ATA66 controller"; /* unsupported but known chipsets, generic DMA only */ case 0x10001042: case 0x10011042: return "RZ 100? ATA controller !WARNING! buggy chip data loss possible"; case 0x06401095: return "CMD 640 ATA controller !WARNING! buggy chip data loss possible"; case 0x01021078: return "Cyrix 5530 ATA controller (generic mode)"; /* unknown chipsets, try generic DMA if it seems possible */ default: if (pci_get_class(dev) == PCIC_STORAGE && (pci_get_subclass(dev) == PCIS_STORAGE_IDE)) return "Generic PCI ATA controller"; } return NULL; } static int ata_pci_probe(device_t dev) { const char *desc = ata_pci_match(dev); if (desc) { device_set_desc(dev, desc); return 0; } else return ENXIO; } static int ata_pci_add_child(device_t dev, int unit) { device_t child; /* check if this is located at one of the std addresses */ if (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) { if (!(child = device_add_child(dev, "ata", unit))) return ENOMEM; } else { if (!(child = device_add_child(dev, "ata", 2))) return ENOMEM; } device_set_ivars(child, (void *)(uintptr_t) unit); return 0; } static int ata_pci_attach(device_t dev) { struct ata_pci_softc *sc = device_get_softc(dev); u_int8_t class, subclass; u_int32_t type, cmd; int rid; /* set up vendor-specific stuff */ type = pci_get_devid(dev); class = pci_get_class(dev); subclass = pci_get_subclass(dev); cmd = pci_read_config(dev, PCIR_COMMAND, 4); /* is this controller busmaster DMA capable ? */ if (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) { /* is busmastering support turned on ? */ if ((cmd & (PCIM_CMD_PORTEN | PCIM_CMD_BUSMASTEREN)) == (PCIM_CMD_PORTEN | PCIM_CMD_BUSMASTEREN)) { /* is there a valid port range to connect to ? */ rid = 0x20; sc->bmio = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, 1, RF_ACTIVE); if (!sc->bmio) device_printf(dev, "Busmastering DMA not configured\n"); } else device_printf(dev, "Busmastering DMA not enabled\n"); } else { if (type == 0x4d33105a || type == 0x4d38105a || type == 0x00041103) { /* Promise and HPT366 controllers support busmastering DMA */ rid = 0x20; sc->bmio = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, 1, RF_ACTIVE); } else /* we dont know this controller, no busmastering DMA */ device_printf(dev, "Busmastering DMA not supported\n"); } /* do extra chipset specific setups */ switch (type) { case 0x522910b9: /* Aladdin need to activate the ATAPI FIFO */ pci_write_config(dev, 0x53, (pci_read_config(dev, 0x53, 1) & ~0x01) | 0x02, 1); break; case 0x4d38105a: /* Promise 66's need their clock changed */ outb(rman_get_start(sc->bmio) + 0x11, inb(rman_get_start(sc->bmio) + 0x11) | 0x0a); /* FALLTHROUGH */ case 0x4d33105a: /* Promise's need burst mode to be turned on */ outb(rman_get_start(sc->bmio) + 0x1f, inb(rman_get_start(sc->bmio) + 0x1f) | 0x01); break; case 0x00041103: /* HPT366 turn of fast interrupt prediction */ pci_write_config(dev, 0x51, (pci_read_config(dev, 0x51, 1) & ~0x80), 1); break; case 0x05711106: case 0x74091022: /* VIA 82C586, 82C596, 82C686 & AMD 756 default setup */ /* set prefetch, postwrite */ pci_write_config(dev, 0x41, pci_read_config(dev, 0x41, 1) | 0xf0, 1); /* set fifo configuration half'n'half */ pci_write_config(dev, 0x43, (pci_read_config(dev, 0x43, 1) & 0x90) | 0x2a, 1); /* set status register read retry */ pci_write_config(dev, 0x44, pci_read_config(dev, 0x44, 1) | 0x08, 1); /* set DMA read & end-of-sector fifo flush */ pci_write_config(dev, 0x46, (pci_read_config(dev, 0x46, 1) & 0x0c) | 0xf0, 1); /* set sector size */ pci_write_config(dev, 0x60, DEV_BSIZE, 2); pci_write_config(dev, 0x68, DEV_BSIZE, 2); /* prepare for ATA-66 on the 82C686 and rev 0x12 and newer 82C596's */ if (ata_find_dev(dev, 0x06861106, 0) || ata_find_dev(dev, 0x05961106, 0x12)) { pci_write_config(dev, 0x50, pci_read_config(dev, 0x50, 4) | 0x070f070f, 4); } break; } /* * the Cypress chip is a mess, it contains two ATA functions, but * both channels are visible on the first one. * simply ignore the second function for now, as the right * solution (ignoring the second channel on the first function) * doesn't work with the crappy ATA interrupt setup on the alpha. */ if (pci_get_devid(dev) == 0xc6931080 && pci_get_function(dev) > 1) return 0; ata_pci_add_child(dev, 0); if (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV || pci_read_config(dev, 0x18, 4) & IOMASK) ata_pci_add_child(dev, 1); return bus_generic_attach(dev); } static int ata_pci_print_child(device_t dev, device_t child) { struct ata_softc *scp = device_get_softc(child); int unit = (uintptr_t) device_get_ivars(child); int retval = 0; retval += bus_print_child_header(dev, child); retval += printf(": at 0x%x", scp->ioaddr); if (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) retval += printf(" irq %d", 14 + unit); retval += bus_print_child_footer(dev, child); return retval; } static struct resource * ata_pci_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct ata_pci_softc *sc = device_get_softc(dev); int masterdev = pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV; int unit = (int)device_get_ivars(child); int myrid; if (type == SYS_RES_IOPORT) { switch (*rid) { case ATA_IOADDR_RID: if (masterdev) { myrid = 0; start = (unit == 0 ? IO_WD1 : IO_WD2); end = start + ATA_IOSIZE - 1; count = ATA_IOSIZE; } else myrid = 0x10 + 8 * unit; break; case ATA_ALTADDR_RID: if (masterdev) { myrid = 0; start = (unit == 0 ? IO_WD1 : IO_WD2) + ATA_ALTPORT; end = start + ATA_ALTIOSIZE - 1; count = ATA_ALTIOSIZE; } else myrid = 0x14 + 8 * unit; break; case ATA_BMADDR_RID: /* the busmaster resource is shared between the two channels */ if (sc->bmio) { if (unit == 0) { sc->bmio_1 = *sc->bmio; sc->bmio_1.r_end = sc->bmio->r_start + ATA_BM_OFFSET1; return &sc->bmio_1; } else { sc->bmio_2 = *sc->bmio; sc->bmio_2.r_start = sc->bmio->r_start + ATA_BM_OFFSET1; sc->bmio_2.r_end = sc->bmio_2.r_start + ATA_BM_OFFSET1; return &sc->bmio_2; } } break; default: return 0; } if (masterdev) /* make the parent just pass through the allocation. */ return BUS_ALLOC_RESOURCE(device_get_parent(dev), child, SYS_RES_IOPORT, &myrid, start, end, count, flags); else /* we are using the parent resource directly. */ return BUS_ALLOC_RESOURCE(device_get_parent(dev), dev, SYS_RES_IOPORT, &myrid, start, end, count, flags); } if (type == SYS_RES_IRQ) { if (*rid != 0) return 0; if (masterdev) { #ifdef __i386__ int irq = (unit == 0 ? 14 : 15); return BUS_ALLOC_RESOURCE(device_get_parent(dev), child, SYS_RES_IRQ, rid, irq, irq, 1, flags & ~RF_SHAREABLE); #else return alpha_platform_alloc_ide_intr(unit); #endif } else { /* primary and secondary channels share the same interrupt */ sc->irqcnt++; if (!sc->irq) sc->irq = BUS_ALLOC_RESOURCE(device_get_parent(dev), dev, SYS_RES_IRQ, rid, 0, ~0, 1, flags); return sc->irq; } } return 0; } static int ata_pci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { struct ata_pci_softc *sc = device_get_softc(dev); int unit = (uintptr_t) device_get_ivars(child); int masterdev = pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV; int myrid = 0; if (type == SYS_RES_IOPORT) { switch (rid) { case ATA_IOADDR_RID: if (masterdev) myrid = 0; else myrid = 0x10 + 8 * unit; break; case ATA_ALTADDR_RID: if (masterdev) myrid = 0; else myrid = 0x14 + 8 * unit; break; case ATA_BMADDR_RID: return 0; default: return ENOENT; } if (masterdev) /* make the parent just pass through the allocation. */ return BUS_RELEASE_RESOURCE(device_get_parent(dev), child, SYS_RES_IOPORT, myrid, r); else /* we are using the parent resource directly. */ return BUS_RELEASE_RESOURCE(device_get_parent(dev), dev, SYS_RES_IOPORT, myrid, r); } if (type == SYS_RES_IRQ) { if (rid != 0) return ENOENT; if (masterdev) { #ifdef __i386__ return BUS_RELEASE_RESOURCE(device_get_parent(dev), child, SYS_RES_IRQ, rid, r); #else return alpha_platform_release_ide_intr(unit, r); #endif } else { if (--sc->irqcnt) return 0; return BUS_RELEASE_RESOURCE(device_get_parent(dev), dev, SYS_RES_IRQ, rid, r); } } return EINVAL; } static int ata_pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, driver_intr_t *intr, void *arg, void **cookiep) { if (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) { #ifdef __i386__ return BUS_SETUP_INTR(device_get_parent(dev), child, irq, flags, intr, arg, cookiep); #else return alpha_platform_setup_ide_intr(irq, intr, arg, cookiep); #endif } else return BUS_SETUP_INTR(device_get_parent(dev), dev, irq, flags, intr, arg, cookiep); } static int ata_pci_teardown_intr(device_t dev, device_t child, struct resource *irq, void *cookie) { if (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) { #ifdef __i386__ return BUS_TEARDOWN_INTR(device_get_parent(dev), child, irq, cookie); #else return alpha_platform_teardown_ide_intr(irq, cookie); #endif } else return BUS_TEARDOWN_INTR(device_get_parent(dev), dev, irq, cookie); } static device_method_t ata_pci_methods[] = { /* device interface */ DEVMETHOD(device_probe, ata_pci_probe), DEVMETHOD(device_attach, ata_pci_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* bus methods */ DEVMETHOD(bus_print_child, ata_pci_print_child), DEVMETHOD(bus_alloc_resource, ata_pci_alloc_resource), DEVMETHOD(bus_release_resource, ata_pci_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, ata_pci_setup_intr), DEVMETHOD(bus_teardown_intr, ata_pci_teardown_intr), { 0, 0 } }; static driver_t ata_pci_driver = { "atapci", ata_pci_methods, sizeof(struct ata_pci_softc), }; DRIVER_MODULE(atapci, pci, ata_pci_driver, ata_pci_devclass, 0, 0); static int ata_pcisub_probe(device_t dev) { struct ata_softc *scp = device_get_softc(dev); /* kids of pci ata chipsets has their physical unit number in ivars */ scp->unit = (uintptr_t) device_get_ivars(dev); /* set the chiptype to the hostchip ID, makes life easier */ if (ata_find_dev(device_get_parent(dev), 0x05861106, 0)) scp->chiptype = 0x05861106; else if (ata_find_dev(device_get_parent(dev), 0x05961106, 0)) scp->chiptype = 0x05961106; else if (ata_find_dev(device_get_parent(dev), 0x06861106, 0)) scp->chiptype = 0x06861106; else scp->chiptype = pci_get_devid(device_get_parent(dev)); return ata_probe(dev); } static device_method_t ata_pcisub_methods[] = { /* device interface */ DEVMETHOD(device_probe, ata_pcisub_probe), DEVMETHOD(device_attach, ata_attach), DEVMETHOD(device_detach, ata_detach), DEVMETHOD(device_resume, ata_resume), { 0, 0 } }; static driver_t ata_pcisub_driver = { "ata", ata_pcisub_methods, sizeof(struct ata_softc), }; DRIVER_MODULE(ata, atapci, ata_pcisub_driver, ata_devclass, 0, 0); #endif static int ata_probe(device_t dev) { struct ata_softc *scp = device_get_softc(dev); struct resource *io = 0; struct resource *altio = 0; struct resource *bmio = 0; int rid; int32_t ioaddr, altioaddr, bmaddr; int32_t mask = 0; u_int8_t status0, status1; if (!scp || scp->flags & ATA_ATTACHED) return ENXIO; /* initialize the softc basics */ scp->active = ATA_IDLE; scp->dev = dev; scp->devices = 0; rid = ATA_IOADDR_RID; io = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, ATA_IOSIZE, RF_ACTIVE); if (!io) goto failure; ioaddr = rman_get_start(io); rid = ATA_ALTADDR_RID; altio = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, ATA_ALTIOSIZE, RF_ACTIVE); if (altio) altioaddr = rman_get_start(altio); else altioaddr = ioaddr + ATA_IOSIZE; rid = ATA_BMADDR_RID; bmio = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, 1, RF_ACTIVE); bmaddr = bmio ? rman_get_start(bmio) : 0; /* store the IO resources for eventual later release */ scp->r_io = io; scp->r_altio = altio; scp->r_bmio = bmio; /* store the physical IO addresse for easy access */ scp->ioaddr = ioaddr; scp->altioaddr = altioaddr; scp->bmaddr = bmaddr; if (bootverbose) ata_printf(scp, -1, "iobase=0x%04x altiobase=0x%04x bmaddr=0x%04x\n", scp->ioaddr, scp->altioaddr, scp->bmaddr); /* do we have any signs of ATA/ATAPI HW being present ? */ outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | ATA_MASTER); DELAY(1); status0 = inb(scp->ioaddr + ATA_STATUS); outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | ATA_SLAVE); DELAY(1); status1 = inb(scp->ioaddr + ATA_STATUS); if ((status0 & 0xf8) != 0xf8 && status0 != 0xa5) mask |= 0x01; if ((status1 & 0xf8) != 0xf8 && status1 != 0xa5) mask |= 0x02; if (bootverbose) ata_printf(scp, -1, "mask=%02x status0=%02x status1=%02x\n", mask, status0, status1); if (!mask) goto failure; ata_reset(scp, &mask); if (!mask) goto failure; /* * OK, we have at least one device on the chain, check for ATAPI * signatures, if none check if its a good old ATA device. */ outb(scp->ioaddr + ATA_DRIVE, (ATA_D_IBM | ATA_MASTER)); DELAY(1); if (inb(scp->ioaddr + ATA_CYL_LSB) == ATAPI_MAGIC_LSB && inb(scp->ioaddr + ATA_CYL_MSB) == ATAPI_MAGIC_MSB) { scp->devices |= ATA_ATAPI_MASTER; } outb(scp->ioaddr + ATA_DRIVE, (ATA_D_IBM | ATA_SLAVE)); DELAY(1); if (inb(scp->ioaddr + ATA_CYL_LSB) == ATAPI_MAGIC_LSB && inb(scp->ioaddr + ATA_CYL_MSB) == ATAPI_MAGIC_MSB) { scp->devices |= ATA_ATAPI_SLAVE; } if (status0 != 0x00 && !(scp->devices & ATA_ATAPI_MASTER)) { outb(scp->ioaddr + ATA_DRIVE, (ATA_D_IBM | ATA_MASTER)); DELAY(1); outb(scp->ioaddr + ATA_ERROR, 0x58); outb(scp->ioaddr + ATA_CYL_LSB, 0xa5); if (inb(scp->ioaddr + ATA_ERROR) != 0x58 && inb(scp->ioaddr + ATA_CYL_LSB) == 0xa5) { scp->devices |= ATA_ATA_MASTER; } } if (status1 != 0x00 && !(scp->devices & ATA_ATAPI_SLAVE)) { outb(scp->ioaddr + ATA_DRIVE, (ATA_D_IBM | ATA_SLAVE)); DELAY(1); outb(scp->ioaddr + ATA_ERROR, 0x58); outb(scp->ioaddr + ATA_CYL_LSB, 0xa5); if (inb(scp->ioaddr + ATA_ERROR) != 0x58 && inb(scp->ioaddr + ATA_CYL_LSB) == 0xa5) { scp->devices |= ATA_ATA_SLAVE; } } if (bootverbose) ata_printf(scp, -1, "devices = 0x%x\n", scp->devices); if (!scp->devices) { goto failure; } TAILQ_INIT(&scp->ata_queue); TAILQ_INIT(&scp->atapi_queue); return 0; failure: if (io) bus_release_resource(dev, SYS_RES_IOPORT, ATA_IOADDR_RID, io); if (altio) bus_release_resource(dev, SYS_RES_IOPORT, ATA_ALTADDR_RID, altio); if (bmio) bus_release_resource(dev, SYS_RES_IOPORT, ATA_BMADDR_RID, bmio); if (bootverbose) ata_printf(scp, -1, "probe allocation failed\n"); return ENXIO; } static int ata_attach(device_t dev) { struct ata_softc *scp = device_get_softc(dev); int error, rid = 0; if (!scp || scp->flags & ATA_ATTACHED) return ENXIO; scp->r_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 1, RF_SHAREABLE | RF_ACTIVE); if (!scp->r_irq) { ata_printf(scp, -1, "unable to allocate interrupt\n"); return ENXIO; } if ((error = bus_setup_intr(dev, scp->r_irq, INTR_TYPE_BIO, ata_intr, scp, &scp->ih))) return error; /* * do not attach devices if we are in early boot, this is done later * when interrupts are enabled by a hook into the boot process. * otherwise attach what the probe has found in scp->devices. */ if (!ata_delayed_attach) { if (scp->devices & ATA_ATA_SLAVE) if (ata_getparam(scp, ATA_SLAVE, ATA_C_ATA_IDENTIFY)) scp->devices &= ~ATA_ATA_SLAVE; if (scp->devices & ATA_ATAPI_SLAVE) if (ata_getparam(scp, ATA_SLAVE, ATA_C_ATAPI_IDENTIFY)) scp->devices &= ~ATA_ATAPI_SLAVE; if (scp->devices & ATA_ATA_MASTER) if (ata_getparam(scp, ATA_MASTER, ATA_C_ATA_IDENTIFY)) scp->devices &= ~ATA_ATA_MASTER; if (scp->devices & ATA_ATAPI_MASTER) if (ata_getparam(scp, ATA_MASTER,ATA_C_ATAPI_IDENTIFY)) scp->devices &= ~ATA_ATAPI_MASTER; #if NATADISK > 0 if (scp->devices & ATA_ATA_MASTER) ad_attach(scp, ATA_MASTER); if (scp->devices & ATA_ATA_SLAVE) ad_attach(scp, ATA_SLAVE); #endif #if NATAPICD > 0 || NATAPIFD > 0 || NATAPIST > 0 if (scp->devices & ATA_ATAPI_MASTER) atapi_attach(scp, ATA_MASTER); if (scp->devices & ATA_ATAPI_SLAVE) atapi_attach(scp, ATA_SLAVE); #endif } scp->flags |= ATA_ATTACHED; return 0; } static int ata_detach(device_t dev) { struct ata_softc *scp = device_get_softc(dev); if (!scp || !(scp->flags & ATA_ATTACHED)) return ENXIO; #if NATADISK > 0 if (scp->devices & ATA_ATA_MASTER) ad_detach(scp->dev_softc[0]); if (scp->devices & ATA_ATA_SLAVE) ad_detach(scp->dev_softc[1]); #endif #if NATAPICD > 0 || NATAPIFD > 0 || NATAPIST > 0 if (scp->devices & ATA_ATAPI_MASTER) atapi_detach(scp->dev_softc[0]); if (scp->devices & ATA_ATAPI_SLAVE) atapi_detach(scp->dev_softc[1]); #endif if (scp->dev_param[ATA_DEV(ATA_MASTER)]) { free(scp->dev_param[ATA_DEV(ATA_MASTER)], M_ATA); scp->dev_param[ATA_DEV(ATA_MASTER)] = NULL; } if (scp->dev_param[ATA_DEV(ATA_SLAVE)]) { free(scp->dev_param[ATA_DEV(ATA_SLAVE)], M_ATA); scp->dev_param[ATA_DEV(ATA_SLAVE)] = NULL; } scp->dev_softc[ATA_DEV(ATA_MASTER)] = NULL; scp->dev_softc[ATA_DEV(ATA_SLAVE)] = NULL; scp->mode[ATA_DEV(ATA_MASTER)] = ATA_PIO; scp->mode[ATA_DEV(ATA_SLAVE)] = ATA_PIO; bus_teardown_intr(dev, scp->r_irq, scp->ih); bus_release_resource(dev, SYS_RES_IRQ, 0, scp->r_irq); if (scp->r_bmio) bus_release_resource(dev, SYS_RES_IOPORT, ATA_BMADDR_RID, scp->r_bmio); if (scp->r_altio) bus_release_resource(dev, SYS_RES_IOPORT, ATA_ALTADDR_RID,scp->r_altio); bus_release_resource(dev, SYS_RES_IOPORT, ATA_IOADDR_RID, scp->r_io); scp->flags &= ~ATA_ATTACHED; return 0; } static int ata_resume(device_t dev) { struct ata_softc *scp = device_get_softc(dev); ata_reinit(scp); return 0; } static int32_t ata_getparam(struct ata_softc *scp, int32_t device, u_int8_t command) { struct ata_params *ata_parm; int8_t buffer[DEV_BSIZE]; int retry = 0; /* select drive */ outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | device); DELAY(1); /* enable interrupt */ outb(scp->altioaddr, ATA_A_4BIT); DELAY(1); /* apparently some devices needs this repeated */ do { if (ata_command(scp, device, command, 0, 0, 0, 0, 0, ATA_WAIT_INTR)) { ata_printf(scp, device, "identify failed\n"); return -1; } if (retry++ > 4) { ata_printf(scp, device, "identify retries exceeded\n"); return -1; } } while (ata_wait(scp, device, ((command == ATA_C_ATAPI_IDENTIFY) ? ATA_S_DRQ : (ATA_S_READY | ATA_S_DSC | ATA_S_DRQ)))); insw(scp->ioaddr + ATA_DATA, buffer, sizeof(buffer)/sizeof(int16_t)); ata_parm = malloc(sizeof(struct ata_params), M_ATA, M_NOWAIT); if (!ata_parm) { ata_printf(scp, device, "malloc for identify data failed\n"); return -1; } bcopy(buffer, ata_parm, sizeof(struct ata_params)); if (command == ATA_C_ATA_IDENTIFY || !((ata_parm->model[0] == 'N' && ata_parm->model[1] == 'E') || (ata_parm->model[0] == 'F' && ata_parm->model[1] == 'X'))) bswap(ata_parm->model, sizeof(ata_parm->model)); btrim(ata_parm->model, sizeof(ata_parm->model)); bpack(ata_parm->model, ata_parm->model, sizeof(ata_parm->model)); bswap(ata_parm->revision, sizeof(ata_parm->revision)); btrim(ata_parm->revision, sizeof(ata_parm->revision)); bpack(ata_parm->revision, ata_parm->revision, sizeof(ata_parm->revision)); scp->dev_param[ATA_DEV(device)] = ata_parm; return 0; } static void ata_boot_attach(void) { struct ata_softc *scp; int32_t ctlr; /* * run through all ata devices and look for real ATA & ATAPI devices * using the hints we found in the early probe, this avoids some of * the delays probing of non-exsistent devices can cause. */ for (ctlr=0; ctlrdevices & ATA_ATA_SLAVE) if (ata_getparam(scp, ATA_SLAVE, ATA_C_ATA_IDENTIFY)) scp->devices &= ~ATA_ATA_SLAVE; if (scp->devices & ATA_ATAPI_SLAVE) if (ata_getparam(scp, ATA_SLAVE, ATA_C_ATAPI_IDENTIFY)) scp->devices &= ~ATA_ATAPI_SLAVE; if (scp->devices & ATA_ATA_MASTER) if (ata_getparam(scp, ATA_MASTER, ATA_C_ATA_IDENTIFY)) scp->devices &= ~ATA_ATA_MASTER; if (scp->devices & ATA_ATAPI_MASTER) if (ata_getparam(scp, ATA_MASTER,ATA_C_ATAPI_IDENTIFY)) scp->devices &= ~ATA_ATAPI_MASTER; } #if NATADISK > 0 /* now we know whats there, do the real attach, first the ATA disks */ for (ctlr=0; ctlrdevices & ATA_ATA_MASTER) ad_attach(scp, ATA_MASTER); if (scp->devices & ATA_ATA_SLAVE) ad_attach(scp, ATA_SLAVE); } #endif #if NATAPICD > 0 || NATAPIFD > 0 || NATAPIST > 0 /* then the atapi devices */ for (ctlr=0; ctlrdevices & ATA_ATAPI_MASTER) atapi_attach(scp, ATA_MASTER); if (scp->devices & ATA_ATAPI_SLAVE) atapi_attach(scp, ATA_SLAVE); } #endif if (ata_delayed_attach) { config_intrhook_disestablish(ata_delayed_attach); free(ata_delayed_attach, M_ATA); ata_delayed_attach = NULL; } } static void ata_intr(void *data) { struct ata_softc *scp = (struct ata_softc *)data; u_int8_t dmastat; /* * since we might share the IRQ with another device, and in some * cases with our twin channel, we only want to process interrupts * that we know this channel generated. */ switch (scp->chiptype) { #if NPCI > 0 case 0x00041103: /* HighPoint HPT366 */ if (!((dmastat = ata_dmastatus(scp)) & ATA_BMSTAT_INTERRUPT)) return; outb(scp->bmaddr + ATA_BMSTAT_PORT, dmastat | ATA_BMSTAT_INTERRUPT); break; case 0x4d33105a: /* Promise 33's */ case 0x4d38105a: /* Promise 66's */ { struct ata_pci_softc *sc=device_get_softc(device_get_parent(scp->dev)); if (!(inl(rman_get_start(sc->bmio) + 0x1c) & ((scp->unit) ? 0x00004000 : 0x00000400))) return; } /* FALLTHROUGH */ #endif default: if (scp->flags & ATA_DMA_ACTIVE) { if (!((dmastat = ata_dmastatus(scp)) & ATA_BMSTAT_INTERRUPT)) return; else outb(scp->bmaddr+ATA_BMSTAT_PORT, dmastat|ATA_BMSTAT_INTERRUPT); } } DELAY(1); /* get status, if drive is busy it didn't interrupt so return */ if ((scp->status = inb(scp->ioaddr + ATA_STATUS)) & ATA_S_BUSY) return; /* find & call the responsible driver to process this interrupt */ switch (scp->active) { #if NATADISK > 0 case ATA_ACTIVE_ATA: if (!scp->running) return; if (ad_interrupt(scp->running) == ATA_OP_CONTINUES) return; break; #endif #if NATAPICD > 0 || NATAPIFD > 0 || NATAPIST > 0 case ATA_ACTIVE_ATAPI: if (!scp->running) return; if (atapi_interrupt(scp->running) == ATA_OP_CONTINUES) return; break; #endif case ATA_WAIT_INTR: wakeup((caddr_t)scp); break; case ATA_WAIT_READY: break; case ATA_REINITING: return; default: case ATA_IDLE: #ifdef ATA_DEBUG { static int32_t intr_count = 0; if (intr_count++ < 10) ata_printf(scp, -1, "unwanted interrupt %d status = %02x\n", intr_count, scp->status); } #endif } scp->active = ATA_IDLE; scp->running = NULL; ata_start(scp); } void ata_start(struct ata_softc *scp) { #if NATADISK > 0 struct ad_request *ad_request; #endif #if NATAPICD > 0 || NATAPIFD > 0 || NATAPIST > 0 struct atapi_request *atapi_request; #endif if (scp->active != ATA_IDLE) return; scp->active = ATA_ACTIVE; #if NATADISK > 0 /* find & call the responsible driver if anything on the ATA queue */ if (TAILQ_EMPTY(&scp->ata_queue)) { if (scp->devices & (ATA_ATA_MASTER) && scp->dev_softc[0]) ad_start((struct ad_softc *)scp->dev_softc[0]); if (scp->devices & (ATA_ATA_SLAVE) && scp->dev_softc[1]) ad_start((struct ad_softc *)scp->dev_softc[1]); } if ((ad_request = TAILQ_FIRST(&scp->ata_queue))) { TAILQ_REMOVE(&scp->ata_queue, ad_request, chain); scp->active = ATA_ACTIVE_ATA; scp->running = ad_request; ad_transfer(ad_request); return; } #endif #if NATAPICD > 0 || NATAPIFD > 0 || NATAPIST > 0 /* find & call the responsible driver if anything on the ATAPI queue */ if (TAILQ_EMPTY(&scp->atapi_queue)) { if (scp->devices & (ATA_ATAPI_MASTER) && scp->dev_softc[0]) atapi_start((struct atapi_softc *)scp->dev_softc[0]); if (scp->devices & (ATA_ATAPI_SLAVE) && scp->dev_softc[1]) atapi_start((struct atapi_softc *)scp->dev_softc[1]); } if ((atapi_request = TAILQ_FIRST(&scp->atapi_queue))) { TAILQ_REMOVE(&scp->atapi_queue, atapi_request, chain); scp->active = ATA_ACTIVE_ATAPI; scp->running = atapi_request; atapi_transfer(atapi_request); return; } #endif scp->active = ATA_IDLE; } void ata_reset(struct ata_softc *scp, int32_t *mask) { int32_t timeout; u_int8_t status0 = 0, status1 = 0; /* reset channel */ outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | ATA_MASTER); DELAY(1); inb(scp->ioaddr + ATA_STATUS); outb(scp->altioaddr, ATA_A_IDS | ATA_A_RESET); DELAY(10000); outb(scp->altioaddr, ATA_A_IDS); DELAY(10000); inb(scp->ioaddr + ATA_ERROR); DELAY(3000); /* wait for BUSY to go inactive */ for (timeout = 0; timeout < 310000; timeout++) { outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | ATA_MASTER); DELAY(1); status0 = inb(scp->ioaddr + ATA_STATUS); outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | ATA_SLAVE); DELAY(1); status1 = inb(scp->ioaddr + ATA_STATUS); if (*mask == 0x01) /* wait for master only */ if (!(status0 & ATA_S_BUSY)) break; if (*mask == 0x02) /* wait for slave only */ if (!(status1 & ATA_S_BUSY)) break; if (*mask == 0x03) /* wait for both master & slave */ if (!(status0 & ATA_S_BUSY) && !(status1 & ATA_S_BUSY)) break; DELAY(100); } DELAY(1); outb(scp->altioaddr, ATA_A_4BIT); if (status0 & ATA_S_BUSY) *mask &= ~0x01; if (status1 & ATA_S_BUSY) *mask &= ~0x02; if (bootverbose) ata_printf(scp, -1, "mask=%02x status0=%02x status1=%02x\n", *mask, status0, status1); } int32_t ata_reinit(struct ata_softc *scp) { int32_t mask = 0, omask; scp->active = ATA_REINITING; scp->running = NULL; if (scp->devices & (ATA_ATA_MASTER | ATA_ATAPI_MASTER)) mask |= 0x01; if (scp->devices & (ATA_ATA_SLAVE | ATA_ATAPI_SLAVE)) mask |= 0x02; if (mask) { omask = mask; ata_printf(scp, -1, "resetting devices .. "); ata_reset(scp, &mask); if (omask != mask) printf(" device dissapeared! %d ", omask & ~mask); #if NATADISK > 0 if (scp->devices & (ATA_ATA_MASTER) && scp->dev_softc[0]) ad_reinit((struct ad_softc *)scp->dev_softc[0]); if (scp->devices & (ATA_ATA_SLAVE) && scp->dev_softc[1]) ad_reinit((struct ad_softc *)scp->dev_softc[1]); #endif #if NATAPICD > 0 || NATAPIFD > 0 || NATAPIST > 0 if (scp->devices & (ATA_ATAPI_MASTER) && scp->dev_softc[0]) atapi_reinit((struct atapi_softc *)scp->dev_softc[0]); if (scp->devices & (ATA_ATAPI_SLAVE) && scp->dev_softc[1]) atapi_reinit((struct atapi_softc *)scp->dev_softc[1]); #endif printf("done\n"); } scp->active = ATA_IDLE; ata_start(scp); return 0; } int32_t ata_wait(struct ata_softc *scp, int32_t device, u_int8_t mask) { u_int32_t timeout = 0; DELAY(1); while (timeout < 5000000) { /* timeout 5 secs */ scp->status = inb(scp->ioaddr + ATA_STATUS); /* if drive fails status, reselect the drive just to be sure */ if (scp->status == 0xff) { ata_printf(scp, device, "no status, reselecting device\n"); outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | device); DELAY(1); scp->status = inb(scp->ioaddr + ATA_STATUS); } /* are we done ? */ if (!(scp->status & ATA_S_BUSY)) break; if (timeout > 1000) { timeout += 1000; DELAY(1000); } else { timeout += 10; DELAY(10); } } if (scp->status & ATA_S_ERROR) scp->error = inb(scp->ioaddr + ATA_ERROR); if (timeout >= 5000000) return -1; if (!mask) return (scp->status & ATA_S_ERROR); /* Wait 50 msec for bits wanted. */ timeout = 5000; while (timeout--) { scp->status = inb(scp->ioaddr + ATA_STATUS); if ((scp->status & mask) == mask) { if (scp->status & ATA_S_ERROR) scp->error = inb(scp->ioaddr + ATA_ERROR); return (scp->status & ATA_S_ERROR); } DELAY (10); } return -1; } int32_t ata_command(struct ata_softc *scp, int32_t device, u_int32_t command, u_int32_t cylinder, u_int32_t head, u_int32_t sector, u_int32_t count, u_int32_t feature, int32_t flags) { #ifdef ATA_DEBUG ata_printf(scp, device, "ata_command: addr=%04x, cmd=%02x, " "c=%d, h=%d, s=%d, count=%d, flags=%02x\n", scp->ioaddr, command, cylinder, head, sector, count, flags); #endif /* ready to issue command ? */ if (ata_wait(scp, device, 0) < 0) { ata_printf(scp, device, "timeout waiting to give command=%02x s=%02x e=%02x\n", command, scp->status, scp->error); return -1; } outb(scp->ioaddr + ATA_FEATURE, feature); outb(scp->ioaddr + ATA_CYL_LSB, cylinder); outb(scp->ioaddr + ATA_CYL_MSB, cylinder >> 8); outb(scp->ioaddr + ATA_DRIVE, ATA_D_IBM | device | head); outb(scp->ioaddr + ATA_SECTOR, sector); outb(scp->ioaddr + ATA_COUNT, count); switch (flags) { case ATA_WAIT_INTR: if (scp->active != ATA_IDLE) ata_printf(scp, device, "WARNING: WAIT_INTR active=%s\n", active2str(scp->active)); scp->active = ATA_WAIT_INTR; asleep((caddr_t)scp, PRIBIO, "atacmd", 10 * hz); outb(scp->ioaddr + ATA_CMD, command); if (await(PRIBIO, 10 * hz)) { ata_printf(scp, device, "ata_command: timeout waiting for intr\n"); scp->active = ATA_IDLE; return -1; } break; case ATA_WAIT_READY: if (scp->active != ATA_IDLE && scp->active != ATA_REINITING) ata_printf(scp, device, "WARNING: WAIT_READY active=%s\n", active2str(scp->active)); if (scp->active != ATA_REINITING) scp->active = ATA_WAIT_READY; outb(scp->ioaddr + ATA_CMD, command); if (ata_wait(scp, device, ATA_S_READY) < 0) { ata_printf(scp, device, "timeout waiting for command=%02x s=%02x e=%02x\n", command, scp->status, scp->error); if (scp->active != ATA_REINITING) scp->active = ATA_IDLE; return -1; } if (scp->active != ATA_REINITING) scp->active = ATA_IDLE; break; case ATA_IMMEDIATE: outb(scp->ioaddr + ATA_CMD, command); break; default: ata_printf(scp, device, "DANGER: illegal interrupt flag=%s\n", active2str(flags)); } return 0; } int ata_printf(struct ata_softc *scp, int32_t device, const char * fmt, ...) { va_list ap; int ret; if (device == -1) ret = printf("ata%d: ", device_get_unit(scp->dev)); else ret = printf("ata%d-%s: ", device_get_unit(scp->dev), (device == ATA_MASTER) ? "master" : "slave"); va_start(ap, fmt); ret += vprintf(fmt, ap); va_end(ap); return ret; } int ata_get_lun(u_int32_t *map) { int lun = ffs(~*map) - 1; *map |= (1 << lun); return lun; } void ata_free_lun(u_int32_t *map, int lun) { *map &= ~(1 << lun); } int8_t * ata_mode2str(int32_t mode) { switch (mode) { case ATA_PIO: return "BIOSPIO"; case ATA_PIO0: return "PIO0"; case ATA_PIO1: return "PIO1"; case ATA_PIO2: return "PIO2"; case ATA_PIO3: return "PIO3"; case ATA_PIO4: return "PIO4"; case ATA_WDMA2: return "WDMA2"; case ATA_UDMA2: return "UDMA33"; case ATA_UDMA4: return "UDMA66"; case ATA_DMA: return "BIOSDMA"; default: return "???"; } } int8_t ata_pio2mode(int32_t pio) { switch (pio) { default: case 0: return ATA_PIO0; case 1: return ATA_PIO1; case 2: return ATA_PIO2; case 3: return ATA_PIO3; case 4: return ATA_PIO4; } } int ata_pmode(struct ata_params *ap) { if (ap->atavalid & ATA_FLAG_64_70) { if (ap->apiomodes & 2) return 4; if (ap->apiomodes & 1) return 3; } if (ap->opiomode == 2) return 2; if (ap->opiomode == 1) return 1; if (ap->opiomode == 0) return 0; return -1; } int ata_wmode(struct ata_params *ap) { if (ap->wdmamodes & 4) return 2; if (ap->wdmamodes & 2) return 1; if (ap->wdmamodes & 1) return 0; return -1; } int ata_umode(struct ata_params *ap) { if (ap->atavalid & ATA_FLAG_88) { if (ap->udmamodes & 0x10) return 4; if (ap->udmamodes & 0x08) return 3; if (ap->udmamodes & 0x04) return 2; if (ap->udmamodes & 0x02) return 1; if (ap->udmamodes & 0x01) return 0; } return -1; } static int8_t * active2str(int32_t active) { static char buf[8]; switch (active) { case ATA_IDLE: return("ATA_IDLE"); case ATA_IMMEDIATE: return("ATA_IMMEDIATE"); case ATA_WAIT_INTR: return("ATA_WAIT_INTR"); case ATA_WAIT_READY: return("ATA_WAIT_READY"); case ATA_ACTIVE: return("ATA_ACTIVE"); case ATA_ACTIVE_ATA: return("ATA_ACTIVE_ATA"); case ATA_ACTIVE_ATAPI: return("ATA_ACTIVE_ATAPI"); case ATA_REINITING: return("ATA_REINITING"); default: sprintf(buf, "0x%02x", active); return buf; } } static void bswap(int8_t *buf, int32_t len) { u_int16_t *ptr = (u_int16_t*)(buf + len); while (--ptr >= (u_int16_t*)buf) *ptr = ntohs(*ptr); } static void btrim(int8_t *buf, int32_t len) { int8_t *ptr; for (ptr = buf; ptr < buf+len; ++ptr) if (!*ptr) *ptr = ' '; for (ptr = buf + len - 1; ptr >= buf && *ptr == ' '; --ptr) *ptr = 0; } static void bpack(int8_t *src, int8_t *dst, int32_t len) { int32_t i, j, blank; for (i = j = blank = 0 ; i < len; i++) { if (blank && src[i] == ' ') continue; if (blank && src[i] != ' ') { dst[j++] = src[i]; blank = 0; continue; } if (src[i] == ' ') { blank = 1; if (i == 0) continue; } dst[j++] = src[i]; } if (j < len) dst[j] = 0x00; } static void ata_change_mode(struct ata_softc *scp, int32_t device, int32_t mode) { int32_t s = splbio(); while (scp->active != ATA_IDLE) tsleep((caddr_t)&s, PRIBIO, "atachm", hz/4); scp->active = ATA_REINITING; ata_dmainit(scp, device, ata_pmode(ATA_PARAM(scp, device)), mode < ATA_DMA ? -1 : ata_wmode(ATA_PARAM(scp, device)), mode < ATA_DMA ? -1 : ata_umode(ATA_PARAM(scp, device))); scp->active = ATA_IDLE; ata_start(scp); splx(s); } static int -sysctl_hw_ata (SYSCTL_HANDLER_ARGS) +sysctl_hw_ata(SYSCTL_HANDLER_ARGS) { struct ata_softc *scp; int ctlr, error, i; /* readout internal state */ bzero(ata_conf, sizeof(ata_conf)); for (ctlr=0; ctlrdev_softc[i]) strcat(ata_conf, "---,"); else if (scp->mode[i] >= ATA_DMA) strcat(ata_conf, "dma,"); else strcat(ata_conf, "pio,"); } } error = sysctl_handle_string(oidp, ata_conf, sizeof(ata_conf), req); if (error == 0 && req->newptr != NULL) { char *ptr = ata_conf; /* update internal state */ i = 0; while (*ptr) { if (!strncmp(ptr, "pio", 3) || !strncmp(ptr, "PIO", 3)) { if ((scp = devclass_get_softc(ata_devclass, i >> 1)) && scp->dev_softc[i & 1] && scp->mode[i & 1] >= ATA_DMA) ata_change_mode(scp, (i & 1)?ATA_SLAVE:ATA_MASTER, ATA_PIO); } else if (!strncmp(ptr, "dma", 3) || !strncmp(ptr, "DMA", 3)) { if ((scp = devclass_get_softc(ata_devclass, i >> 1)) && scp->dev_softc[i & 1] && scp->mode[i & 1] < ATA_DMA) ata_change_mode(scp, (i & 1)?ATA_SLAVE:ATA_MASTER, ATA_DMA); } else if (strncmp(ptr, "---", 3)) break; ptr+=3; if (*ptr++ != ',' || ++i > (devclass_get_maxunit(ata_devclass) << 1)) break; } } return error; } SYSCTL_PROC(_hw, OID_AUTO, atamodes, CTLTYPE_STRING | CTLFLAG_RW, 0, sizeof(ata_conf), sysctl_hw_ata, "A", ""); static void ata_init(void) { /* register boot attach to be run when interrupts are enabled */ if (!(ata_delayed_attach = (struct intr_config_hook *) malloc(sizeof(struct intr_config_hook), M_TEMP, M_NOWAIT))) { printf("ata: malloc of delayed attach hook failed\n"); return; } bzero(ata_delayed_attach, sizeof(struct intr_config_hook)); ata_delayed_attach->ich_func = (void*)ata_boot_attach; if (config_intrhook_establish(ata_delayed_attach) != 0) { printf("ata: config_intrhook_establish failed\n"); free(ata_delayed_attach, M_TEMP); } } SYSINIT(atadev, SI_SUB_DRIVERS, SI_ORDER_SECOND, ata_init, NULL) Index: head/sys/dev/sio/sio.c =================================================================== --- head/sys/dev/sio/sio.c (revision 62572) +++ head/sys/dev/sio/sio.c (revision 62573) @@ -1,3292 +1,3292 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * from: @(#)com.c 7.5 (Berkeley) 5/16/91 * from: i386/isa sio.c,v 1.234 */ #include "opt_comconsole.h" #include "opt_compat.h" #include "opt_ddb.h" #include "opt_sio.h" #include "card.h" #include "pci.h" #include "sio.h" /* * Serial driver, based on 386BSD-0.1 com driver. * Mostly rewritten to use pseudo-DMA. * Works for National Semiconductor NS8250-NS16550AF UARTs. * COM driver, based on HP dca driver. * * Changes for PC-Card integration: * - Added PC-Card driver table and handlers */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if NPCI > 0 #include #include #endif #include #include #include #ifndef SMP #include #endif #include #include #ifdef COM_ESP #include #endif #include #ifndef __i386__ #define disable_intr() #define enable_intr() #endif #ifdef SMP #define disable_intr() COM_DISABLE_INTR() #define enable_intr() COM_ENABLE_INTR() #endif /* SMP */ #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ #define CALLOUT_MASK 0x80 #define CONTROL_MASK 0x60 #define CONTROL_INIT_STATE 0x20 #define CONTROL_LOCK_STATE 0x40 #define DEV_TO_UNIT(dev) (MINOR_TO_UNIT(minor(dev))) #define MINOR_MAGIC_MASK (CALLOUT_MASK | CONTROL_MASK) #define MINOR_TO_UNIT(mynor) ((mynor) & ~MINOR_MAGIC_MASK) #ifdef COM_MULTIPORT /* checks in flags for multiport and which is multiport "master chip" * for a given card */ #define COM_ISMULTIPORT(flags) ((flags) & 0x01) #define COM_MPMASTER(flags) (((flags) >> 8) & 0x0ff) #define COM_NOTAST4(flags) ((flags) & 0x04) #endif /* COM_MULTIPORT */ #define COM_CONSOLE(flags) ((flags) & 0x10) #define COM_FORCECONSOLE(flags) ((flags) & 0x20) #define COM_LLCONSOLE(flags) ((flags) & 0x40) #define COM_DEBUGGER(flags) ((flags) & 0x80) #define COM_LOSESOUTINTS(flags) ((flags) & 0x08) #define COM_NOFIFO(flags) ((flags) & 0x02) #define COM_ST16650A(flags) ((flags) & 0x20000) #define COM_C_NOPROBE (0x40000) #define COM_NOPROBE(flags) ((flags) & COM_C_NOPROBE) #define COM_C_IIR_TXRDYBUG (0x80000) #define COM_IIR_TXRDYBUG(flags) ((flags) & COM_C_IIR_TXRDYBUG) #define COM_FIFOSIZE(flags) (((flags) & 0xff000000) >> 24) #define com_scr 7 /* scratch register for 16450-16550 (R/W) */ #define sio_getreg(com, off) \ (bus_space_read_1((com)->bst, (com)->bsh, (off))) #define sio_setreg(com, off, value) \ (bus_space_write_1((com)->bst, (com)->bsh, (off), (value))) /* * com state bits. * (CS_BUSY | CS_TTGO) and (CS_BUSY | CS_TTGO | CS_ODEVREADY) must be higher * than the other bits so that they can be tested as a group without masking * off the low bits. * * The following com and tty flags correspond closely: * CS_BUSY = TS_BUSY (maintained by comstart(), siopoll() and * comstop()) * CS_TTGO = ~TS_TTSTOP (maintained by comparam() and comstart()) * CS_CTS_OFLOW = CCTS_OFLOW (maintained by comparam()) * CS_RTS_IFLOW = CRTS_IFLOW (maintained by comparam()) * TS_FLUSH is not used. * XXX I think TIOCSETA doesn't clear TS_TTSTOP when it clears IXON. * XXX CS_*FLOW should be CF_*FLOW in com->flags (control flags not state). */ #define CS_BUSY 0x80 /* output in progress */ #define CS_TTGO 0x40 /* output not stopped by XOFF */ #define CS_ODEVREADY 0x20 /* external device h/w ready (CTS) */ #define CS_CHECKMSR 1 /* check of MSR scheduled */ #define CS_CTS_OFLOW 2 /* use CTS output flow control */ #define CS_DTR_OFF 0x10 /* DTR held off */ #define CS_ODONE 4 /* output completed */ #define CS_RTS_IFLOW 8 /* use RTS input flow control */ #define CSE_BUSYCHECK 1 /* siobusycheck() scheduled */ static char const * const error_desc[] = { #define CE_OVERRUN 0 "silo overflow", #define CE_INTERRUPT_BUF_OVERFLOW 1 "interrupt-level buffer overflow", #define CE_TTY_BUF_OVERFLOW 2 "tty-level buffer overflow", }; #define CE_NTYPES 3 #define CE_RECORD(com, errnum) (++(com)->delta_error_counts[errnum]) /* types. XXX - should be elsewhere */ typedef u_int Port_t; /* hardware port */ typedef u_char bool_t; /* boolean */ /* queue of linear buffers */ struct lbq { u_char *l_head; /* next char to process */ u_char *l_tail; /* one past the last char to process */ struct lbq *l_next; /* next in queue */ bool_t l_queued; /* nonzero if queued */ }; /* com device structure */ struct com_s { u_int flags; /* Copy isa device flags */ u_char state; /* miscellaneous flag bits */ bool_t active_out; /* nonzero if the callout device is open */ u_char cfcr_image; /* copy of value written to CFCR */ #ifdef COM_ESP bool_t esp; /* is this unit a hayes esp board? */ #endif u_char extra_state; /* more flag bits, separate for order trick */ u_char fifo_image; /* copy of value written to FIFO */ bool_t hasfifo; /* nonzero for 16550 UARTs */ bool_t st16650a; /* Is a Startech 16650A or RTS/CTS compat */ bool_t loses_outints; /* nonzero if device loses output interrupts */ u_char mcr_image; /* copy of value written to MCR */ #ifdef COM_MULTIPORT bool_t multiport; /* is this unit part of a multiport device? */ #endif /* COM_MULTIPORT */ bool_t no_irq; /* nonzero if irq is not attached */ bool_t gone; /* hardware disappeared */ bool_t poll; /* nonzero if polling is required */ bool_t poll_output; /* nonzero if polling for output is required */ int unit; /* unit number */ int dtr_wait; /* time to hold DTR down on close (* 1/hz) */ u_int tx_fifo_size; u_int wopeners; /* # processes waiting for DCD in open() */ /* * The high level of the driver never reads status registers directly * because there would be too many side effects to handle conveniently. * Instead, it reads copies of the registers stored here by the * interrupt handler. */ u_char last_modem_status; /* last MSR read by intr handler */ u_char prev_modem_status; /* last MSR handled by high level */ u_char hotchar; /* ldisc-specific char to be handled ASAP */ u_char *ibuf; /* start of input buffer */ u_char *ibufend; /* end of input buffer */ u_char *ibufold; /* old input buffer, to be freed */ u_char *ihighwater; /* threshold in input buffer */ u_char *iptr; /* next free spot in input buffer */ int ibufsize; /* size of ibuf (not include error bytes) */ int ierroff; /* offset of error bytes in ibuf */ struct lbq obufq; /* head of queue of output buffers */ struct lbq obufs[2]; /* output buffers */ bus_space_tag_t bst; bus_space_handle_t bsh; Port_t data_port; /* i/o ports */ #ifdef COM_ESP Port_t esp_port; #endif Port_t int_id_port; Port_t modem_ctl_port; Port_t line_status_port; Port_t modem_status_port; Port_t intr_ctl_port; /* Ports of IIR register */ struct tty *tp; /* cross reference */ /* Initial state. */ struct termios it_in; /* should be in struct tty */ struct termios it_out; /* Lock state. */ struct termios lt_in; /* should be in struct tty */ struct termios lt_out; bool_t do_timestamp; bool_t do_dcd_timestamp; struct timeval timestamp; struct timeval dcd_timestamp; struct pps_state pps; u_long bytes_in; /* statistics */ u_long bytes_out; u_int delta_error_counts[CE_NTYPES]; u_long error_counts[CE_NTYPES]; struct resource *irqres; struct resource *ioportres; void *cookie; /* * Data area for output buffers. Someday we should build the output * buffer queue without copying data. */ u_char obuf1[256]; u_char obuf2[256]; }; #ifdef COM_ESP static int espattach __P((struct com_s *com, Port_t esp_port)); #endif static int sioattach __P((device_t dev, int rid)); static int sio_isa_attach __P((device_t dev)); static timeout_t siobusycheck; static timeout_t siodtrwakeup; static void comhardclose __P((struct com_s *com)); static void sioinput __P((struct com_s *com)); static void siointr1 __P((struct com_s *com)); static void siointr __P((void *arg)); static int commctl __P((struct com_s *com, int bits, int how)); static int comparam __P((struct tty *tp, struct termios *t)); static swihand_t siopoll; static int sioprobe __P((device_t dev, int xrid)); static int sio_isa_probe __P((device_t dev)); static void siosettimeout __P((void)); static int siosetwater __P((struct com_s *com, speed_t speed)); static void comstart __P((struct tty *tp)); static void comstop __P((struct tty *tp, int rw)); static timeout_t comwakeup; static void disc_optim __P((struct tty *tp, struct termios *t, struct com_s *com)); #if NCARD > 0 static int sio_pccard_attach __P((device_t dev)); static int sio_pccard_detach __P((device_t dev)); static int sio_pccard_probe __P((device_t dev)); #endif /* NCARD > 0 */ #if NPCI > 0 static int sio_pci_attach __P((device_t dev)); static void sio_pci_kludge_unit __P((device_t dev)); static int sio_pci_probe __P((device_t dev)); #endif /* NPCI > 0 */ static char driver_name[] = "sio"; /* table and macro for fast conversion from a unit number to its com struct */ static devclass_t sio_devclass; #define com_addr(unit) ((struct com_s *) \ devclass_get_softc(sio_devclass, unit)) static device_method_t sio_isa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_isa_probe), DEVMETHOD(device_attach, sio_isa_attach), { 0, 0 } }; static driver_t sio_isa_driver = { driver_name, sio_isa_methods, sizeof(struct com_s), }; #if NCARD > 0 static device_method_t sio_pccard_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pccard_probe), DEVMETHOD(device_attach, sio_pccard_attach), DEVMETHOD(device_detach, sio_pccard_detach), { 0, 0 } }; static driver_t sio_pccard_driver = { driver_name, sio_pccard_methods, sizeof(struct com_s), }; #endif /* NCARD > 0 */ #if NPCI > 0 static device_method_t sio_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pci_probe), DEVMETHOD(device_attach, sio_pci_attach), { 0, 0 } }; static driver_t sio_pci_driver = { driver_name, sio_pci_methods, sizeof(struct com_s), }; #endif /* NPCI > 0 */ static d_open_t sioopen; static d_close_t sioclose; static d_read_t sioread; static d_write_t siowrite; static d_ioctl_t sioioctl; #define CDEV_MAJOR 28 static struct cdevsw sio_cdevsw = { /* open */ sioopen, /* close */ sioclose, /* read */ sioread, /* write */ siowrite, /* ioctl */ sioioctl, /* poll */ ttypoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ driver_name, /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ D_TTY, /* bmaj */ -1 }; int comconsole = -1; static volatile speed_t comdefaultrate = CONSPEED; #ifdef __alpha__ static volatile speed_t gdbdefaultrate = CONSPEED; #endif static u_int com_events; /* input chars + weighted output completions */ static Port_t siocniobase; static int siocnunit; static Port_t siogdbiobase; static int siogdbunit = -1; static bool_t sio_registered; static int sio_timeout; static int sio_timeouts_until_log; static struct callout_handle sio_timeout_handle = CALLOUT_HANDLE_INITIALIZER(&sio_timeout_handle); static int sio_numunits; static struct speedtab comspeedtab[] = { { 0, 0 }, { 50, COMBRD(50) }, { 75, COMBRD(75) }, { 110, COMBRD(110) }, { 134, COMBRD(134) }, { 150, COMBRD(150) }, { 200, COMBRD(200) }, { 300, COMBRD(300) }, { 600, COMBRD(600) }, { 1200, COMBRD(1200) }, { 1800, COMBRD(1800) }, { 2400, COMBRD(2400) }, { 4800, COMBRD(4800) }, { 9600, COMBRD(9600) }, { 19200, COMBRD(19200) }, { 38400, COMBRD(38400) }, { 57600, COMBRD(57600) }, { 115200, COMBRD(115200) }, { -1, -1 } }; #ifdef COM_ESP /* XXX configure this properly. */ static Port_t likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, }; static Port_t likely_esp_ports[] = { 0x140, 0x180, 0x280, 0 }; #endif /* * handle sysctl read/write requests for console speed * * In addition to setting comdefaultrate for I/O through /dev/console, * also set the initial and lock values for the /dev/ttyXX device * if there is one associated with the console. Finally, if the /dev/tty * device has already been open, change the speed on the open running port * itself. */ static int -sysctl_machdep_comdefaultrate (SYSCTL_HANDLER_ARGS) +sysctl_machdep_comdefaultrate(SYSCTL_HANDLER_ARGS) { int error, s; speed_t newspeed; struct com_s *com; struct tty *tp; newspeed = comdefaultrate; error = sysctl_handle_opaque(oidp, &newspeed, sizeof newspeed, req); if (error || !req->newptr) return (error); comdefaultrate = newspeed; if (comconsole < 0) /* serial console not selected? */ return (0); com = com_addr(comconsole); if (com == NULL) return (ENXIO); /* * set the initial and lock rates for /dev/ttydXX and /dev/cuaXX * (note, the lock rates really are boolean -- if non-zero, disallow * speed changes) */ com->it_in.c_ispeed = com->it_in.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_out.c_ispeed = com->it_out.c_ospeed = com->lt_out.c_ispeed = com->lt_out.c_ospeed = comdefaultrate; /* * if we're open, change the running rate too */ tp = com->tp; if (tp && (tp->t_state & TS_ISOPEN)) { tp->t_termios.c_ispeed = tp->t_termios.c_ospeed = comdefaultrate; s = spltty(); error = comparam(tp, &tp->t_termios); splx(s); } return error; } SYSCTL_PROC(_machdep, OID_AUTO, conspeed, CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_machdep_comdefaultrate, "I", ""); #define SET_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) | (bit)) #define CLR_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) & ~(bit)) #if NCARD > 0 static int sio_pccard_probe(dev) device_t dev; { /* Do not probe IRQ - pccard doesn't turn on the interrupt line */ /* until bus_setup_intr */ SET_FLAG(dev, COM_C_NOPROBE); return (sioprobe(dev, 0)); } static int sio_pccard_attach(dev) device_t dev; { return (sioattach(dev, 0)); } /* * sio_detach - unload the driver and clear the table. * XXX TODO: * This is usually called when the card is ejected, but * can be caused by a modunload of a controller driver. * The idea is to reset the driver's view of the device * and ensure that any driver entry points such as * read and write do not hang. */ static int sio_pccard_detach(dev) device_t dev; { struct com_s *com; com = (struct com_s *) device_get_softc(dev); if (com == NULL) { device_printf(dev, "NULL com in siounload\n"); return (0); } com->gone = 1; if (com->irqres) { bus_teardown_intr(dev, com->irqres, com->cookie); bus_release_resource(dev, SYS_RES_IRQ, 0, com->irqres); } if (com->ioportres) bus_release_resource(dev, SYS_RES_IOPORT, 0, com->ioportres); if (com->tp && (com->tp->t_state & TS_ISOPEN)) { device_printf(dev, "still open, forcing close\n"); com->tp->t_gen++; ttyclose(com->tp); ttwakeup(com->tp); ttwwakeup(com->tp); } else { if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); } device_printf(dev, "unloaded\n"); return (0); } #endif /* NCARD > 0 */ #if NPCI > 0 struct pci_ids { u_int32_t type; const char *desc; int rid; }; static struct pci_ids pci_ids[] = { { 0x100812b9, "3COM PCI FaxModem", 0x10 }, { 0x048011c1, "ActionTec 56k FAX PCI Modem", 0x14 }, { 0x00000000, NULL, 0 } }; static int sio_pci_attach(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); sio_pci_kludge_unit(dev); return (sioattach(dev, id->rid)); } /* * Don't cut and paste this to other drivers. It is a horrible kludge * which will fail to work and also be unnecessary in future versions. */ static void sio_pci_kludge_unit(dev) device_t dev; { devclass_t dc; int err; int start; int unit; unit = 0; start = 0; while (resource_int_value("sio", unit, "port", &start) == 0 && start > 0) unit++; if (device_get_unit(dev) < unit) { dc = device_get_devclass(dev); while (devclass_get_device(dc, unit)) unit++; device_printf(dev, "moving to sio%d\n", unit); err = device_set_unit(dev, unit); /* EVIL DO NOT COPY */ if (err) device_printf(dev, "error moving device %d\n", err); } } static int sio_pci_probe(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); device_set_desc(dev, id->desc); return (sioprobe(dev, id->rid)); } #endif /* NPCI > 0 */ static struct isa_pnp_id sio_ids[] = { {0x0005d041, "Standard PC COM port"}, /* PNP0500 */ {0x0105d041, "16550A-compatible COM port"}, /* PNP0501 */ {0x0205d041, "Multiport serial device (non-intelligent 16550)"}, /* PNP0502 */ {0x1005d041, "Generic IRDA-compatible device"}, /* PNP0510 */ {0x1105d041, "Generic IRDA-compatible device"}, /* PNP0511 */ /* Devices that do not have a compatid */ {0x12206804, NULL}, /* ACH2012 - 5634BTS 56K Video Ready Modem */ {0x7602a904, NULL}, /* AEI0276 - 56K v.90 Fax Modem (LKT) */ {0x00007905, NULL}, /* AKY0000 - 56K Plug&Play Modem */ {0x01405407, NULL}, /* AZT4001 - AZT3000 PnP SOUND DEVICE, MODEM */ {0x56039008, NULL}, /* BDP0356 - Best Data 56x2 */ {0x36339008, NULL}, /* BDP3336 - Best Data Prods. 336F */ {0x0014490a, NULL}, /* BRI1400 - Boca 33.6 PnP */ {0x0015490a, NULL}, /* BRI1500 - Internal Fax Data */ {0x0034490a, NULL}, /* BRI3400 - Internal ACF Modem */ {0x00b4490a, NULL}, /* BRIB400 - Boca 56k PnP */ {0x0030320d, NULL}, /* CIR3000 - Cirrus Logic V43 */ {0x0100440e, NULL}, /* CRD0001 - Cardinal MVP288IV ? */ {0x36033610, NULL}, /* DAV0336 - DAVICOM 336PNP MODEM */ {0x0000aa1a, NULL}, /* FUJ0000 - FUJITSU Modem 33600 PNP/I2 */ {0x1200c31e, NULL}, /* GVC0012 - VF1128HV-R9 (win modem?) */ {0x0303c31e, NULL}, /* GVC0303 - MaxTech 33.6 PnP D/F/V */ {0x0505c31e, NULL}, /* GVC0505 - GVC 56k Faxmodem */ {0x0050c31e, NULL}, /* GVC5000 - some GVC modem */ {0x3800f91e, NULL}, /* GWY0038 - Telepath with v.90 */ {0x9062f91e, NULL}, /* GWY6290 - Telepath with x2 Technology */ {0x0000f435, NULL}, /* MOT0000 - Motorola ModemSURFR 33.6 Intern */ {0x5015f435, NULL}, /* MOT1550 - Motorola ModemSURFR 56K Modem */ {0xf015f435, NULL}, /* MOT15F0 - Motorola VoiceSURFR 56K Modem */ {0x6045f435, NULL}, /* MOT4560 - Motorola ? */ {0x61e7a338, NULL}, /* NECE761 - 33.6Modem */ {0x0f804f3f, NULL}, /* OZO800f - Zoom 2812 (56k Modem) */ {0x39804f3f, NULL}, /* OZO8039 - Zoom 56k flex */ {0x3024a341, NULL}, /* PMC2430 - Pace 56 Voice Internal Modem */ {0x1000eb49, NULL}, /* ROK0010 - Rockwell ? */ {0x5002734a, NULL}, /* RSS0250 - 5614Jx3(G) Internal Modem */ {0x6202734a, NULL}, /* RSS0262 - 5614Jx3[G] V90+K56Flex Modem */ {0xc100ad4d, NULL}, /* SMM00C1 - Leopard 56k PnP */ {0x9012b04e, NULL}, /* SUP1290 - Supra ? */ {0x1013b04e, NULL}, /* SUP1310 - SupraExpress 336i PnP */ {0x8013b04e, NULL}, /* SUP1380 - SupraExpress 288i PnP Voice */ {0x8113b04e, NULL}, /* SUP1381 - SupraExpress 336i PnP Voice */ {0x5016b04e, NULL}, /* SUP1650 - Supra 336i Sp Intl */ {0x7016b04e, NULL}, /* SUP1670 - Supra 336i V+ Intl */ {0x7420b04e, NULL}, /* SUP2070 - Supra ? */ {0x8020b04e, NULL}, /* SUP2080 - Supra ? */ {0x8420b04e, NULL}, /* SUP2084 - SupraExpress 56i PnP */ {0x7121b04e, NULL}, /* SUP2171 - SupraExpress 56i Sp? */ {0x8024b04e, NULL}, /* SUP2480 - Supra ? */ {0x01007256, NULL}, /* USR0001 - U.S. Robotics Inc., Sportster W */ {0x02007256, NULL}, /* USR0002 - U.S. Robotics Inc. Sportster 33. */ {0x04007256, NULL}, /* USR0004 - USR Sportster 14.4k */ {0x06007256, NULL}, /* USR0006 - USR Sportster 33.6k */ {0x11007256, NULL}, /* USR0011 - USR ? */ {0x01017256, NULL}, /* USR0101 - USR ? */ {0x30207256, NULL}, /* USR2030 - U.S.Robotics Inc. Sportster 560 */ {0x50207256, NULL}, /* USR2050 - U.S.Robotics Inc. Sportster 33. */ {0x70207256, NULL}, /* USR2070 - U.S.Robotics Inc. Sportster 560 */ {0x30307256, NULL}, /* USR3030 - U.S. Robotics 56K FAX INT */ {0x31307256, NULL}, /* USR3031 - U.S. Robotics 56K FAX INT */ {0x50307256, NULL}, /* USR3050 - U.S. Robotics 56K FAX INT */ {0x70307256, NULL}, /* USR3070 - U.S. Robotics 56K Voice INT */ {0x90307256, NULL}, /* USR3090 - USR ? */ {0x90917256, NULL}, /* USR9190 - USR 56k Voice INT */ {0x0300695c, NULL}, /* WCI0003 - Fax/Voice/Modem/Speakphone/Asvd */ {0x61f7896a, NULL}, /* ZTIF761 - Zoom ComStar 33.6 */ {0} }; static int sio_isa_probe(dev) device_t dev; { /* Check isapnp ids */ if (ISA_PNP_PROBE(device_get_parent(dev), dev, sio_ids) == ENXIO) return (ENXIO); return (sioprobe(dev, 0)); } static int sioprobe(dev, xrid) device_t dev; int xrid; { #if 0 static bool_t already_init; device_t xdev; #endif struct com_s *com; bool_t failures[10]; int fn; device_t idev; Port_t iobase; intrmask_t irqmap[4]; intrmask_t irqs; u_char mcr_image; int result; u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; rid = xrid; port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); if (!port) return (ENXIO); com = device_get_softc(dev); com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); #if 0 /* * XXX this is broken - when we are first called, there are no * previously configured IO ports. We could hard code * 0x3f8, 0x2f8, 0x3e8, 0x2e8 etc but that's probably worse. * This code has been doing nothing since the conversion since * "count" is zero the first time around. */ if (!already_init) { /* * Turn off MCR_IENABLE for all likely serial ports. An unused * port with its MCR_IENABLE gate open will inhibit interrupts * from any used port that shares the interrupt vector. * XXX the gate enable is elsewhere for some multiports. */ device_t *devs; int count, i, xioport; devclass_get_devices(sio_devclass, &devs, &count); for (i = 0; i < count; i++) { xdev = devs[i]; if (device_is_enabled(xdev) && bus_get_resource(xdev, SYS_RES_IOPORT, 0, &xioport, NULL) == 0) outb(xioport + com_mcr, 0); } free(devs, M_TEMP); already_init = TRUE; } #endif if (COM_LLCONSOLE(flags)) { printf("sio%d: reserved for low-level i/o\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } /* * If the device is on a multiport card and has an AST/4 * compatible interrupt control register, initialize this * register and prepare to leave MCR_IENABLE clear in the mcr. * Otherwise, prepare to set MCR_IENABLE in the mcr. * Point idev to the device struct giving the correct id_irq. * This is the struct for the master device if there is one. */ idev = dev; mcr_image = MCR_IENABLE; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { Port_t xiobase; u_long io; idev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); if (idev == NULL) { printf("sio%d: master device %d not configured\n", device_get_unit(dev), COM_MPMASTER(flags)); idev = dev; } if (!COM_NOTAST4(flags)) { if (bus_get_resource(idev, SYS_RES_IOPORT, 0, &io, NULL) == 0) { xiobase = io; if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) == 0) outb(xiobase + com_scr, 0x80); else outb(xiobase + com_scr, 0); } mcr_image = 0; } } #endif /* COM_MULTIPORT */ if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) mcr_image = 0; bzero(failures, sizeof failures); iobase = rman_get_start(port); /* * We don't want to get actual interrupts, just masked ones. * Interrupts from this line should already be masked in the ICU, * but mask them in the processor as well in case there are some * (misconfigured) shared interrupts. */ disable_intr(); /* EXTRA DELAY? */ /* * Initialize the speed and the word size and wait long enough to * drain the maximum of 16 bytes of junk in device output queues. * The speed is undefined after a master reset and must be set * before relying on anything related to output. There may be * junk after a (very fast) soft reboot and (apparently) after * master reset. * XXX what about the UART bug avoided by waiting in comparam()? * We don't want to to wait long enough to drain at 2 bps. */ if (iobase == siocniobase) DELAY((16 + 1) * 1000000 / (comdefaultrate / 10)); else { sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); sio_setreg(com, com_dlbl, COMBRD(SIO_TEST_SPEED) & 0xff); sio_setreg(com, com_dlbh, (u_int) COMBRD(SIO_TEST_SPEED) >> 8); sio_setreg(com, com_cfcr, CFCR_8BITS); DELAY((16 + 1) * 1000000 / (SIO_TEST_SPEED / 10)); } /* * Enable the interrupt gate and disable device interupts. This * should leave the device driving the interrupt line low and * guarantee an edge trigger if an interrupt can be generated. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ irqmap[0] = isa_irq_pending(); /* * Attempt to set loopback mode so that we can send a null byte * without annoying any external device. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image | MCR_LOOPBACK); /* * Attempt to generate an output interrupt. On 8250's, setting * IER_ETXRDY generates an interrupt independent of the current * setting and independent of whether the THR is empty. On 16450's, * setting IER_ETXRDY generates an interrupt independent of the * current setting. On 16550A's, setting IER_ETXRDY only * generates an interrupt when IER_ETXRDY is not already set. */ sio_setreg(com, com_ier, IER_ETXRDY); /* * On some 16x50 incompatibles, setting IER_ETXRDY doesn't generate * an interrupt. They'd better generate one for actually doing * output. Loopback may be broken on the same incompatibles but * it's unlikely to do more than allow the null byte out. */ sio_setreg(com, com_data, 0); DELAY((1 + 2) * 1000000 / (SIO_TEST_SPEED / 10)); /* * Turn off loopback mode so that the interrupt gate works again * (MCR_IENABLE was hidden). This should leave the device driving * an interrupt line high. It doesn't matter if the interrupt * line oscillates while we are not looking at it, since interrupts * are disabled. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); /* * Some pcmcia cards have the "TXRDY bug", so we check everyone * for IIR_TXRDY implementation ( Palido 321s, DC-1S... ) */ if (COM_NOPROBE(flags)) { /* Reading IIR register twice */ for (fn = 0; fn < 2; fn ++) { DELAY(10000); failures[6] = sio_getreg(com, com_iir); } /* Check IIR_TXRDY clear ? */ result = 0; if (failures[6] & IIR_TXRDY) { /* Nop, Double check with clearing IER */ sio_setreg(com, com_ier, 0); if (sio_getreg(com, com_iir) & IIR_NOPEND) { /* Ok. we're familia this gang */ SET_FLAG(dev, COM_C_IIR_TXRDYBUG); } else { /* Unknown, Just omit this chip.. XXX */ result = ENXIO; } } else { /* OK. this is well-known guys */ CLR_FLAG(dev, COM_C_IIR_TXRDYBUG); } sio_setreg(com, com_cfcr, CFCR_8BITS); enable_intr(); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } /* * Check that * o the CFCR, IER and MCR in UART hold the values written to them * (the values happen to be all distinct - this is good for * avoiding false positive tests from bus echoes). * o an output interrupt is generated and its vector is correct. * o the interrupt goes away when the IIR in the UART is read. */ /* EXTRA DELAY? */ failures[0] = sio_getreg(com, com_cfcr) - CFCR_8BITS; failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ irqmap[1] = isa_irq_pending(); failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; DELAY(1000); /* XXX */ irqmap[2] = isa_irq_pending(); failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; /* * Turn off all device interrupts and check that they go off properly. * Leave MCR_IENABLE alone. For ports without a master port, it gates * the OUT2 output of the UART to * the ICU input. Closing the gate would give a floating ICU input * (unless there is another device driving it) and spurious interrupts. * (On the system that this was first tested on, the input floats high * and gives a (masked) interrupt as soon as the gate is closed.) */ sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); /* dummy to avoid bus echo */ failures[7] = sio_getreg(com, com_ier); DELAY(1000); /* XXX */ irqmap[3] = isa_irq_pending(); failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; enable_intr(); irqs = irqmap[1] & ~irqmap[0]; if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && ((1 << xirq) & irqs) == 0) printf( "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", device_get_unit(dev), xirq, irqs); if (bootverbose) printf("sio%d: irq maps: %#x %#x %#x %#x\n", device_get_unit(dev), irqmap[0], irqmap[1], irqmap[2], irqmap[3]); result = 0; for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) { sio_setreg(com, com_mcr, 0); result = ENXIO; if (bootverbose) { printf("sio%d: probe failed test(s):", device_get_unit(dev)); for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) printf(" %d", fn); printf("\n"); } break; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } #ifdef COM_ESP static int espattach(com, esp_port) struct com_s *com; Port_t esp_port; { u_char dips; u_char val; /* * Check the ESP-specific I/O port to see if we're an ESP * card. If not, return failure immediately. */ if ((inb(esp_port) & 0xf3) == 0) { printf(" port 0x%x is not an ESP board?\n", esp_port); return (0); } /* * We've got something that claims to be a Hayes ESP card. * Let's hope so. */ /* Get the dip-switch configuration */ outb(esp_port + ESP_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP_STATUS1); /* * Bits 0,1 of dips say which COM port we are. */ if (rman_get_start(com->ioportres) == likely_com_ports[dips & 0x03]) printf(" : ESP"); else { printf(" esp_port has com %d\n", dips & 0x03); return (0); } /* * Check for ESP version 2.0 or later: bits 4,5,6 = 010. */ outb(esp_port + ESP_CMD1, ESP_GETTEST); val = inb(esp_port + ESP_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP_STATUS2); if ((val & 0x70) < 0x20) { printf("-old (%o)", val & 0x70); return (0); } /* * Check for ability to emulate 16550: bit 7 == 1 */ if ((dips & 0x80) == 0) { printf(" slave"); return (0); } /* * Okay, we seem to be a Hayes ESP card. Whee. */ com->esp = TRUE; com->esp_port = esp_port; return (1); } #endif /* COM_ESP */ static int sio_isa_attach(dev) device_t dev; { return (sioattach(dev, 0)); } static int sioattach(dev, xrid) device_t dev; int xrid; { struct com_s *com; #ifdef COM_ESP Port_t *espp; #endif Port_t iobase; int unit; u_int flags; int rid; struct resource *port; int ret; rid = xrid; port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); if (!port) return (ENXIO); iobase = rman_get_start(port); unit = device_get_unit(dev); com = device_get_softc(dev); flags = device_get_flags(dev); if (unit >= sio_numunits) sio_numunits = unit + 1; /* * sioprobe() has initialized the device registers as follows: * o cfcr = CFCR_8BITS. * It is most important that CFCR_DLAB is off, so that the * data port is not hidden when we enable interrupts. * o ier = 0. * Interrupts are only enabled when the line is open. * o mcr = MCR_IENABLE, or 0 if the port has AST/4 compatible * interrupt control register or the config specifies no irq. * Keeping MCR_DTR and MCR_RTS off might stop the external * device from sending before we are ready. */ bzero(com, sizeof *com); com->unit = unit; com->ioportres = port; com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); com->cfcr_image = CFCR_8BITS; com->dtr_wait = 3 * hz; com->loses_outints = COM_LOSESOUTINTS(flags) != 0; com->no_irq = bus_get_resource(dev, SYS_RES_IRQ, 0, NULL, NULL) != 0; com->tx_fifo_size = 1; com->obufs[0].l_head = com->obuf1; com->obufs[1].l_head = com->obuf2; com->data_port = iobase + com_data; com->int_id_port = iobase + com_iir; com->modem_ctl_port = iobase + com_mcr; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + com_lsr; com->modem_status_port = iobase + com_msr; com->intr_ctl_port = iobase + com_ier; /* * We don't use all the flags from since they * are only relevant for logins. It's important to have echo off * initially so that the line doesn't start blathering before the * echo flag can be turned off. */ com->it_in.c_iflag = 0; com->it_in.c_oflag = 0; com->it_in.c_cflag = TTYDEF_CFLAG; com->it_in.c_lflag = 0; if (unit == comconsole) { com->it_in.c_iflag = TTYDEF_IFLAG; com->it_in.c_oflag = TTYDEF_OFLAG; com->it_in.c_cflag = TTYDEF_CFLAG | CLOCAL; com->it_in.c_lflag = TTYDEF_LFLAG; com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL; com->lt_out.c_ispeed = com->lt_out.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate; } else com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED; if (siosetwater(com, com->it_in.c_ispeed) != 0) { enable_intr(); /* * Leave i/o resources allocated if this is a `cn'-level * console, so that other devices can't snarf them. */ if (iobase != siocniobase) bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } enable_intr(); termioschars(&com->it_in); com->it_out = com->it_in; /* attempt to determine UART type */ printf("sio%d: type", unit); #ifdef COM_MULTIPORT if (!COM_ISMULTIPORT(flags) && !COM_IIR_TXRDYBUG(flags)) #else if (!COM_IIR_TXRDYBUG(flags)) #endif { u_char scr; u_char scr1; u_char scr2; scr = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0xa5); scr1 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0x5a); scr2 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, scr); if (scr1 != 0xa5 || scr2 != 0x5a) { printf(" 8250"); goto determined_type; } } sio_setreg(com, com_fifo, FIFO_ENABLE | FIFO_RX_HIGH); DELAY(100); com->st16650a = 0; switch (inb(com->int_id_port) & IIR_FIFO_MASK) { case FIFO_RX_LOW: printf(" 16450"); break; case FIFO_RX_MEDL: printf(" 16450?"); break; case FIFO_RX_MEDH: printf(" 16550?"); break; case FIFO_RX_HIGH: if (COM_NOFIFO(flags)) { printf(" 16550A fifo disabled"); } else { com->hasfifo = TRUE; if (COM_ST16650A(flags)) { com->st16650a = 1; com->tx_fifo_size = 32; printf(" ST16650A"); } else { com->tx_fifo_size = COM_FIFOSIZE(flags); printf(" 16550A"); } } #ifdef COM_ESP for (espp = likely_esp_ports; *espp != 0; espp++) if (espattach(com, *espp)) { com->tx_fifo_size = 1024; break; } #endif if (!com->st16650a) { if (!com->tx_fifo_size) com->tx_fifo_size = 16; else printf(" lookalike with %d bytes FIFO", com->tx_fifo_size); } break; } #ifdef COM_ESP if (com->esp) { /* * Set 16550 compatibility mode. * We don't use the ESP_MODE_SCALE bit to increase the * fifo trigger levels because we can't handle large * bursts of input. * XXX flow control should be set in comparam(), not here. */ outb(com->esp_port + ESP_CMD1, ESP_SETMODE); outb(com->esp_port + ESP_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); /* Set RTS/CTS flow control. */ outb(com->esp_port + ESP_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP_CMD2, ESP_FLOW_CTS); /* Set flow-control levels. */ outb(com->esp_port + ESP_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP_CMD2, HIBYTE(768)); outb(com->esp_port + ESP_CMD2, LOBYTE(768)); outb(com->esp_port + ESP_CMD2, HIBYTE(512)); outb(com->esp_port + ESP_CMD2, LOBYTE(512)); } #endif /* COM_ESP */ sio_setreg(com, com_fifo, 0); determined_type: ; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { device_t masterdev; com->multiport = TRUE; printf(" (multiport"); if (unit == COM_MPMASTER(flags)) printf(" master"); printf(")"); masterdev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); com->no_irq = (masterdev == NULL || bus_get_resource(masterdev, SYS_RES_IRQ, 0, NULL, NULL) != 0); } #endif /* COM_MULTIPORT */ if (unit == comconsole) printf(", console"); if (COM_IIR_TXRDYBUG(flags)) printf(" with a bogus IIR_TXRDY register"); printf("\n"); if (!sio_registered) { register_swi(SWI_TTY, siopoll); sio_registered = TRUE; } make_dev(&sio_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, "ttyd%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_INIT_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyid%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_LOCK_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyld%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK, UID_UUCP, GID_DIALER, 0660, "cuaa%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_INIT_STATE, UID_UUCP, GID_DIALER, 0660, "cuaia%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_LOCK_STATE, UID_UUCP, GID_DIALER, 0660, "cuala%r", unit); com->flags = flags; com->pps.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR; pps_init(&com->pps); rid = 0; com->irqres = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0ul, ~0ul, 1, RF_ACTIVE); if (com->irqres) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY | INTR_TYPE_FAST, siointr, com, &com->cookie); if (ret) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY, siointr, com, &com->cookie); if (ret == 0) device_printf(dev, "unable to activate interrupt in fast mode - using normal mode"); } if (ret) device_printf(dev, "could not activate interrupt\n"); } return (0); } static int sioopen(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; int unit; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL) return (ENXIO); if (com->gone) return (ENXIO); if (mynor & CONTROL_MASK) return (0); tp = dev->si_tty = com->tp = ttymalloc(com->tp); s = spltty(); /* * We jump to this label after all non-interrupted sleeps to pick * up any changes of the device state. */ open_top: while (com->state & CS_DTR_OFF) { error = tsleep(&com->dtr_wait, TTIPRI | PCATCH, "siodtr", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; } if (tp->t_state & TS_ISOPEN) { /* * The device is open, so everything has been initialized. * Handle conflicts. */ if (mynor & CALLOUT_MASK) { if (!com->active_out) { error = EBUSY; goto out; } } else { if (com->active_out) { if (flag & O_NONBLOCK) { error = EBUSY; goto out; } error = tsleep(&com->active_out, TTIPRI | PCATCH, "siobi", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; goto open_top; } } if (tp->t_state & TS_XCLUDE && suser(p)) { error = EBUSY; goto out; } } else { /* * The device isn't open, so there are no conflicts. * Initialize it. Initialization is done twice in many * cases: to preempt sleeping callin opens if we are * callout, and to complete a callin open after DCD rises. */ tp->t_oproc = comstart; tp->t_param = comparam; tp->t_stop = comstop; tp->t_dev = dev; tp->t_termios = mynor & CALLOUT_MASK ? com->it_out : com->it_in; (void)commctl(com, TIOCM_DTR | TIOCM_RTS, DMSET); com->poll = com->no_irq; com->poll_output = com->loses_outints; ++com->wopeners; error = comparam(tp, &tp->t_termios); --com->wopeners; if (error != 0) goto out; /* * XXX we should goto open_top if comparam() slept. */ if (com->hasfifo) { /* * (Re)enable and drain fifos. * * Certain SMC chips cause problems if the fifos * are enabled while input is ready. Turn off the * fifo if necessary to clear the input. We test * the input ready bit after enabling the fifos * since we've already enabled them in comparam() * and to handle races between enabling and fresh * input. */ while (TRUE) { sio_setreg(com, com_fifo, FIFO_RCV_RST | FIFO_XMT_RST | com->fifo_image); /* * XXX the delays are for superstitious * historical reasons. It must be less than * the character time at the maximum * supported speed (87 usec at 115200 bps * 8N1). Otherwise we might loop endlessly * if data is streaming in. We used to use * delays of 100. That usually worked * because DELAY(100) used to usually delay * for about 85 usec instead of 100. */ DELAY(50); if (!(inb(com->line_status_port) & LSR_RXRDY)) break; sio_setreg(com, com_fifo, 0); DELAY(50); (void) inb(com->data_port); } } disable_intr(); (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status = inb(com->modem_status_port); if (COM_IIR_TXRDYBUG(com->flags)) { outb(com->intr_ctl_port, IER_ERXRDY | IER_ERLS | IER_EMSC); } else { outb(com->intr_ctl_port, IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC); } enable_intr(); /* * Handle initial DCD. Callout devices get a fake initial * DCD (trapdoor DCD). If we are callout, then any sleeping * callin opens get woken up and resume sleeping on "siobi" * instead of "siodcd". */ /* * XXX `mynor & CALLOUT_MASK' should be * `tp->t_cflag & (SOFT_CARRIER | TRAPDOOR_CARRIER) where * TRAPDOOR_CARRIER is the default initial state for callout * devices and SOFT_CARRIER is like CLOCAL except it hides * the true carrier. */ if (com->prev_modem_status & MSR_DCD || mynor & CALLOUT_MASK) (*linesw[tp->t_line].l_modem)(tp, 1); } /* * Wait for DCD if necessary. */ if (!(tp->t_state & TS_CARR_ON) && !(mynor & CALLOUT_MASK) && !(tp->t_cflag & CLOCAL) && !(flag & O_NONBLOCK)) { ++com->wopeners; error = tsleep(TSA_CARR_ON(tp), TTIPRI | PCATCH, "siodcd", 0); if (com_addr(unit) == NULL) return (ENXIO); --com->wopeners; if (error != 0 || com->gone) goto out; goto open_top; } error = (*linesw[tp->t_line].l_open)(dev, tp); disc_optim(tp, &tp->t_termios, com); if (tp->t_state & TS_ISOPEN && mynor & CALLOUT_MASK) com->active_out = TRUE; siosettimeout(); out: splx(s); if (!(tp->t_state & TS_ISOPEN) && com->wopeners == 0) comhardclose(com); return (error); } static int sioclose(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int mynor; int s; struct tty *tp; mynor = minor(dev); if (mynor & CONTROL_MASK) return (0); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL) return (ENODEV); tp = com->tp; s = spltty(); (*linesw[tp->t_line].l_close)(tp, flag); disc_optim(tp, &tp->t_termios, com); comstop(tp, FREAD | FWRITE); comhardclose(com); ttyclose(tp); siosettimeout(); splx(s); if (com->gone) { printf("sio%d: gone\n", com->unit); s = spltty(); if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); bzero(tp, sizeof *tp); splx(s); } return (0); } static void comhardclose(com) struct com_s *com; { int s; struct tty *tp; int unit; unit = com->unit; s = spltty(); com->poll = FALSE; com->poll_output = FALSE; com->do_timestamp = FALSE; com->do_dcd_timestamp = FALSE; com->pps.ppsparam.mode = 0; sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); { sio_setreg(com, com_ier, 0); tp = com->tp; if (tp->t_cflag & HUPCL /* * XXX we will miss any carrier drop between here and the * next open. Perhaps we should watch DCD even when the * port is closed; it is not sufficient to check it at * the next open because it might go up and down while * we're not watching. */ || (!com->active_out && !(com->prev_modem_status & MSR_DCD) && !(com->it_in.c_cflag & CLOCAL)) || !(tp->t_state & TS_ISOPEN)) { (void)commctl(com, TIOCM_DTR, DMBIC); if (com->dtr_wait != 0 && !(com->state & CS_DTR_OFF)) { timeout(siodtrwakeup, com, com->dtr_wait); com->state |= CS_DTR_OFF; } } } if (com->hasfifo) { /* * Disable fifos so that they are off after controlled * reboots. Some BIOSes fail to detect 16550s when the * fifos are enabled. */ sio_setreg(com, com_fifo, 0); } com->active_out = FALSE; wakeup(&com->active_out); wakeup(TSA_CARR_ON(tp)); /* restart any wopeners */ splx(s); } static int sioread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); return ((*linesw[com->tp->t_line].l_read)(com->tp, uio, flag)); } static int siowrite(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; int unit; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL || com->gone) return (ENODEV); /* * (XXX) We disallow virtual consoles if the physical console is * a serial port. This is in case there is a display attached that * is not the console. In that situation we don't need/want the X * server taking over the console. */ if (constty != NULL && unit == comconsole) constty = NULL; return ((*linesw[com->tp->t_line].l_write)(com->tp, uio, flag)); } static void siobusycheck(chan) void *chan; { struct com_s *com; int s; com = (struct com_s *)chan; /* * Clear TS_BUSY if low-level output is complete. * spl locking is sufficient because siointr1() does not set CS_BUSY. * If siointr1() clears CS_BUSY after we look at it, then we'll get * called again. Reading the line status port outside of siointr1() * is safe because CS_BUSY is clear so there are no output interrupts * to lose. */ s = spltty(); if (com->state & CS_BUSY) com->extra_state &= ~CSE_BUSYCHECK; /* False alarm. */ else if ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY)) { com->tp->t_state &= ~TS_BUSY; ttwwakeup(com->tp); com->extra_state &= ~CSE_BUSYCHECK; } else timeout(siobusycheck, com, hz / 100); splx(s); } static void siodtrwakeup(chan) void *chan; { struct com_s *com; com = (struct com_s *)chan; com->state &= ~CS_DTR_OFF; wakeup(&com->dtr_wait); } static void sioinput(com) struct com_s *com; { u_char *buf; int incc; u_char line_status; int recv_data; struct tty *tp; buf = com->ibuf; tp = com->tp; if (!(tp->t_state & TS_ISOPEN) || !(tp->t_cflag & CREAD)) { com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; return; } if (tp->t_state & TS_CAN_BYPASS_L_RINT) { /* * Avoid the grotesquely inefficient lineswitch routine * (ttyinput) in "raw" mode. It usually takes about 450 * instructions (that's without canonical processing or echo!). * slinput is reasonably fast (usually 40 instructions plus * call overhead). */ do { enable_intr(); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat && (com->state & CS_RTS_IFLOW || tp->t_iflag & IXOFF) && !(tp->t_state & TS_TBLOCK)) ttyblock(tp); com->delta_error_counts[CE_TTY_BUF_OVERFLOW] += b_to_q((char *)buf, incc, &tp->t_rawq); buf += incc; tk_nin += incc; tk_rawcc += incc; tp->t_rawcc += incc; ttwakeup(tp); if (tp->t_state & TS_TTSTOP && (tp->t_iflag & IXANY || tp->t_cc[VSTART] == tp->t_cc[VSTOP])) { tp->t_state &= ~TS_TTSTOP; tp->t_lflag &= ~FLUSHO; comstart(tp); } disable_intr(); } while (buf < com->iptr); } else { do { enable_intr(); line_status = buf[com->ierroff]; recv_data = *buf++; if (line_status & (LSR_BI | LSR_FE | LSR_OE | LSR_PE)) { if (line_status & LSR_BI) recv_data |= TTY_BI; if (line_status & LSR_FE) recv_data |= TTY_FE; if (line_status & LSR_OE) recv_data |= TTY_OE; if (line_status & LSR_PE) recv_data |= TTY_PE; } (*linesw[tp->t_line].l_rint)(recv_data, tp); disable_intr(); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; /* * There is now room for another low-level buffer full of input, * so enable RTS if it is now disabled and there is room in the * high-level buffer. */ if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } void siointr(arg) void *arg; { #ifndef COM_MULTIPORT COM_LOCK(); siointr1((struct com_s *) arg); COM_UNLOCK(); #else /* COM_MULTIPORT */ bool_t possibly_more_intrs; int unit; struct com_s *com; /* * Loop until there is no activity on any port. This is necessary * to get an interrupt edge more than to avoid another interrupt. * If the IRQ signal is just an OR of the IRQ signals from several * devices, then the edge from one may be lost because another is * on. */ COM_LOCK(); do { possibly_more_intrs = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); /* * XXX COM_LOCK(); * would it work here, or be counter-productive? */ if (com != NULL && !com->gone && (inb(com->int_id_port) & IIR_IMASK) != IIR_NOPEND) { siointr1(com); possibly_more_intrs = TRUE; } /* XXX COM_UNLOCK(); */ } } while (possibly_more_intrs); COM_UNLOCK(); #endif /* COM_MULTIPORT */ } static void siointr1(com) struct com_s *com; { u_char line_status; u_char modem_status; u_char *ioptr; u_char recv_data; u_char int_ctl; u_char int_ctl_new; struct timecounter *tc; u_int count; int_ctl = inb(com->intr_ctl_port); int_ctl_new = int_ctl; while (!com->gone) { if (com->pps.ppsparam.mode & PPS_CAPTUREBOTH) { modem_status = inb(com->modem_status_port); if ((modem_status ^ com->last_modem_status) & MSR_DCD) { tc = timecounter; count = tc->tc_get_timecount(tc); pps_event(&com->pps, tc, count, (modem_status & MSR_DCD) ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR); } } line_status = inb(com->line_status_port); /* input event? (check first to help avoid overruns) */ while (line_status & LSR_RCV_MASK) { /* break/unnattached error bits or real input? */ if (!(line_status & LSR_RXRDY)) recv_data = 0; else recv_data = inb(com->data_port); #if defined(DDB) && defined(ALT_BREAK_TO_DEBUGGER) /* * Solaris implements a new BREAK which is initiated * by a character sequence CR ~ ^b which is similar * to a familiar pattern used on Sun servers by the * Remote Console. */ #define KEY_CRTLB 2 /* ^B */ #define KEY_CR 13 /* CR '\r' */ #define KEY_TILDE 126 /* ~ */ if (com->unit == comconsole) { static int brk_state1 = 0, brk_state2 = 0; if (recv_data == KEY_CR) { brk_state1 = recv_data; brk_state2 = 0; } else if (brk_state1 == KEY_CR && (recv_data == KEY_TILDE || recv_data == KEY_CRTLB)) { if (recv_data == KEY_TILDE) brk_state2 = recv_data; else if (brk_state2 == KEY_TILDE && recv_data == KEY_CRTLB) { breakpoint(); brk_state1 = brk_state2 = 0; goto cont; } else brk_state2 = 0; } else brk_state1 = 0; } #endif if (line_status & (LSR_BI | LSR_FE | LSR_PE)) { /* * Don't store BI if IGNBRK or FE/PE if IGNPAR. * Otherwise, push the work to a higher level * (to handle PARMRK) if we're bypassing. * Otherwise, convert BI/FE and PE+INPCK to 0. * * This makes bypassing work right in the * usual "raw" case (IGNBRK set, and IGNPAR * and INPCK clear). * * Note: BI together with FE/PE means just BI. */ if (line_status & LSR_BI) { #if defined(DDB) && defined(BREAK_TO_DEBUGGER) if (com->unit == comconsole) { breakpoint(); goto cont; } #endif if (com->tp == NULL || com->tp->t_iflag & IGNBRK) goto cont; } else { if (com->tp == NULL || com->tp->t_iflag & IGNPAR) goto cont; } if (com->tp->t_state & TS_CAN_BYPASS_L_RINT && (line_status & (LSR_BI | LSR_FE) || com->tp->t_iflag & INPCK)) recv_data = 0; } ++com->bytes_in; if (com->hotchar != 0 && recv_data == com->hotchar) setsofttty(); ioptr = com->iptr; if (ioptr >= com->ibufend) CE_RECORD(com, CE_INTERRUPT_BUF_OVERFLOW); else { if (com->do_timestamp) microtime(&com->timestamp); ++com_events; schedsofttty(); #if 0 /* for testing input latency vs efficiency */ if (com->iptr - com->ibuf == 8) setsofttty(); #endif ioptr[0] = recv_data; ioptr[com->ierroff] = line_status; com->iptr = ++ioptr; if (ioptr == com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); if (line_status & LSR_OE) CE_RECORD(com, CE_OVERRUN); } cont: /* * "& 0x7F" is to avoid the gcc-1.40 generating a slow * jump from the top of the loop to here */ line_status = inb(com->line_status_port) & 0x7F; } /* modem status change? (always check before doing output) */ modem_status = inb(com->modem_status_port); if (modem_status != com->last_modem_status) { if (com->do_dcd_timestamp && !(com->last_modem_status & MSR_DCD) && modem_status & MSR_DCD) microtime(&com->dcd_timestamp); /* * Schedule high level to handle DCD changes. Note * that we don't use the delta bits anywhere. Some * UARTs mess them up, and it's easy to remember the * previous bits and calculate the delta. */ com->last_modem_status = modem_status; if (!(com->state & CS_CHECKMSR)) { com_events += LOTS_OF_EVENTS; com->state |= CS_CHECKMSR; setsofttty(); } /* handle CTS change immediately for crisp flow ctl */ if (com->state & CS_CTS_OFLOW) { if (modem_status & MSR_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } } /* output queued and everything ready? */ if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { ioptr = com->obufq.l_head; if (com->tx_fifo_size > 1) { u_int ocount; ocount = com->obufq.l_tail - ioptr; if (ocount > com->tx_fifo_size) ocount = com->tx_fifo_size; com->bytes_out += ocount; do outb(com->data_port, *ioptr++); while (--ocount != 0); } else { outb(com->data_port, *ioptr++); ++com->bytes_out; } com->obufq.l_head = ioptr; if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl | IER_ETXRDY; } if (ioptr >= com->obufq.l_tail) { struct lbq *qp; qp = com->obufq.l_next; qp->l_queued = FALSE; qp = qp->l_next; if (qp != NULL) { com->obufq.l_head = qp->l_head; com->obufq.l_tail = qp->l_tail; com->obufq.l_next = qp; } else { /* output just completed */ if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl & ~IER_ETXRDY; } com->state &= ~CS_BUSY; } if (!(com->state & CS_ODONE)) { com_events += LOTS_OF_EVENTS; com->state |= CS_ODONE; setsofttty(); /* handle at high level ASAP */ } } if (COM_IIR_TXRDYBUG(com->flags) && (int_ctl != int_ctl_new)) { outb(com->intr_ctl_port, int_ctl_new); } } /* finished? */ #ifndef COM_MULTIPORT if ((inb(com->int_id_port) & IIR_IMASK) == IIR_NOPEND) #endif /* COM_MULTIPORT */ return; } } static int sioioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) u_long oldcmd; struct termios term; #endif mynor = minor(dev); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); if (mynor & CONTROL_MASK) { struct termios *ct; switch (mynor & CONTROL_MASK) { case CONTROL_INIT_STATE: ct = mynor & CALLOUT_MASK ? &com->it_out : &com->it_in; break; case CONTROL_LOCK_STATE: ct = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; break; default: return (ENODEV); /* /dev/nodev */ } switch (cmd) { case TIOCSETA: error = suser(p); if (error != 0) return (error); *ct = *(struct termios *)data; return (0); case TIOCGETA: *(struct termios *)data = *ct; return (0); case TIOCGETD: *(int *)data = TTYDISC; return (0); case TIOCGWINSZ: bzero(data, sizeof(struct winsize)); return (0); default: return (ENOTTY); } } tp = com->tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) term = tp->t_termios; oldcmd = cmd; error = ttsetcompat(tp, &cmd, data, &term); if (error != 0) return (error); if (cmd != oldcmd) data = (caddr_t)&term; #endif if (cmd == TIOCSETA || cmd == TIOCSETAW || cmd == TIOCSETAF) { int cc; struct termios *dt = (struct termios *)data; struct termios *lt = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; dt->c_iflag = (tp->t_iflag & lt->c_iflag) | (dt->c_iflag & ~lt->c_iflag); dt->c_oflag = (tp->t_oflag & lt->c_oflag) | (dt->c_oflag & ~lt->c_oflag); dt->c_cflag = (tp->t_cflag & lt->c_cflag) | (dt->c_cflag & ~lt->c_cflag); dt->c_lflag = (tp->t_lflag & lt->c_lflag) | (dt->c_lflag & ~lt->c_lflag); for (cc = 0; cc < NCCS; ++cc) if (lt->c_cc[cc] != 0) dt->c_cc[cc] = tp->t_cc[cc]; if (lt->c_ispeed != 0) dt->c_ispeed = tp->t_ispeed; if (lt->c_ospeed != 0) dt->c_ospeed = tp->t_ospeed; } error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (error != ENOIOCTL) return (error); s = spltty(); error = ttioctl(tp, cmd, data, flag); disc_optim(tp, &tp->t_termios, com); if (error != ENOIOCTL) { splx(s); return (error); } switch (cmd) { case TIOCSBRK: sio_setreg(com, com_cfcr, com->cfcr_image |= CFCR_SBREAK); break; case TIOCCBRK: sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); break; case TIOCSDTR: (void)commctl(com, TIOCM_DTR, DMBIS); break; case TIOCCDTR: (void)commctl(com, TIOCM_DTR, DMBIC); break; /* * XXX should disallow changing MCR_RTS if CS_RTS_IFLOW is set. The * changes get undone on the next call to comparam(). */ case TIOCMSET: (void)commctl(com, *(int *)data, DMSET); break; case TIOCMBIS: (void)commctl(com, *(int *)data, DMBIS); break; case TIOCMBIC: (void)commctl(com, *(int *)data, DMBIC); break; case TIOCMGET: *(int *)data = commctl(com, 0, DMGET); break; case TIOCMSDTRWAIT: /* must be root since the wait applies to following logins */ error = suser(p); if (error != 0) { splx(s); return (error); } com->dtr_wait = *(int *)data * hz / 100; break; case TIOCMGDTRWAIT: *(int *)data = com->dtr_wait * 100 / hz; break; case TIOCTIMESTAMP: com->do_timestamp = TRUE; *(struct timeval *)data = com->timestamp; break; case TIOCDCDTIMESTAMP: com->do_dcd_timestamp = TRUE; *(struct timeval *)data = com->dcd_timestamp; break; default: splx(s); error = pps_ioctl(cmd, data, &com->pps); if (error == ENODEV) error = ENOTTY; return (error); } splx(s); return (0); } static void siopoll() { int unit; if (com_events == 0) return; repeat: for (unit = 0; unit < sio_numunits; ++unit) { struct com_s *com; int incc; struct tty *tp; com = com_addr(unit); if (com == NULL) continue; tp = com->tp; if (tp == NULL || com->gone) { /* * Discard any events related to never-opened or * going-away devices. */ disable_intr(); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { incc += LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; } com_events -= incc; enable_intr(); continue; } if (com->iptr != com->ibuf) { disable_intr(); sioinput(com); enable_intr(); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; disable_intr(); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; enable_intr(); if (delta_modem_status & MSR_DCD) (*linesw[tp->t_line].l_modem) (tp, com->prev_modem_status & MSR_DCD); } if (com->state & CS_ODONE) { disable_intr(); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; enable_intr(); if (!(com->state & CS_BUSY) && !(com->extra_state & CSE_BUSYCHECK)) { timeout(siobusycheck, com, hz / 100); com->extra_state |= CSE_BUSYCHECK; } (*linesw[tp->t_line].l_start)(tp); } if (com_events == 0) break; } if (com_events >= LOTS_OF_EVENTS) goto repeat; } static int comparam(tp, t) struct tty *tp; struct termios *t; { u_int cfcr; int cflag; struct com_s *com; int divisor; u_char dlbh; u_char dlbl; int s; int unit; /* do historical conversions */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* check requested parameters */ divisor = ttspeedtab(t->c_ospeed, comspeedtab); if (divisor < 0 || (divisor > 0 && t->c_ispeed != t->c_ospeed)) return (EINVAL); /* parameters are OK, convert them to the com struct and the device */ unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return (ENODEV); s = spltty(); if (divisor == 0) (void)commctl(com, TIOCM_DTR, DMBIC); /* hang up line */ else (void)commctl(com, TIOCM_DTR, DMBIS); cflag = t->c_cflag; switch (cflag & CSIZE) { case CS5: cfcr = CFCR_5BITS; break; case CS6: cfcr = CFCR_6BITS; break; case CS7: cfcr = CFCR_7BITS; break; default: cfcr = CFCR_8BITS; break; } if (cflag & PARENB) { cfcr |= CFCR_PENAB; if (!(cflag & PARODD)) cfcr |= CFCR_PEVEN; } if (cflag & CSTOPB) cfcr |= CFCR_STOPB; if (com->hasfifo && divisor != 0) { /* * Use a fifo trigger level low enough so that the input * latency from the fifo is less than about 16 msec and * the total latency is less than about 30 msec. These * latencies are reasonable for humans. Serial comms * protocols shouldn't expect anything better since modem * latencies are larger. */ com->fifo_image = t->c_ospeed <= 4800 ? FIFO_ENABLE : FIFO_ENABLE | FIFO_RX_HIGH; #ifdef COM_ESP /* * The Hayes ESP card needs the fifo DMA mode bit set * in compatibility mode. If not, it will interrupt * for each character received. */ if (com->esp) com->fifo_image |= FIFO_DMA_MODE; #endif sio_setreg(com, com_fifo, com->fifo_image); } /* * This returns with interrupts disabled so that we can complete * the speed change atomically. Keeping interrupts disabled is * especially important while com_data is hidden. */ (void) siosetwater(com, t->c_ispeed); if (divisor != 0) { sio_setreg(com, com_cfcr, cfcr | CFCR_DLAB); /* * Only set the divisor registers if they would change, * since on some 16550 incompatibles (UMC8669F), setting * them while input is arriving them loses sync until * data stops arriving. */ dlbl = divisor & 0xFF; if (sio_getreg(com, com_dlbl) != dlbl) sio_setreg(com, com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sio_getreg(com, com_dlbh) != dlbh) sio_setreg(com, com_dlbh, dlbh); } sio_setreg(com, com_cfcr, com->cfcr_image = cfcr); if (!(tp->t_state & TS_TTSTOP)) com->state |= CS_TTGO; if (cflag & CRTS_IFLOW) { if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x40); } com->state |= CS_RTS_IFLOW; /* * If CS_RTS_IFLOW just changed from off to on, the change * needs to be propagated to MCR_RTS. This isn't urgent, * so do it later by calling comstart() instead of repeating * a lot of code from comstart() here. */ } else if (com->state & CS_RTS_IFLOW) { com->state &= ~CS_RTS_IFLOW; /* * CS_RTS_IFLOW just changed from on to off. Force MCR_RTS * on here, since comstart() won't do it later. */ outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x40); } } /* * Set up state to handle output flow control. * XXX - worth handling MDMBUF (DCD) flow control at the lowest level? * Now has 10+ msec latency, while CTS flow has 50- usec latency. */ com->state |= CS_ODEVREADY; com->state &= ~CS_CTS_OFLOW; if (cflag & CCTS_OFLOW) { com->state |= CS_CTS_OFLOW; if (!(com->last_modem_status & MSR_CTS)) com->state &= ~CS_ODEVREADY; if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x80); } } else { if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x80); } } sio_setreg(com, com_cfcr, com->cfcr_image); /* XXX shouldn't call functions while intrs are disabled. */ disc_optim(tp, t, com); /* * Recover from fiddling with CS_TTGO. We used to call siointr1() * unconditionally, but that defeated the careful discarding of * stale input in sioopen(). */ if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); enable_intr(); splx(s); comstart(tp); if (com->ibufold != NULL) { free(com->ibufold, M_DEVBUF); com->ibufold = NULL; } return (0); } static int siosetwater(com, speed) struct com_s *com; speed_t speed; { int cp4ticks; u_char *ibuf; int ibufsize; struct tty *tp; /* * Make the buffer size large enough to handle a softtty interrupt * latency of about 2 ticks without loss of throughput or data * (about 3 ticks if input flow control is not used or not honoured, * but a bit less for CS5-CS7 modes). */ cp4ticks = speed / 10 / hz * 4; for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; if (ibufsize == com->ibufsize) { disable_intr(); return (0); } /* * Allocate input buffer. The extra factor of 2 in the size is * to allow for an error byte for each input byte. */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { disable_intr(); return (ENOMEM); } /* Initialize non-critical variables. */ com->ibufold = com->ibuf; com->ibufsize = ibufsize; tp = com->tp; if (tp != NULL) { tp->t_ififosize = 2 * ibufsize; tp->t_ispeedwat = (speed_t)-1; tp->t_ospeedwat = (speed_t)-1; } /* * Read current input buffer, if any. Continue with interrupts * disabled. */ disable_intr(); if (com->iptr != com->ibuf) sioinput(com); /*- * Initialize critical variables, including input buffer watermarks. * The external device is asked to stop sending when the buffer * exactly reaches high water, or when the high level requests it. * The high level is notified immediately (rather than at a later * clock tick) when this watermark is reached. * The buffer size is chosen so the watermark should almost never * be reached. * The low watermark is invisibly 0 since the buffer is always * emptied all at once. */ com->iptr = com->ibuf = ibuf; com->ibufend = ibuf + ibufsize; com->ierroff = ibufsize; com->ihighwater = ibuf + 3 * ibufsize / 4; return (0); } static void comstart(tp) struct tty *tp; { struct com_s *com; int s; int unit; unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return; s = spltty(); disable_intr(); if (tp->t_state & TS_TTSTOP) com->state &= ~CS_TTGO; else com->state |= CS_TTGO; if (tp->t_state & TS_TBLOCK) { if (com->mcr_image & MCR_RTS && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); } else { if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } enable_intr(); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); return; } if (tp->t_outq.c_cc != 0) { struct lbq *qp; struct lbq *next; if (!com->obufs[0].l_queued) { com->obufs[0].l_tail = com->obuf1 + q_to_b(&tp->t_outq, com->obuf1, sizeof com->obuf1); com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[0]; } else { com->obufq.l_head = com->obufs[0].l_head; com->obufq.l_tail = com->obufs[0].l_tail; com->obufq.l_next = &com->obufs[0]; com->state |= CS_BUSY; } enable_intr(); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { com->obufs[1].l_tail = com->obuf2 + q_to_b(&tp->t_outq, com->obuf2, sizeof com->obuf2); com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[1]; } else { com->obufq.l_head = com->obufs[1].l_head; com->obufq.l_tail = com->obufs[1].l_tail; com->obufq.l_next = &com->obufs[1]; com->state |= CS_BUSY; } enable_intr(); } tp->t_state |= TS_BUSY; } disable_intr(); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ enable_intr(); ttwwakeup(tp); splx(s); } static void comstop(tp, rw) struct tty *tp; int rw; { struct com_s *com; com = com_addr(DEV_TO_UNIT(tp->t_dev)); if (com == NULL || com->gone) return; disable_intr(); if (rw & FWRITE) { if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); com->obufs[0].l_queued = FALSE; com->obufs[1].l_queued = FALSE; if (com->state & CS_ODONE) com_events -= LOTS_OF_EVENTS; com->state &= ~(CS_ODONE | CS_BUSY); com->tp->t_state &= ~TS_BUSY; } if (rw & FREAD) { if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_RCV_RST | com->fifo_image); com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } enable_intr(); comstart(tp); } static int commctl(com, bits, how) struct com_s *com; int bits; int how; { int mcr; int msr; if (how == DMGET) { bits = TIOCM_LE; /* XXX - always enabled while open */ mcr = com->mcr_image; if (mcr & MCR_DTR) bits |= TIOCM_DTR; if (mcr & MCR_RTS) bits |= TIOCM_RTS; msr = com->prev_modem_status; if (msr & MSR_CTS) bits |= TIOCM_CTS; if (msr & MSR_DCD) bits |= TIOCM_CD; if (msr & MSR_DSR) bits |= TIOCM_DSR; /* * XXX - MSR_RI is naturally volatile, and we make MSR_TERI * more volatile by reading the modem status a lot. Perhaps * we should latch both bits until the status is read here. */ if (msr & (MSR_RI | MSR_TERI)) bits |= TIOCM_RI; return (bits); } mcr = 0; if (bits & TIOCM_DTR) mcr |= MCR_DTR; if (bits & TIOCM_RTS) mcr |= MCR_RTS; if (com->gone) return(0); disable_intr(); switch (how) { case DMSET: outb(com->modem_ctl_port, com->mcr_image = mcr | (com->mcr_image & MCR_IENABLE)); break; case DMBIS: outb(com->modem_ctl_port, com->mcr_image |= mcr); break; case DMBIC: outb(com->modem_ctl_port, com->mcr_image &= ~mcr); break; } enable_intr(); return (0); } static void siosettimeout() { struct com_s *com; bool_t someopen; int unit; /* * Set our timeout period to 1 second if no polled devices are open. * Otherwise set it to max(1/200, 1/hz). * Enable timeouts iff some device is open. */ untimeout(comwakeup, (void *)NULL, sio_timeout_handle); sio_timeout = hz; someopen = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && com->tp != NULL && com->tp->t_state & TS_ISOPEN && !com->gone) { someopen = TRUE; if (com->poll || com->poll_output) { sio_timeout = hz > 200 ? hz / 200 : 1; break; } } } if (someopen) { sio_timeouts_until_log = hz / sio_timeout; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); } else { /* Flush error messages, if any. */ sio_timeouts_until_log = 1; comwakeup((void *)NULL); untimeout(comwakeup, (void *)NULL, sio_timeout_handle); } } static void comwakeup(chan) void *chan; { struct com_s *com; int unit; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); /* * Recover from lost output interrupts. * Poll any lines that don't use interrupts. */ for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && !com->gone && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { disable_intr(); siointr1(com); enable_intr(); } } /* * Check for and log errors, but not too often. */ if (--sio_timeouts_until_log > 0) return; sio_timeouts_until_log = hz / sio_timeout; for (unit = 0; unit < sio_numunits; ++unit) { int errnum; com = com_addr(unit); if (com == NULL) continue; if (com->gone) continue; for (errnum = 0; errnum < CE_NTYPES; ++errnum) { u_int delta; u_long total; disable_intr(); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; enable_intr(); if (delta == 0) continue; total = com->error_counts[errnum] += delta; log(LOG_ERR, "sio%d: %u more %s%s (total %lu)\n", unit, delta, error_desc[errnum], delta == 1 ? "" : "s", total); } } } static void disc_optim(tp, t, com) struct tty *tp; struct termios *t; struct com_s *com; { if (!(t->c_iflag & (ICRNL | IGNCR | IMAXBEL | INLCR | ISTRIP | IXON)) && (!(t->c_iflag & BRKINT) || (t->c_iflag & IGNBRK)) && (!(t->c_iflag & PARMRK) || (t->c_iflag & (IGNPAR | IGNBRK)) == (IGNPAR | IGNBRK)) && !(t->c_lflag & (ECHO | ICANON | IEXTEN | ISIG | PENDIN)) && linesw[tp->t_line].l_rint == ttyinput) tp->t_state |= TS_CAN_BYPASS_L_RINT; else tp->t_state &= ~TS_CAN_BYPASS_L_RINT; com->hotchar = linesw[tp->t_line].l_hotchar; } /* * Following are all routines needed for SIO to act as console */ #include struct siocnstate { u_char dlbl; u_char dlbh; u_char ier; u_char cfcr; u_char mcr; }; static speed_t siocngetspeed __P((Port_t, struct speedtab *)); static void siocnclose __P((struct siocnstate *sp, Port_t iobase)); static void siocnopen __P((struct siocnstate *sp, Port_t iobase, int speed)); static void siocntxwait __P((Port_t iobase)); static cn_probe_t siocnprobe; static cn_init_t siocninit; static cn_checkc_t siocncheckc; static cn_getc_t siocngetc; static cn_putc_t siocnputc; #ifdef __i386__ CONS_DRIVER(sio, siocnprobe, siocninit, NULL, siocngetc, siocncheckc, siocnputc, NULL); #endif /* To get the GDB related variables */ #if DDB > 0 #include #endif static void siocntxwait(iobase) Port_t iobase; { int timo; /* * Wait for any pending transmission to finish. Required to avoid * the UART lockup bug when the speed is changed, and for normal * transmits. */ timo = 100000; while ((inb(iobase + com_lsr) & (LSR_TSRE | LSR_TXRDY)) != (LSR_TSRE | LSR_TXRDY) && --timo != 0) ; } /* * Read the serial port specified and try to figure out what speed * it's currently running at. We're assuming the serial port has * been initialized and is basicly idle. This routine is only intended * to be run at system startup. * * If the value read from the serial port doesn't make sense, return 0. */ static speed_t siocngetspeed(iobase, table) Port_t iobase; struct speedtab *table; { int code; u_char dlbh; u_char dlbl; u_char cfcr; cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); dlbl = inb(iobase + com_dlbl); dlbh = inb(iobase + com_dlbh); outb(iobase + com_cfcr, cfcr); code = dlbh << 8 | dlbl; for (; table->sp_speed != -1; table++) if (table->sp_code == code) return (table->sp_speed); return (0); /* didn't match anything sane */ } static void siocnopen(sp, iobase, speed) struct siocnstate *sp; Port_t iobase; int speed; { int divisor; u_char dlbh; u_char dlbl; /* * Save all the device control registers except the fifo register * and set our default ones (cs8 -parenb speed=comdefaultrate). * We can't save the fifo register since it is read-only. */ sp->ier = inb(iobase + com_ier); outb(iobase + com_ier, 0); /* spltty() doesn't stop siointr() */ siocntxwait(iobase); sp->cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); sp->dlbl = inb(iobase + com_dlbl); sp->dlbh = inb(iobase + com_dlbh); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (Startech), setting them clears the * data input register. This also reduces the effects of the * UMC8669F bug. */ divisor = ttspeedtab(speed, comspeedtab); dlbl = divisor & 0xFF; if (sp->dlbl != dlbl) outb(iobase + com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sp->dlbh != dlbh) outb(iobase + com_dlbh, dlbh); outb(iobase + com_cfcr, CFCR_8BITS); sp->mcr = inb(iobase + com_mcr); /* * We don't want interrupts, but must be careful not to "disable" * them by clearing the MCR_IENABLE bit, since that might cause * an interrupt by floating the IRQ line. */ outb(iobase + com_mcr, (sp->mcr & MCR_IENABLE) | MCR_DTR | MCR_RTS); } static void siocnclose(sp, iobase) struct siocnstate *sp; Port_t iobase; { /* * Restore the device control registers. */ siocntxwait(iobase); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); if (sp->dlbl != inb(iobase + com_dlbl)) outb(iobase + com_dlbl, sp->dlbl); if (sp->dlbh != inb(iobase + com_dlbh)) outb(iobase + com_dlbh, sp->dlbh); outb(iobase + com_cfcr, sp->cfcr); /* * XXX damp oscillations of MCR_DTR and MCR_RTS by not restoring them. */ outb(iobase + com_mcr, sp->mcr | MCR_DTR | MCR_RTS); outb(iobase + com_ier, sp->ier); } static void siocnprobe(cp) struct consdev *cp; { speed_t boot_speed; u_char cfcr; int s, unit; struct siocnstate sp; /* * Find our first enabled console, if any. If it is a high-level * console device, then initialize it and return successfully. * If it is a low-level console device, then initialize it and * return unsuccessfully. It must be initialized in both cases * for early use by console drivers and debuggers. Initializing * the hardware is not necessary in all cases, since the i/o * routines initialize it on the fly, but it is necessary if * input might arrive while the hardware is switched back to an * uninitialized state. We can't handle multiple console devices * yet because our low-level routines don't take a device arg. * We trust the user to set the console flags properly so that we * don't need to probe. */ cp->cn_pri = CN_DEAD; for (unit = 0; unit < 16; unit++) { /* XXX need to know how many */ int flags; int disabled; if (resource_int_value("sio", unit, "disabled", &disabled) == 0) { if (disabled) continue; } if (resource_int_value("sio", unit, "flags", &flags)) continue; if (COM_CONSOLE(flags) || COM_DEBUGGER(flags)) { int port; Port_t iobase; if (resource_int_value("sio", unit, "port", &port)) continue; iobase = port; s = spltty(); if (boothowto & RB_SERIAL) { boot_speed = siocngetspeed(iobase, comspeedtab); if (boot_speed) comdefaultrate = boot_speed; } /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); outb(iobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(iobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(iobase + com_cfcr, cfcr); siocnopen(&sp, iobase, comdefaultrate); splx(s); if (COM_CONSOLE(flags) && !COM_LLCONSOLE(flags)) { cp->cn_dev = makedev(CDEV_MAJOR, unit); cp->cn_pri = COM_FORCECONSOLE(flags) || boothowto & RB_SERIAL ? CN_REMOTE : CN_NORMAL; siocniobase = iobase; siocnunit = unit; } if (COM_DEBUGGER(flags)) { printf("sio%d: gdb debugging port\n", unit); siogdbiobase = iobase; siogdbunit = unit; #if DDB > 0 gdbdev = makedev(CDEV_MAJOR, unit); gdb_getc = siocngetc; gdb_putc = siocnputc; #endif } } } #ifdef __i386__ #if DDB > 0 /* * XXX Ugly Compatability. * If no gdb port has been specified, set it to be the console * as some configuration files don't specify the gdb port. */ if (gdbdev == NODEV && (boothowto & RB_GDB)) { printf("Warning: no GDB port specified. Defaulting to sio%d.\n", siocnunit); printf("Set flag 0x80 on desired GDB port in your\n"); printf("configuration file (currently sio only).\n"); siogdbiobase = siocniobase; siogdbunit = siocnunit; gdbdev = makedev(CDEV_MAJOR, siocnunit); gdb_getc = siocngetc; gdb_putc = siocnputc; } #endif #endif } #ifdef __alpha__ CONS_DRIVER(sio, NULL, NULL, NULL, siocngetc, siocncheckc, siocnputc, NULL); int siocnattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siocniobase = port; comdefaultrate = speed; sio_consdev.cn_pri = CN_NORMAL; sio_consdev.cn_dev = makedev(CDEV_MAJOR, 0); s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siocniobase + com_cfcr); outb(siocniobase + com_cfcr, CFCR_DLAB | cfcr); outb(siocniobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(siocniobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(siocniobase + com_cfcr, cfcr); siocnopen(&sp, siocniobase, comdefaultrate); splx(s); cn_tab = &sio_consdev; return (0); } int siogdbattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siogdbiobase = port; gdbdefaultrate = speed; s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siogdbiobase + com_cfcr); outb(siogdbiobase + com_cfcr, CFCR_DLAB | cfcr); outb(siogdbiobase + com_dlbl, COMBRD(gdbdefaultrate) & 0xff); outb(siogdbiobase + com_dlbh, (u_int) COMBRD(gdbdefaultrate) >> 8); outb(siogdbiobase + com_cfcr, cfcr); siocnopen(&sp, siogdbiobase, gdbdefaultrate); splx(s); return (0); } #endif static void siocninit(cp) struct consdev *cp; { comconsole = DEV_TO_UNIT(cp->cn_dev); } static int siocncheckc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); if (inb(iobase + com_lsr) & LSR_RXRDY) c = inb(iobase + com_data); else c = -1; siocnclose(&sp, iobase); splx(s); return (c); } int siocngetc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siocnputc(dev, c) dev_t dev; int c; { int s; struct siocnstate sp; Port_t iobase; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); siocntxwait(iobase); outb(iobase + com_data, c); siocnclose(&sp, iobase); splx(s); } #ifdef __alpha__ int siogdbgetc() { int c; Port_t iobase; int s; struct siocnstate sp; iobase = siogdbiobase; s = spltty(); siocnopen(&sp, iobase, gdbdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siogdbputc(c) int c; { int s; struct siocnstate sp; s = spltty(); siocnopen(&sp, siogdbiobase, gdbdefaultrate); siocntxwait(siogdbiobase); outb(siogdbiobase + com_data, c); siocnclose(&sp, siogdbiobase); splx(s); } #endif DRIVER_MODULE(sio, isa, sio_isa_driver, sio_devclass, 0, 0); #if NCARD > 0 DRIVER_MODULE(sio, pccard, sio_pccard_driver, sio_devclass, 0, 0); #endif #if NPCI > 0 DRIVER_MODULE(sio, pci, sio_pci_driver, sio_devclass, 0, 0); #endif Index: head/sys/fs/nwfs/nwfs_node.c =================================================================== --- head/sys/fs/nwfs/nwfs_node.c (revision 62572) +++ head/sys/fs/nwfs/nwfs_node.c (revision 62573) @@ -1,316 +1,316 @@ /* * Copyright (c) 1999, Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Boris Popov. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NWNOHASH(fhsum) (&nwhashtbl[(fhsum.f_id) & nwnodehash]) extern vop_t **nwfs_vnodeop_p; static LIST_HEAD(nwnode_hash_head,nwnode) *nwhashtbl; static u_long nwnodehash; static int nwhashlock = 0; MALLOC_DEFINE(M_NWNODE, "NWFS node", "NWFS vnode private part"); MALLOC_DEFINE(M_NWFSHASH, "NWFS hash", "NWFS has table"); -static int nwfs_sysctl_vnprint (SYSCTL_HANDLER_ARGS); +static int nwfs_sysctl_vnprint(SYSCTL_HANDLER_ARGS); extern struct linker_set sysctl_vfs_nwfs; SYSCTL_DECL(_vfs_nwfs); SYSCTL_PROC(_vfs_nwfs, OID_AUTO, vnprint, CTLFLAG_WR|CTLTYPE_OPAQUE, NULL, 0, nwfs_sysctl_vnprint, "S,vnlist", "vnode hash"); void nwfs_hash_init(void) { nwhashtbl = hashinit(desiredvnodes, M_NWFSHASH, &nwnodehash); } void nwfs_hash_free(void) { free(nwhashtbl, M_NWFSHASH); } int -nwfs_sysctl_vnprint (SYSCTL_HANDLER_ARGS) { +nwfs_sysctl_vnprint(SYSCTL_HANDLER_ARGS) { struct nwnode *np; struct nwnode_hash_head *nhpp; struct vnode *vp; int i; if (nwfs_debuglevel == 0) return 0; printf("Name:uc:hc:fid:pfid\n"); for(i = 0; i <= nwnodehash; i++) { nhpp = &nwhashtbl[i]; for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { vp = NWTOV(np); printf("%s:%d:%d:%d:%d\n",np->n_name,vp->v_usecount,vp->v_holdcnt, np->n_fid.f_id, np->n_fid.f_parent); } } return 0; } /* * Allocate new nwfsnode/vnode from given nwnode. * Vnode referenced and not locked. */ int nwfs_allocvp(struct mount *mp, ncpfid fid, struct vnode **vpp) { struct proc *p = curproc; /* XXX */ struct nwnode *np, *np2; struct nwnode_hash_head *nhpp; struct vnode *vp; int error; retry: nhpp = NWNOHASH(fid); loop: for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { vp = NWTOV(np); if (mp != vp->v_mount || !NWCMPF(&fid, &np->n_fid)) continue; if (vget(vp, LK_EXCLUSIVE, p)) goto loop; *vpp = vp; return(0); } /* lock list, or waiting in malloc can cause problems */ if (nwhashlock) { while(nwhashlock) { nwhashlock = -1; tsleep((caddr_t) &nwhashlock, PVM, "nwfsvp", 0); } goto loop; } nwhashlock = 1; /* * Do the MALLOC before the getnewvnode since doing so afterward * might cause a bogus v_data pointer to get dereferenced * elsewhere if MALLOC should block. */ MALLOC(np, struct nwnode *, sizeof *np, M_NWNODE, M_WAITOK); error = getnewvnode(VT_NWFS, mp, nwfs_vnodeop_p, &vp); if (error) { if (nwhashlock < 0) wakeup(&nwhashlock); nwhashlock = 0; *vpp = 0; FREE(np, M_NWNODE); return (error); } *vpp = vp; bzero(np,sizeof(*np)); vp->v_data = np; np->n_vnode = vp; np->n_mount = VFSTONWFS(mp); np->n_fid = fid; for (np2 = nhpp->lh_first; np2 != 0; np2 = np->n_hash.le_next) { if (mp != NWTOV(np2)->v_mount || !NWCMPF(&fid, &np2->n_fid)) continue; vrele(vp); FREE(np, M_NWNODE); if (nwhashlock < 0) wakeup(&nwhashlock); nwhashlock = 0; goto retry; } LIST_INSERT_HEAD(nhpp, np, n_hash); if (nwhashlock < 0) wakeup(&nwhashlock); nwhashlock = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); np->n_flag |= NNEW; return (error); } int nwfs_lookupnp(struct nwmount *nmp, ncpfid fid, struct nwnode **npp) { struct nwnode *np; struct nwnode_hash_head *nhpp; nhpp = NWNOHASH(fid); for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { if (nmp != np->n_mount || !NWCMPF(&fid, &np->n_fid)) continue; *npp = np; return(0); } return ENOENT; } /* * Free nwnode, and give vnode back to system */ int nwfs_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *dvp = NULL, *vp = ap->a_vp; struct nwnode *dnp, *np = VTONW(vp); struct nwmount *nmp=VTONWFS(vp); NCPVNDEBUG("%s,%d\n", np->n_name, vp->v_usecount); if (np->n_refparent) { np->n_refparent = 0; if (nwfs_lookupnp(nmp, np->n_parent, &dnp) == 0) { dvp = dnp->n_vnode; } else { NCPVNDEBUG("%s: has no parent ?\n",np->n_name); } } LIST_REMOVE(np, n_hash); cache_purge(vp); if (nmp->n_root == np) { nmp->n_root = NULL; } vp->v_data = NULL; FREE(np, M_NWNODE); if (dvp) { vrele(dvp); } return (0); } int nwfs_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { struct proc *p = ap->a_p; struct ucred *cred = p->p_ucred; struct vnode *vp = ap->a_vp; struct nwnode *np = VTONW(vp); int error; NCPVNDEBUG("%s: %d\n", VTONW(vp)->n_name, vp->v_usecount); if (np->opened) { error = nwfs_vinvalbuf(vp, V_SAVE, cred, p, 1); error = ncp_close_file(NWFSTOCONN(VTONWFS(vp)), &np->n_fh, p, cred); np->opened = 0; } VOP_UNLOCK(vp, 0, p); return (0); } /* * routines to maintain vnode attributes cache * nwfs_attr_cacheenter: unpack np.i to va structure */ void nwfs_attr_cacheenter(struct vnode *vp, struct nw_entry_info *fi) { struct nwnode *np = VTONW(vp); struct nwmount *nmp = VTONWFS(vp); register struct vattr *va = &np->n_vattr; va->va_type = vp->v_type; /* vnode type (for create) */ if (vp->v_type == VREG) { if (va->va_size != fi->dataStreamSize) { va->va_size = fi->dataStreamSize; vnode_pager_setsize(vp, va->va_size); } va->va_mode = nmp->m.file_mode; /* files access mode and type */ } else if (vp->v_type == VDIR) { va->va_size = 16384; /* should be a better way ... */ va->va_mode = nmp->m.dir_mode; /* files access mode and type */ } else return; np->n_size = va->va_size; va->va_nlink = 1; /* number of references to file */ va->va_uid = nmp->m.uid; /* owner user id */ va->va_gid = nmp->m.gid; /* owner group id */ va->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; va->va_fileid = np->n_fid.f_id; /* file id */ if (va->va_fileid == 0) va->va_fileid = NWFS_ROOT_INO; va->va_blocksize=nmp->connh->nh_conn->buffer_size;/* blocksize preferred for i/o */ /* time of last modification */ ncp_dos2unixtime(fi->modifyDate, fi->modifyTime, 0, nmp->m.tz, &va->va_mtime); /* time of last access */ ncp_dos2unixtime(fi->lastAccessDate, 0, 0, nmp->m.tz, &va->va_atime); va->va_ctime = va->va_mtime; /* time file changed */ va->va_gen = VNOVAL; /* generation number of file */ va->va_flags = 0; /* flags defined for file */ va->va_rdev = VNOVAL; /* device the special file represents */ va->va_bytes = va->va_size; /* bytes of disk space held by file */ va->va_filerev = 0; /* file modification number */ va->va_vaflags = 0; /* operations flags */ np->n_vattr = *va; if (np->n_mtime == 0) { np->n_mtime = va->va_mtime.tv_sec; } np->n_atime = time_second; return; } int nwfs_attr_cachelookup(struct vnode *vp, struct vattr *va) { struct nwnode *np = VTONW(vp); int diff; diff = time_second - np->n_atime; if (diff > 2) { /* XXX should be configurable */ return ENOENT; } *va = np->n_vattr; return 0; } Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c (revision 62572) +++ head/sys/i386/i386/machdep.c (revision 62573) @@ -1,2435 +1,2435 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD$ */ #include "apm.h" #include "npx.h" #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" #include "opt_smp.h" #include "opt_user_ldt.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* pcb.h included via sys/user.h */ #ifdef SMP #include #include #endif #ifdef PERFMON #include #endif #ifdef OLD_BUS_ARCH #include #endif #include #include #include #include #include extern void init386 __P((int first)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); int _udatasel, _ucodesel; u_int atdevbase; #if defined(SWTCH_OPTIM_STATS) extern int swtch_optim_stats; SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, CTLFLAG_RD, &swtch_optim_stats, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif #ifdef PC98 static int ispc98 = 1; #else static int ispc98 = 0; #endif SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); int physmem = 0; int cold = 1; static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code)); static int -sysctl_hw_physmem (SYSCTL_HANDLER_ARGS) +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int -sysctl_hw_usermem (SYSCTL_HANDLER_ARGS) +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); static int -sysctl_hw_availpages (SYSCTL_HANDLER_ARGS) +sysctl_hw_availpages(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, i386_btop(avail_end - avail_start), req); return (error); } SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_availpages, "I", ""); static int -sysctl_machdep_msgbuf (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS) { int error; /* Unwind the buffer, so that it's linear (possibly starting with * some initial nulls). */ error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr, msgbufp->msg_size-msgbufp->msg_bufr,req); if(error) return(error); if(msgbufp->msg_bufr>0) { error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr, msgbufp->msg_bufr,req); } return(error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer"); static int msgbuf_clear; static int -sysctl_machdep_msgbuf_clear (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) { /* Clear the buffer and reset write pointer */ bzero(msgbufp->msg_ptr,msgbufp->msg_size); msgbufp->msg_bufr=msgbufp->msg_bufx=0; msgbuf_clear=0; } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW, &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I", "Clear kernel message buffer"); int bootverbose = 0, Maxmem = 0; long dumplo; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %u (%uK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %u bytes (%u pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } /* * Calculate callout wheel size */ for (callwheelsize = 1, callwheelbits = 0; callwheelsize < ncallout; callwheelsize <<= 1, ++callwheelbits) ; callwheelmask = callwheelsize - 1; /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); valloc(callwheel, struct callout_tailq, callwheelsize); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional * buffers to cover 1/20 of our ram over 64MB. * * factor represents the 1/4 x ram conversion. */ if (nbuf == 0) { int factor = 4 * BKVASIZE / PAGE_SIZE; nbuf = 50; if (physmem > 1024) nbuf += min((physmem - 1024) / factor, 16384 / factor); if (physmem > 16384) nbuf += (physmem - 16384) * 2 / (factor * 5); } /* * Do not allow the buffer_map to be more then 1/2 the size of the * kernel_map. */ if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2)) { nbuf = (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2); printf("Warning: nbufs capped at %d\n", nbuf); } nswbuf = max(min(nbuf/4, 256), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); v = bufhashinit(v); /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE)); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size); pager_map->system_map = 1; exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*(ARG_MAX+(PAGE_SIZE*3)))); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size); mb_map->system_map = 1; } /* * Initialize callouts */ SLIST_INIT(&callfree); for (i = 0; i < ncallout; i++) { callout_init(&callout[i]); callout[i].c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle); } for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&callwheel[i]); } #if defined(USERCONFIG) userconfig(); cninit(); /* the preferred console may have changed */ #endif printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! */ mp_start(); /* fire up the APs and APICs */ mp_announce(); #endif /* SMP */ } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } int unregister_netisr(num) int num; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("unregister_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = NULL; return (0); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void osendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct osigframe sf; struct osigframe *fp; struct proc *p; struct sigacts *psp; struct trapframe *regs; int oonstack; p = curproc; psp = p->p_sigacts; regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct osigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else fp = (struct osigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *fp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)fp) == 0 || !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = oonstack; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf; struct proc *p; struct sigacts *psp; struct trapframe *regs; struct sigframe *sfp; int oonstack; p = curproc; psp = p->p_sigacts; if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = oonstack; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else sfp = (struct sigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *sfp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)sfp) == 0 || !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG printf("process %d has trashed its stack\n", p->p_pid); #endif SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill siginfo structure. */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * We should never have PSL_T set when returning from vm86 * mode. It may be set here if we deliver a signal before * getting to vm86 mode, so turn it off. * * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int osigreturn(p, uap) struct proc *p; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct trapframe *regs; struct osigcontext *scp; int eflags; regs = p->p_md.md_regs; scp = uap->sigcntxp; if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ)) return (EFAULT); eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; if (scp->sc_onstack & 01) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; SIGSETOLD(p->p_sigmask, scp->sc_mask); SIG_CANTMASK(p->p_sigmask); regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; return (EJUSTRETURN); } int sigreturn(p, uap) struct proc *p; struct sigreturn_args /* { ucontext_t *sigcntxp; } */ *uap; { struct trapframe *regs; ucontext_t *ucp; int cs, eflags; ucp = uap->sigcntxp; if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ)) return (EFAULT); if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516) return (osigreturn(p, (struct osigreturn_args *)uap)); /* * Since ucp is not an osigcontext but a ucontext_t, we have to * check again if all of it is accessible. A ucontext_t is * much larger, so instead of just checking for the pointer * being valid for the size of an osigcontext, now check for * it being valid for a whole, new-style ucontext_t. */ if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ)) return (EFAULT); regs = p->p_md.md_regs; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack, ps_strings) struct proc *p; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = p->p_md.md_regs; struct pcb *pcb = &p->p_addr->u_pcb; #ifdef USER_LDT /* was i386_user_cleanup() in NetBSD */ user_ldt_free(pcb); #endif bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* reset %gs as well */ if (pcb == curpcb) load_gs(_udatasel); else pcb->pcb_gs = _udatasel; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ p->p_retval[1] = 0; } static int -sysctl_machdep_adjkerntz (SYSCTL_HANDLER_ARGS) +sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; #ifdef SMP union descriptor gdt[NGDT * NCPU]; /* global descriptor table */ #else union descriptor gdt[NGDT]; /* global descriptor table */ #endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #ifdef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif #ifndef SMP extern struct segment_descriptor common_tssd, *tss_gdt; #endif int private_tss; /* flag indicating private tss */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 4 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 5 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 6 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 7 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 9 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #define PHYSMAP_SIZE (2 * 8) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. */ static void getmemsize(int first) { int i, physmap_idx, pa_indx; u_int basemem, extmem; struct vm86frame vmf; struct vm86context vmc; vm_offset_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t pte; const char *cp; struct { u_int64_t base; u_int64_t length; u_int32_t type; } *smap; bzero(&vmf, sizeof(struct vm86frame)); bzero(physmap, sizeof(physmap)); /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { pte = (pt_entry_t)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT)); *pte = (1 << PAGE_SHIFT) | PG_RW | PG_V; /* * get memory map with INT 15:E820 */ #define SMAPSIZ sizeof(*smap) #define SMAP_SIG 0x534D4150 /* 'SMAP' */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); physmap_idx = 0; vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = SMAPSIZ; i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n", smap->type, *(u_int32_t *)((char *)&smap->base + 4), (u_int32_t)smap->base, *(u_int32_t *)((char *)&smap->length + 4), (u_int32_t)smap->length); if (smap->type != 0x01) goto next_run; if (smap->length == 0) goto next_run; if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); goto next_run; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; goto next_run; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; next_run: } while (vmf.vmf_ebx != 0); if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); /* look for the MP hardware - needed for apic addresses */ mp_probe(); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif /* * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ if ((cp = getenv("hw.physmem")) != NULL) { u_int64_t AllowMem, sanity; char *ep; sanity = AllowMem = strtouq(cp, &ep, 0); if ((ep != cp) && (*ep != 0)) { switch(*ep) { case 'g': case 'G': AllowMem <<= 10; case 'm': case 'M': AllowMem <<= 10; case 'k': case 'K': AllowMem <<= 10; break; default: AllowMem = sanity = 0; } if (AllowMem < sanity) AllowMem = 0; } if (AllowMem == 0) printf("Ignoring invalid memory size of '%s'\n", cp); else Maxmem = atop(AllowMem); } if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %uK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa(Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first, 0); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; #if 0 pte = (pt_entry_t)vtopte(KERNBASE); #else pte = (pt_entry_t)CMAP1; #endif /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_offset_t end; end = ptoa(Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; #if 0 int *ptr = 0; #else int *ptr = (int *)CADDR1; #endif /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } void init386(first) int first; { int x; struct gate_descriptor *gdp; int gsel_tss; #ifndef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif int off; /* * Prevent lowering of the ipl if we call tsleep() early. */ safepri = cpl; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; #ifdef SMP gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(sizeof(struct privatespace)) - 1; gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0]; gdt_segs[GPROC0_SEL].ssd_base = (int) &SMP_prvspace[0].globaldata.gd_common_tss; SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0]; #else gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GPROC0_SEL].ssd_base = (int) &common_tss; #endif for (x = 0; x < NGDT; x++) { #ifdef BDE_DEBUGGER /* avoid overwriting db entries with APM ones */ if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL) continue; #endif ssdtosd(&gdt_segs[x], &gdt[x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); #ifdef USER_LDT currentldt = _default_ldt; #endif /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; tss_gdt = &gdt[GPROC0_SEL].sd; common_tssd = *tss_gdt; common_tss.tss_ioopt = (sizeof common_tss) << 16; ltr(gsel_tss); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = (int)IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; #ifdef SMP proc0.p_addr->u_pcb.pcb_mpnest = 1; #endif proc0.p_addr->u_pcb.pcb_ext = 0; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; #ifndef SMP struct region_descriptor r_idt; #endif vm_offset_t tmp; if (!has_f00f_bug) return; printf("Intel Pentium detected, installing workaround for F00F bug\n"); r_idt.rd_limit = sizeof(idt0) - 1; tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) panic("kmem_alloc returned non-page-aligned memory"); /* Put the first seven entries in the lower page */ new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ int ptrace_set_pc(p, addr) struct proc *p; unsigned long addr; { p->p_md.md_regs->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { p->p_md.md_regs->tf_eflags |= PSL_T; return (0); } int ptrace_read_u_check(p, addr, len) struct proc *p; vm_offset_t addr; size_t len; { vm_offset_t gap; if ((vm_offset_t) (addr + len) < addr) return EPERM; if ((vm_offset_t) (addr + len) <= sizeof(struct user)) return 0; gap = (char *) p->p_md.md_regs - (char *) p->p_addr; if ((vm_offset_t) addr < gap) return EPERM; if ((vm_offset_t) (addr + len) <= (vm_offset_t) (gap + sizeof(struct trapframe))) return 0; return EPERM; } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; long data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - (char *)p->p_addr; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = p->p_md.md_regs; frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = &p->p_addr->u_pcb; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = &p->p_addr->u_pcb; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs); return (0); } int set_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs); return (0); } int fill_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; dbregs->dr0 = pcb->pcb_dr0; dbregs->dr1 = pcb->pcb_dr1; dbregs->dr2 = pcb->pcb_dr2; dbregs->dr3 = pcb->pcb_dr3; dbregs->dr4 = 0; dbregs->dr5 = 0; dbregs->dr6 = pcb->pcb_dr6; dbregs->dr7 = pcb->pcb_dr7; return (0); } int set_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space, unless, perhaps, we were called by * uid 0. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (p->p_ucred->cr_uid != 0) { if (dbregs->dr7 & 0x3) { /* dr0 is enabled */ if (dbregs->dr0 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<2)) { /* dr1 is enabled */ if (dbregs->dr1 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<4)) { /* dr2 is enabled */ if (dbregs->dr2 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<6)) { /* dr3 is enabled */ if (dbregs->dr3 >= VM_MAXUSER_ADDRESS) return (EINVAL); } } pcb->pcb_dr0 = dbregs->dr0; pcb->pcb_dr1 = dbregs->dr1; pcb->pcb_dr2 = dbregs->dr2; pcb->pcb_dr3 = dbregs->dr3; pcb->pcb_dr6 = dbregs->dr6; pcb->pcb_dr7 = dbregs->dr7; pcb->pcb_flags |= PCB_DBREGS; return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i=0; i /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->bio_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR && (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->bio_blkno == maxsz) { bp->bio_resid = bp->bio_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->bio_blkno; if (sz <= 0) { bp->bio_error = EINVAL; goto bad; } bp->bio_bcount = sz << DEV_BSHIFT; } bp->bio_pblkno = bp->bio_blkno + p->p_offset; return(1); bad: bp->bio_flags |= BIO_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/i386/i386/mp_clock.c =================================================================== --- head/sys/i386/i386/mp_clock.c (revision 62572) +++ head/sys/i386/i386/mp_clock.c (revision 62573) @@ -1,142 +1,142 @@ /* * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * Just when we thought life were beautiful, reality pops its grim face over * the edge again: * * ] 20. ACPI Timer Errata * ] * ] Problem: The power management timer may return improper result when * ] read. Although the timer value settles properly after incrementing, * ] while incrementing there is a 3nS window every 69.8nS where the * ] timer value is indeterminate (a 4.2% chance that the data will be * ] incorrect when read). As a result, the ACPI free running count up * ] timer specification is violated due to erroneous reads. Implication: * ] System hangs due to the "inaccuracy" of the timer when used by * ] software for time critical events and delays. * ] * ] Workaround: Read the register twice and compare. * ] Status: This will not be fixed in the PIIX4 or PIIX4E. * * The counter is in other words not latched to the PCI bus clock when * read. Notice the workaround isn't: We need to read until we have * three monotonic samples and then use the middle one, otherwise we are * not protected against the fact that the bits can be wrong in two * directions. If we only cared about monosity two reads would be enough. * * $FreeBSD$ * */ /* #include "opt_bus.h" */ /* #include "opt_pci.h" */ /* #include "opt_smp.h" */ #include #include #include #include #include #include #include static unsigned piix_get_timecount(struct timecounter *tc); static u_int32_t piix_timecounter_address; static u_int piix_freq = 14318182/4; static struct timecounter piix_timecounter = { piix_get_timecount, 0, 0xffffff, 0, "PIIX" }; SYSCTL_OPAQUE(_debug, OID_AUTO, piix_timecounter, CTLFLAG_RD, &piix_timecounter, sizeof(piix_timecounter), "S,timecounter", ""); static int -sysctl_machdep_piix_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_piix_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (piix_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = piix_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { piix_freq = freq; piix_timecounter.tc_frequency = piix_freq; tc_update(&piix_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, piix_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_piix_freq, "I", ""); static unsigned piix_get_timecount(struct timecounter *tc) { unsigned u1, u2, u3; u2 = inl(piix_timecounter_address); u3 = inl(piix_timecounter_address); do { u1 = u2; u2 = u3; u3 = inl(piix_timecounter_address); } while (u1 > u2 || u2 > u3); return (u2); } static int piix_probe (device_t dev) { u_int32_t d; switch (pci_get_devid(dev)) { case 0x71138086: d = pci_read_config(dev, 0x4, 2); if (!(d & 1)) return 0; /* IO space not mapped */ d = pci_read_config(dev, 0x40, 4); piix_timecounter_address = (d & 0xffc0) + 8; piix_timecounter.tc_frequency = piix_freq; tc_init(&piix_timecounter); return (ENXIO); }; return (ENXIO); } static int piix_attach (device_t dev) { return 0; } static device_method_t piix_methods[] = { /* Device interface */ DEVMETHOD(device_probe, piix_probe), DEVMETHOD(device_attach, piix_attach), { 0, 0 } }; static driver_t piix_driver = { "piix", piix_methods, 1, }; static devclass_t piix_devclass; DRIVER_MODULE(piix, pci, piix_driver, piix_devclass, 0, 0); Index: head/sys/i386/i386/tsc.c =================================================================== --- head/sys/i386/i386/tsc.c (revision 62572) +++ head/sys/i386/i386/tsc.c (revision 62573) @@ -1,1279 +1,1279 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #include #include #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #define TIMER_FREQ 1193182 #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } splx(x); return (0); } /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; int yd; int year, month; int y, m, s; struct timespec ts; if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); } /* * Start both clocks running. */ void cpu_initclocks() { int diag; #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/i386/i386/userconfig.c =================================================================== --- head/sys/i386/i386/userconfig.c (revision 62572) +++ head/sys/i386/i386/userconfig.c (revision 62573) @@ -1,3447 +1,3447 @@ /** ** Copyright (c) 1995 ** Michael Smith, msmith@freebsd.org. All rights reserved. ** ** This code contains a module marked : * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 Jordan K. Hubbard * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * Many additional changes by Bruce Evans * * This code is derived from software contributed by the * University of California Berkeley, Jordan K. Hubbard, * David Greenman and Bruce Evans. ** As such, it contains code subject to the above copyrights. ** The module and its copyright can be found below. ** ** Redistribution and use in source and binary forms, with or without ** modification, are permitted provided that the following conditions ** are met: ** 1. Redistributions of source code must retain the above copyright ** notice, this list of conditions and the following disclaimer as ** the first lines of this file unmodified. ** 2. Redistributions in binary form must reproduce the above copyright ** notice, this list of conditions and the following disclaimer in the ** documentation and/or other materials provided with the distribution. ** 3. All advertising materials mentioning features or use of this software ** must display the following acknowledgment: ** This product includes software developed by Michael Smith. ** 4. The name of the author may not be used to endorse or promote products ** derived from this software without specific prior written permission. ** ** THIS SOFTWARE IS PROVIDED BY MICHAEL SMITH ``AS IS'' AND ANY EXPRESS OR ** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ** IN NO EVENT SHALL MICHAEL SMITH BE LIABLE FOR ANY DIRECT, INDIRECT, ** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ** ** $FreeBSD$ **/ /** ** USERCONFIG ** ** Kernel boot-time configuration manipulation tool for FreeBSD. ** ** Two modes of operation are supported : the default is the line-editor mode, ** the command "visual" invokes the fullscreen mode. ** ** The line-editor mode is the old favorite from FreeBSD 2.0/20.05 &c., the ** fullscreen mode requires syscons or a minimal-ansi serial console. **/ /** ** USERCONFIG, visual mode. ** ** msmith@freebsd.org ** ** Look for "EDIT THIS LIST" to add to the list of known devices ** ** ** There are a number of assumptions made in this code. ** ** - That the console supports a minimal set of ANSI escape sequences ** (See the screen manipulation section for a summary) ** and has at least 24 rows. ** - That values less than or equal to zero for any of the device ** parameters indicate that the driver does not use the parameter. ** - That flags are _always_ editable. ** ** Devices marked as disabled are imported as such. ** ** For this tool to be useful, the list of devices below _MUST_ be updated ** when a new driver is brought into the kernel. It is not possible to ** extract this information from the drivers in the kernel. ** ** XXX - TODO: ** ** - Display _what_ a device conflicts with. ** - Implement page up/down (as what?) ** - Wizard mode (no restrictions) ** - Find out how to put syscons back into low-intensity mode so that the ** !b escape is useful on the console. (It seems to be that it actually ** gets low/high intensity backwards. That looks OK.) ** ** - Only display headings with devices under them. (difficult) **/ #include "opt_userconfig.h" #define COMPAT_OLDISA /* get the definitions */ #include #include #include #include #include #include #include #include #include #include #include #define _I386_ISA_ISA_DEVICE_H_ #undef NPNP #define NPNP 0 #if NPNP > 0 #include #endif static MALLOC_DEFINE(M_DEVL, "uc_devlist", "uc_device lists in userconfig()"); #include static struct uc_device *uc_devlist; /* list read by kget to extract changes */ static struct uc_device *uc_devtab; /* fake uc_device table */ static int userconfig_boot_parsing; /* set if we are reading from the boot instructions */ #define putchar(x) cnputc(x) static void load_devtab(void); static void free_devtab(void); static void save_resource(struct uc_device *); static int -sysctl_machdep_uc_devlist (SYSCTL_HANDLER_ARGS) +sysctl_machdep_uc_devlist(SYSCTL_HANDLER_ARGS) { struct uc_device *id; int error=0; char name[8]; if(!req->oldptr) { /* Only sizing */ id=uc_devlist; while(id) { error+=sizeof(struct uc_device)+8; id=id->id_next; } return(SYSCTL_OUT(req,0,error)); } else { /* Output the data. The buffer is filled with consecutive * struct uc_device and char buf[8], containing the name * (not guaranteed to end with '\0'). */ id=uc_devlist; while(id) { error=sysctl_handle_opaque(oidp,id, sizeof(struct uc_device),req); if(error) return(error); strncpy(name,id->id_name,8); error=sysctl_handle_opaque(oidp,name, 8,req); if(error) return(error); id=id->id_next; } return(0); } } SYSCTL_PROC( _machdep, OID_AUTO, uc_devlist, CTLFLAG_RD, 0, 0, sysctl_machdep_uc_devlist, "A", "List of ISA devices changed in UserConfig"); /* ** Obtain command input. ** ** Initially, input is read from a possibly-loaded script. ** At the end of the script, or if no script is supplied, ** behaviour is determined by the RB_CONFIG (-c) flag. If ** the flag is set, user input is read from the console; if ** unset, the 'quit' command is invoked and userconfig ** will exit. ** ** Note that quit commands encountered in the script will be ** ignored if the RB_CONFIG flag is supplied. */ static const char *config_script; static int config_script_size; /* use of int for -ve magic value */ #define has_config_script() (config_script_size > 0) static int init_config_script(void) { caddr_t autoentry, autoattr; /* Look for loaded userconfig script */ autoentry = preload_search_by_type("userconfig_script"); if (autoentry != NULL) { /* We have one, get size and data */ config_script_size = 0; if ((autoattr = preload_search_info(autoentry, MODINFO_SIZE)) != NULL) config_script_size = (size_t)*(u_int32_t *)autoattr; config_script = NULL; if ((autoattr = preload_search_info(autoentry, MODINFO_ADDR)) != NULL) config_script = *(const char **)autoattr; /* sanity check */ if ((config_script_size == 0) || (config_script == NULL)) { config_script_size = 0; config_script = NULL; } } return has_config_script(); } static int getchar(void) { int c = -1; #ifdef INTRO_USERCONFIG static int intro = 0; #endif if (has_config_script()) { /* Consume character from loaded userconfig script, display */ userconfig_boot_parsing = 1; c = *config_script; config_script++; config_script_size--; } else { #ifdef INTRO_USERCONFIG if (userconfig_boot_parsing) { if (!(boothowto & RB_CONFIG)) { /* userconfig_script, !RB_CONFIG -> quit */ if (intro == 0) { c = 'q'; config_script = "uit\n"; config_script_size = strlen(config_script); /* userconfig_script will be 1 on the next pass */ } } else { /* userconfig_script, RB_CONFIG -> cngetc() */ } } else { if (!(boothowto & RB_CONFIG)) { /* no userconfig_script, !RB_CONFIG -> show intro */ if (intro == 0) { intro = 1; c = 'i'; config_script = "ntro\n"; config_script_size = strlen(config_script); /* userconfig_script will be 1 on the next pass */ } } else { /* no userconfig_script, RB_CONFIG -> cngetc() */ } } #else /* !INTRO_USERCONFIG */ /* assert(boothowto & RB_CONFIG) */ #endif /* INTRO_USERCONFIG */ userconfig_boot_parsing = 0; if (c <= 0) c = cngetc(); } return(c); } #ifndef FALSE #define FALSE (0) #define TRUE (!FALSE) #endif #ifdef VISUAL_USERCONFIG typedef struct { char dev[16]; /* device basename */ char name[60]; /* long name */ int attrib; /* things to do with the device */ int class; /* device classification */ } DEV_INFO; #define FLG_INVISIBLE (1<<0) /* device should not be shown */ #define FLG_MANDATORY (1<<1) /* device can be edited but not disabled */ #define FLG_FIXIRQ (1<<2) /* device IRQ cannot be changed */ #define FLG_FIXIOBASE (1<<3) /* device iobase cannot be changed */ #define FLG_FIXMADDR (1<<4) /* device maddr cannot be changed */ #define FLG_FIXMSIZE (1<<5) /* device msize cannot be changed */ #define FLG_FIXDRQ (1<<6) /* device DRQ cannot be changed */ #define FLG_FIXED (FLG_FIXIRQ|FLG_FIXIOBASE|FLG_FIXMADDR|FLG_FIXMSIZE|FLG_FIXDRQ) #define FLG_IMMUTABLE (FLG_FIXED|FLG_MANDATORY) #define CLS_STORAGE 1 /* storage devices */ #define CLS_NETWORK 2 /* network interfaces */ #define CLS_COMMS 3 /* serial, parallel ports */ #define CLS_INPUT 4 /* user input : mice, keyboards, joysticks etc */ #define CLS_MMEDIA 5 /* "multimedia" devices (sound, video, etc) */ #define CLS_MISC 255 /* none of the above */ typedef struct { char name[60]; int number; } DEVCLASS_INFO; static DEVCLASS_INFO devclass_names[] = { { "Storage : ", CLS_STORAGE}, { "Network : ", CLS_NETWORK}, { "Communications : ", CLS_COMMS}, { "Input : ", CLS_INPUT}, { "Multimedia : ", CLS_MMEDIA}, { "Miscellaneous : ", CLS_MISC}, { "",0}}; /********************* EDIT THIS LIST **********************/ /** Notes : ** ** - Devices that shouldn't be seen or removed should be marked FLG_INVISIBLE. ** - XXX The list below should be reviewed by the driver authors to verify ** that the correct flags have been set for each driver, and that the ** descriptions are accurate. **/ static DEV_INFO device_info[] = { /*---Name----- ---Description---------------------------------------------- */ {"adv", "AdvanSys SCSI narrow controller", 0, CLS_STORAGE}, {"bt", "Buslogic SCSI controller", 0, CLS_STORAGE}, {"aha", "Adaptec 154x SCSI controller", 0, CLS_STORAGE}, {"aic", "Adaptec 152x SCSI and compatible SCSI cards", 0, CLS_STORAGE}, {"nca", "ProAudio Spectrum SCSI and compatibles", 0, CLS_STORAGE}, {"sea", "Seagate ST01/ST02 SCSI and compatibles", 0, CLS_STORAGE}, {"ata", "ATA/ATAPI compatible disk controller", 0, CLS_STORAGE}, {"fdc", "Floppy disk controller", FLG_FIXED, CLS_STORAGE}, {"mcd", "Mitsumi CD-ROM", 0, CLS_STORAGE}, {"scd", "Sony CD-ROM", 0, CLS_STORAGE}, {"matcd", "Matsushita/Panasonic/Creative CDROM", 0, CLS_STORAGE}, {"wt", "Wangtek/Archive QIC-02 Tape drive", 0, CLS_STORAGE}, {"wd", "IDE or ST506 compatible storage device", FLG_INVISIBLE, CLS_STORAGE}, {"ad", "ATA/ATAPI compatible storage device", FLG_INVISIBLE, CLS_STORAGE}, {"fd", "Floppy disk device", FLG_INVISIBLE, CLS_STORAGE}, {"cs", "IBM EtherJet, CS89x0-based Ethernet adapters",0, CLS_NETWORK}, {"ed", "NE1000,NE2000,3C503,WD/SMC80xx Ethernet adapters",0, CLS_NETWORK}, {"el", "3C501 Ethernet adapter", 0, CLS_NETWORK}, {"ep", "3C509 Ethernet adapter", 0, CLS_NETWORK}, {"ex", "Intel EtherExpress Pro/10 Ethernet adapter", 0, CLS_NETWORK}, {"fe", "Fujitsu MD86960A/MB869685A Ethernet adapters", 0, CLS_NETWORK}, {"ie", "AT&T Starlan 10 and EN100, 3C507, NI5210 Ethernet adapters",0,CLS_NETWORK}, {"le", "DEC Etherworks 2 and 3 Ethernet adapters", 0, CLS_NETWORK}, {"lnc", "Isolan, Novell NE2100/NE32-VL Ethernet adapters", 0,CLS_NETWORK}, {"sn", "SMC/Megahertz Ethernet adapters", 0,CLS_NETWORK}, {"xe", "Xircom PC Card Ethernet adapter", 0, CLS_NETWORK}, {"rdp", "RealTek RTL8002 Pocket Ethernet", 0, CLS_NETWORK}, {"sio", "8250/16450/16550 Serial port", 0, CLS_COMMS}, {"cx", "Cronyx/Sigma multiport sync/async adapter",0, CLS_COMMS}, {"rc", "RISCom/8 multiport async adapter", 0, CLS_COMMS}, {"cy", "Cyclades multiport async adapter", 0, CLS_COMMS}, {"dgb", "Digiboard PC/Xe, PC/Xi async adapter", 0, CLS_COMMS}, {"si", "Specialix SI/XIO/SX async adapter", 0, CLS_COMMS}, {"stl", "Stallion EasyIO/Easy Connection 8/32 async adapter",0, CLS_COMMS}, {"stli", "Stallion intelligent async adapter" ,0, CLS_COMMS}, {"ppc", "Parallel Port chipset", 0, CLS_COMMS}, {"gp", "National Instruments AT-GPIB/TNT driver", 0, CLS_COMMS}, {"atkbdc", "Keyboard controller", FLG_INVISIBLE, CLS_INPUT}, {"atkbd", "Keyboard", FLG_FIXED, CLS_INPUT}, {"mse", "Microsoft Bus Mouse", 0, CLS_INPUT}, {"psm", "PS/2 Mouse", FLG_FIXED, CLS_INPUT}, {"joy", "Joystick", FLG_FIXED, CLS_INPUT}, {"vt", "PCVT console driver", FLG_IMMUTABLE, CLS_INPUT}, {"sc", "Syscons console driver", FLG_IMMUTABLE, CLS_INPUT}, {"sbc", "PCM Creative SoundBlaster/ESS/Avance sounce cards", 0,CLS_MMEDIA}, {"gusc", "PCM Gravis UltraSound sound cards", 0, CLS_MMEDIA}, {"pcm", "PCM Generic soundcard support", 0, CLS_MMEDIA}, {"sb", "VOXWARE Soundblaster PCM (SB/Pro/16, ProAudio Spectrum)",0,CLS_MMEDIA}, {"sbxvi", "VOXWARE Soundblaster 16", 0, CLS_MMEDIA}, {"sbmidi", "VOXWARE Soundblaster MIDI interface", 0, CLS_MMEDIA}, {"awe", "VOXWARE AWE32 MIDI", 0, CLS_MMEDIA}, {"pas", "VOXWARE ProAudio Spectrum PCM and MIDI", 0, CLS_MMEDIA}, {"gus", "VOXWARE Gravis Ultrasound, Ultrasound 16 and Ultrasound MAX",0,CLS_MMEDIA}, {"gusxvi", "VOXWARE Gravis Ultrasound 16-bit PCM", 0, CLS_MMEDIA}, {"gusmax", "VOXWARE Gravis Ultrasound MAX", 0, CLS_MMEDIA}, {"mss", "VOXWARE Microsoft Sound System", 0, CLS_MMEDIA}, {"opl", "VOXWARE OPL-2/3 FM, SB/Pro/16, ProAudio Spectrum",0,CLS_MMEDIA}, {"mpu", "VOXWARE Roland MPU401 MIDI", 0, CLS_MMEDIA}, {"sscape", "VOXWARE Ensoniq Soundscape MIDI interface", 0, CLS_MMEDIA}, {"sscape_mss", "VOXWARE Ensoniq Soundscape PCM", 0, CLS_MMEDIA}, {"uart", "VOXWARE 6850 MIDI UART", 0, CLS_MMEDIA}, {"pca", "PC speaker PCM audio driver", FLG_FIXED, CLS_MMEDIA}, {"ctx", "Coretex-I frame grabber", 0, CLS_MMEDIA}, {"spigot", "Creative Labs Video Spigot video capture", 0, CLS_MMEDIA}, {"scc", "IBM Smart Capture Card", 0, CLS_MMEDIA}, {"gsc", "Genius GS-4500 hand scanner", 0, CLS_MMEDIA}, {"asc", "AmiScan scanner", 0, CLS_MMEDIA}, {"apm", "Advanced Power Management", FLG_FIXED, CLS_MISC}, {"labpc", "National Instruments Lab-PC/Lab-PC+", 0, CLS_MISC}, {"pcic", "PC-card controller", 0, CLS_MISC}, {"npx", "Math coprocessor", FLG_IMMUTABLE, CLS_MISC}, {"vga", "Catchall PCI VGA driver", FLG_INVISIBLE, CLS_MISC}, {"","",0,0}}; typedef struct _devlist_struct { char name[80]; int attrib; /* flag values as per the FLG_* defines above */ int class; /* disk, etc as per the CLS_* defines above */ char dev[16]; int iobase,irq,drq,maddr,msize,unit,flags,id; int comment; /* 0 = device, 1 = comment, 2 = collapsed comment */ int conflicts; /* set/reset by findconflict, count of conflicts */ int changed; /* nonzero if the device has been edited */ struct uc_device *device; struct _devlist_struct *prev,*next; } DEV_LIST; #define DEV_DEVICE 0 #define DEV_COMMENT 1 #define DEV_ZOOMED 2 #define LIST_CURRENT (1<<0) #define LIST_SELECTED (1<<1) #define KEY_EXIT 0 /* return codes from dolist() and friends */ #define KEY_DO 1 #define KEY_DEL 2 #define KEY_TAB 3 #define KEY_REDRAW 4 #define KEY_UP 5 /* these only returned from editval() */ #define KEY_DOWN 6 #define KEY_LEFT 7 #define KEY_RIGHT 8 #define KEY_NULL 9 /* this allows us to spin & redraw */ #define KEY_ZOOM 10 /* these for zoom all/collapse all */ #define KEY_UNZOOM 11 #define KEY_HELP 12 /* duh? */ static void redraw(void); static void insdev(DEV_LIST *dev, DEV_LIST *list); static int devinfo(DEV_LIST *dev); static int visuserconfig(void); static DEV_LIST *active = NULL,*inactive = NULL; /* driver lists */ static DEV_LIST *alist,*ilist; /* visible heads of the driver lists */ static DEV_LIST scratch; /* scratch record */ static int conflicts; /* total conflict count */ static char lines[] = "--------------------------------------------------------------------------------"; static char spaces[] = " "; /** ** Device manipulation stuff : find, describe, configure. **/ /** ** setdev ** ** Sets the device referenced by (*dev) to the parameters in the struct, ** and the enable flag according to (enabled) **/ static void setdev(DEV_LIST *dev, int enabled) { dev->device->id_iobase = dev->iobase; /* copy happy */ dev->device->id_irq = (u_short)(dev->irq < 16 ? 1<irq : 0); /* IRQ is bitfield */ dev->device->id_drq = (short)dev->drq; dev->device->id_maddr = (caddr_t)dev->maddr; dev->device->id_msize = dev->msize; dev->device->id_flags = dev->flags; dev->device->id_enabled = enabled; } /** ** getdevs ** ** Walk the kernel device tables and build the active and inactive lists **/ static void getdevs(void) { int i; struct uc_device *ap; ap = uc_devtab; /* pointer to array of devices */ for (i = 0; ap[i].id_id; i++) /* for each device in this table */ { scratch.unit = ap[i].id_unit; /* device parameters */ strcpy(scratch.dev,ap[i].id_name); scratch.iobase = ap[i].id_iobase; scratch.irq = ffs(ap[i].id_irq)-1; scratch.drq = ap[i].id_drq; scratch.maddr = (int)ap[i].id_maddr; scratch.msize = ap[i].id_msize; scratch.flags = ap[i].id_flags; scratch.comment = DEV_DEVICE; /* admin stuff */ scratch.conflicts = 0; scratch.device = &ap[i]; /* save pointer for later reference */ scratch.changed = 0; if (!devinfo(&scratch)) /* get more info on the device */ insdev(&scratch,ap[i].id_enabled?active:inactive); } } /** ** Devinfo ** ** Fill in (dev->name), (dev->attrib) and (dev->type) from the device_info array. ** If the device is unknown, put it in the CLS_MISC class, with no flags. ** ** If the device is marked "invisible", return nonzero; the caller should ** not insert any such device into either list. ** **/ static int devinfo(DEV_LIST *dev) { int i; for (i = 0; device_info[i].class; i++) { if (!strcmp(dev->dev,device_info[i].dev)) { if (device_info[i].attrib & FLG_INVISIBLE) /* forget we ever saw this one */ return(1); strcpy(dev->name,device_info[i].name); /* get the name */ dev->attrib = device_info[i].attrib; dev->class = device_info[i].class; return(0); } } strcpy(dev->name,"Unknown device"); dev->attrib = 0; dev->class = CLS_MISC; return(0); } /** ** List manipulation stuff : add, move, initialise, free, traverse ** ** Note that there are assumptions throughout this code that ** the first entry in a list will never move. (assumed to be ** a comment). **/ /** ** Adddev ** ** appends a copy of (dev) to the end of (*list) **/ static void addev(DEV_LIST *dev, DEV_LIST **list) { DEV_LIST *lp,*ap; lp = (DEV_LIST *)malloc(sizeof(DEV_LIST),M_DEVL,M_WAITOK); bcopy(dev,lp,sizeof(DEV_LIST)); /* create copied record */ if (*list) /* list exists */ { ap = *list; while(ap->next) ap = ap->next; /* scoot to end of list */ lp->prev = ap; lp->next = NULL; ap->next = lp; }else{ /* list does not yet exist */ *list = lp; lp->prev = lp->next = NULL; /* list now exists */ } } /** ** Findspot ** ** Finds the 'appropriate' place for (dev) in (list) ** ** 'Appropriate' means in numeric order with other devices of the same type, ** or in alphabetic order following a comment of the appropriate type. ** or at the end of the list if an appropriate comment is not found. (this should ** never happen) ** (Note that the appropriate point is never the top, but may be the bottom) **/ static DEV_LIST * findspot(DEV_LIST *dev, DEV_LIST *list) { DEV_LIST *ap = NULL; /* search for a previous instance of the same device */ for (ap = list; ap; ap = ap->next) { if (ap->comment != DEV_DEVICE) /* ignore comments */ continue; if (!strcmp(dev->dev,ap->dev)) /* same base device */ { if ((dev->unit <= ap->unit) /* belongs before (equal is bad) */ || !ap->next) /* or end of list */ { ap = ap->prev; /* back up one */ break; /* done here */ } if (ap->next) /* if the next item exists */ { if (ap->next->comment != DEV_DEVICE) /* next is a comment */ break; if (strcmp(dev->dev,ap->next->dev)) /* next is a different device */ break; } } } if (!ap) /* not sure yet */ { /* search for a class that the device might belong to */ for (ap = list; ap; ap = ap->next) { if (ap->comment != DEV_DEVICE) /* look for simlar devices */ continue; if (dev->class != ap->class) /* of same class too 8) */ continue; if (strcmp(dev->dev,ap->dev) < 0) /* belongs before the current entry */ { ap = ap->prev; /* back up one */ break; /* done here */ } if (ap->next) /* if the next item exists */ if (ap->next->comment != DEV_DEVICE) /* next is a comment, go here */ break; } } if (!ap) /* didn't find a match */ { for (ap = list; ap->next; ap = ap->next) /* try for a matching comment */ if ((ap->comment != DEV_DEVICE) && (ap->class == dev->class)) /* appropriate place? */ break; } /* or just put up with last */ return(ap); } /** ** Insdev ** ** Inserts a copy of (dev) at the appropriate point in (list) **/ static void insdev(DEV_LIST *dev, DEV_LIST *list) { DEV_LIST *lp,*ap; lp = (DEV_LIST *)malloc(sizeof(DEV_LIST),M_DEVL,M_WAITOK); bcopy(dev,lp,sizeof(DEV_LIST)); /* create copied record */ ap = findspot(lp,list); /* find appropriate spot */ lp->next = ap->next; /* point to next */ if (ap->next) ap->next->prev = lp; /* point next to new */ lp->prev = ap; /* point new to current */ ap->next = lp; /* and current to new */ } /** ** Movedev ** ** Moves (dev) from its current list to an appropriate place in (list) ** (dev) may not come from the top of a list, but it may from the bottom. **/ static void movedev(DEV_LIST *dev, DEV_LIST *list) { DEV_LIST *ap; ap = findspot(dev,list); dev->prev->next = dev->next; /* remove from old list */ if (dev->next) dev->next->prev = dev->prev; dev->next = ap->next; /* insert in new list */ if (ap->next) ap->next->prev = dev; /* point next to new */ dev->prev = ap; /* point new to current */ ap->next = dev; /* and current to new */ } /** ** Initlist ** ** Initialises (*list) with the basic headings **/ static void initlist(DEV_LIST **list) { int i; for(i = 0; devclass_names[i].name[0]; i++) /* for each devtype name */ { strcpy(scratch.name,devclass_names[i].name); scratch.comment = DEV_ZOOMED; scratch.class = devclass_names[i].number; scratch.attrib = FLG_MANDATORY; /* can't be moved */ addev(&scratch,list); /* add to the list */ } } /** ** savelist ** ** Walks (list) and saves the settings of any entry marked as changed. ** ** The device's active field is set according to (active). ** ** Builds the uc_devlist used by kget to extract the changed device information. ** The code for this was taken almost verbatim from the original module. **/ static void savelist(DEV_LIST *list, int active) { struct uc_device *id_p,*id_pn; char *name; while (list) { if ((list->comment == DEV_DEVICE) && /* is a device */ (list->changed) && /* has been changed */ (list->device != NULL)) { /* has an uc_device structure */ setdev(list,active); /* set the device itself */ id_pn = NULL; for (id_p=uc_devlist; id_p; id_p=id_p->id_next) { /* look on the list for it */ if (id_p->id_id == list->device->id_id) { name = list->device->id_name; id_pn = id_p->id_next; if (id_p->id_name) free(id_p->id_name, M_DEVL); bcopy(list->device,id_p,sizeof(struct uc_device)); save_resource(list->device); id_p->id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(id_p->id_name, name); id_pn->id_next = uc_devlist; id_p->id_next = id_pn; break; } } if (!id_pn) /* not already on the list */ { name = list->device->id_name; id_pn = malloc(sizeof(struct uc_device),M_DEVL,M_WAITOK); bcopy(list->device,id_pn,sizeof(struct uc_device)); save_resource(list->device); id_pn->id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(id_pn->id_name, name); id_pn->id_next = uc_devlist; uc_devlist = id_pn; /* park at top of list */ } } list = list->next; } } /** ** nukelist ** ** Frees all storage in use by a (list). **/ static void nukelist(DEV_LIST *list) { DEV_LIST *dp; if (!list) return; while(list->prev) /* walk to head of list */ list = list->prev; while(list) { dp = list; list = list->next; free(dp,M_DEVL); } } /** ** prevent ** ** Returns the previous entry in (list), skipping zoomed regions. Returns NULL ** if there is no previous entry. (Only possible if list->prev == NULL given the ** premise that there is always a comment at the head of the list) **/ static DEV_LIST * prevent(DEV_LIST *list) { DEV_LIST *dp; if (!list) return(NULL); dp = list->prev; /* start back one */ while(dp) { if (dp->comment == DEV_ZOOMED) /* previous section is zoomed */ return(dp); /* so skip to comment */ if (dp->comment == DEV_COMMENT) /* not zoomed */ return(list->prev); /* one back as normal */ dp = dp->prev; /* backpedal */ } return(dp); /* NULL, we can assume */ } /** ** nextent ** ** Returns the next entry in (list), skipping zoomed regions. Returns NULL ** if there is no next entry. (Possible if the current entry is last, or ** if the current entry is the last heading and it's collapsed) **/ static DEV_LIST * nextent(DEV_LIST *list) { DEV_LIST *dp; if (!list) return(NULL); if (list->comment != DEV_ZOOMED) /* no reason to skip */ return(list->next); dp = list->next; while(dp) { if (dp->comment != DEV_DEVICE) /* found another heading */ break; dp = dp->next; } return(dp); /* back we go */ } /** ** ofsent ** ** Returns the (ofs)th entry down from (list), or NULL if it doesn't exist **/ static DEV_LIST * ofsent(int ofs, DEV_LIST *list) { while (ofs-- && list) list = nextent(list); return(list); } /** ** findconflict ** ** Scans every element of (list) and sets the conflict tags appropriately ** Returns the number of conflicts found. **/ static int findconflict(DEV_LIST *list) { int count = 0; /* number of conflicts found */ DEV_LIST *dp,*sp; for (dp = list; dp; dp = dp->next) /* over the whole list */ { if (dp->comment != DEV_DEVICE) /* comments don't usually conflict */ continue; dp->conflicts = 0; /* assume the best */ for (sp = list; sp; sp = sp->next) /* scan the entire list for conflicts */ { if (sp->comment != DEV_DEVICE) /* likewise */ continue; if (sp == dp) /* always conflict with itself */ continue; if ((dp->iobase > 0) && /* iobase conflict? */ (dp->iobase == sp->iobase)) dp->conflicts = 1; if ((dp->irq > 0) && /* irq conflict? */ (dp->irq == sp->irq)) dp->conflicts = 1; if ((dp->drq > 0) && /* drq conflict? */ (dp->drq == sp->drq)) dp->conflicts = 1; if ((sp->maddr > 0) && /* maddr/msize conflict? */ (dp->maddr > 0) && (sp->maddr + ((sp->msize == 0) ? 1 : sp->msize) > dp->maddr) && (dp->maddr + ((dp->msize == 0) ? 1 : dp->msize) > sp->maddr)) dp->conflicts = 1; } count += dp->conflicts; /* count conflicts */ } return(count); } /** ** expandlist ** ** Unzooms all headings in (list) **/ static void expandlist(DEV_LIST *list) { while(list) { if (list->comment == DEV_COMMENT) list->comment = DEV_ZOOMED; list = list->next; } } /** ** collapselist ** ** Zooms all headings in (list) **/ static void collapselist(DEV_LIST *list) { while(list) { if (list->comment == DEV_ZOOMED) list->comment = DEV_COMMENT; list = list->next; } } /** ** Screen-manipulation stuff ** ** This is the basic screen layout : ** ** 0 5 10 15 20 25 30 35 40 45 50 55 60 67 70 75 ** |....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|.... ** +--------------------------------------------------------------------------------+ ** 0 -|---Active Drivers----------------------------xx Conflicts------Dev---IRQ--Port--| ** 1 -| ........................ ....... .. 0x....| ** 2 -| ........................ ....... .. 0x....| ** 3 -| ........................ ....... .. 0x....| ** 4 -| ........................ ....... .. 0x....| ** 5 -| ........................ ....... .. 0x....| ** 6 -| ........................ ....... .. 0x....| ** 7 -| ........................ ....... .. 0x....| ** 8 -| ........................ ....... .. 0x....| ** 9 -|---Inactive Drivers--------------------------------------------Dev--------------| ** 10-| ........................ ....... | ** 11-| ........................ ....... | ** 12-| ........................ ....... | ** 13-| ........................ ....... | ** 14-| ........................ ....... | ** 15-| ........................ ....... | ** 16-| ........................ ....... | ** 17-|------------------------------------------------------UP-DOWN-------------------| ** 18-| Relevant parameters for the current device | ** 19-| | ** 20-| | ** 21-|--------------------------------------------------------------------------------| ** 22-| Help texts go here | ** 23-| | ** +--------------------------------------------------------------------------------+ ** ** Help texts ** ** On a collapsed comment : ** ** [Enter] Expand device list [z] Expand all lists ** [TAB] Change fields [Q] Save and Exit ** ** On an expanded comment : ** ** [Enter] Collapse device list [Z] Collapse all lists ** [TAB] Change fields [Q] Save and Exit ** ** On a comment with no followers ** ** ** [TAB] Change fields [Q] Save and Exit ** ** On a device in the active list ** ** [Enter] Edit device parameters [DEL] Disable device ** [TAB] Change fields [Q] Save and Exit [?] Help ** ** On a device in the inactive list ** ** [Enter] Enable device ** [TAB] Change fields [Q] Save and Exit [?] Help ** ** While editing parameters ** ** ** [TAB] Change fields [Q] Save device parameters **/ /** ** ** The base-level screen primitives : ** ** bold() - enter bold mode \E[1m (md) ** inverse() - enter inverse mode \E[7m (so) ** normal() - clear bold/inverse mode \E[m (se) ** clear() - clear the screen \E[H\E[J (ce) ** move(x,y) - move the cursor to x,y \E[y;xH: (cm) **/ static void bold(void) { printf("\033[1m"); } static void inverse(void) { printf("\033[7m"); } static void normal(void) { printf("\033[m"); } static void clear(void) { normal(); printf("\033[H\033[J"); } static void move(int x, int y) { printf("\033[%d;%dH",y+1,x+1); } /** ** ** High-level screen primitives : ** ** putxyl(x,y,str,len) - put (len) bytes of (str) at (x,y), supports embedded formatting ** putxy(x,y,str) - put (str) at (x,y), supports embedded formatting ** erase(x,y,w,h) - clear the box (x,y,w,h) ** txtbox(x,y,w,y,str) - put (str) in a region at (x,y,w,h) ** putmsg(str) - put (str) in the message area ** puthelp(str) - put (str) in the upper helpline ** pad(str,len) - pad (str) to (len) with spaces ** drawline(row,detail,list,inverse,*dhelp) ** - draws a line for (*list) at (row) onscreen. If (detail) is ** nonzero, include port, IRQ and maddr, if (inverse) is nonzero, ** draw the line in inverse video, and display (*dhelp) on the ** helpline. ** drawlist(row,num,detail,list) ** - draw (num) entries from (list) at (row) onscreen, passile (detail) ** through to drawline(). ** showparams(dev) - displays the relevant parameters for (dev) below the lists onscreen. ** yesno(str) - displays (str) in the message area, and returns nonzero on 'y' or 'Y' ** redraw(); - Redraws the entire screen layout, including the ** - two list panels. **/ /** ** putxy ** writes (str) at x,y onscreen ** putxyl ** writes up to (len) of (str) at x,y onscreen. ** ** Supports embedded formatting : ** !i - inverse mode. ** !b - bold mode. ** !n - normal mode. **/ static void putxyl(int x, int y, char *str, int len) { move(x,y); normal(); while((*str) && (len--)) { if (*str == '!') /* format escape? */ { switch(*(str+1)) /* depending on the next character */ { case 'i': inverse(); str +=2; /* skip formatting */ len++; /* doesn't count for length */ break; case 'b': bold(); str +=2; /* skip formatting */ len++; /* doesn't count for length */ break; case 'n': normal(); str +=2; /* skip formatting */ len++; /* doesn't count for length */ break; default: putchar(*str++); /* not an escape */ } }else{ putchar(*str++); /* emit the character */ } } } #define putxy(x,y,str) putxyl(x,y,str,-1) /** ** erase ** ** Erases the region (x,y,w,h) **/ static void erase(int x, int y, int w, int h) { int i; normal(); for (i = 0; i < h; i++) putxyl(x,y++,spaces,w); } /** ** txtbox ** ** Writes (str) into the region (x,y,w,h), supports embedded formatting using ** putxy. Lines are not wrapped, newlines must be forced with \n. **/ static void txtbox(int x, int y, int w, int h, char *str) { int i = 0; h--; while((str[i]) && h) { if (str[i] == '\n') /* newline */ { putxyl(x,y,str,(i= len) /* no padding needed */ return; while(i < len) /* pad */ str[i++] = ' '; str[i] = '\0'; } /** ** drawline ** ** Displays entry (ofs) of (list) in region at (row) onscreen, optionally displaying ** the port and IRQ fields if (detail) is nonzero. If (inverse), in inverse video. ** ** The text (dhelp) is displayed if the item is a normal device, otherwise ** help is shown for normal or zoomed comments **/ static void drawline(int row, int detail, DEV_LIST *list, int inverse, char *dhelp) { char lbuf[90],nb[70],db[20],ib[16],pb[16]; if (list->comment == DEV_DEVICE) { nb[0] = ' '; strncpy(nb+1,list->name,57); }else{ strncpy(nb,list->name,58); if ((list->comment == DEV_ZOOMED) && (list->next)) if (list->next->comment == DEV_DEVICE) /* only mention if there's something hidden */ strcat(nb," (Collapsed)"); } nb[58] = '\0'; pad(nb,60); if (list->conflicts) /* device in conflict? */ { if (inverse) { strcpy(nb+54," !nCONF!i "); /* tag conflict, careful of length */ }else{ strcpy(nb+54," !iCONF!n "); /* tag conflict, careful of length */ } } if (list->comment == DEV_DEVICE) { sprintf(db,"%s%d",list->dev,list->unit); pad(db,8); }else{ strcpy(db," "); } if ((list->irq > 0) && detail && (list->comment == DEV_DEVICE)) { sprintf(ib," %d",list->irq); pad(ib,4); }else{ strcpy(ib," "); } if ((list->iobase > 0) && detail && (list->comment == DEV_DEVICE)) { sprintf(pb,"0x%x",list->iobase); pad(pb,7); }else{ strcpy(pb," "); } sprintf(lbuf," %s%s%s%s%s",inverse?"!i":"",nb,db,ib,pb); putxyl(0,row,lbuf,80); if (dhelp) { switch(list->comment) { case DEV_DEVICE: /* ordinary device */ puthelp(dhelp); break; case DEV_COMMENT: puthelp(""); if (list->next) if (list->next->comment == DEV_DEVICE) puthelp(" [!bEnter!n] Collapse device list [!bC!n] Collapse all lists"); break; case DEV_ZOOMED: puthelp(""); if (list->next) if (list->next->comment == DEV_DEVICE) puthelp(" [!bEnter!n] Expand device list [!bX!n] Expand all lists"); break; default: puthelp(" WARNING: This list entry corrupted!"); break; } } move(0,row); /* put the cursor somewhere relevant */ } /** ** drawlist ** ** Displays (num) lines of the contents of (list) at (row), optionally displaying the ** port and IRQ fields as well if (detail) is nonzero ** ** printf in the kernel is essentially useless, so we do most of the hard work ourselves here. **/ static void drawlist(int row, int num, int detail, DEV_LIST *list) { int ofs; for(ofs = 0; ofs < num; ofs++) { if (list) { drawline(row+ofs,detail,list,0,NULL); /* NULL -> don't draw empty help string */ list = nextent(list); /* move down visible list */ }else{ erase(0,row+ofs,80,1); } } } /** ** redrawactive ** ** Redraws the active list **/ static void redrawactive(void) { char cbuf[16]; if (conflicts) { sprintf(cbuf,"!i%d conflict%s-",conflicts,(conflicts>1)?"s":""); putxy(45,0,cbuf); }else{ putxyl(45,0,lines,16); } drawlist(1,8,1,alist); /* draw device lists */ } /** ** redrawinactive ** ** Redraws the inactive list **/ static void redrawinactive(void) { drawlist(10,7,0,ilist); /* draw device lists */ } /** ** redraw ** ** Clear the screen and redraw the entire layout **/ static void redraw(void) { clear(); putxy(0,0,lines); putxy(3,0,"!bActive!n-!bDrivers"); putxy(63,0,"!bDev!n---!bIRQ!n--!bPort"); putxy(0,9,lines); putxy(3,9,"!bInactive!n-!bDrivers"); putxy(63,9,"!bDev"); putxy(0,17,lines); putxy(0,21,lines); masterhelp(" [!bTAB!n] Change fields [!bQ!n] Save and Exit [!b?!n] Help"); redrawactive(); redrawinactive(); } /** ** yesnocancel ** ** Put (str) in the message area, and return 1 if the user hits 'y' or 'Y', ** 2 if they hit 'c' or 'C', or 0 for 'n' or 'N'. **/ static int yesnocancel(char *str) { putmsg(str); for(;;) switch(getchar()) { case -1: case 'n': case 'N': return(0); case 'y': case 'Y': return(1); case 'c': case 'C': return(2); } } /** ** showparams ** ** Show device parameters in the region below the lists ** ** 0 5 10 15 20 25 30 35 40 45 50 55 60 67 70 75 ** |....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|.... ** +--------------------------------------------------------------------------------+ ** 17-|--------------------------------------------------------------------------------| ** 18-| Port address : 0x0000 Memory address : 0x00000 Conflict allowed | ** 19-| IRQ number : 00 Memory size : 0x0000 | ** 20-| Flags : 0x0000 DRQ number : 00 | ** 21-|--------------------------------------------------------------------------------| **/ static void showparams(DEV_LIST *dev) { char buf[80]; erase(0,18,80,3); /* clear area */ if (!dev) return; if (dev->comment != DEV_DEVICE) return; if (dev->iobase > 0) { sprintf(buf,"Port address : 0x%x",dev->iobase); putxy(1,18,buf); } if (dev->irq > 0) { sprintf(buf,"IRQ number : %d",dev->irq); putxy(1,19,buf); } sprintf(buf,"Flags : 0x%x",dev->flags); putxy(1,20,buf); if (dev->maddr > 0) { sprintf(buf,"Memory address : 0x%x",dev->maddr); putxy(26,18,buf); } if (dev->msize > 0) { sprintf(buf,"Memory size : 0x%x",dev->msize); putxy(26,19,buf); } if (dev->drq > 0) { sprintf(buf,"DRQ number : %d",dev->drq); putxy(26,20,buf); } } /** ** Editing functions for device parameters ** ** editval(x,y,width,hex,min,max,val) - Edit (*val) in a field (width) wide at (x,y) ** onscreen. Refuse values outsise (min) and (max). ** editparams(dev) - Edit the parameters for (dev) **/ #define VetRet(code) \ { \ if ((i >= min) && (i <= max)) /* legit? */ \ { \ *val = i; \ sprintf(buf,hex?"0x%x":"%d",i); \ putxy(hex?x-2:x,y,buf); \ return(code); /* all done and exit */ \ } \ i = *val; /* restore original value */ \ delta = 1; /* restore other stuff */ \ } /** ** editval ** ** Edit (*val) at (x,y) in (hex)?hex:decimal mode, allowing values between (min) and (max) ** in a field (width) wide. (Allow one space) ** If (ro) is set, we're in "readonly" mode, so disallow edits. ** ** Return KEY_TAB on \t, KEY_EXIT on 'q' **/ static int editval(int x, int y, int width, int hex, int min, int max, int *val, int ro) { int i = *val; /* work with copy of the value */ char buf[2+11+1],tc[11+1]; /* display buffer, text copy */ int xp = 0; /* cursor offset into text copy */ int delta = 1; /* force redraw first time in */ int c; int extended = 0; /* stage counter for extended key sequences */ if (hex) /* we presume there's a leading 0x onscreen */ putxy(x-2,y,"!i0x"); /* coz there sure is now */ for (;;) { if (delta) /* only update if necessary */ { sprintf(tc,hex?"%x":"%d",i); /* make a text copy of the value */ sprintf(buf,"!i%s",tc); /* format for printing */ erase(x,y,width,1); /* clear the area */ putxy(x,y,buf); /* write */ xp = strlen(tc); /* cursor always at end */ move(x+xp,y); /* position the cursor */ } c = getchar(); switch(extended) /* escape handling */ { case 0: if (c == 0x1b) /* esc? */ { extended = 1; /* flag and spin */ continue; } extended = 0; break; /* nope, drop through */ case 1: /* there was an escape prefix */ if (c == '[' || c == 'O') /* second character in sequence */ { extended = 2; continue; } if (c == 0x1b) return(KEY_EXIT); /* double esc exits */ extended = 0; break; /* nup, not a sequence. */ case 2: extended = 0; switch(c) /* looks like the real McCoy */ { case 'A': VetRet(KEY_UP); /* leave if OK */ continue; case 'B': VetRet(KEY_DOWN); /* leave if OK */ continue; case 'C': VetRet(KEY_RIGHT); /* leave if OK */ continue; case 'D': VetRet(KEY_LEFT); /* leave if OK */ continue; default: continue; } } switch(c) { case '\t': /* trying to tab off */ VetRet(KEY_TAB); /* verify and maybe return */ break; case -1: case 'q': case 'Q': VetRet(KEY_EXIT); break; case '\b': case '\177': /* BS or DEL */ if (ro) /* readonly? */ { puthelp(" !iThis value cannot be edited (Press ESC)"); while(getchar() != 0x1b); /* wait for key */ return(KEY_NULL); /* spin */ } if (xp) /* still something left to delete */ { i = (hex ? i/0x10u : i/10); /* strip last digit */ delta = 1; /* force update */ } break; case 588: VetRet(KEY_UP); break; case '\r': case '\n': case 596: VetRet(KEY_DOWN); break; case 591: VetRet(KEY_LEFT); break; case 593: VetRet(KEY_RIGHT); break; default: if (ro) /* readonly? */ { puthelp(" !iThis value cannot be edited (Press ESC)"); while(getchar() != 0x1b); /* wait for key */ return(KEY_NULL); /* spin */ } if (xp >= width) /* no room for more characters anyway */ break; if (hex) { if ((c >= '0') && (c <= '9')) { i = i*0x10 + (c-'0'); /* update value */ delta = 1; break; } if ((c >= 'a') && (c <= 'f')) { i = i*0x10 + (c-'a'+0xa); delta = 1; break; } if ((c >= 'A') && (c <= 'F')) { i = i*0x10 + (c-'A'+0xa); delta = 1; break; } }else{ if ((c >= '0') && (c <= '9')) { i = i*10 + (c-'0'); /* update value */ delta = 1; /* force redraw */ break; } } break; } } } /** ** editparams ** ** Edit the parameters for (dev) ** ** Note that it's _always_ possible to edit the flags, otherwise it might be ** possible for this to spin in an endless loop... ** 0 5 10 15 20 25 30 35 40 45 50 55 60 67 70 75 ** |....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|.... ** +--------------------------------------------------------------------------------+ ** 17-|--------------------------------------------------------------------------------| ** 18-| Port address : 0x0000 Memory address : 0x00000 Conflict allowed | ** 19-| IRQ number : 00 Memory size : 0x0000 | ** 20-| Flags : 0x0000 DRQ number : 00 | ** 21-|--------------------------------------------------------------------------------| ** ** The "intelligence" in this function that hops around based on the directional ** returns from editval isn't very smart, and depends on the layout above. **/ static void editparams(DEV_LIST *dev) { int ret; char buf[16]; /* needs to fit the device name */ putxy(2,17,"!bParameters!n-!bfor!n-!bdevice!n-"); sprintf(buf,"!b%s",dev->dev); putxy(24,17,buf); erase(1,22,80,1); for (;;) { ep_iobase: if (dev->iobase > 0) { puthelp(" IO Port address (Hexadecimal, 0x1-0xffff)"); ret = editval(18,18,5,1,0x1,0xffff,&(dev->iobase),(dev->attrib & FLG_FIXIOBASE)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_RIGHT: if (dev->maddr > 0) goto ep_maddr; break; case KEY_TAB: case KEY_DOWN: goto ep_irq; } goto ep_iobase; } ep_irq: if (dev->irq > 0) { puthelp(" Interrupt number (Decimal, 1-15)"); ret = editval(16,19,3,0,1,15,&(dev->irq),(dev->attrib & FLG_FIXIRQ)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_RIGHT: if (dev->msize > 0) goto ep_msize; break; case KEY_UP: if (dev->iobase > 0) goto ep_iobase; break; case KEY_TAB: case KEY_DOWN: goto ep_flags; } goto ep_irq; } ep_flags: puthelp(" Device-specific flag values."); ret = editval(18,20,8,1,INT_MIN,INT_MAX,&(dev->flags),0); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_RIGHT: if (dev->drq > 0) goto ep_drq; break; case KEY_UP: if (dev->irq > 0) goto ep_irq; if (dev->iobase > 0) goto ep_iobase; break; case KEY_DOWN: if (dev->maddr > 0) goto ep_maddr; if (dev->msize > 0) goto ep_msize; if (dev->drq > 0) goto ep_drq; break; case KEY_TAB: goto ep_maddr; } goto ep_flags; ep_maddr: if (dev->maddr > 0) { puthelp(" Device memory start address (Hexadecimal, 0x1-0xfffff)"); ret = editval(45,18,6,1,0x1,0xfffff,&(dev->maddr),(dev->attrib & FLG_FIXMADDR)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_LEFT: if (dev->iobase > 0) goto ep_iobase; break; case KEY_UP: goto ep_flags; case KEY_DOWN: if (dev->msize > 0) goto ep_msize; if (dev->drq > 0) goto ep_drq; break; case KEY_TAB: goto ep_msize; } goto ep_maddr; } ep_msize: if (dev->msize > 0) { puthelp(" Device memory size (Hexadecimal, 0x1-0x10000)"); ret = editval(45,19,5,1,0x1,0x10000,&(dev->msize),(dev->attrib & FLG_FIXMSIZE)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_LEFT: if (dev->irq > 0) goto ep_irq; break; case KEY_UP: if (dev->maddr > 0) goto ep_maddr; goto ep_flags; case KEY_DOWN: if (dev->drq > 0) goto ep_drq; break; case KEY_TAB: goto ep_drq; } goto ep_msize; } ep_drq: if (dev->drq > 0) { puthelp(" Device DMA request number (Decimal, 1-7)"); ret = editval(43,20,2,0,1,7,&(dev->drq),(dev->attrib & FLG_FIXDRQ)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_LEFT: goto ep_flags; case KEY_UP: if (dev->msize > 0) goto ep_msize; if (dev->maddr > 0) goto ep_maddr; goto ep_flags; case KEY_TAB: goto ep_iobase; } goto ep_drq; } } ep_exit: dev->changed = 1; /* mark as changed */ } static char *helptext[] = { " Using the UserConfig kernel settings editor", " -------------------------------------------", "", "VISUAL MODE:", "", "- - Layout -", "", "The screen displays a list of available drivers, divided into two", "scrolling lists: Active Drivers, and Inactive Drivers. Each list is", "by default collapsed and can be expanded to show all the drivers", "available in each category. The parameters for the currently selected", "driver are shown at the bottom of the screen.", "", "- - Moving around -", "", "To move in the current list, use the UP and DOWN cursor keys to select", "an item (the selected item will be highlighted). If the item is a", "category name, you may alternatively expand or collapse the list of", "drivers for that category by pressing [!bENTER!n]. Once the category is", "expanded, you can select each driver in the same manner and either:", "", " - change its parameters using [!bENTER!n]", " - move it to the Inactive list using [!bDEL!n]", "", "Use the [!bTAB!n] key to toggle between the Active and Inactive list; if", "you need to move a driver from the Inactive list back to the Active", "one, select it in the Inactive list, using [!bTAB!n] to change lists if", "necessary, and press [!bENTER!n] -- the device will be moved back to", "its place in the Active list.", "", "- - Altering the list/parameters -", "", "Any drivers for devices not installed in your system should be moved", "to the Inactive list, until there are no remaining parameter conflicts", "between the drivers, as indicated at the top.", "", "Once the list of Active drivers only contains entries for the devices", "present in your system, you can set their parameters (Interrupt, DMA", "channel, I/O addresses). To do this, select the driver and press", "[!bENTER!n]: it is now possible to edit the settings at the", "bottom of the screen. Use [!bTAB!n] to change fields, and when you are", "finished, use [!bQ!n] to return to the list.", "", "- - Saving changes -", "", "When all settings seem correct, and you wish to proceed with the", "kernel device probing and boot, press [!bQ!n] -- you will be asked to", "confirm your choice.", "", NULL }; /** ** helpscreen ** ** Displays help text onscreen for people that are confused, using a simple ** pager. **/ static void helpscreen(void) { int topline = 0; /* where we are in the text */ int c, delta = 1; char prompt[80]; for (;;) /* loop until user quits */ { int line = 0; /* display help text */ if (delta) { clear(); /* remove everything else */ for (line = topline; (line < (topline + 24)) && (helptext[line]); line++) putxy(0,line-topline,helptext[line]); delta = 0; } /* prompt */ sprintf(prompt,"!i --%s-- [U]p [D]own [Q]uit !n",helptext[line] ? "MORE" : "END"); putxy(0,24,prompt); c = getchar(); /* so what do they say? */ switch (c) { case 'u': case 'U': case 'b': case 'B': /* wired into 'more' users' fingers */ if (topline > 0) /* room to go up? */ { topline -= 24; if (topline < 0) /* don't go too far */ topline = 0; delta = 1; } break; case 'd': case 'D': case ' ': /* expected by most people */ if (helptext[line]) /* maybe more below? */ { topline += 24; delta = 1; } break; case 'q': case 'Q': redraw(); /* restore the screen */ return; } } } /** ** High-level control functions **/ /** ** dolist ** ** Handle user movement within (*list) in the region starting at (row) onscreen with ** (num) lines, starting at (*ofs) offset from row onscreen. ** Pass (detail) on to drawing routines. ** ** If the user hits a key other than a cursor key, maybe return a code. ** ** (*list) points to the device at the top line in the region, (*ofs) is the ** position of the highlight within the region. All routines below ** this take only a device and an absolute row : use ofsent() to find the ** device, and add (*ofs) to (row) to find the absolute row. **/ static int dolist(int row, int num, int detail, int *ofs, DEV_LIST **list, char *dhelp) { int extended = 0; int c; DEV_LIST *lp; int delta = 1; for(;;) { if (delta) { showparams(ofsent(*ofs,*list)); /* show device parameters */ drawline(row+*ofs,detail,ofsent(*ofs,*list),1,dhelp); /* highlight current line */ delta = 0; } c = getchar(); /* get a character */ if ((extended == 2) || (c==588) || (c==596)) /* console gives "alternative" codes */ { extended = 0; /* no longer */ switch(c) { case 588: /* syscons' idea of 'up' */ case 'A': /* up */ if (*ofs) /* just a move onscreen */ { drawline(row+*ofs,detail,ofsent(*ofs,*list),0,dhelp);/* unhighlight current line */ (*ofs)--; /* move up */ }else{ lp = prevent(*list); /* can we go up? */ if (!lp) /* no */ break; *list = lp; /* yes, move up list */ drawlist(row,num,detail,*list); } delta = 1; break; case 596: /* dooby-do */ case 'B': /* down */ lp = ofsent(*ofs,*list); /* get current item */ if (!nextent(lp)) break; /* nothing more to move to */ drawline(row+*ofs,detail,ofsent(*ofs,*list),0,dhelp); /* unhighlight current line */ if (*ofs < (num-1)) /* room to move onscreen? */ { (*ofs)++; }else{ *list = nextent(*list); /* scroll region down */ drawlist(row,num,detail,*list); } delta = 1; break; } }else{ switch(c) { case '\033': extended=1; break; case '[': /* cheat : always preceeds cursor move */ case 'O': /* ANSI application key mode */ if (extended==1) extended=2; else extended=0; break; case 'Q': case 'q': return(KEY_EXIT); /* user requests exit */ case '\r': case '\n': return(KEY_DO); /* "do" something */ case '\b': case '\177': case 599: return(KEY_DEL); /* "delete" response */ case 'X': case 'x': return(KEY_UNZOOM); /* expand everything */ case 'C': case 'c': return(KEY_ZOOM); /* collapse everything */ case '\t': drawline(row+*ofs,detail,ofsent(*ofs,*list),0,dhelp); /* unhighlight current line */ return(KEY_TAB); /* "move" response */ case '\014': /* ^L, redraw */ return(KEY_REDRAW); case '?': /* helptext */ return(KEY_HELP); } } } } /** ** visuserconfig ** ** Do the fullscreen config thang **/ static int visuserconfig(void) { int actofs = 0, inactofs = 0, mode = 0, ret = -1, i; DEV_LIST *dp; initlist(&active); initlist(&inactive); alist = active; ilist = inactive; getdevs(); conflicts = findconflict(active); /* find conflicts in the active list only */ redraw(); for(;;) { switch(mode) { case 0: /* active devices */ ret = dolist(1,8,1,&actofs,&alist, " [!bEnter!n] Edit device parameters [!bDEL!n] Disable device"); switch(ret) { case KEY_TAB: mode = 1; /* swap lists */ break; case KEY_REDRAW: redraw(); break; case KEY_ZOOM: alist = active; actofs = 0; expandlist(active); redrawactive(); break; case KEY_UNZOOM: alist = active; actofs = 0; collapselist(active); redrawactive(); break; case KEY_DEL: dp = ofsent(actofs,alist); /* get current device */ if (dp) /* paranoia... */ { if (dp->attrib & FLG_MANDATORY) /* can't be deleted */ break; if (dp == alist) /* moving top item on list? */ { if (dp->next) { alist = dp->next; /* point list to non-moving item */ }else{ alist = dp->prev; /* end of list, go back instead */ } }else{ if (!dp->next) /* moving last item on list? */ actofs--; } dp->conflicts = 0; /* no conflicts on the inactive list */ movedev(dp,inactive); /* shift to inactive list */ conflicts = findconflict(active); /* update conflict tags */ dp->changed = 1; redrawactive(); /* redraw */ redrawinactive(); } break; case KEY_DO: /* edit device parameters */ dp = ofsent(actofs,alist); /* get current device */ if (dp) /* paranoia... */ { if (dp->comment == DEV_DEVICE) /* can't edit comments, zoom? */ { masterhelp(" [!bTAB!n] Change fields [!bQ!n] Save device parameters"); editparams(dp); masterhelp(" [!bTAB!n] Change fields [!bQ!n] Save and Exit [!b?!n] Help"); putxy(0,17,lines); conflicts = findconflict(active); /* update conflict tags */ }else{ /* DO on comment = zoom */ switch(dp->comment) /* Depends on current state */ { case DEV_COMMENT: /* not currently zoomed */ dp->comment = DEV_ZOOMED; break; case DEV_ZOOMED: dp->comment = DEV_COMMENT; break; } } redrawactive(); } break; } break; case 1: /* inactive devices */ ret = dolist(10,7,0,&inactofs,&ilist, " [!bEnter!n] Enable device "); switch(ret) { case KEY_TAB: mode = 0; break; case KEY_REDRAW: redraw(); break; case KEY_ZOOM: ilist = inactive; inactofs = 0; expandlist(inactive); redrawinactive(); break; case KEY_UNZOOM: ilist = inactive; inactofs = 0; collapselist(inactive); redrawinactive(); break; case KEY_DO: dp = ofsent(inactofs,ilist); /* get current device */ if (dp) /* paranoia... */ { if (dp->comment == DEV_DEVICE) /* can't move comments, zoom? */ { if (dp == ilist) /* moving top of list? */ { if (dp->next) { ilist = dp->next; /* point list to non-moving item */ }else{ ilist = dp->prev; /* can't go down, go up instead */ } }else{ if (!dp->next) /* last entry on list? */ inactofs--; /* shift cursor up one */ } movedev(dp,active); /* shift to active list */ conflicts = findconflict(active); /* update conflict tags */ dp->changed = 1; alist = dp; /* put at top and current */ actofs = 0; while(dp->comment == DEV_DEVICE) dp = dp->prev; /* forcibly unzoom section */ dp ->comment = DEV_COMMENT; mode = 0; /* and swap modes to follow it */ }else{ /* DO on comment = zoom */ switch(dp->comment) /* Depends on current state */ { case DEV_COMMENT: /* not currently zoomed */ dp->comment = DEV_ZOOMED; break; case DEV_ZOOMED: dp->comment = DEV_COMMENT; break; } } redrawactive(); /* redraw */ redrawinactive(); } break; default: /* nothing else relevant here */ break; } break; default: mode = 0; /* shouldn't happen... */ } /* handle returns that are the same for both modes */ switch (ret) { case KEY_HELP: helpscreen(); break; case KEY_EXIT: i = yesnocancel(" Save these parameters before exiting? ([!bY!n]es/[!bN!n]o/[!bC!n]ancel) "); switch(i) { case 2: /* cancel */ redraw(); break; case 1: /* save and exit */ savelist(active,1); savelist(inactive,0); case 0: /* exit */ nukelist(active); /* clean up after ourselves */ nukelist(inactive); normal(); clear(); return(1); } break; } } } #endif /* VISUAL_USERCONFIG */ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 Jordan K. Hubbard * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * Many additional changes by Bruce Evans * * This code is derived from software contributed by the * University of California Berkeley, Jordan K. Hubbard, * David Greenman and Bruce Evans. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #define PARM_DEVSPEC 0x1 #define PARM_INT 0x2 #define PARM_ADDR 0x3 #define PARM_STRING 0x4 typedef struct _cmdparm { int type; union { struct uc_device *dparm; int iparm; union { void *aparm; const char *sparm; } u; } parm; } CmdParm; typedef int (*CmdFunc)(CmdParm *); typedef struct _cmd { char *name; CmdFunc handler; CmdParm *parms; } Cmd; #if 0 static void lsscsi(void); static int list_scsi(CmdParm *); #endif static int lsdevtab(struct uc_device *); static struct uc_device *find_device(char *, int); static struct uc_device *search_devtable(struct uc_device *, char *, int); static void cngets(char *, int); static Cmd *parse_cmd(char *); static int parse_args(const char *, CmdParm *); static int save_dev(struct uc_device *); static int list_devices(CmdParm *); static int set_device_ioaddr(CmdParm *); static int set_device_irq(CmdParm *); static int set_device_drq(CmdParm *); static int set_device_iosize(CmdParm *); static int set_device_mem(CmdParm *); static int set_device_flags(CmdParm *); static int set_device_enable(CmdParm *); static int set_device_disable(CmdParm *); static int quitfunc(CmdParm *); static int helpfunc(CmdParm *); #if defined(INTRO_USERCONFIG) static int introfunc(CmdParm *); #endif #if NPNP > 0 static int lspnp(void); static int set_pnp_parms(CmdParm *); #endif static int lineno; #include "eisa.h" #if NEISA > 0 #include static int set_num_eisa_slots(CmdParm *); #endif /* NEISA > 0 */ static CmdParm addr_parms[] = { { PARM_DEVSPEC, {} }, { PARM_ADDR, {} }, { -1, {} }, }; static CmdParm int_parms[] = { { PARM_DEVSPEC, {} }, { PARM_INT, {} }, { -1, {} }, }; static CmdParm dev_parms[] = { { PARM_DEVSPEC, {} }, { -1, {} }, }; #if NPNP > 0 static CmdParm string_arg[] = { { PARM_STRING, {} }, { -1, {} }, }; #endif #if NEISA > 0 static CmdParm int_arg[] = { { PARM_INT, {} }, { -1, {} }, }; #endif /* NEISA > 0 */ static Cmd CmdList[] = { { "?", helpfunc, NULL }, /* ? (help) */ { "di", set_device_disable, dev_parms }, /* disable dev */ { "dr", set_device_drq, int_parms }, /* drq dev # */ #if NEISA > 0 { "ei", set_num_eisa_slots, int_arg }, /* # EISA slots */ #endif /* NEISA > 0 */ { "en", set_device_enable, dev_parms }, /* enable dev */ { "ex", quitfunc, NULL }, /* exit (quit) */ { "f", set_device_flags, int_parms }, /* flags dev mask */ { "h", helpfunc, NULL }, /* help */ #if defined(INTRO_USERCONFIG) { "intro", introfunc, NULL }, /* intro screen */ #endif { "iom", set_device_mem, addr_parms }, /* iomem dev addr */ { "ios", set_device_iosize, int_parms }, /* iosize dev size */ { "ir", set_device_irq, int_parms }, /* irq dev # */ { "l", list_devices, NULL }, /* ls, list */ #if NPNP > 0 { "pn", set_pnp_parms, string_arg }, /* pnp ... */ #endif { "po", set_device_ioaddr, int_parms }, /* port dev addr */ { "res", (CmdFunc)cpu_reset, NULL }, /* reset CPU */ { "q", quitfunc, NULL }, /* quit */ #if 0 { "s", list_scsi, NULL }, /* scsi */ #endif #ifdef VISUAL_USERCONFIG { "v", (CmdFunc)visuserconfig, NULL }, /* visual mode */ #endif { NULL, NULL, NULL }, }; void userconfig(void) { static char banner = 1; char input[80]; int rval; Cmd *cmd; load_devtab(); init_config_script(); while (1) { /* Only display signon banner if we are about to go interactive */ if (!has_config_script()) { if (!(boothowto & RB_CONFIG)) #ifdef INTRO_USERCONFIG banner = 0; #else return; #endif if (banner) { banner = 0; printf("FreeBSD Kernel Configuration Utility - Version 1.2\n" " Type \"help\" for help" #ifdef VISUAL_USERCONFIG " or \"visual\" to go to the visual\n" " configuration interface (requires MGA/VGA display or\n" " serial terminal capable of displaying ANSI graphics)" #endif ".\n"); } } printf("config> "); cngets(input, 80); if (input[0] == '\0') continue; cmd = parse_cmd(input); if (!cmd) { printf("Invalid command or syntax. Type `?' for help.\n"); continue; } rval = (*cmd->handler)(cmd->parms); if (rval) { free_devtab(); return; } } } static Cmd * parse_cmd(char *cmd) { Cmd *cp; for (cp = CmdList; cp->name; cp++) { int len = strlen(cp->name); if (!strncmp(cp->name, cmd, len)) { while (*cmd && *cmd != ' ' && *cmd != '\t') ++cmd; if (parse_args(cmd, cp->parms)) return NULL; else return cp; } } return NULL; } static int parse_args(const char *cmd, CmdParm *parms) { while (1) { char *ptr; if (*cmd == ' ' || *cmd == '\t') { ++cmd; continue; } if (parms == NULL || parms->type == -1) { if (*cmd == '\0') return 0; printf("Extra arg(s): %s\n", cmd); return 1; } if (parms->type == PARM_DEVSPEC) { int i = 0; char devname[64]; int unit = 0; while (*cmd && !(*cmd == ' ' || *cmd == '\t' || (*cmd >= '0' && *cmd <= '9'))) devname[i++] = *(cmd++); devname[i] = '\0'; if (*cmd >= '0' && *cmd <= '9') { unit = strtoul(cmd, &ptr, 10); if (cmd == ptr) { printf("Invalid device number\n"); /* XXX should print invalid token here and elsewhere. */ return 1; } /* XXX else should require end of token. */ cmd = ptr; } if ((parms->parm.dparm = find_device(devname, unit)) == NULL) { printf("No such device: %s%d\n", devname, unit); return 1; } ++parms; continue; } if (parms->type == PARM_INT) { parms->parm.iparm = strtoul(cmd, &ptr, 0); if (cmd == ptr) { printf("Invalid numeric argument\n"); return 1; } cmd = ptr; ++parms; continue; } if (parms->type == PARM_ADDR) { parms->parm.u.aparm = (void *)(uintptr_t)strtoul(cmd, &ptr, 0); if (cmd == ptr) { printf("Invalid address argument\n"); return 1; } cmd = ptr; ++parms; continue; } if (parms->type == PARM_STRING) { parms->parm.u.sparm = cmd; return 0; } } return 0; } static int list_devices(CmdParm *parms) { lineno = 0; if (lsdevtab(uc_devtab)) return 0; #if NPNP > 0 if (lspnp()) return 0; #endif #if NEISA > 0 printf("\nNumber of EISA slots to probe: %d\n", num_eisa_slots); #endif /* NEISA > 0 */ return 0; } static int set_device_ioaddr(CmdParm *parms) { parms[0].parm.dparm->id_iobase = parms[1].parm.iparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_irq(CmdParm *parms) { unsigned irq; irq = parms[1].parm.iparm; if (irq == 2) { printf("Warning: Remapping IRQ 2 to IRQ 9\n"); irq = 9; } else if (irq != -1 && irq > 15) { printf("An IRQ > 15 would be invalid.\n"); return 0; } parms[0].parm.dparm->id_irq = (irq < 16 ? 1 << irq : 0); save_dev(parms[0].parm.dparm); return 0; } static int set_device_drq(CmdParm *parms) { unsigned drq; /* * The bounds checking is just to ensure that the value can be printed * in 5 characters. 32768 gets converted to -32768 and doesn't fit. */ drq = parms[1].parm.iparm; parms[0].parm.dparm->id_drq = (drq < 32768 ? drq : -1); save_dev(parms[0].parm.dparm); return 0; } static int set_device_iosize(CmdParm *parms) { parms[0].parm.dparm->id_msize = parms[1].parm.iparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_mem(CmdParm *parms) { parms[0].parm.dparm->id_maddr = parms[1].parm.u.aparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_flags(CmdParm *parms) { parms[0].parm.dparm->id_flags = parms[1].parm.iparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_enable(CmdParm *parms) { parms[0].parm.dparm->id_enabled = TRUE; save_dev(parms[0].parm.dparm); return 0; } static int set_device_disable(CmdParm *parms) { parms[0].parm.dparm->id_enabled = FALSE; save_dev(parms[0].parm.dparm); return 0; } #if NPNP > 0 static int -sysctl_machdep_uc_pnplist (SYSCTL_HANDLER_ARGS) +sysctl_machdep_uc_pnplist(SYSCTL_HANDLER_ARGS) { int error=0; if(!req->oldptr) { /* Only sizing */ return(SYSCTL_OUT(req,0,sizeof(struct pnp_cinfo)*MAX_PNP_LDN)); } else { /* * Output the pnp_ldn_overrides[] table. */ error=sysctl_handle_opaque(oidp,&pnp_ldn_overrides, sizeof(struct pnp_cinfo)*MAX_PNP_LDN,req); if(error) return(error); return(0); } } SYSCTL_PROC( _machdep, OID_AUTO, uc_pnplist, CTLFLAG_RD, 0, 0, sysctl_machdep_uc_pnplist, "A", "List of PnP overrides changed in UserConfig"); /* * this function sets the kernel table to override bios PnP * configuration. */ static int set_pnp_parms(CmdParm *parms) { u_long idx, val, ldn, csn; int i; char *q; const char *p = parms[0].parm.u.sparm; struct pnp_cinfo d; csn=strtoul(p,&q, 0); ldn=strtoul(q,&q, 0); for (p=q; *p && (*p==' ' || *p=='\t'); p++) ; if (csn < 1 || csn > MAX_PNP_CARDS || ldn >= MAX_PNP_LDN) { printf("bad csn/ldn %ld:%ld\n", csn, ldn); return 0; } for (i=0; i < MAX_PNP_LDN; i++) { if (pnp_ldn_overrides[i].csn == csn && pnp_ldn_overrides[i].ldn == ldn) break; } if (i==MAX_PNP_LDN) { for (i=0; i < MAX_PNP_LDN; i++) { if (pnp_ldn_overrides[i].csn <1 || pnp_ldn_overrides[i].csn > MAX_PNP_CARDS) break; } } if (i==MAX_PNP_LDN) { printf("sorry, no PnP entries available, try delete one\n"); return 0 ; } d = pnp_ldn_overrides[i] ; d.csn = csn; d.ldn = ldn ; while (*p) { idx = 0; val = 0; if (!strncmp(p,"irq",3)) { idx=strtoul(p+3,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 2) d.irq[idx] = val; } else if (!strncmp(p,"flags",5)) { idx=strtoul(p+5,&q, 0); d.flags = idx; } else if (!strncmp(p,"drq",3)) { idx=strtoul(p+3,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 2) d.drq[idx] = val; } else if (!strncmp(p,"port",4)) { idx=strtoul(p+4,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 8) d.port[idx] = val; } else if (!strncmp(p,"mem",3)) { idx=strtoul(p+3,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 4) d.mem[idx].base = val; } else if (!strncmp(p,"bios",4)) { q = p+ 4; d.override = 0 ; } else if (!strncmp(p,"os",2)) { q = p+2 ; d.override = 1 ; } else if (!strncmp(p,"disable",7)) { q = p+7 ; d.enable = 0 ; } else if (!strncmp(p,"enable",6)) { q = p+6; d.enable = 1 ; } else if (!strncmp(p,"delete",6)) { bzero(&pnp_ldn_overrides[i], sizeof (pnp_ldn_overrides[i])); if (i==0) pnp_ldn_overrides[i].csn = 255;/* not reinit */ return 0; } else { printf("unknown command <%s>\n", p); break; } for (p=q; *p && (*p==' ' || *p=='\t'); p++) ; } pnp_ldn_overrides[i] = d ; return 0; } #endif /* NPNP */ #if NEISA > 0 static int set_num_eisa_slots(CmdParm *parms) { int num_slots; num_slots = parms[0].parm.iparm; num_eisa_slots = (num_slots <= 16 ? num_slots : 10); return 0; } #endif /* NEISA > 0 */ static int quitfunc(CmdParm *parms) { /* * If kernel config supplied, and we are parsing it, and -c also supplied, * ignore a quit command, This provides a safety mechanism to allow * recovery from a damaged/buggy kernel config. */ if ((boothowto & RB_CONFIG) && userconfig_boot_parsing) return 0; return 1; } static int helpfunc(CmdParm *parms) { printf( "Command\t\t\tDescription\n" "-------\t\t\t-----------\n" "ls\t\t\tList currently configured devices\n" "port \tSet device port (i/o address)\n" "irq \tSet device irq\n" "drq \tSet device drq\n" "iomem \tSet device maddr (memory address)\n" "iosize \tSet device memory size\n" "flags \tSet device flags\n" "enable \tEnable device\n" "disable \tDisable device (will not be probed)\n"); #if NPNP > 0 printf( "pnp [enable|disable]\tenable/disable device\n" "pnp [os|bios]\tset parameters using FreeBSD or BIOS\n" "pnp [portX ]\tset addr for port X (0..7)\n" "pnp [memX ]\tset addr for memory range X (0..3)\n" "pnp [irqX ]\tset irq X (0..1) to number, 0=unused\n" "pnp [drqX ]\tset drq X (0..1) to number, 4=unused\n"); #endif #if NEISA > 0 printf("eisa \t\tSet the number of EISA slots to probe\n"); #endif /* NEISA > 0 */ printf( "quit\t\t\tExit this configuration utility\n" "reset\t\t\tReset CPU\n"); #ifdef VISUAL_USERCONFIG printf("visual\t\t\tGo to fullscreen mode.\n"); #endif printf( "help\t\t\tThis message\n\n" "Commands may be abbreviated to a unique prefix\n"); return 0; } #if defined(INTRO_USERCONFIG) #if defined (VISUAL_USERCONFIG) static void center(int y, char *str) { putxy((80 - strlen(str)) / 2, y, str); } #endif static int introfunc(CmdParm *parms) { #if defined (VISUAL_USERCONFIG) int curr_item, first_time, extended = 0; static char *choices[] = { " Skip kernel configuration and continue with installation ", " Start kernel configuration in full-screen visual mode ", " Start kernel configuration in CLI mode ", }; clear(); center(2, "!bKernel Configuration Menu!n"); curr_item = 0; first_time = 1; while (1) { char tmp[80]; int c, i; if (!extended) { for (i = 0; i < 3; i++) { tmp[0] = '\0'; if (curr_item == i) strcpy(tmp, "!i"); strcat(tmp, choices[i]); if (curr_item == i) strcat(tmp, "!n"); putxy(10, 5 + i, tmp); } if (first_time) { putxy(2, 10, "Here you have the chance to go into kernel configuration mode, making"); putxy(2, 11, "any changes which may be necessary to properly adjust the kernel to"); putxy(2, 12, "match your hardware configuration."); putxy(2, 14, "If you are installing FreeBSD for the first time, select Visual Mode"); putxy(2, 15, "(press Down-Arrow then ENTER)."); putxy(2, 17, "If you need to do more specialized kernel configuration and are an"); putxy(2, 18, "experienced FreeBSD user, select CLI mode."); putxy(2, 20, "If you are !icertain!n that you do not need to configure your kernel"); putxy(2, 21, "then simply press ENTER or Q now."); first_time = 0; } move(0, 0); /* move the cursor out of the way */ } c = getchar(); if ((extended == 2) || (c == 588) || (c == 596)) { /* console gives "alternative" codes */ extended = 0; /* no longer */ switch (c) { case 588: case 'A': /* up */ if (curr_item > 0) --curr_item; break; case 596: case 'B': /* down */ if (curr_item < 2) ++curr_item; break; } } else { switch(c) { case '\033': extended = 1; break; case '[': /* cheat : always preceeds cursor move */ case 'O': /* ANSI application key mode */ if (extended == 1) extended = 2; else extended = 0; break; case -1: case 'Q': case 'q': clear(); return 1; /* user requests exit */ case '1': /* select an item */ case 'S': case 's': curr_item = 0; break; case '2': case 'V': case 'v': curr_item = 1; break; case '3': case 'C': case 'c': curr_item = 2; break; case 'U': /* up */ case 'u': case 'P': case 'p': if (curr_item > 0) --curr_item; break; case 'D': /* down */ case 'd': case 'N': case 'n': if (curr_item < 2) ++curr_item; break; case '\r': case '\n': clear(); if (!curr_item) return 1; else if (curr_item == 1) return visuserconfig(); else { putxy(0, 1, "Type \"help\" for help or \"quit\" to exit."); /* enable quitfunc */ userconfig_boot_parsing=0; move (0, 3); boothowto |= RB_CONFIG; /* force -c */ return 0; } break; } } } #endif } #endif #if NPNP > 0 static int lspnp () { struct pnp_cinfo *c; int i, first = 1; for (i=0; i< MAX_PNP_LDN; i++) { c = &pnp_ldn_overrides[i]; if (c->csn >0 && c->csn != 255) { int pmax, mmax; static char pfmt[] = "port 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x "; static char mfmt[] = "mem 0x%x 0x%x 0x%x 0x%x"; char buf[256]; if (lineno >= 23) { if (!userconfig_boot_parsing) { printf(" "); if (getchar() == 'q') { printf("quit\n"); return (1); } printf("\n"); } lineno = 0; } if (lineno == 0 || first) printf("CSN LDN conf en irqs drqs others (PnP devices)\n"); first = 0 ; printf("%3d %3d %4s %2s %2d %-2d %2d %-2d ", c->csn, c->ldn, c->override ? "OS ":"BIOS", c->enable ? "Y":"N", c->irq[0], c->irq[1], c->drq[0], c->drq[1]); if (c->flags) printf("flags 0x%08lx ",c->flags); for (pmax = 7; pmax >=0 ; pmax--) if (c->port[pmax]!=0) break; for (mmax = 3; mmax >=0 ; mmax--) if (c->mem[mmax].base!=0) break; if (pmax>=0) { strcpy(buf, pfmt); buf[10 + 5*pmax]='\0'; printf(buf, c->port[0], c->port[1], c->port[2], c->port[3], c->port[4], c->port[5], c->port[6], c->port[7]); } if (mmax>=0) { strcpy(buf, mfmt); buf[8 + 5*mmax]='\0'; printf(buf, c->mem[0].base, c->mem[1].base, c->mem[2].base, c->mem[3].base); } printf("\n"); } } return 0 ; } #endif /* NPNP */ static int lsdevtab(struct uc_device *dt) { for (; dt->id_id != 0; dt++) { char dname[80]; if (lineno >= 23) { printf(" "); if (!userconfig_boot_parsing) { if (getchar() == 'q') { printf("quit\n"); return (1); } printf("\n"); } lineno = 0; } if (lineno == 0) { printf( "Device port irq drq iomem iosize unit flags enab\n" ); ++lineno; } sprintf(dname, "%s%d", dt->id_name, dt->id_unit); printf("%-9.9s%-#11x%-6d%-6d%-8p%-9d%-6d%-#11x%-5s\n", dname, /* dt->id_id, dt->id_driver(by name), */ dt->id_iobase, ffs(dt->id_irq) - 1, dt->id_drq, dt->id_maddr, dt->id_msize, /* dt->id_intr(by name), */ dt->id_unit, dt->id_flags, dt->id_enabled ? "Yes" : "No"); ++lineno; } return(0); } static void load_devtab(void) { int i, val; int count = resource_count(); int id = 1; int dt; char *name; int unit; uc_devtab = malloc(sizeof(struct uc_device)*(count + 1),M_DEVL,M_WAITOK); bzero(uc_devtab, sizeof(struct uc_device) * (count + 1)); dt = 0; for (i = 0; i < count; i++) { name = resource_query_name(i); unit = resource_query_unit(i); if (unit < 0) continue; /* skip wildcards */ uc_devtab[dt].id_id = id++; resource_int_value(name, unit, "port", &uc_devtab[dt].id_iobase); val = 0; resource_int_value(name, unit, "irq", &val); uc_devtab[dt].id_irq = (1 << val); resource_int_value(name, unit, "drq", &uc_devtab[dt].id_drq); resource_int_value(name, unit, "maddr",(int *)&uc_devtab[dt].id_maddr); resource_int_value(name, unit, "msize", &uc_devtab[dt].id_msize); uc_devtab[dt].id_unit = unit; resource_int_value(name, unit, "flags", &uc_devtab[dt].id_flags); val = 0; resource_int_value(name, unit, "disabled", &val); uc_devtab[dt].id_enabled = !val; uc_devtab[dt].id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(uc_devtab[dt].id_name, name); dt++; } } static void free_devtab(void) { int i; int count = resource_count(); for (i = 0; i < count; i++) if (uc_devtab[i].id_name) free(uc_devtab[i].id_name, M_DEVL); free(uc_devtab, M_DEVL); } static struct uc_device * find_device(char *devname, int unit) { struct uc_device *ret; if ((ret = search_devtable(uc_devtab, devname, unit)) != NULL) return ret; return NULL; } static struct uc_device * search_devtable(struct uc_device *dt, char *devname, int unit) { int i; for (i = 0; dt->id_id != 0; dt++) if (!strcmp(dt->id_name, devname) && dt->id_unit == unit) return dt; return NULL; } static void cngets(char *input, int maxin) { int c, nchars = 0; while (1) { c = getchar(); /* Treat ^H or ^? as backspace */ if ((c == '\010' || c == '\177')) { if (nchars) { printf("\010 \010"); *--input = '\0', --nchars; } continue; } /* Treat ^U or ^X as kill line */ else if ((c == '\025' || c == '\030')) { while (nchars) { printf("\010 \010"); *--input = '\0', --nchars; } continue; } printf("%c", c); if ((++nchars == maxin) || (c == '\n') || (c == '\r') || ( c == -1)) { *input = '\0'; break; } *input++ = (u_char)c; } } #if 0 /* scsi: Support for displaying configured SCSI devices. * There is no way to edit them, and this is inconsistent * with the ISA method. This is here as a basis for further work. */ static char * type_text(char *name) /* XXX: This is bogus */ { if (strcmp(name, "sd") == 0) return "disk"; if (strcmp(name, "st") == 0) return "tape"; return "device"; } id_put(char *desc, int id) { if (id != SCCONF_UNSPEC) { if (desc) printf("%s", desc); if (id == SCCONF_ANY) printf("?"); else printf("%d", id); } } static void lsscsi(void) { int i; printf("scsi: (can't be edited):\n"); for (i = 0; scsi_cinit[i].driver; i++) { id_put("controller scbus", scsi_cinit[i].scbus); if (scsi_cinit[i].unit != -1) { printf(" at "); id_put(scsi_cinit[i].driver, scsi_cinit[i].unit); } printf("\n"); } for (i = 0; scsi_dinit[i].name; i++) { printf("%s ", type_text(scsi_dinit[i].name)); id_put(scsi_dinit[i].name, scsi_dinit[i].unit); id_put(" at scbus", scsi_dinit[i].cunit); id_put(" target ", scsi_dinit[i].target); id_put(" lun ", scsi_dinit[i].lun); if (scsi_dinit[i].flags) printf(" flags 0x%x\n", scsi_dinit[i].flags); printf("\n"); } } static int list_scsi(CmdParm *parms) { lineno = 0; lsscsi(); return 0; } #endif static void save_resource(struct uc_device *idev) { char *name; int unit; name = idev->id_name; unit = idev->id_unit; resource_set_int(name, unit, "port", idev->id_iobase); resource_set_int(name, unit, "irq", ffs(idev->id_irq) - 1); resource_set_int(name, unit, "drq", idev->id_drq); resource_set_int(name, unit, "maddr", (int)idev->id_maddr); resource_set_int(name, unit, "msize", idev->id_msize); resource_set_int(name, unit, "flags", idev->id_flags); resource_set_int(name, unit, "disabled", !idev->id_enabled); } static int save_dev(idev) struct uc_device *idev; { struct uc_device *id_p,*id_pn; char *name = idev->id_name; for (id_p = uc_devlist; id_p; id_p = id_p->id_next) { if (id_p->id_id == idev->id_id) { id_pn = id_p->id_next; if (id_p->id_name) free(id_p->id_name, M_DEVL); bcopy(idev,id_p,sizeof(struct uc_device)); save_resource(idev); id_p->id_name = malloc(strlen(name)+1, M_DEVL,M_WAITOK); strcpy(id_p->id_name, name); id_p->id_next = id_pn; return 1; } } id_pn = malloc(sizeof(struct uc_device),M_DEVL,M_WAITOK); bcopy(idev,id_pn,sizeof(struct uc_device)); save_resource(idev); id_pn->id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(id_pn->id_name, name); id_pn->id_next = uc_devlist; uc_devlist = id_pn; return 0; } Index: head/sys/i386/isa/clock.c =================================================================== --- head/sys/i386/isa/clock.c (revision 62572) +++ head/sys/i386/isa/clock.c (revision 62573) @@ -1,1279 +1,1279 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #include #include #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #define TIMER_FREQ 1193182 #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } splx(x); return (0); } /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; int yd; int year, month; int y, m, s; struct timespec ts; if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); } /* * Start both clocks running. */ void cpu_initclocks() { int diag; #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/i386/isa/prof_machdep.c =================================================================== --- head/sys/i386/isa/prof_machdep.c (revision 62572) +++ head/sys/i386/isa/prof_machdep.c (revision 62573) @@ -1,355 +1,355 @@ /*- * Copyright (c) 1996 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifdef GUPROF #include "opt_i586_guprof.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #undef MCOUNT #endif #include #ifdef PC98 #include #else #include #endif #include #ifdef GUPROF #define CPUTIME_CLOCK_UNINITIALIZED 0 #define CPUTIME_CLOCK_I8254 1 #define CPUTIME_CLOCK_TSC 2 #define CPUTIME_CLOCK_I586_PMC 3 #define CPUTIME_CLOCK_I8254_SHIFT 7 int cputime_bias = 1; /* initialize for locality of reference */ static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; #ifdef I586_PMC_GUPROF static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; static int cputime_clock_pmc_init; static struct gmonparam saved_gmp; #endif #endif /* GUPROF */ #ifdef __GNUC__ __asm(" \n\ GM_STATE = 0 \n\ GMON_PROF_OFF = 3 \n\ \n\ .text \n\ .p2align 4,0x90 \n\ .globl __mcount \n\ .type __mcount,@function \n\ __mcount: \n\ # \n\ # Check that we are profiling. Do it early for speed. \n\ # \n\ cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ je .mcount_exit \n\ # \n\ # __mcount is the same as [.]mcount except the caller \n\ # hasn't changed the stack except to call here, so the \n\ # caller's raddr is above our raddr. \n\ # \n\ movl 4(%esp),%edx \n\ jmp .got_frompc \n\ \n\ .p2align 4,0x90 \n\ .globl " __XSTRING(HIDENAME(mcount)) " \n\ " __XSTRING(HIDENAME(mcount)) ": \n\ cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ je .mcount_exit \n\ # \n\ # The caller's stack frame has already been built, so \n\ # %ebp is the caller's frame pointer. The caller's \n\ # raddr is in the caller's frame following the caller's \n\ # caller's frame pointer. \n\ # \n\ movl 4(%ebp),%edx \n\ .got_frompc: \n\ # \n\ # Our raddr is the caller's pc. \n\ # \n\ movl (%esp),%eax \n\ \n\ pushfl \n\ pushl %eax \n\ pushl %edx \n\ cli \n\ call " __XSTRING(CNAME(mcount)) " \n\ addl $8,%esp \n\ popfl \n\ .mcount_exit: \n\ ret \n\ "); #else /* !__GNUC__ */ #error #endif /* __GNUC__ */ #ifdef GUPROF /* * [.]mexitcount saves the return register(s), loads selfpc and calls * mexitcount(selfpc) to do the work. Someday it should be in a machine * dependent file together with cputime(), __mcount and [.]mcount. cputime() * can't just be put in machdep.c because it has to be compiled without -pg. */ #ifdef __GNUC__ __asm(" \n\ .text \n\ # \n\ # Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ # \n\ .p2align 4,0x90 \n\ .globl __mexitcount \n\ .type __mexitcount,@function \n\ __mexitcount: \n\ nop \n\ \n\ GMON_PROF_HIRES = 4 \n\ \n\ .p2align 4,0x90 \n\ .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ " __XSTRING(HIDENAME(mexitcount)) ": \n\ cmpl $GMON_PROF_HIRES," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ jne .mexitcount_exit \n\ pushl %edx \n\ pushl %eax \n\ movl 8(%esp),%eax \n\ pushfl \n\ pushl %eax \n\ cli \n\ call " __XSTRING(CNAME(mexitcount)) " \n\ addl $4,%esp \n\ popfl \n\ popl %eax \n\ popl %edx \n\ .mexitcount_exit: \n\ ret \n\ "); #else /* !__GNUC__ */ #error #endif /* __GNUC__ */ /* * Return the time elapsed since the last call. The units are machine- * dependent. */ int cputime() { u_int count; int delta; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) && \ defined(PERFMON) && defined(I586_PMC_GUPROF) u_quad_t event_count; #endif u_char high, low; static u_int prev_count; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) if (cputime_clock == CPUTIME_CLOCK_TSC) { count = (u_int)rdtsc(); delta = (int)(count - prev_count); prev_count = count; return (delta); } #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { /* * XXX permon_read() should be inlined so that the * perfmon module doesn't need to be compiled with * profiling disabled and so that it is fast. */ perfmon_read(0, &event_count); count = (u_int)event_count; delta = (int)(count - prev_count); prev_count = count; return (delta); } #endif /* PERFMON && I586_PMC_GUPROF */ #endif /* (I586_CPU || I686_CPU) && !SMP */ /* * Read the current value of the 8254 timer counter 0. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; /* * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. * While profiling is enabled, this routine is called at least twice * per timer reset (for mcounting and mexitcounting hardclock()), * so at most one reset has occurred since the last call, and one * has occurred iff the current count is larger than the previous * count. This allows counter underflow to be detected faster * than in microtime(). */ delta = prev_count - count; prev_count = count; if ((int) delta <= 0) return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); return (delta); } static int -sysctl_machdep_cputime_clock (SYSCTL_HANDLER_ARGS) +sysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) { int clock; int error; #if defined(PERFMON) && defined(I586_PMC_GUPROF) int event; struct pmc pmc; #endif clock = cputime_clock; #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (clock == CPUTIME_CLOCK_I586_PMC) { pmc.pmc_val = cputime_clock_pmc_conf; clock += pmc.pmc_event; } #endif error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); if (error == 0 && req->newptr != NULL) { #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (clock >= CPUTIME_CLOCK_I586_PMC) { event = clock - CPUTIME_CLOCK_I586_PMC; if (event >= 256) return (EINVAL); pmc.pmc_num = 0; pmc.pmc_event = event; pmc.pmc_unit = 0; pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; pmc.pmc_mask = 0; cputime_clock_pmc_conf = pmc.pmc_val; cputime_clock = CPUTIME_CLOCK_I586_PMC; } else #endif { if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) return (EINVAL); cputime_clock = clock; } } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); /* * The start and stop routines need not be here since we turn off profiling * before calling them. They are here for convenience. */ void startguprof(gp) struct gmonparam *gp; { if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { cputime_clock = CPUTIME_CLOCK_I8254; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) if (tsc_freq != 0) cputime_clock = CPUTIME_CLOCK_TSC; #endif } gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) if (cputime_clock == CPUTIME_CLOCK_TSC) gp->profrate = tsc_freq; #if defined(PERFMON) && defined(I586_PMC_GUPROF) else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { if (perfmon_avail() && perfmon_setup(0, cputime_clock_pmc_conf) == 0) { if (perfmon_start(0) != 0) perfmon_fini(0); else { /* XXX 1 event == 1 us. */ gp->profrate = 1000000; saved_gmp = *gp; /* Zap overheads. They are invalid. */ gp->cputime_overhead = 0; gp->mcount_overhead = 0; gp->mcount_post_overhead = 0; gp->mcount_pre_overhead = 0; gp->mexitcount_overhead = 0; gp->mexitcount_post_overhead = 0; gp->mexitcount_pre_overhead = 0; cputime_clock_pmc_init = TRUE; } } } #endif /* PERFMON && I586_PMC_GUPROF */ #endif /* (I586_CPU || I686_CPU) && !SMP */ cputime_bias = 0; cputime(); } void stopguprof(gp) struct gmonparam *gp; { #if defined(PERFMON) && defined(I586_PMC_GUPROF) if (cputime_clock_pmc_init) { *gp = saved_gmp; perfmon_fini(0); cputime_clock_pmc_init = FALSE; } #endif } #else /* !GUPROF */ #ifdef __GNUC__ __asm(" \n\ .text \n\ .p2align 4,0x90 \n\ .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ " __XSTRING(HIDENAME(mexitcount)) ": \n\ ret \n\ "); #else /* !__GNUC__ */ #error #endif /* __GNUC__ */ #endif /* GUPROF */ Index: head/sys/i386/linux/linux_mib.c =================================================================== --- head/sys/i386/linux/linux_mib.c (revision 62572) +++ head/sys/i386/linux/linux_mib.c (revision 62573) @@ -1,231 +1,231 @@ /*- * Copyright (c) 1999 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include struct linux_prison { char pr_osname[LINUX_MAX_UTSNAME]; char pr_osrelease[LINUX_MAX_UTSNAME]; int pr_oss_version; }; SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, "Linux mode"); static char linux_osname[LINUX_MAX_UTSNAME] = "Linux"; static int -linux_sysctl_osname (SYSCTL_HANDLER_ARGS) +linux_sysctl_osname(SYSCTL_HANDLER_ARGS) { char osname[LINUX_MAX_UTSNAME]; int error; strcpy(osname, linux_get_osname(req->p)); error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); if (error || req->newptr == NULL) return (error); error = linux_set_osname(req->p, osname); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, osname, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 0, 0, linux_sysctl_osname, "A", "Linux kernel OS name"); static char linux_osrelease[LINUX_MAX_UTSNAME] = "2.2.12"; static int -linux_sysctl_osrelease (SYSCTL_HANDLER_ARGS) +linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) { char osrelease[LINUX_MAX_UTSNAME]; int error; strcpy(osrelease, linux_get_osrelease(req->p)); error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); if (error || req->newptr == NULL) return (error); error = linux_set_osrelease(req->p, osrelease); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 0, 0, linux_sysctl_osrelease, "A", "Linux kernel OS release"); static int linux_oss_version = 0x030600; static int -linux_sysctl_oss_version (SYSCTL_HANDLER_ARGS) +linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) { int oss_version; int error; oss_version = linux_get_oss_version(req->p); error = sysctl_handle_int(oidp, &oss_version, 0, req); if (error || req->newptr == NULL) return (error); error = linux_set_oss_version(req->p, oss_version); return (error); } SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON, 0, 0, linux_sysctl_oss_version, "I", "Linux OSS version"); static struct linux_prison * get_prison(struct proc *p) { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr == NULL) return (NULL); if (pr->pr_linux == NULL) { MALLOC(lpr, struct linux_prison *, sizeof *lpr, M_PRISON, M_WAITOK); bzero((caddr_t)lpr, sizeof *lpr); pr->pr_linux = lpr; } return (pr->pr_linux); } char * linux_get_osname(p) struct proc *p; { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr != NULL && pr->pr_linux != NULL) { lpr = pr->pr_linux; if (lpr->pr_osname[0]) return (lpr->pr_osname); } return (linux_osname); } int linux_set_osname(p, osname) struct proc *p; char *osname; { register struct linux_prison *lpr; lpr = get_prison(p); if (lpr != NULL) strcpy(lpr->pr_osname, osname); else strcpy(linux_osname, osname); return (0); } char * linux_get_osrelease(p) struct proc *p; { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr != NULL && pr->pr_linux != NULL) { lpr = pr->pr_linux; if (lpr->pr_osrelease[0]) return (lpr->pr_osrelease); } return (linux_osrelease); } int linux_set_osrelease(p, osrelease) struct proc *p; char *osrelease; { register struct linux_prison *lpr; lpr = get_prison(p); if (lpr != NULL) strcpy(lpr->pr_osrelease, osrelease); else strcpy(linux_osrelease, osrelease); return (0); } int linux_get_oss_version(p) struct proc *p; { register struct prison *pr; register struct linux_prison *lpr; pr = p->p_prison; if (pr != NULL && pr->pr_linux != NULL) { lpr = pr->pr_linux; if (lpr->pr_oss_version) return (lpr->pr_oss_version); } return (linux_oss_version); } int linux_set_oss_version(p, oss_version) struct proc *p; int oss_version; { register struct linux_prison *lpr; lpr = get_prison(p); if (lpr != NULL) lpr->pr_oss_version = oss_version; else linux_oss_version = oss_version; return (0); } Index: head/sys/isa/atrtc.c =================================================================== --- head/sys/isa/atrtc.c (revision 62572) +++ head/sys/isa/atrtc.c (revision 62573) @@ -1,1279 +1,1279 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #include #include #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #define TIMER_FREQ 1193182 #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } splx(x); return (0); } /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; int yd; int year, month; int y, m, s; struct timespec ts; if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); } /* * Start both clocks running. */ void cpu_initclocks() { int diag; #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/isa/sio.c =================================================================== --- head/sys/isa/sio.c (revision 62572) +++ head/sys/isa/sio.c (revision 62573) @@ -1,3292 +1,3292 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * from: @(#)com.c 7.5 (Berkeley) 5/16/91 * from: i386/isa sio.c,v 1.234 */ #include "opt_comconsole.h" #include "opt_compat.h" #include "opt_ddb.h" #include "opt_sio.h" #include "card.h" #include "pci.h" #include "sio.h" /* * Serial driver, based on 386BSD-0.1 com driver. * Mostly rewritten to use pseudo-DMA. * Works for National Semiconductor NS8250-NS16550AF UARTs. * COM driver, based on HP dca driver. * * Changes for PC-Card integration: * - Added PC-Card driver table and handlers */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if NPCI > 0 #include #include #endif #include #include #include #ifndef SMP #include #endif #include #include #ifdef COM_ESP #include #endif #include #ifndef __i386__ #define disable_intr() #define enable_intr() #endif #ifdef SMP #define disable_intr() COM_DISABLE_INTR() #define enable_intr() COM_ENABLE_INTR() #endif /* SMP */ #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ #define CALLOUT_MASK 0x80 #define CONTROL_MASK 0x60 #define CONTROL_INIT_STATE 0x20 #define CONTROL_LOCK_STATE 0x40 #define DEV_TO_UNIT(dev) (MINOR_TO_UNIT(minor(dev))) #define MINOR_MAGIC_MASK (CALLOUT_MASK | CONTROL_MASK) #define MINOR_TO_UNIT(mynor) ((mynor) & ~MINOR_MAGIC_MASK) #ifdef COM_MULTIPORT /* checks in flags for multiport and which is multiport "master chip" * for a given card */ #define COM_ISMULTIPORT(flags) ((flags) & 0x01) #define COM_MPMASTER(flags) (((flags) >> 8) & 0x0ff) #define COM_NOTAST4(flags) ((flags) & 0x04) #endif /* COM_MULTIPORT */ #define COM_CONSOLE(flags) ((flags) & 0x10) #define COM_FORCECONSOLE(flags) ((flags) & 0x20) #define COM_LLCONSOLE(flags) ((flags) & 0x40) #define COM_DEBUGGER(flags) ((flags) & 0x80) #define COM_LOSESOUTINTS(flags) ((flags) & 0x08) #define COM_NOFIFO(flags) ((flags) & 0x02) #define COM_ST16650A(flags) ((flags) & 0x20000) #define COM_C_NOPROBE (0x40000) #define COM_NOPROBE(flags) ((flags) & COM_C_NOPROBE) #define COM_C_IIR_TXRDYBUG (0x80000) #define COM_IIR_TXRDYBUG(flags) ((flags) & COM_C_IIR_TXRDYBUG) #define COM_FIFOSIZE(flags) (((flags) & 0xff000000) >> 24) #define com_scr 7 /* scratch register for 16450-16550 (R/W) */ #define sio_getreg(com, off) \ (bus_space_read_1((com)->bst, (com)->bsh, (off))) #define sio_setreg(com, off, value) \ (bus_space_write_1((com)->bst, (com)->bsh, (off), (value))) /* * com state bits. * (CS_BUSY | CS_TTGO) and (CS_BUSY | CS_TTGO | CS_ODEVREADY) must be higher * than the other bits so that they can be tested as a group without masking * off the low bits. * * The following com and tty flags correspond closely: * CS_BUSY = TS_BUSY (maintained by comstart(), siopoll() and * comstop()) * CS_TTGO = ~TS_TTSTOP (maintained by comparam() and comstart()) * CS_CTS_OFLOW = CCTS_OFLOW (maintained by comparam()) * CS_RTS_IFLOW = CRTS_IFLOW (maintained by comparam()) * TS_FLUSH is not used. * XXX I think TIOCSETA doesn't clear TS_TTSTOP when it clears IXON. * XXX CS_*FLOW should be CF_*FLOW in com->flags (control flags not state). */ #define CS_BUSY 0x80 /* output in progress */ #define CS_TTGO 0x40 /* output not stopped by XOFF */ #define CS_ODEVREADY 0x20 /* external device h/w ready (CTS) */ #define CS_CHECKMSR 1 /* check of MSR scheduled */ #define CS_CTS_OFLOW 2 /* use CTS output flow control */ #define CS_DTR_OFF 0x10 /* DTR held off */ #define CS_ODONE 4 /* output completed */ #define CS_RTS_IFLOW 8 /* use RTS input flow control */ #define CSE_BUSYCHECK 1 /* siobusycheck() scheduled */ static char const * const error_desc[] = { #define CE_OVERRUN 0 "silo overflow", #define CE_INTERRUPT_BUF_OVERFLOW 1 "interrupt-level buffer overflow", #define CE_TTY_BUF_OVERFLOW 2 "tty-level buffer overflow", }; #define CE_NTYPES 3 #define CE_RECORD(com, errnum) (++(com)->delta_error_counts[errnum]) /* types. XXX - should be elsewhere */ typedef u_int Port_t; /* hardware port */ typedef u_char bool_t; /* boolean */ /* queue of linear buffers */ struct lbq { u_char *l_head; /* next char to process */ u_char *l_tail; /* one past the last char to process */ struct lbq *l_next; /* next in queue */ bool_t l_queued; /* nonzero if queued */ }; /* com device structure */ struct com_s { u_int flags; /* Copy isa device flags */ u_char state; /* miscellaneous flag bits */ bool_t active_out; /* nonzero if the callout device is open */ u_char cfcr_image; /* copy of value written to CFCR */ #ifdef COM_ESP bool_t esp; /* is this unit a hayes esp board? */ #endif u_char extra_state; /* more flag bits, separate for order trick */ u_char fifo_image; /* copy of value written to FIFO */ bool_t hasfifo; /* nonzero for 16550 UARTs */ bool_t st16650a; /* Is a Startech 16650A or RTS/CTS compat */ bool_t loses_outints; /* nonzero if device loses output interrupts */ u_char mcr_image; /* copy of value written to MCR */ #ifdef COM_MULTIPORT bool_t multiport; /* is this unit part of a multiport device? */ #endif /* COM_MULTIPORT */ bool_t no_irq; /* nonzero if irq is not attached */ bool_t gone; /* hardware disappeared */ bool_t poll; /* nonzero if polling is required */ bool_t poll_output; /* nonzero if polling for output is required */ int unit; /* unit number */ int dtr_wait; /* time to hold DTR down on close (* 1/hz) */ u_int tx_fifo_size; u_int wopeners; /* # processes waiting for DCD in open() */ /* * The high level of the driver never reads status registers directly * because there would be too many side effects to handle conveniently. * Instead, it reads copies of the registers stored here by the * interrupt handler. */ u_char last_modem_status; /* last MSR read by intr handler */ u_char prev_modem_status; /* last MSR handled by high level */ u_char hotchar; /* ldisc-specific char to be handled ASAP */ u_char *ibuf; /* start of input buffer */ u_char *ibufend; /* end of input buffer */ u_char *ibufold; /* old input buffer, to be freed */ u_char *ihighwater; /* threshold in input buffer */ u_char *iptr; /* next free spot in input buffer */ int ibufsize; /* size of ibuf (not include error bytes) */ int ierroff; /* offset of error bytes in ibuf */ struct lbq obufq; /* head of queue of output buffers */ struct lbq obufs[2]; /* output buffers */ bus_space_tag_t bst; bus_space_handle_t bsh; Port_t data_port; /* i/o ports */ #ifdef COM_ESP Port_t esp_port; #endif Port_t int_id_port; Port_t modem_ctl_port; Port_t line_status_port; Port_t modem_status_port; Port_t intr_ctl_port; /* Ports of IIR register */ struct tty *tp; /* cross reference */ /* Initial state. */ struct termios it_in; /* should be in struct tty */ struct termios it_out; /* Lock state. */ struct termios lt_in; /* should be in struct tty */ struct termios lt_out; bool_t do_timestamp; bool_t do_dcd_timestamp; struct timeval timestamp; struct timeval dcd_timestamp; struct pps_state pps; u_long bytes_in; /* statistics */ u_long bytes_out; u_int delta_error_counts[CE_NTYPES]; u_long error_counts[CE_NTYPES]; struct resource *irqres; struct resource *ioportres; void *cookie; /* * Data area for output buffers. Someday we should build the output * buffer queue without copying data. */ u_char obuf1[256]; u_char obuf2[256]; }; #ifdef COM_ESP static int espattach __P((struct com_s *com, Port_t esp_port)); #endif static int sioattach __P((device_t dev, int rid)); static int sio_isa_attach __P((device_t dev)); static timeout_t siobusycheck; static timeout_t siodtrwakeup; static void comhardclose __P((struct com_s *com)); static void sioinput __P((struct com_s *com)); static void siointr1 __P((struct com_s *com)); static void siointr __P((void *arg)); static int commctl __P((struct com_s *com, int bits, int how)); static int comparam __P((struct tty *tp, struct termios *t)); static swihand_t siopoll; static int sioprobe __P((device_t dev, int xrid)); static int sio_isa_probe __P((device_t dev)); static void siosettimeout __P((void)); static int siosetwater __P((struct com_s *com, speed_t speed)); static void comstart __P((struct tty *tp)); static void comstop __P((struct tty *tp, int rw)); static timeout_t comwakeup; static void disc_optim __P((struct tty *tp, struct termios *t, struct com_s *com)); #if NCARD > 0 static int sio_pccard_attach __P((device_t dev)); static int sio_pccard_detach __P((device_t dev)); static int sio_pccard_probe __P((device_t dev)); #endif /* NCARD > 0 */ #if NPCI > 0 static int sio_pci_attach __P((device_t dev)); static void sio_pci_kludge_unit __P((device_t dev)); static int sio_pci_probe __P((device_t dev)); #endif /* NPCI > 0 */ static char driver_name[] = "sio"; /* table and macro for fast conversion from a unit number to its com struct */ static devclass_t sio_devclass; #define com_addr(unit) ((struct com_s *) \ devclass_get_softc(sio_devclass, unit)) static device_method_t sio_isa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_isa_probe), DEVMETHOD(device_attach, sio_isa_attach), { 0, 0 } }; static driver_t sio_isa_driver = { driver_name, sio_isa_methods, sizeof(struct com_s), }; #if NCARD > 0 static device_method_t sio_pccard_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pccard_probe), DEVMETHOD(device_attach, sio_pccard_attach), DEVMETHOD(device_detach, sio_pccard_detach), { 0, 0 } }; static driver_t sio_pccard_driver = { driver_name, sio_pccard_methods, sizeof(struct com_s), }; #endif /* NCARD > 0 */ #if NPCI > 0 static device_method_t sio_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pci_probe), DEVMETHOD(device_attach, sio_pci_attach), { 0, 0 } }; static driver_t sio_pci_driver = { driver_name, sio_pci_methods, sizeof(struct com_s), }; #endif /* NPCI > 0 */ static d_open_t sioopen; static d_close_t sioclose; static d_read_t sioread; static d_write_t siowrite; static d_ioctl_t sioioctl; #define CDEV_MAJOR 28 static struct cdevsw sio_cdevsw = { /* open */ sioopen, /* close */ sioclose, /* read */ sioread, /* write */ siowrite, /* ioctl */ sioioctl, /* poll */ ttypoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ driver_name, /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ D_TTY, /* bmaj */ -1 }; int comconsole = -1; static volatile speed_t comdefaultrate = CONSPEED; #ifdef __alpha__ static volatile speed_t gdbdefaultrate = CONSPEED; #endif static u_int com_events; /* input chars + weighted output completions */ static Port_t siocniobase; static int siocnunit; static Port_t siogdbiobase; static int siogdbunit = -1; static bool_t sio_registered; static int sio_timeout; static int sio_timeouts_until_log; static struct callout_handle sio_timeout_handle = CALLOUT_HANDLE_INITIALIZER(&sio_timeout_handle); static int sio_numunits; static struct speedtab comspeedtab[] = { { 0, 0 }, { 50, COMBRD(50) }, { 75, COMBRD(75) }, { 110, COMBRD(110) }, { 134, COMBRD(134) }, { 150, COMBRD(150) }, { 200, COMBRD(200) }, { 300, COMBRD(300) }, { 600, COMBRD(600) }, { 1200, COMBRD(1200) }, { 1800, COMBRD(1800) }, { 2400, COMBRD(2400) }, { 4800, COMBRD(4800) }, { 9600, COMBRD(9600) }, { 19200, COMBRD(19200) }, { 38400, COMBRD(38400) }, { 57600, COMBRD(57600) }, { 115200, COMBRD(115200) }, { -1, -1 } }; #ifdef COM_ESP /* XXX configure this properly. */ static Port_t likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, }; static Port_t likely_esp_ports[] = { 0x140, 0x180, 0x280, 0 }; #endif /* * handle sysctl read/write requests for console speed * * In addition to setting comdefaultrate for I/O through /dev/console, * also set the initial and lock values for the /dev/ttyXX device * if there is one associated with the console. Finally, if the /dev/tty * device has already been open, change the speed on the open running port * itself. */ static int -sysctl_machdep_comdefaultrate (SYSCTL_HANDLER_ARGS) +sysctl_machdep_comdefaultrate(SYSCTL_HANDLER_ARGS) { int error, s; speed_t newspeed; struct com_s *com; struct tty *tp; newspeed = comdefaultrate; error = sysctl_handle_opaque(oidp, &newspeed, sizeof newspeed, req); if (error || !req->newptr) return (error); comdefaultrate = newspeed; if (comconsole < 0) /* serial console not selected? */ return (0); com = com_addr(comconsole); if (com == NULL) return (ENXIO); /* * set the initial and lock rates for /dev/ttydXX and /dev/cuaXX * (note, the lock rates really are boolean -- if non-zero, disallow * speed changes) */ com->it_in.c_ispeed = com->it_in.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_out.c_ispeed = com->it_out.c_ospeed = com->lt_out.c_ispeed = com->lt_out.c_ospeed = comdefaultrate; /* * if we're open, change the running rate too */ tp = com->tp; if (tp && (tp->t_state & TS_ISOPEN)) { tp->t_termios.c_ispeed = tp->t_termios.c_ospeed = comdefaultrate; s = spltty(); error = comparam(tp, &tp->t_termios); splx(s); } return error; } SYSCTL_PROC(_machdep, OID_AUTO, conspeed, CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_machdep_comdefaultrate, "I", ""); #define SET_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) | (bit)) #define CLR_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) & ~(bit)) #if NCARD > 0 static int sio_pccard_probe(dev) device_t dev; { /* Do not probe IRQ - pccard doesn't turn on the interrupt line */ /* until bus_setup_intr */ SET_FLAG(dev, COM_C_NOPROBE); return (sioprobe(dev, 0)); } static int sio_pccard_attach(dev) device_t dev; { return (sioattach(dev, 0)); } /* * sio_detach - unload the driver and clear the table. * XXX TODO: * This is usually called when the card is ejected, but * can be caused by a modunload of a controller driver. * The idea is to reset the driver's view of the device * and ensure that any driver entry points such as * read and write do not hang. */ static int sio_pccard_detach(dev) device_t dev; { struct com_s *com; com = (struct com_s *) device_get_softc(dev); if (com == NULL) { device_printf(dev, "NULL com in siounload\n"); return (0); } com->gone = 1; if (com->irqres) { bus_teardown_intr(dev, com->irqres, com->cookie); bus_release_resource(dev, SYS_RES_IRQ, 0, com->irqres); } if (com->ioportres) bus_release_resource(dev, SYS_RES_IOPORT, 0, com->ioportres); if (com->tp && (com->tp->t_state & TS_ISOPEN)) { device_printf(dev, "still open, forcing close\n"); com->tp->t_gen++; ttyclose(com->tp); ttwakeup(com->tp); ttwwakeup(com->tp); } else { if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); } device_printf(dev, "unloaded\n"); return (0); } #endif /* NCARD > 0 */ #if NPCI > 0 struct pci_ids { u_int32_t type; const char *desc; int rid; }; static struct pci_ids pci_ids[] = { { 0x100812b9, "3COM PCI FaxModem", 0x10 }, { 0x048011c1, "ActionTec 56k FAX PCI Modem", 0x14 }, { 0x00000000, NULL, 0 } }; static int sio_pci_attach(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); sio_pci_kludge_unit(dev); return (sioattach(dev, id->rid)); } /* * Don't cut and paste this to other drivers. It is a horrible kludge * which will fail to work and also be unnecessary in future versions. */ static void sio_pci_kludge_unit(dev) device_t dev; { devclass_t dc; int err; int start; int unit; unit = 0; start = 0; while (resource_int_value("sio", unit, "port", &start) == 0 && start > 0) unit++; if (device_get_unit(dev) < unit) { dc = device_get_devclass(dev); while (devclass_get_device(dc, unit)) unit++; device_printf(dev, "moving to sio%d\n", unit); err = device_set_unit(dev, unit); /* EVIL DO NOT COPY */ if (err) device_printf(dev, "error moving device %d\n", err); } } static int sio_pci_probe(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); device_set_desc(dev, id->desc); return (sioprobe(dev, id->rid)); } #endif /* NPCI > 0 */ static struct isa_pnp_id sio_ids[] = { {0x0005d041, "Standard PC COM port"}, /* PNP0500 */ {0x0105d041, "16550A-compatible COM port"}, /* PNP0501 */ {0x0205d041, "Multiport serial device (non-intelligent 16550)"}, /* PNP0502 */ {0x1005d041, "Generic IRDA-compatible device"}, /* PNP0510 */ {0x1105d041, "Generic IRDA-compatible device"}, /* PNP0511 */ /* Devices that do not have a compatid */ {0x12206804, NULL}, /* ACH2012 - 5634BTS 56K Video Ready Modem */ {0x7602a904, NULL}, /* AEI0276 - 56K v.90 Fax Modem (LKT) */ {0x00007905, NULL}, /* AKY0000 - 56K Plug&Play Modem */ {0x01405407, NULL}, /* AZT4001 - AZT3000 PnP SOUND DEVICE, MODEM */ {0x56039008, NULL}, /* BDP0356 - Best Data 56x2 */ {0x36339008, NULL}, /* BDP3336 - Best Data Prods. 336F */ {0x0014490a, NULL}, /* BRI1400 - Boca 33.6 PnP */ {0x0015490a, NULL}, /* BRI1500 - Internal Fax Data */ {0x0034490a, NULL}, /* BRI3400 - Internal ACF Modem */ {0x00b4490a, NULL}, /* BRIB400 - Boca 56k PnP */ {0x0030320d, NULL}, /* CIR3000 - Cirrus Logic V43 */ {0x0100440e, NULL}, /* CRD0001 - Cardinal MVP288IV ? */ {0x36033610, NULL}, /* DAV0336 - DAVICOM 336PNP MODEM */ {0x0000aa1a, NULL}, /* FUJ0000 - FUJITSU Modem 33600 PNP/I2 */ {0x1200c31e, NULL}, /* GVC0012 - VF1128HV-R9 (win modem?) */ {0x0303c31e, NULL}, /* GVC0303 - MaxTech 33.6 PnP D/F/V */ {0x0505c31e, NULL}, /* GVC0505 - GVC 56k Faxmodem */ {0x0050c31e, NULL}, /* GVC5000 - some GVC modem */ {0x3800f91e, NULL}, /* GWY0038 - Telepath with v.90 */ {0x9062f91e, NULL}, /* GWY6290 - Telepath with x2 Technology */ {0x0000f435, NULL}, /* MOT0000 - Motorola ModemSURFR 33.6 Intern */ {0x5015f435, NULL}, /* MOT1550 - Motorola ModemSURFR 56K Modem */ {0xf015f435, NULL}, /* MOT15F0 - Motorola VoiceSURFR 56K Modem */ {0x6045f435, NULL}, /* MOT4560 - Motorola ? */ {0x61e7a338, NULL}, /* NECE761 - 33.6Modem */ {0x0f804f3f, NULL}, /* OZO800f - Zoom 2812 (56k Modem) */ {0x39804f3f, NULL}, /* OZO8039 - Zoom 56k flex */ {0x3024a341, NULL}, /* PMC2430 - Pace 56 Voice Internal Modem */ {0x1000eb49, NULL}, /* ROK0010 - Rockwell ? */ {0x5002734a, NULL}, /* RSS0250 - 5614Jx3(G) Internal Modem */ {0x6202734a, NULL}, /* RSS0262 - 5614Jx3[G] V90+K56Flex Modem */ {0xc100ad4d, NULL}, /* SMM00C1 - Leopard 56k PnP */ {0x9012b04e, NULL}, /* SUP1290 - Supra ? */ {0x1013b04e, NULL}, /* SUP1310 - SupraExpress 336i PnP */ {0x8013b04e, NULL}, /* SUP1380 - SupraExpress 288i PnP Voice */ {0x8113b04e, NULL}, /* SUP1381 - SupraExpress 336i PnP Voice */ {0x5016b04e, NULL}, /* SUP1650 - Supra 336i Sp Intl */ {0x7016b04e, NULL}, /* SUP1670 - Supra 336i V+ Intl */ {0x7420b04e, NULL}, /* SUP2070 - Supra ? */ {0x8020b04e, NULL}, /* SUP2080 - Supra ? */ {0x8420b04e, NULL}, /* SUP2084 - SupraExpress 56i PnP */ {0x7121b04e, NULL}, /* SUP2171 - SupraExpress 56i Sp? */ {0x8024b04e, NULL}, /* SUP2480 - Supra ? */ {0x01007256, NULL}, /* USR0001 - U.S. Robotics Inc., Sportster W */ {0x02007256, NULL}, /* USR0002 - U.S. Robotics Inc. Sportster 33. */ {0x04007256, NULL}, /* USR0004 - USR Sportster 14.4k */ {0x06007256, NULL}, /* USR0006 - USR Sportster 33.6k */ {0x11007256, NULL}, /* USR0011 - USR ? */ {0x01017256, NULL}, /* USR0101 - USR ? */ {0x30207256, NULL}, /* USR2030 - U.S.Robotics Inc. Sportster 560 */ {0x50207256, NULL}, /* USR2050 - U.S.Robotics Inc. Sportster 33. */ {0x70207256, NULL}, /* USR2070 - U.S.Robotics Inc. Sportster 560 */ {0x30307256, NULL}, /* USR3030 - U.S. Robotics 56K FAX INT */ {0x31307256, NULL}, /* USR3031 - U.S. Robotics 56K FAX INT */ {0x50307256, NULL}, /* USR3050 - U.S. Robotics 56K FAX INT */ {0x70307256, NULL}, /* USR3070 - U.S. Robotics 56K Voice INT */ {0x90307256, NULL}, /* USR3090 - USR ? */ {0x90917256, NULL}, /* USR9190 - USR 56k Voice INT */ {0x0300695c, NULL}, /* WCI0003 - Fax/Voice/Modem/Speakphone/Asvd */ {0x61f7896a, NULL}, /* ZTIF761 - Zoom ComStar 33.6 */ {0} }; static int sio_isa_probe(dev) device_t dev; { /* Check isapnp ids */ if (ISA_PNP_PROBE(device_get_parent(dev), dev, sio_ids) == ENXIO) return (ENXIO); return (sioprobe(dev, 0)); } static int sioprobe(dev, xrid) device_t dev; int xrid; { #if 0 static bool_t already_init; device_t xdev; #endif struct com_s *com; bool_t failures[10]; int fn; device_t idev; Port_t iobase; intrmask_t irqmap[4]; intrmask_t irqs; u_char mcr_image; int result; u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; rid = xrid; port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); if (!port) return (ENXIO); com = device_get_softc(dev); com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); #if 0 /* * XXX this is broken - when we are first called, there are no * previously configured IO ports. We could hard code * 0x3f8, 0x2f8, 0x3e8, 0x2e8 etc but that's probably worse. * This code has been doing nothing since the conversion since * "count" is zero the first time around. */ if (!already_init) { /* * Turn off MCR_IENABLE for all likely serial ports. An unused * port with its MCR_IENABLE gate open will inhibit interrupts * from any used port that shares the interrupt vector. * XXX the gate enable is elsewhere for some multiports. */ device_t *devs; int count, i, xioport; devclass_get_devices(sio_devclass, &devs, &count); for (i = 0; i < count; i++) { xdev = devs[i]; if (device_is_enabled(xdev) && bus_get_resource(xdev, SYS_RES_IOPORT, 0, &xioport, NULL) == 0) outb(xioport + com_mcr, 0); } free(devs, M_TEMP); already_init = TRUE; } #endif if (COM_LLCONSOLE(flags)) { printf("sio%d: reserved for low-level i/o\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } /* * If the device is on a multiport card and has an AST/4 * compatible interrupt control register, initialize this * register and prepare to leave MCR_IENABLE clear in the mcr. * Otherwise, prepare to set MCR_IENABLE in the mcr. * Point idev to the device struct giving the correct id_irq. * This is the struct for the master device if there is one. */ idev = dev; mcr_image = MCR_IENABLE; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { Port_t xiobase; u_long io; idev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); if (idev == NULL) { printf("sio%d: master device %d not configured\n", device_get_unit(dev), COM_MPMASTER(flags)); idev = dev; } if (!COM_NOTAST4(flags)) { if (bus_get_resource(idev, SYS_RES_IOPORT, 0, &io, NULL) == 0) { xiobase = io; if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) == 0) outb(xiobase + com_scr, 0x80); else outb(xiobase + com_scr, 0); } mcr_image = 0; } } #endif /* COM_MULTIPORT */ if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) mcr_image = 0; bzero(failures, sizeof failures); iobase = rman_get_start(port); /* * We don't want to get actual interrupts, just masked ones. * Interrupts from this line should already be masked in the ICU, * but mask them in the processor as well in case there are some * (misconfigured) shared interrupts. */ disable_intr(); /* EXTRA DELAY? */ /* * Initialize the speed and the word size and wait long enough to * drain the maximum of 16 bytes of junk in device output queues. * The speed is undefined after a master reset and must be set * before relying on anything related to output. There may be * junk after a (very fast) soft reboot and (apparently) after * master reset. * XXX what about the UART bug avoided by waiting in comparam()? * We don't want to to wait long enough to drain at 2 bps. */ if (iobase == siocniobase) DELAY((16 + 1) * 1000000 / (comdefaultrate / 10)); else { sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); sio_setreg(com, com_dlbl, COMBRD(SIO_TEST_SPEED) & 0xff); sio_setreg(com, com_dlbh, (u_int) COMBRD(SIO_TEST_SPEED) >> 8); sio_setreg(com, com_cfcr, CFCR_8BITS); DELAY((16 + 1) * 1000000 / (SIO_TEST_SPEED / 10)); } /* * Enable the interrupt gate and disable device interupts. This * should leave the device driving the interrupt line low and * guarantee an edge trigger if an interrupt can be generated. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ irqmap[0] = isa_irq_pending(); /* * Attempt to set loopback mode so that we can send a null byte * without annoying any external device. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image | MCR_LOOPBACK); /* * Attempt to generate an output interrupt. On 8250's, setting * IER_ETXRDY generates an interrupt independent of the current * setting and independent of whether the THR is empty. On 16450's, * setting IER_ETXRDY generates an interrupt independent of the * current setting. On 16550A's, setting IER_ETXRDY only * generates an interrupt when IER_ETXRDY is not already set. */ sio_setreg(com, com_ier, IER_ETXRDY); /* * On some 16x50 incompatibles, setting IER_ETXRDY doesn't generate * an interrupt. They'd better generate one for actually doing * output. Loopback may be broken on the same incompatibles but * it's unlikely to do more than allow the null byte out. */ sio_setreg(com, com_data, 0); DELAY((1 + 2) * 1000000 / (SIO_TEST_SPEED / 10)); /* * Turn off loopback mode so that the interrupt gate works again * (MCR_IENABLE was hidden). This should leave the device driving * an interrupt line high. It doesn't matter if the interrupt * line oscillates while we are not looking at it, since interrupts * are disabled. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); /* * Some pcmcia cards have the "TXRDY bug", so we check everyone * for IIR_TXRDY implementation ( Palido 321s, DC-1S... ) */ if (COM_NOPROBE(flags)) { /* Reading IIR register twice */ for (fn = 0; fn < 2; fn ++) { DELAY(10000); failures[6] = sio_getreg(com, com_iir); } /* Check IIR_TXRDY clear ? */ result = 0; if (failures[6] & IIR_TXRDY) { /* Nop, Double check with clearing IER */ sio_setreg(com, com_ier, 0); if (sio_getreg(com, com_iir) & IIR_NOPEND) { /* Ok. we're familia this gang */ SET_FLAG(dev, COM_C_IIR_TXRDYBUG); } else { /* Unknown, Just omit this chip.. XXX */ result = ENXIO; } } else { /* OK. this is well-known guys */ CLR_FLAG(dev, COM_C_IIR_TXRDYBUG); } sio_setreg(com, com_cfcr, CFCR_8BITS); enable_intr(); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } /* * Check that * o the CFCR, IER and MCR in UART hold the values written to them * (the values happen to be all distinct - this is good for * avoiding false positive tests from bus echoes). * o an output interrupt is generated and its vector is correct. * o the interrupt goes away when the IIR in the UART is read. */ /* EXTRA DELAY? */ failures[0] = sio_getreg(com, com_cfcr) - CFCR_8BITS; failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ irqmap[1] = isa_irq_pending(); failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; DELAY(1000); /* XXX */ irqmap[2] = isa_irq_pending(); failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; /* * Turn off all device interrupts and check that they go off properly. * Leave MCR_IENABLE alone. For ports without a master port, it gates * the OUT2 output of the UART to * the ICU input. Closing the gate would give a floating ICU input * (unless there is another device driving it) and spurious interrupts. * (On the system that this was first tested on, the input floats high * and gives a (masked) interrupt as soon as the gate is closed.) */ sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); /* dummy to avoid bus echo */ failures[7] = sio_getreg(com, com_ier); DELAY(1000); /* XXX */ irqmap[3] = isa_irq_pending(); failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; enable_intr(); irqs = irqmap[1] & ~irqmap[0]; if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && ((1 << xirq) & irqs) == 0) printf( "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", device_get_unit(dev), xirq, irqs); if (bootverbose) printf("sio%d: irq maps: %#x %#x %#x %#x\n", device_get_unit(dev), irqmap[0], irqmap[1], irqmap[2], irqmap[3]); result = 0; for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) { sio_setreg(com, com_mcr, 0); result = ENXIO; if (bootverbose) { printf("sio%d: probe failed test(s):", device_get_unit(dev)); for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) printf(" %d", fn); printf("\n"); } break; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } #ifdef COM_ESP static int espattach(com, esp_port) struct com_s *com; Port_t esp_port; { u_char dips; u_char val; /* * Check the ESP-specific I/O port to see if we're an ESP * card. If not, return failure immediately. */ if ((inb(esp_port) & 0xf3) == 0) { printf(" port 0x%x is not an ESP board?\n", esp_port); return (0); } /* * We've got something that claims to be a Hayes ESP card. * Let's hope so. */ /* Get the dip-switch configuration */ outb(esp_port + ESP_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP_STATUS1); /* * Bits 0,1 of dips say which COM port we are. */ if (rman_get_start(com->ioportres) == likely_com_ports[dips & 0x03]) printf(" : ESP"); else { printf(" esp_port has com %d\n", dips & 0x03); return (0); } /* * Check for ESP version 2.0 or later: bits 4,5,6 = 010. */ outb(esp_port + ESP_CMD1, ESP_GETTEST); val = inb(esp_port + ESP_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP_STATUS2); if ((val & 0x70) < 0x20) { printf("-old (%o)", val & 0x70); return (0); } /* * Check for ability to emulate 16550: bit 7 == 1 */ if ((dips & 0x80) == 0) { printf(" slave"); return (0); } /* * Okay, we seem to be a Hayes ESP card. Whee. */ com->esp = TRUE; com->esp_port = esp_port; return (1); } #endif /* COM_ESP */ static int sio_isa_attach(dev) device_t dev; { return (sioattach(dev, 0)); } static int sioattach(dev, xrid) device_t dev; int xrid; { struct com_s *com; #ifdef COM_ESP Port_t *espp; #endif Port_t iobase; int unit; u_int flags; int rid; struct resource *port; int ret; rid = xrid; port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); if (!port) return (ENXIO); iobase = rman_get_start(port); unit = device_get_unit(dev); com = device_get_softc(dev); flags = device_get_flags(dev); if (unit >= sio_numunits) sio_numunits = unit + 1; /* * sioprobe() has initialized the device registers as follows: * o cfcr = CFCR_8BITS. * It is most important that CFCR_DLAB is off, so that the * data port is not hidden when we enable interrupts. * o ier = 0. * Interrupts are only enabled when the line is open. * o mcr = MCR_IENABLE, or 0 if the port has AST/4 compatible * interrupt control register or the config specifies no irq. * Keeping MCR_DTR and MCR_RTS off might stop the external * device from sending before we are ready. */ bzero(com, sizeof *com); com->unit = unit; com->ioportres = port; com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); com->cfcr_image = CFCR_8BITS; com->dtr_wait = 3 * hz; com->loses_outints = COM_LOSESOUTINTS(flags) != 0; com->no_irq = bus_get_resource(dev, SYS_RES_IRQ, 0, NULL, NULL) != 0; com->tx_fifo_size = 1; com->obufs[0].l_head = com->obuf1; com->obufs[1].l_head = com->obuf2; com->data_port = iobase + com_data; com->int_id_port = iobase + com_iir; com->modem_ctl_port = iobase + com_mcr; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + com_lsr; com->modem_status_port = iobase + com_msr; com->intr_ctl_port = iobase + com_ier; /* * We don't use all the flags from since they * are only relevant for logins. It's important to have echo off * initially so that the line doesn't start blathering before the * echo flag can be turned off. */ com->it_in.c_iflag = 0; com->it_in.c_oflag = 0; com->it_in.c_cflag = TTYDEF_CFLAG; com->it_in.c_lflag = 0; if (unit == comconsole) { com->it_in.c_iflag = TTYDEF_IFLAG; com->it_in.c_oflag = TTYDEF_OFLAG; com->it_in.c_cflag = TTYDEF_CFLAG | CLOCAL; com->it_in.c_lflag = TTYDEF_LFLAG; com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL; com->lt_out.c_ispeed = com->lt_out.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate; } else com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED; if (siosetwater(com, com->it_in.c_ispeed) != 0) { enable_intr(); /* * Leave i/o resources allocated if this is a `cn'-level * console, so that other devices can't snarf them. */ if (iobase != siocniobase) bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } enable_intr(); termioschars(&com->it_in); com->it_out = com->it_in; /* attempt to determine UART type */ printf("sio%d: type", unit); #ifdef COM_MULTIPORT if (!COM_ISMULTIPORT(flags) && !COM_IIR_TXRDYBUG(flags)) #else if (!COM_IIR_TXRDYBUG(flags)) #endif { u_char scr; u_char scr1; u_char scr2; scr = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0xa5); scr1 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0x5a); scr2 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, scr); if (scr1 != 0xa5 || scr2 != 0x5a) { printf(" 8250"); goto determined_type; } } sio_setreg(com, com_fifo, FIFO_ENABLE | FIFO_RX_HIGH); DELAY(100); com->st16650a = 0; switch (inb(com->int_id_port) & IIR_FIFO_MASK) { case FIFO_RX_LOW: printf(" 16450"); break; case FIFO_RX_MEDL: printf(" 16450?"); break; case FIFO_RX_MEDH: printf(" 16550?"); break; case FIFO_RX_HIGH: if (COM_NOFIFO(flags)) { printf(" 16550A fifo disabled"); } else { com->hasfifo = TRUE; if (COM_ST16650A(flags)) { com->st16650a = 1; com->tx_fifo_size = 32; printf(" ST16650A"); } else { com->tx_fifo_size = COM_FIFOSIZE(flags); printf(" 16550A"); } } #ifdef COM_ESP for (espp = likely_esp_ports; *espp != 0; espp++) if (espattach(com, *espp)) { com->tx_fifo_size = 1024; break; } #endif if (!com->st16650a) { if (!com->tx_fifo_size) com->tx_fifo_size = 16; else printf(" lookalike with %d bytes FIFO", com->tx_fifo_size); } break; } #ifdef COM_ESP if (com->esp) { /* * Set 16550 compatibility mode. * We don't use the ESP_MODE_SCALE bit to increase the * fifo trigger levels because we can't handle large * bursts of input. * XXX flow control should be set in comparam(), not here. */ outb(com->esp_port + ESP_CMD1, ESP_SETMODE); outb(com->esp_port + ESP_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); /* Set RTS/CTS flow control. */ outb(com->esp_port + ESP_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP_CMD2, ESP_FLOW_CTS); /* Set flow-control levels. */ outb(com->esp_port + ESP_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP_CMD2, HIBYTE(768)); outb(com->esp_port + ESP_CMD2, LOBYTE(768)); outb(com->esp_port + ESP_CMD2, HIBYTE(512)); outb(com->esp_port + ESP_CMD2, LOBYTE(512)); } #endif /* COM_ESP */ sio_setreg(com, com_fifo, 0); determined_type: ; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { device_t masterdev; com->multiport = TRUE; printf(" (multiport"); if (unit == COM_MPMASTER(flags)) printf(" master"); printf(")"); masterdev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); com->no_irq = (masterdev == NULL || bus_get_resource(masterdev, SYS_RES_IRQ, 0, NULL, NULL) != 0); } #endif /* COM_MULTIPORT */ if (unit == comconsole) printf(", console"); if (COM_IIR_TXRDYBUG(flags)) printf(" with a bogus IIR_TXRDY register"); printf("\n"); if (!sio_registered) { register_swi(SWI_TTY, siopoll); sio_registered = TRUE; } make_dev(&sio_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, "ttyd%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_INIT_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyid%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_LOCK_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyld%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK, UID_UUCP, GID_DIALER, 0660, "cuaa%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_INIT_STATE, UID_UUCP, GID_DIALER, 0660, "cuaia%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_LOCK_STATE, UID_UUCP, GID_DIALER, 0660, "cuala%r", unit); com->flags = flags; com->pps.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR; pps_init(&com->pps); rid = 0; com->irqres = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0ul, ~0ul, 1, RF_ACTIVE); if (com->irqres) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY | INTR_TYPE_FAST, siointr, com, &com->cookie); if (ret) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY, siointr, com, &com->cookie); if (ret == 0) device_printf(dev, "unable to activate interrupt in fast mode - using normal mode"); } if (ret) device_printf(dev, "could not activate interrupt\n"); } return (0); } static int sioopen(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; int unit; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL) return (ENXIO); if (com->gone) return (ENXIO); if (mynor & CONTROL_MASK) return (0); tp = dev->si_tty = com->tp = ttymalloc(com->tp); s = spltty(); /* * We jump to this label after all non-interrupted sleeps to pick * up any changes of the device state. */ open_top: while (com->state & CS_DTR_OFF) { error = tsleep(&com->dtr_wait, TTIPRI | PCATCH, "siodtr", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; } if (tp->t_state & TS_ISOPEN) { /* * The device is open, so everything has been initialized. * Handle conflicts. */ if (mynor & CALLOUT_MASK) { if (!com->active_out) { error = EBUSY; goto out; } } else { if (com->active_out) { if (flag & O_NONBLOCK) { error = EBUSY; goto out; } error = tsleep(&com->active_out, TTIPRI | PCATCH, "siobi", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; goto open_top; } } if (tp->t_state & TS_XCLUDE && suser(p)) { error = EBUSY; goto out; } } else { /* * The device isn't open, so there are no conflicts. * Initialize it. Initialization is done twice in many * cases: to preempt sleeping callin opens if we are * callout, and to complete a callin open after DCD rises. */ tp->t_oproc = comstart; tp->t_param = comparam; tp->t_stop = comstop; tp->t_dev = dev; tp->t_termios = mynor & CALLOUT_MASK ? com->it_out : com->it_in; (void)commctl(com, TIOCM_DTR | TIOCM_RTS, DMSET); com->poll = com->no_irq; com->poll_output = com->loses_outints; ++com->wopeners; error = comparam(tp, &tp->t_termios); --com->wopeners; if (error != 0) goto out; /* * XXX we should goto open_top if comparam() slept. */ if (com->hasfifo) { /* * (Re)enable and drain fifos. * * Certain SMC chips cause problems if the fifos * are enabled while input is ready. Turn off the * fifo if necessary to clear the input. We test * the input ready bit after enabling the fifos * since we've already enabled them in comparam() * and to handle races between enabling and fresh * input. */ while (TRUE) { sio_setreg(com, com_fifo, FIFO_RCV_RST | FIFO_XMT_RST | com->fifo_image); /* * XXX the delays are for superstitious * historical reasons. It must be less than * the character time at the maximum * supported speed (87 usec at 115200 bps * 8N1). Otherwise we might loop endlessly * if data is streaming in. We used to use * delays of 100. That usually worked * because DELAY(100) used to usually delay * for about 85 usec instead of 100. */ DELAY(50); if (!(inb(com->line_status_port) & LSR_RXRDY)) break; sio_setreg(com, com_fifo, 0); DELAY(50); (void) inb(com->data_port); } } disable_intr(); (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status = inb(com->modem_status_port); if (COM_IIR_TXRDYBUG(com->flags)) { outb(com->intr_ctl_port, IER_ERXRDY | IER_ERLS | IER_EMSC); } else { outb(com->intr_ctl_port, IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC); } enable_intr(); /* * Handle initial DCD. Callout devices get a fake initial * DCD (trapdoor DCD). If we are callout, then any sleeping * callin opens get woken up and resume sleeping on "siobi" * instead of "siodcd". */ /* * XXX `mynor & CALLOUT_MASK' should be * `tp->t_cflag & (SOFT_CARRIER | TRAPDOOR_CARRIER) where * TRAPDOOR_CARRIER is the default initial state for callout * devices and SOFT_CARRIER is like CLOCAL except it hides * the true carrier. */ if (com->prev_modem_status & MSR_DCD || mynor & CALLOUT_MASK) (*linesw[tp->t_line].l_modem)(tp, 1); } /* * Wait for DCD if necessary. */ if (!(tp->t_state & TS_CARR_ON) && !(mynor & CALLOUT_MASK) && !(tp->t_cflag & CLOCAL) && !(flag & O_NONBLOCK)) { ++com->wopeners; error = tsleep(TSA_CARR_ON(tp), TTIPRI | PCATCH, "siodcd", 0); if (com_addr(unit) == NULL) return (ENXIO); --com->wopeners; if (error != 0 || com->gone) goto out; goto open_top; } error = (*linesw[tp->t_line].l_open)(dev, tp); disc_optim(tp, &tp->t_termios, com); if (tp->t_state & TS_ISOPEN && mynor & CALLOUT_MASK) com->active_out = TRUE; siosettimeout(); out: splx(s); if (!(tp->t_state & TS_ISOPEN) && com->wopeners == 0) comhardclose(com); return (error); } static int sioclose(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int mynor; int s; struct tty *tp; mynor = minor(dev); if (mynor & CONTROL_MASK) return (0); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL) return (ENODEV); tp = com->tp; s = spltty(); (*linesw[tp->t_line].l_close)(tp, flag); disc_optim(tp, &tp->t_termios, com); comstop(tp, FREAD | FWRITE); comhardclose(com); ttyclose(tp); siosettimeout(); splx(s); if (com->gone) { printf("sio%d: gone\n", com->unit); s = spltty(); if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); bzero(tp, sizeof *tp); splx(s); } return (0); } static void comhardclose(com) struct com_s *com; { int s; struct tty *tp; int unit; unit = com->unit; s = spltty(); com->poll = FALSE; com->poll_output = FALSE; com->do_timestamp = FALSE; com->do_dcd_timestamp = FALSE; com->pps.ppsparam.mode = 0; sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); { sio_setreg(com, com_ier, 0); tp = com->tp; if (tp->t_cflag & HUPCL /* * XXX we will miss any carrier drop between here and the * next open. Perhaps we should watch DCD even when the * port is closed; it is not sufficient to check it at * the next open because it might go up and down while * we're not watching. */ || (!com->active_out && !(com->prev_modem_status & MSR_DCD) && !(com->it_in.c_cflag & CLOCAL)) || !(tp->t_state & TS_ISOPEN)) { (void)commctl(com, TIOCM_DTR, DMBIC); if (com->dtr_wait != 0 && !(com->state & CS_DTR_OFF)) { timeout(siodtrwakeup, com, com->dtr_wait); com->state |= CS_DTR_OFF; } } } if (com->hasfifo) { /* * Disable fifos so that they are off after controlled * reboots. Some BIOSes fail to detect 16550s when the * fifos are enabled. */ sio_setreg(com, com_fifo, 0); } com->active_out = FALSE; wakeup(&com->active_out); wakeup(TSA_CARR_ON(tp)); /* restart any wopeners */ splx(s); } static int sioread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); return ((*linesw[com->tp->t_line].l_read)(com->tp, uio, flag)); } static int siowrite(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; int unit; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL || com->gone) return (ENODEV); /* * (XXX) We disallow virtual consoles if the physical console is * a serial port. This is in case there is a display attached that * is not the console. In that situation we don't need/want the X * server taking over the console. */ if (constty != NULL && unit == comconsole) constty = NULL; return ((*linesw[com->tp->t_line].l_write)(com->tp, uio, flag)); } static void siobusycheck(chan) void *chan; { struct com_s *com; int s; com = (struct com_s *)chan; /* * Clear TS_BUSY if low-level output is complete. * spl locking is sufficient because siointr1() does not set CS_BUSY. * If siointr1() clears CS_BUSY after we look at it, then we'll get * called again. Reading the line status port outside of siointr1() * is safe because CS_BUSY is clear so there are no output interrupts * to lose. */ s = spltty(); if (com->state & CS_BUSY) com->extra_state &= ~CSE_BUSYCHECK; /* False alarm. */ else if ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY)) { com->tp->t_state &= ~TS_BUSY; ttwwakeup(com->tp); com->extra_state &= ~CSE_BUSYCHECK; } else timeout(siobusycheck, com, hz / 100); splx(s); } static void siodtrwakeup(chan) void *chan; { struct com_s *com; com = (struct com_s *)chan; com->state &= ~CS_DTR_OFF; wakeup(&com->dtr_wait); } static void sioinput(com) struct com_s *com; { u_char *buf; int incc; u_char line_status; int recv_data; struct tty *tp; buf = com->ibuf; tp = com->tp; if (!(tp->t_state & TS_ISOPEN) || !(tp->t_cflag & CREAD)) { com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; return; } if (tp->t_state & TS_CAN_BYPASS_L_RINT) { /* * Avoid the grotesquely inefficient lineswitch routine * (ttyinput) in "raw" mode. It usually takes about 450 * instructions (that's without canonical processing or echo!). * slinput is reasonably fast (usually 40 instructions plus * call overhead). */ do { enable_intr(); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat && (com->state & CS_RTS_IFLOW || tp->t_iflag & IXOFF) && !(tp->t_state & TS_TBLOCK)) ttyblock(tp); com->delta_error_counts[CE_TTY_BUF_OVERFLOW] += b_to_q((char *)buf, incc, &tp->t_rawq); buf += incc; tk_nin += incc; tk_rawcc += incc; tp->t_rawcc += incc; ttwakeup(tp); if (tp->t_state & TS_TTSTOP && (tp->t_iflag & IXANY || tp->t_cc[VSTART] == tp->t_cc[VSTOP])) { tp->t_state &= ~TS_TTSTOP; tp->t_lflag &= ~FLUSHO; comstart(tp); } disable_intr(); } while (buf < com->iptr); } else { do { enable_intr(); line_status = buf[com->ierroff]; recv_data = *buf++; if (line_status & (LSR_BI | LSR_FE | LSR_OE | LSR_PE)) { if (line_status & LSR_BI) recv_data |= TTY_BI; if (line_status & LSR_FE) recv_data |= TTY_FE; if (line_status & LSR_OE) recv_data |= TTY_OE; if (line_status & LSR_PE) recv_data |= TTY_PE; } (*linesw[tp->t_line].l_rint)(recv_data, tp); disable_intr(); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; /* * There is now room for another low-level buffer full of input, * so enable RTS if it is now disabled and there is room in the * high-level buffer. */ if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } void siointr(arg) void *arg; { #ifndef COM_MULTIPORT COM_LOCK(); siointr1((struct com_s *) arg); COM_UNLOCK(); #else /* COM_MULTIPORT */ bool_t possibly_more_intrs; int unit; struct com_s *com; /* * Loop until there is no activity on any port. This is necessary * to get an interrupt edge more than to avoid another interrupt. * If the IRQ signal is just an OR of the IRQ signals from several * devices, then the edge from one may be lost because another is * on. */ COM_LOCK(); do { possibly_more_intrs = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); /* * XXX COM_LOCK(); * would it work here, or be counter-productive? */ if (com != NULL && !com->gone && (inb(com->int_id_port) & IIR_IMASK) != IIR_NOPEND) { siointr1(com); possibly_more_intrs = TRUE; } /* XXX COM_UNLOCK(); */ } } while (possibly_more_intrs); COM_UNLOCK(); #endif /* COM_MULTIPORT */ } static void siointr1(com) struct com_s *com; { u_char line_status; u_char modem_status; u_char *ioptr; u_char recv_data; u_char int_ctl; u_char int_ctl_new; struct timecounter *tc; u_int count; int_ctl = inb(com->intr_ctl_port); int_ctl_new = int_ctl; while (!com->gone) { if (com->pps.ppsparam.mode & PPS_CAPTUREBOTH) { modem_status = inb(com->modem_status_port); if ((modem_status ^ com->last_modem_status) & MSR_DCD) { tc = timecounter; count = tc->tc_get_timecount(tc); pps_event(&com->pps, tc, count, (modem_status & MSR_DCD) ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR); } } line_status = inb(com->line_status_port); /* input event? (check first to help avoid overruns) */ while (line_status & LSR_RCV_MASK) { /* break/unnattached error bits or real input? */ if (!(line_status & LSR_RXRDY)) recv_data = 0; else recv_data = inb(com->data_port); #if defined(DDB) && defined(ALT_BREAK_TO_DEBUGGER) /* * Solaris implements a new BREAK which is initiated * by a character sequence CR ~ ^b which is similar * to a familiar pattern used on Sun servers by the * Remote Console. */ #define KEY_CRTLB 2 /* ^B */ #define KEY_CR 13 /* CR '\r' */ #define KEY_TILDE 126 /* ~ */ if (com->unit == comconsole) { static int brk_state1 = 0, brk_state2 = 0; if (recv_data == KEY_CR) { brk_state1 = recv_data; brk_state2 = 0; } else if (brk_state1 == KEY_CR && (recv_data == KEY_TILDE || recv_data == KEY_CRTLB)) { if (recv_data == KEY_TILDE) brk_state2 = recv_data; else if (brk_state2 == KEY_TILDE && recv_data == KEY_CRTLB) { breakpoint(); brk_state1 = brk_state2 = 0; goto cont; } else brk_state2 = 0; } else brk_state1 = 0; } #endif if (line_status & (LSR_BI | LSR_FE | LSR_PE)) { /* * Don't store BI if IGNBRK or FE/PE if IGNPAR. * Otherwise, push the work to a higher level * (to handle PARMRK) if we're bypassing. * Otherwise, convert BI/FE and PE+INPCK to 0. * * This makes bypassing work right in the * usual "raw" case (IGNBRK set, and IGNPAR * and INPCK clear). * * Note: BI together with FE/PE means just BI. */ if (line_status & LSR_BI) { #if defined(DDB) && defined(BREAK_TO_DEBUGGER) if (com->unit == comconsole) { breakpoint(); goto cont; } #endif if (com->tp == NULL || com->tp->t_iflag & IGNBRK) goto cont; } else { if (com->tp == NULL || com->tp->t_iflag & IGNPAR) goto cont; } if (com->tp->t_state & TS_CAN_BYPASS_L_RINT && (line_status & (LSR_BI | LSR_FE) || com->tp->t_iflag & INPCK)) recv_data = 0; } ++com->bytes_in; if (com->hotchar != 0 && recv_data == com->hotchar) setsofttty(); ioptr = com->iptr; if (ioptr >= com->ibufend) CE_RECORD(com, CE_INTERRUPT_BUF_OVERFLOW); else { if (com->do_timestamp) microtime(&com->timestamp); ++com_events; schedsofttty(); #if 0 /* for testing input latency vs efficiency */ if (com->iptr - com->ibuf == 8) setsofttty(); #endif ioptr[0] = recv_data; ioptr[com->ierroff] = line_status; com->iptr = ++ioptr; if (ioptr == com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); if (line_status & LSR_OE) CE_RECORD(com, CE_OVERRUN); } cont: /* * "& 0x7F" is to avoid the gcc-1.40 generating a slow * jump from the top of the loop to here */ line_status = inb(com->line_status_port) & 0x7F; } /* modem status change? (always check before doing output) */ modem_status = inb(com->modem_status_port); if (modem_status != com->last_modem_status) { if (com->do_dcd_timestamp && !(com->last_modem_status & MSR_DCD) && modem_status & MSR_DCD) microtime(&com->dcd_timestamp); /* * Schedule high level to handle DCD changes. Note * that we don't use the delta bits anywhere. Some * UARTs mess them up, and it's easy to remember the * previous bits and calculate the delta. */ com->last_modem_status = modem_status; if (!(com->state & CS_CHECKMSR)) { com_events += LOTS_OF_EVENTS; com->state |= CS_CHECKMSR; setsofttty(); } /* handle CTS change immediately for crisp flow ctl */ if (com->state & CS_CTS_OFLOW) { if (modem_status & MSR_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } } /* output queued and everything ready? */ if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { ioptr = com->obufq.l_head; if (com->tx_fifo_size > 1) { u_int ocount; ocount = com->obufq.l_tail - ioptr; if (ocount > com->tx_fifo_size) ocount = com->tx_fifo_size; com->bytes_out += ocount; do outb(com->data_port, *ioptr++); while (--ocount != 0); } else { outb(com->data_port, *ioptr++); ++com->bytes_out; } com->obufq.l_head = ioptr; if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl | IER_ETXRDY; } if (ioptr >= com->obufq.l_tail) { struct lbq *qp; qp = com->obufq.l_next; qp->l_queued = FALSE; qp = qp->l_next; if (qp != NULL) { com->obufq.l_head = qp->l_head; com->obufq.l_tail = qp->l_tail; com->obufq.l_next = qp; } else { /* output just completed */ if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl & ~IER_ETXRDY; } com->state &= ~CS_BUSY; } if (!(com->state & CS_ODONE)) { com_events += LOTS_OF_EVENTS; com->state |= CS_ODONE; setsofttty(); /* handle at high level ASAP */ } } if (COM_IIR_TXRDYBUG(com->flags) && (int_ctl != int_ctl_new)) { outb(com->intr_ctl_port, int_ctl_new); } } /* finished? */ #ifndef COM_MULTIPORT if ((inb(com->int_id_port) & IIR_IMASK) == IIR_NOPEND) #endif /* COM_MULTIPORT */ return; } } static int sioioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) u_long oldcmd; struct termios term; #endif mynor = minor(dev); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); if (mynor & CONTROL_MASK) { struct termios *ct; switch (mynor & CONTROL_MASK) { case CONTROL_INIT_STATE: ct = mynor & CALLOUT_MASK ? &com->it_out : &com->it_in; break; case CONTROL_LOCK_STATE: ct = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; break; default: return (ENODEV); /* /dev/nodev */ } switch (cmd) { case TIOCSETA: error = suser(p); if (error != 0) return (error); *ct = *(struct termios *)data; return (0); case TIOCGETA: *(struct termios *)data = *ct; return (0); case TIOCGETD: *(int *)data = TTYDISC; return (0); case TIOCGWINSZ: bzero(data, sizeof(struct winsize)); return (0); default: return (ENOTTY); } } tp = com->tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) term = tp->t_termios; oldcmd = cmd; error = ttsetcompat(tp, &cmd, data, &term); if (error != 0) return (error); if (cmd != oldcmd) data = (caddr_t)&term; #endif if (cmd == TIOCSETA || cmd == TIOCSETAW || cmd == TIOCSETAF) { int cc; struct termios *dt = (struct termios *)data; struct termios *lt = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; dt->c_iflag = (tp->t_iflag & lt->c_iflag) | (dt->c_iflag & ~lt->c_iflag); dt->c_oflag = (tp->t_oflag & lt->c_oflag) | (dt->c_oflag & ~lt->c_oflag); dt->c_cflag = (tp->t_cflag & lt->c_cflag) | (dt->c_cflag & ~lt->c_cflag); dt->c_lflag = (tp->t_lflag & lt->c_lflag) | (dt->c_lflag & ~lt->c_lflag); for (cc = 0; cc < NCCS; ++cc) if (lt->c_cc[cc] != 0) dt->c_cc[cc] = tp->t_cc[cc]; if (lt->c_ispeed != 0) dt->c_ispeed = tp->t_ispeed; if (lt->c_ospeed != 0) dt->c_ospeed = tp->t_ospeed; } error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (error != ENOIOCTL) return (error); s = spltty(); error = ttioctl(tp, cmd, data, flag); disc_optim(tp, &tp->t_termios, com); if (error != ENOIOCTL) { splx(s); return (error); } switch (cmd) { case TIOCSBRK: sio_setreg(com, com_cfcr, com->cfcr_image |= CFCR_SBREAK); break; case TIOCCBRK: sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); break; case TIOCSDTR: (void)commctl(com, TIOCM_DTR, DMBIS); break; case TIOCCDTR: (void)commctl(com, TIOCM_DTR, DMBIC); break; /* * XXX should disallow changing MCR_RTS if CS_RTS_IFLOW is set. The * changes get undone on the next call to comparam(). */ case TIOCMSET: (void)commctl(com, *(int *)data, DMSET); break; case TIOCMBIS: (void)commctl(com, *(int *)data, DMBIS); break; case TIOCMBIC: (void)commctl(com, *(int *)data, DMBIC); break; case TIOCMGET: *(int *)data = commctl(com, 0, DMGET); break; case TIOCMSDTRWAIT: /* must be root since the wait applies to following logins */ error = suser(p); if (error != 0) { splx(s); return (error); } com->dtr_wait = *(int *)data * hz / 100; break; case TIOCMGDTRWAIT: *(int *)data = com->dtr_wait * 100 / hz; break; case TIOCTIMESTAMP: com->do_timestamp = TRUE; *(struct timeval *)data = com->timestamp; break; case TIOCDCDTIMESTAMP: com->do_dcd_timestamp = TRUE; *(struct timeval *)data = com->dcd_timestamp; break; default: splx(s); error = pps_ioctl(cmd, data, &com->pps); if (error == ENODEV) error = ENOTTY; return (error); } splx(s); return (0); } static void siopoll() { int unit; if (com_events == 0) return; repeat: for (unit = 0; unit < sio_numunits; ++unit) { struct com_s *com; int incc; struct tty *tp; com = com_addr(unit); if (com == NULL) continue; tp = com->tp; if (tp == NULL || com->gone) { /* * Discard any events related to never-opened or * going-away devices. */ disable_intr(); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { incc += LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; } com_events -= incc; enable_intr(); continue; } if (com->iptr != com->ibuf) { disable_intr(); sioinput(com); enable_intr(); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; disable_intr(); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; enable_intr(); if (delta_modem_status & MSR_DCD) (*linesw[tp->t_line].l_modem) (tp, com->prev_modem_status & MSR_DCD); } if (com->state & CS_ODONE) { disable_intr(); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; enable_intr(); if (!(com->state & CS_BUSY) && !(com->extra_state & CSE_BUSYCHECK)) { timeout(siobusycheck, com, hz / 100); com->extra_state |= CSE_BUSYCHECK; } (*linesw[tp->t_line].l_start)(tp); } if (com_events == 0) break; } if (com_events >= LOTS_OF_EVENTS) goto repeat; } static int comparam(tp, t) struct tty *tp; struct termios *t; { u_int cfcr; int cflag; struct com_s *com; int divisor; u_char dlbh; u_char dlbl; int s; int unit; /* do historical conversions */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* check requested parameters */ divisor = ttspeedtab(t->c_ospeed, comspeedtab); if (divisor < 0 || (divisor > 0 && t->c_ispeed != t->c_ospeed)) return (EINVAL); /* parameters are OK, convert them to the com struct and the device */ unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return (ENODEV); s = spltty(); if (divisor == 0) (void)commctl(com, TIOCM_DTR, DMBIC); /* hang up line */ else (void)commctl(com, TIOCM_DTR, DMBIS); cflag = t->c_cflag; switch (cflag & CSIZE) { case CS5: cfcr = CFCR_5BITS; break; case CS6: cfcr = CFCR_6BITS; break; case CS7: cfcr = CFCR_7BITS; break; default: cfcr = CFCR_8BITS; break; } if (cflag & PARENB) { cfcr |= CFCR_PENAB; if (!(cflag & PARODD)) cfcr |= CFCR_PEVEN; } if (cflag & CSTOPB) cfcr |= CFCR_STOPB; if (com->hasfifo && divisor != 0) { /* * Use a fifo trigger level low enough so that the input * latency from the fifo is less than about 16 msec and * the total latency is less than about 30 msec. These * latencies are reasonable for humans. Serial comms * protocols shouldn't expect anything better since modem * latencies are larger. */ com->fifo_image = t->c_ospeed <= 4800 ? FIFO_ENABLE : FIFO_ENABLE | FIFO_RX_HIGH; #ifdef COM_ESP /* * The Hayes ESP card needs the fifo DMA mode bit set * in compatibility mode. If not, it will interrupt * for each character received. */ if (com->esp) com->fifo_image |= FIFO_DMA_MODE; #endif sio_setreg(com, com_fifo, com->fifo_image); } /* * This returns with interrupts disabled so that we can complete * the speed change atomically. Keeping interrupts disabled is * especially important while com_data is hidden. */ (void) siosetwater(com, t->c_ispeed); if (divisor != 0) { sio_setreg(com, com_cfcr, cfcr | CFCR_DLAB); /* * Only set the divisor registers if they would change, * since on some 16550 incompatibles (UMC8669F), setting * them while input is arriving them loses sync until * data stops arriving. */ dlbl = divisor & 0xFF; if (sio_getreg(com, com_dlbl) != dlbl) sio_setreg(com, com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sio_getreg(com, com_dlbh) != dlbh) sio_setreg(com, com_dlbh, dlbh); } sio_setreg(com, com_cfcr, com->cfcr_image = cfcr); if (!(tp->t_state & TS_TTSTOP)) com->state |= CS_TTGO; if (cflag & CRTS_IFLOW) { if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x40); } com->state |= CS_RTS_IFLOW; /* * If CS_RTS_IFLOW just changed from off to on, the change * needs to be propagated to MCR_RTS. This isn't urgent, * so do it later by calling comstart() instead of repeating * a lot of code from comstart() here. */ } else if (com->state & CS_RTS_IFLOW) { com->state &= ~CS_RTS_IFLOW; /* * CS_RTS_IFLOW just changed from on to off. Force MCR_RTS * on here, since comstart() won't do it later. */ outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x40); } } /* * Set up state to handle output flow control. * XXX - worth handling MDMBUF (DCD) flow control at the lowest level? * Now has 10+ msec latency, while CTS flow has 50- usec latency. */ com->state |= CS_ODEVREADY; com->state &= ~CS_CTS_OFLOW; if (cflag & CCTS_OFLOW) { com->state |= CS_CTS_OFLOW; if (!(com->last_modem_status & MSR_CTS)) com->state &= ~CS_ODEVREADY; if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x80); } } else { if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x80); } } sio_setreg(com, com_cfcr, com->cfcr_image); /* XXX shouldn't call functions while intrs are disabled. */ disc_optim(tp, t, com); /* * Recover from fiddling with CS_TTGO. We used to call siointr1() * unconditionally, but that defeated the careful discarding of * stale input in sioopen(). */ if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); enable_intr(); splx(s); comstart(tp); if (com->ibufold != NULL) { free(com->ibufold, M_DEVBUF); com->ibufold = NULL; } return (0); } static int siosetwater(com, speed) struct com_s *com; speed_t speed; { int cp4ticks; u_char *ibuf; int ibufsize; struct tty *tp; /* * Make the buffer size large enough to handle a softtty interrupt * latency of about 2 ticks without loss of throughput or data * (about 3 ticks if input flow control is not used or not honoured, * but a bit less for CS5-CS7 modes). */ cp4ticks = speed / 10 / hz * 4; for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; if (ibufsize == com->ibufsize) { disable_intr(); return (0); } /* * Allocate input buffer. The extra factor of 2 in the size is * to allow for an error byte for each input byte. */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { disable_intr(); return (ENOMEM); } /* Initialize non-critical variables. */ com->ibufold = com->ibuf; com->ibufsize = ibufsize; tp = com->tp; if (tp != NULL) { tp->t_ififosize = 2 * ibufsize; tp->t_ispeedwat = (speed_t)-1; tp->t_ospeedwat = (speed_t)-1; } /* * Read current input buffer, if any. Continue with interrupts * disabled. */ disable_intr(); if (com->iptr != com->ibuf) sioinput(com); /*- * Initialize critical variables, including input buffer watermarks. * The external device is asked to stop sending when the buffer * exactly reaches high water, or when the high level requests it. * The high level is notified immediately (rather than at a later * clock tick) when this watermark is reached. * The buffer size is chosen so the watermark should almost never * be reached. * The low watermark is invisibly 0 since the buffer is always * emptied all at once. */ com->iptr = com->ibuf = ibuf; com->ibufend = ibuf + ibufsize; com->ierroff = ibufsize; com->ihighwater = ibuf + 3 * ibufsize / 4; return (0); } static void comstart(tp) struct tty *tp; { struct com_s *com; int s; int unit; unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return; s = spltty(); disable_intr(); if (tp->t_state & TS_TTSTOP) com->state &= ~CS_TTGO; else com->state |= CS_TTGO; if (tp->t_state & TS_TBLOCK) { if (com->mcr_image & MCR_RTS && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); } else { if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } enable_intr(); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); return; } if (tp->t_outq.c_cc != 0) { struct lbq *qp; struct lbq *next; if (!com->obufs[0].l_queued) { com->obufs[0].l_tail = com->obuf1 + q_to_b(&tp->t_outq, com->obuf1, sizeof com->obuf1); com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[0]; } else { com->obufq.l_head = com->obufs[0].l_head; com->obufq.l_tail = com->obufs[0].l_tail; com->obufq.l_next = &com->obufs[0]; com->state |= CS_BUSY; } enable_intr(); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { com->obufs[1].l_tail = com->obuf2 + q_to_b(&tp->t_outq, com->obuf2, sizeof com->obuf2); com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[1]; } else { com->obufq.l_head = com->obufs[1].l_head; com->obufq.l_tail = com->obufs[1].l_tail; com->obufq.l_next = &com->obufs[1]; com->state |= CS_BUSY; } enable_intr(); } tp->t_state |= TS_BUSY; } disable_intr(); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ enable_intr(); ttwwakeup(tp); splx(s); } static void comstop(tp, rw) struct tty *tp; int rw; { struct com_s *com; com = com_addr(DEV_TO_UNIT(tp->t_dev)); if (com == NULL || com->gone) return; disable_intr(); if (rw & FWRITE) { if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); com->obufs[0].l_queued = FALSE; com->obufs[1].l_queued = FALSE; if (com->state & CS_ODONE) com_events -= LOTS_OF_EVENTS; com->state &= ~(CS_ODONE | CS_BUSY); com->tp->t_state &= ~TS_BUSY; } if (rw & FREAD) { if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_RCV_RST | com->fifo_image); com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } enable_intr(); comstart(tp); } static int commctl(com, bits, how) struct com_s *com; int bits; int how; { int mcr; int msr; if (how == DMGET) { bits = TIOCM_LE; /* XXX - always enabled while open */ mcr = com->mcr_image; if (mcr & MCR_DTR) bits |= TIOCM_DTR; if (mcr & MCR_RTS) bits |= TIOCM_RTS; msr = com->prev_modem_status; if (msr & MSR_CTS) bits |= TIOCM_CTS; if (msr & MSR_DCD) bits |= TIOCM_CD; if (msr & MSR_DSR) bits |= TIOCM_DSR; /* * XXX - MSR_RI is naturally volatile, and we make MSR_TERI * more volatile by reading the modem status a lot. Perhaps * we should latch both bits until the status is read here. */ if (msr & (MSR_RI | MSR_TERI)) bits |= TIOCM_RI; return (bits); } mcr = 0; if (bits & TIOCM_DTR) mcr |= MCR_DTR; if (bits & TIOCM_RTS) mcr |= MCR_RTS; if (com->gone) return(0); disable_intr(); switch (how) { case DMSET: outb(com->modem_ctl_port, com->mcr_image = mcr | (com->mcr_image & MCR_IENABLE)); break; case DMBIS: outb(com->modem_ctl_port, com->mcr_image |= mcr); break; case DMBIC: outb(com->modem_ctl_port, com->mcr_image &= ~mcr); break; } enable_intr(); return (0); } static void siosettimeout() { struct com_s *com; bool_t someopen; int unit; /* * Set our timeout period to 1 second if no polled devices are open. * Otherwise set it to max(1/200, 1/hz). * Enable timeouts iff some device is open. */ untimeout(comwakeup, (void *)NULL, sio_timeout_handle); sio_timeout = hz; someopen = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && com->tp != NULL && com->tp->t_state & TS_ISOPEN && !com->gone) { someopen = TRUE; if (com->poll || com->poll_output) { sio_timeout = hz > 200 ? hz / 200 : 1; break; } } } if (someopen) { sio_timeouts_until_log = hz / sio_timeout; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); } else { /* Flush error messages, if any. */ sio_timeouts_until_log = 1; comwakeup((void *)NULL); untimeout(comwakeup, (void *)NULL, sio_timeout_handle); } } static void comwakeup(chan) void *chan; { struct com_s *com; int unit; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); /* * Recover from lost output interrupts. * Poll any lines that don't use interrupts. */ for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && !com->gone && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { disable_intr(); siointr1(com); enable_intr(); } } /* * Check for and log errors, but not too often. */ if (--sio_timeouts_until_log > 0) return; sio_timeouts_until_log = hz / sio_timeout; for (unit = 0; unit < sio_numunits; ++unit) { int errnum; com = com_addr(unit); if (com == NULL) continue; if (com->gone) continue; for (errnum = 0; errnum < CE_NTYPES; ++errnum) { u_int delta; u_long total; disable_intr(); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; enable_intr(); if (delta == 0) continue; total = com->error_counts[errnum] += delta; log(LOG_ERR, "sio%d: %u more %s%s (total %lu)\n", unit, delta, error_desc[errnum], delta == 1 ? "" : "s", total); } } } static void disc_optim(tp, t, com) struct tty *tp; struct termios *t; struct com_s *com; { if (!(t->c_iflag & (ICRNL | IGNCR | IMAXBEL | INLCR | ISTRIP | IXON)) && (!(t->c_iflag & BRKINT) || (t->c_iflag & IGNBRK)) && (!(t->c_iflag & PARMRK) || (t->c_iflag & (IGNPAR | IGNBRK)) == (IGNPAR | IGNBRK)) && !(t->c_lflag & (ECHO | ICANON | IEXTEN | ISIG | PENDIN)) && linesw[tp->t_line].l_rint == ttyinput) tp->t_state |= TS_CAN_BYPASS_L_RINT; else tp->t_state &= ~TS_CAN_BYPASS_L_RINT; com->hotchar = linesw[tp->t_line].l_hotchar; } /* * Following are all routines needed for SIO to act as console */ #include struct siocnstate { u_char dlbl; u_char dlbh; u_char ier; u_char cfcr; u_char mcr; }; static speed_t siocngetspeed __P((Port_t, struct speedtab *)); static void siocnclose __P((struct siocnstate *sp, Port_t iobase)); static void siocnopen __P((struct siocnstate *sp, Port_t iobase, int speed)); static void siocntxwait __P((Port_t iobase)); static cn_probe_t siocnprobe; static cn_init_t siocninit; static cn_checkc_t siocncheckc; static cn_getc_t siocngetc; static cn_putc_t siocnputc; #ifdef __i386__ CONS_DRIVER(sio, siocnprobe, siocninit, NULL, siocngetc, siocncheckc, siocnputc, NULL); #endif /* To get the GDB related variables */ #if DDB > 0 #include #endif static void siocntxwait(iobase) Port_t iobase; { int timo; /* * Wait for any pending transmission to finish. Required to avoid * the UART lockup bug when the speed is changed, and for normal * transmits. */ timo = 100000; while ((inb(iobase + com_lsr) & (LSR_TSRE | LSR_TXRDY)) != (LSR_TSRE | LSR_TXRDY) && --timo != 0) ; } /* * Read the serial port specified and try to figure out what speed * it's currently running at. We're assuming the serial port has * been initialized and is basicly idle. This routine is only intended * to be run at system startup. * * If the value read from the serial port doesn't make sense, return 0. */ static speed_t siocngetspeed(iobase, table) Port_t iobase; struct speedtab *table; { int code; u_char dlbh; u_char dlbl; u_char cfcr; cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); dlbl = inb(iobase + com_dlbl); dlbh = inb(iobase + com_dlbh); outb(iobase + com_cfcr, cfcr); code = dlbh << 8 | dlbl; for (; table->sp_speed != -1; table++) if (table->sp_code == code) return (table->sp_speed); return (0); /* didn't match anything sane */ } static void siocnopen(sp, iobase, speed) struct siocnstate *sp; Port_t iobase; int speed; { int divisor; u_char dlbh; u_char dlbl; /* * Save all the device control registers except the fifo register * and set our default ones (cs8 -parenb speed=comdefaultrate). * We can't save the fifo register since it is read-only. */ sp->ier = inb(iobase + com_ier); outb(iobase + com_ier, 0); /* spltty() doesn't stop siointr() */ siocntxwait(iobase); sp->cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); sp->dlbl = inb(iobase + com_dlbl); sp->dlbh = inb(iobase + com_dlbh); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (Startech), setting them clears the * data input register. This also reduces the effects of the * UMC8669F bug. */ divisor = ttspeedtab(speed, comspeedtab); dlbl = divisor & 0xFF; if (sp->dlbl != dlbl) outb(iobase + com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sp->dlbh != dlbh) outb(iobase + com_dlbh, dlbh); outb(iobase + com_cfcr, CFCR_8BITS); sp->mcr = inb(iobase + com_mcr); /* * We don't want interrupts, but must be careful not to "disable" * them by clearing the MCR_IENABLE bit, since that might cause * an interrupt by floating the IRQ line. */ outb(iobase + com_mcr, (sp->mcr & MCR_IENABLE) | MCR_DTR | MCR_RTS); } static void siocnclose(sp, iobase) struct siocnstate *sp; Port_t iobase; { /* * Restore the device control registers. */ siocntxwait(iobase); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); if (sp->dlbl != inb(iobase + com_dlbl)) outb(iobase + com_dlbl, sp->dlbl); if (sp->dlbh != inb(iobase + com_dlbh)) outb(iobase + com_dlbh, sp->dlbh); outb(iobase + com_cfcr, sp->cfcr); /* * XXX damp oscillations of MCR_DTR and MCR_RTS by not restoring them. */ outb(iobase + com_mcr, sp->mcr | MCR_DTR | MCR_RTS); outb(iobase + com_ier, sp->ier); } static void siocnprobe(cp) struct consdev *cp; { speed_t boot_speed; u_char cfcr; int s, unit; struct siocnstate sp; /* * Find our first enabled console, if any. If it is a high-level * console device, then initialize it and return successfully. * If it is a low-level console device, then initialize it and * return unsuccessfully. It must be initialized in both cases * for early use by console drivers and debuggers. Initializing * the hardware is not necessary in all cases, since the i/o * routines initialize it on the fly, but it is necessary if * input might arrive while the hardware is switched back to an * uninitialized state. We can't handle multiple console devices * yet because our low-level routines don't take a device arg. * We trust the user to set the console flags properly so that we * don't need to probe. */ cp->cn_pri = CN_DEAD; for (unit = 0; unit < 16; unit++) { /* XXX need to know how many */ int flags; int disabled; if (resource_int_value("sio", unit, "disabled", &disabled) == 0) { if (disabled) continue; } if (resource_int_value("sio", unit, "flags", &flags)) continue; if (COM_CONSOLE(flags) || COM_DEBUGGER(flags)) { int port; Port_t iobase; if (resource_int_value("sio", unit, "port", &port)) continue; iobase = port; s = spltty(); if (boothowto & RB_SERIAL) { boot_speed = siocngetspeed(iobase, comspeedtab); if (boot_speed) comdefaultrate = boot_speed; } /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); outb(iobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(iobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(iobase + com_cfcr, cfcr); siocnopen(&sp, iobase, comdefaultrate); splx(s); if (COM_CONSOLE(flags) && !COM_LLCONSOLE(flags)) { cp->cn_dev = makedev(CDEV_MAJOR, unit); cp->cn_pri = COM_FORCECONSOLE(flags) || boothowto & RB_SERIAL ? CN_REMOTE : CN_NORMAL; siocniobase = iobase; siocnunit = unit; } if (COM_DEBUGGER(flags)) { printf("sio%d: gdb debugging port\n", unit); siogdbiobase = iobase; siogdbunit = unit; #if DDB > 0 gdbdev = makedev(CDEV_MAJOR, unit); gdb_getc = siocngetc; gdb_putc = siocnputc; #endif } } } #ifdef __i386__ #if DDB > 0 /* * XXX Ugly Compatability. * If no gdb port has been specified, set it to be the console * as some configuration files don't specify the gdb port. */ if (gdbdev == NODEV && (boothowto & RB_GDB)) { printf("Warning: no GDB port specified. Defaulting to sio%d.\n", siocnunit); printf("Set flag 0x80 on desired GDB port in your\n"); printf("configuration file (currently sio only).\n"); siogdbiobase = siocniobase; siogdbunit = siocnunit; gdbdev = makedev(CDEV_MAJOR, siocnunit); gdb_getc = siocngetc; gdb_putc = siocnputc; } #endif #endif } #ifdef __alpha__ CONS_DRIVER(sio, NULL, NULL, NULL, siocngetc, siocncheckc, siocnputc, NULL); int siocnattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siocniobase = port; comdefaultrate = speed; sio_consdev.cn_pri = CN_NORMAL; sio_consdev.cn_dev = makedev(CDEV_MAJOR, 0); s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siocniobase + com_cfcr); outb(siocniobase + com_cfcr, CFCR_DLAB | cfcr); outb(siocniobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(siocniobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(siocniobase + com_cfcr, cfcr); siocnopen(&sp, siocniobase, comdefaultrate); splx(s); cn_tab = &sio_consdev; return (0); } int siogdbattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siogdbiobase = port; gdbdefaultrate = speed; s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siogdbiobase + com_cfcr); outb(siogdbiobase + com_cfcr, CFCR_DLAB | cfcr); outb(siogdbiobase + com_dlbl, COMBRD(gdbdefaultrate) & 0xff); outb(siogdbiobase + com_dlbh, (u_int) COMBRD(gdbdefaultrate) >> 8); outb(siogdbiobase + com_cfcr, cfcr); siocnopen(&sp, siogdbiobase, gdbdefaultrate); splx(s); return (0); } #endif static void siocninit(cp) struct consdev *cp; { comconsole = DEV_TO_UNIT(cp->cn_dev); } static int siocncheckc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); if (inb(iobase + com_lsr) & LSR_RXRDY) c = inb(iobase + com_data); else c = -1; siocnclose(&sp, iobase); splx(s); return (c); } int siocngetc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siocnputc(dev, c) dev_t dev; int c; { int s; struct siocnstate sp; Port_t iobase; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); siocntxwait(iobase); outb(iobase + com_data, c); siocnclose(&sp, iobase); splx(s); } #ifdef __alpha__ int siogdbgetc() { int c; Port_t iobase; int s; struct siocnstate sp; iobase = siogdbiobase; s = spltty(); siocnopen(&sp, iobase, gdbdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siogdbputc(c) int c; { int s; struct siocnstate sp; s = spltty(); siocnopen(&sp, siogdbiobase, gdbdefaultrate); siocntxwait(siogdbiobase); outb(siogdbiobase + com_data, c); siocnclose(&sp, siogdbiobase); splx(s); } #endif DRIVER_MODULE(sio, isa, sio_isa_driver, sio_devclass, 0, 0); #if NCARD > 0 DRIVER_MODULE(sio, pccard, sio_pccard_driver, sio_devclass, 0, 0); #endif #if NPCI > 0 DRIVER_MODULE(sio, pci, sio_pci_driver, sio_devclass, 0, 0); #endif Index: head/sys/kern/kern_clock.c =================================================================== --- head/sys/kern/kern_clock.c (revision 62572) +++ head/sys/kern/kern_clock.c (revision 62573) @@ -1,431 +1,431 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 * $FreeBSD$ */ #include "opt_ntp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #endif static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) /* Some of these don't belong here, but it's easiest to concentrate them. */ #if defined(SMP) && defined(BETTER_CLOCK) long cp_time[CPUSTATES]; #else static long cp_time[CPUSTATES]; #endif long tk_cancc; long tk_nin; long tk_nout; long tk_rawcc; /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. * * The main timer, running hz times per second, is used to trigger interval * timers, timeouts and rescheduling as needed. * * The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the cpu * just before its quantum expires. Otherwise, it would never accumulate * cpu ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) * * Time-of-day is maintained using a "timecounter", which may or may * not be related to the hardware generating the above mentioned * interrupts. */ int stathz; int profhz; static int profprocs; int ticks; static int psdiv, pscnt; /* prof => stat divider */ int psratio; /* ratio: prof / stat */ /* * Initialize clock frequencies and start both clocks running. */ /* ARGSUSED*/ static void initclocks(dummy) void *dummy; { register int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ psdiv = pscnt = 1; cpu_initclocks(); /* * Compute profhz/stathz, and fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; } /* * The real-time timer, interrupting hz times per second. */ void hardclock(frame) register struct clockframe *frame; { register struct proc *p; p = curproc; if (p) { register struct pstats *pstats; /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; if (CLKF_USERMODE(frame) && timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) psignal(p, SIGVTALRM); if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) psignal(p, SIGPROF); } #if defined(SMP) && defined(BETTER_CLOCK) forward_hardclock(pscnt); #endif /* * If no separate statistics clock is available, run it from here. */ if (stathz == 0) statclock(frame); tc_windup(); ticks++; /* * Process callouts at a very low cpu priority, so we don't keep the * relatively high clock interrupt priority any longer than necessary. */ if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { if (CLKF_BASEPRI(frame)) { /* * Save the overhead of a software interrupt; * it will happen as soon as we return, so do it now. */ (void)splsoftclock(); softclock(); } else setsoftclock(); } else if (softticks + 1 == ticks) ++softticks; } /* * Compute number of ticks in the specified amount of time. */ int tvtohz(tv) struct timeval *tv; { register unsigned long ticks; register long sec, usec; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * If ints have 32 bits, then the maximum value for any timeout in * 10ms ticks is 248 days. */ sec = tv->tv_sec; usec = tv->tv_usec; if (usec < 0) { sec--; usec += 1000000; } if (sec < 0) { #ifdef DIAGNOSTIC if (usec > 0) { sec++; usec -= 1000000; } printf("tvotohz: negative time difference %ld sec %ld usec\n", sec, usec); #endif ticks = 1; } else if (sec <= LONG_MAX / 1000000) ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) / tick + 1; else if (sec <= LONG_MAX / hz) ticks = sec * hz + ((unsigned long)usec + (tick - 1)) / tick + 1; else ticks = LONG_MAX; if (ticks > INT_MAX) ticks = INT_MAX; return ((int)ticks); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void startprofclock(p) register struct proc *p; { int s; if ((p->p_flag & P_PROFIL) == 0) { p->p_flag |= P_PROFIL; if (++profprocs == 1 && stathz != 0) { s = splstatclock(); psdiv = pscnt = psratio; setstatclockrate(profhz); splx(s); } } } /* * Stop profiling on a process. */ void stopprofclock(p) register struct proc *p; { int s; if (p->p_flag & P_PROFIL) { p->p_flag &= ~P_PROFIL; if (--profprocs == 0 && stathz != 0) { s = splstatclock(); psdiv = pscnt = 1; setstatclockrate(stathz); splx(s); } } } /* * Statistics clock. Grab profile sample, and if divider reaches 0, * do process and kernel statistics. Most of the statistics are only * used by user-level statistics programs. The main exceptions are * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu. */ void statclock(frame) register struct clockframe *frame; { #ifdef GPROF register struct gmonparam *g; int i; #endif register struct proc *p; struct pstats *pstats; long rss; struct rusage *ru; struct vmspace *vm; if (curproc != NULL && CLKF_USERMODE(frame)) { /* * Came from user mode; CPU was in user state. * If this process is being profiled, record the tick. */ p = curproc; if (p->p_flag & P_PROFIL) addupc_intr(p, CLKF_PC(frame), 1); #if defined(SMP) && defined(BETTER_CLOCK) if (stathz != 0) forward_statclock(pscnt); #endif if (--pscnt > 0) return; /* * Charge the time as appropriate. */ p->p_uticks++; if (p->p_nice > NZERO) cp_time[CP_NICE]++; else cp_time[CP_USER]++; } else { #ifdef GPROF /* * Kernel statistics are just like addupc_intr, only easier. */ g = &_gmonparam; if (g->state == GMON_PROF_ON) { i = CLKF_PC(frame) - g->lowpc; if (i < g->textsize) { i /= HISTFRACTION * sizeof(*g->kcount); g->kcount[i]++; } } #endif #if defined(SMP) && defined(BETTER_CLOCK) if (stathz != 0) forward_statclock(pscnt); #endif if (--pscnt > 0) return; /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ p = curproc; if (CLKF_INTR(frame)) { if (p != NULL) p->p_iticks++; cp_time[CP_INTR]++; } else if (p != NULL) { p->p_sticks++; cp_time[CP_SYS]++; } else cp_time[CP_IDLE]++; } pscnt = psdiv; if (p != NULL) { schedclock(p); /* Update resource usage integrals and maximums. */ if ((pstats = p->p_stats) != NULL && (ru = &pstats->p_ru) != NULL && (vm = p->p_vmspace) != NULL) { ru->ru_ixrss += pgtok(vm->vm_tsize); ru->ru_idrss += pgtok(vm->vm_dsize); ru->ru_isrss += pgtok(vm->vm_ssize); rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; } } } /* * Return information about system clocks. */ static int -sysctl_kern_clockrate (SYSCTL_HANDLER_ARGS) +sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) { struct clockinfo clkinfo; /* * Construct clockinfo structure. */ clkinfo.hz = hz; clkinfo.tick = tick; clkinfo.tickadj = tickadj; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); } SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); Index: head/sys/kern/kern_descrip.c =================================================================== --- head/sys/kern/kern_descrip.c (revision 62572) +++ head/sys/kern/kern_descrip.c (revision 62573) @@ -1,1385 +1,1385 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 * $FreeBSD$ */ #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); MALLOC_DEFINE(M_FILE, "file", "Open file structure"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); static d_open_t fdopen; #define NUMFDESC 64 #define CDEV_MAJOR 22 static struct cdevsw fildesc_cdevsw = { /* open */ fdopen, /* close */ noclose, /* read */ noread, /* write */ nowrite, /* ioctl */ noioctl, /* poll */ nopoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ "FD", /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ 0, /* bmaj */ -1 }; static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval)); static int badfo_readwrite __P((struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct proc *p)); static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data, struct proc *p)); static int badfo_poll __P((struct file *fp, int events, struct ucred *cred, struct proc *p)); static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p)); static int badfo_close __P((struct file *fp, struct proc *p)); /* * Descriptor management. */ struct filelist filehead; /* head of list of open files */ int nfiles; /* actual number of open files */ extern int cmask; /* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ struct getdtablesize_args { int dummy; }; #endif /* ARGSUSED */ int getdtablesize(p, uap) struct proc *p; struct getdtablesize_args *uap; { p->p_retval[0] = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); return (0); } /* * Duplicate a file descriptor to a particular value. */ #ifndef _SYS_SYSPROTO_H_ struct dup2_args { u_int from; u_int to; }; #endif /* ARGSUSED */ int dup2(p, uap) struct proc *p; struct dup2_args *uap; { register struct filedesc *fdp = p->p_fd; register u_int old = uap->from, new = uap->to; int i, error; if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || new >= maxfilesperproc) return (EBADF); if (old == new) { p->p_retval[0] = new; return (0); } if (new >= fdp->fd_nfiles) { if ((error = fdalloc(p, new, &i))) return (error); if (new != i) panic("dup2: fdalloc"); } else if (fdp->fd_ofiles[new]) { if (fdp->fd_ofileflags[new] & UF_MAPPED) (void) munmapfd(p, new); /* * dup2() must succeed even if the close has an error. */ (void) closef(fdp->fd_ofiles[new], p); } return (finishdup(fdp, (int)old, (int)new, p->p_retval)); } /* * Duplicate a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct dup_args { u_int fd; }; #endif /* ARGSUSED */ int dup(p, uap) struct proc *p; struct dup_args *uap; { register struct filedesc *fdp; u_int old; int new, error; old = uap->fd; fdp = p->p_fd; if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) return (EBADF); if ((error = fdalloc(p, 0, &new))) return (error); return (finishdup(fdp, (int)old, new, p->p_retval)); } /* * The file control system call. */ #ifndef _SYS_SYSPROTO_H_ struct fcntl_args { int fd; int cmd; long arg; }; #endif /* ARGSUSED */ int fcntl(p, uap) struct proc *p; register struct fcntl_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register char *pop; struct vnode *vp; int i, tmp, error, flg = F_POSIX; struct flock fl; u_int newmin; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); pop = &fdp->fd_ofileflags[uap->fd]; switch (uap->cmd) { case F_DUPFD: newmin = uap->arg; if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || newmin >= maxfilesperproc) return (EINVAL); if ((error = fdalloc(p, newmin, &i))) return (error); return (finishdup(fdp, uap->fd, i, p->p_retval)); case F_GETFD: p->p_retval[0] = *pop & 1; return (0); case F_SETFD: *pop = (*pop &~ 1) | (uap->arg & 1); return (0); case F_GETFL: p->p_retval[0] = OFLAGS(fp->f_flag); return (0); case F_SETFL: fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); if (error) return (error); tmp = fp->f_flag & FASYNC; error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); if (!error) return (0); fp->f_flag &= ~FNONBLOCK; tmp = 0; (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); return (error); case F_GETOWN: return (fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p)); case F_SETOWN: return (fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p)); case F_SETLKW: flg |= F_WAIT; /* Fall into F_SETLK */ case F_SETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); if (error) return (error); if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; switch (fl.l_type) { case F_RDLCK: if ((fp->f_flag & FREAD) == 0) return (EBADF); p->p_flag |= P_ADVLOCK; return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg)); case F_WRLCK: if ((fp->f_flag & FWRITE) == 0) return (EBADF); p->p_flag |= P_ADVLOCK; return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg)); case F_UNLCK: return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &fl, F_POSIX)); default: return (EINVAL); } case F_GETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); if (error) return (error); if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK && fl.l_type != F_UNLCK) return (EINVAL); if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; if ((error = VOP_ADVLOCK(vp,(caddr_t)p->p_leader,F_GETLK,&fl,F_POSIX))) return (error); return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg, sizeof(fl))); default: return (EINVAL); } /* NOTREACHED */ } /* * Common code for dup, dup2, and fcntl(F_DUPFD). */ static int finishdup(fdp, old, new, retval) register struct filedesc *fdp; register int old, new; register_t *retval; { register struct file *fp; fp = fdp->fd_ofiles[old]; fdp->fd_ofiles[new] = fp; fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; fhold(fp); if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; return (0); } /* * If sigio is on the list associated with a process or process group, * disable signalling from the device, remove sigio from the list and * free sigio. */ void funsetown(sigio) struct sigio *sigio; { int s; if (sigio == NULL) return; s = splhigh(); *(sigio->sio_myref) = NULL; splx(s); if (sigio->sio_pgid < 0) { SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, sigio, sio_pgsigio); } else /* if ((*sigiop)->sio_pgid > 0) */ { SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, sigio, sio_pgsigio); } crfree(sigio->sio_ucred); FREE(sigio, M_SIGIO); } /* Free a list of sigio structures. */ void funsetownlst(sigiolst) struct sigiolst *sigiolst; { struct sigio *sigio; while ((sigio = SLIST_FIRST(sigiolst)) != NULL) funsetown(sigio); } /* * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). * * After permission checking, add a sigio structure to the sigio list for * the process or process group. */ int fsetown(pgid, sigiop) pid_t pgid; struct sigio **sigiop; { struct proc *proc; struct pgrp *pgrp; struct sigio *sigio; int s; if (pgid == 0) { funsetown(*sigiop); return (0); } if (pgid > 0) { proc = pfind(pgid); if (proc == NULL) return (ESRCH); /* * Policy - Don't allow a process to FSETOWN a process * in another session. * * Remove this test to allow maximum flexibility or * restrict FSETOWN to the current process or process * group for maximum safety. */ if (proc->p_session != curproc->p_session) return (EPERM); pgrp = NULL; } else /* if (pgid < 0) */ { pgrp = pgfind(-pgid); if (pgrp == NULL) return (ESRCH); /* * Policy - Don't allow a process to FSETOWN a process * in another session. * * Remove this test to allow maximum flexibility or * restrict FSETOWN to the current process or process * group for maximum safety. */ if (pgrp->pg_session != curproc->p_session) return (EPERM); proc = NULL; } funsetown(*sigiop); MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); if (pgid > 0) { SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); sigio->sio_proc = proc; } else { SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); sigio->sio_pgrp = pgrp; } sigio->sio_pgid = pgid; crhold(curproc->p_ucred); sigio->sio_ucred = curproc->p_ucred; /* It would be convenient if p_ruid was in ucred. */ sigio->sio_ruid = curproc->p_cred->p_ruid; sigio->sio_myref = sigiop; s = splhigh(); *sigiop = sigio; splx(s); return (0); } /* * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). */ pid_t fgetown(sigio) struct sigio *sigio; { return (sigio != NULL ? sigio->sio_pgid : 0); } /* * Close a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct close_args { int fd; }; #endif /* ARGSUSED */ int close(p, uap) struct proc *p; struct close_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register int fd = uap->fd; register u_char *pf; if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); pf = (u_char *)&fdp->fd_ofileflags[fd]; if (*pf & UF_MAPPED) (void) munmapfd(p, fd); fdp->fd_ofiles[fd] = NULL; while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; *pf = 0; if (fd < fdp->fd_knlistsize) knote_fdclose(p, fd); return (closef(fp, p)); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ofstat_args { int fd; struct ostat *sb; }; #endif /* ARGSUSED */ int ofstat(p, uap) struct proc *p; register struct ofstat_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; struct ostat oub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); error = fo_stat(fp, &ub, p); if (error == 0) { cvtstat(&ub, &oub); error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub)); } return (error); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fstat_args { int fd; struct stat *sb; }; #endif /* ARGSUSED */ int fstat(p, uap) struct proc *p; register struct fstat_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); error = fo_stat(fp, &ub, p); if (error == 0) error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub)); return (error); } /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct nfstat_args { int fd; struct nstat *sb; }; #endif /* ARGSUSED */ int nfstat(p, uap) struct proc *p; register struct nfstat_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; struct nstat nub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); error = fo_stat(fp, &ub, p); if (error == 0) { cvtnstat(&ub, &nub); error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub)); } return (error); } /* * Return pathconf information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fpathconf_args { int fd; int name; }; #endif /* ARGSUSED */ int fpathconf(p, uap) struct proc *p; register struct fpathconf_args *uap; { struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_PIPE: case DTYPE_SOCKET: if (uap->name != _PC_PIPE_BUF) return (EINVAL); p->p_retval[0] = PIPE_BUF; return (0); case DTYPE_FIFO: case DTYPE_VNODE: vp = (struct vnode *)fp->f_data; return (VOP_PATHCONF(vp, uap->name, p->p_retval)); default: return (EOPNOTSUPP); } /*NOTREACHED*/ } /* * Allocate a file descriptor for the process. */ static int fdexpand; SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, ""); int fdalloc(p, want, result) struct proc *p; int want; int *result; { register struct filedesc *fdp = p->p_fd; register int i; int lim, last, nfiles; struct file **newofile; char *newofileflags; /* * Search for a free descriptor starting at the higher * of want or fd_freefile. If that fails, consider * expanding the ofile array. */ lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); for (;;) { last = min(fdp->fd_nfiles, lim); if ((i = want) < fdp->fd_freefile) i = fdp->fd_freefile; for (; i < last; i++) { if (fdp->fd_ofiles[i] == NULL) { fdp->fd_ofileflags[i] = 0; if (i > fdp->fd_lastfile) fdp->fd_lastfile = i; if (want <= fdp->fd_freefile) fdp->fd_freefile = i; *result = i; return (0); } } /* * No space in current array. Expand? */ if (fdp->fd_nfiles >= lim) return (EMFILE); if (fdp->fd_nfiles < NDEXTENT) nfiles = NDEXTENT; else nfiles = 2 * fdp->fd_nfiles; MALLOC(newofile, struct file **, nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); newofileflags = (char *) &newofile[nfiles]; /* * Copy the existing ofile and ofileflags arrays * and zero the new portion of each array. */ bcopy(fdp->fd_ofiles, newofile, (i = sizeof(struct file *) * fdp->fd_nfiles)); bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i); bcopy(fdp->fd_ofileflags, newofileflags, (i = sizeof(char) * fdp->fd_nfiles)); bzero(newofileflags + i, nfiles * sizeof(char) - i); if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); fdp->fd_ofiles = newofile; fdp->fd_ofileflags = newofileflags; fdp->fd_nfiles = nfiles; fdexpand++; } return (0); } /* * Check to see whether n user file descriptors * are available to the process p. */ int fdavail(p, n) struct proc *p; register int n; { register struct filedesc *fdp = p->p_fd; register struct file **fpp; register int i, lim, last; lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) return (1); last = min(fdp->fd_nfiles, lim); fpp = &fdp->fd_ofiles[fdp->fd_freefile]; for (i = last - fdp->fd_freefile; --i >= 0; fpp++) if (*fpp == NULL && --n <= 0) return (1); return (0); } /* * Create a new open file structure and allocate * a file decriptor for the process that refers to it. */ int falloc(p, resultfp, resultfd) register struct proc *p; struct file **resultfp; int *resultfd; { register struct file *fp, *fq; int error, i; if ((error = fdalloc(p, 0, &i))) return (error); if (nfiles >= maxfiles) { tablefull("file"); return (ENFILE); } /* * Allocate a new file descriptor. * If the process has file descriptor zero open, add to the list * of open files at that point, otherwise put it at the front of * the list of open files. */ nfiles++; MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK); bzero(fp, sizeof(struct file)); fp->f_count = 1; fp->f_cred = p->p_ucred; fp->f_ops = &badfileops; fp->f_seqcount = 1; crhold(fp->f_cred); if ((fq = p->p_fd->fd_ofiles[0])) { LIST_INSERT_AFTER(fq, fp, f_list); } else { LIST_INSERT_HEAD(&filehead, fp, f_list); } p->p_fd->fd_ofiles[i] = fp; if (resultfp) *resultfp = fp; if (resultfd) *resultfd = i; return (0); } /* * Free a file descriptor. */ void ffree(fp) register struct file *fp; { LIST_REMOVE(fp, f_list); crfree(fp->f_cred); #if defined(DIAGNOSTIC) || defined(INVARIANTS) fp->f_count = 0; #endif nfiles--; FREE(fp, M_FILE); } /* * Build a new filedesc structure. */ struct filedesc * fdinit(p) struct proc *p; { register struct filedesc0 *newfdp; register struct filedesc *fdp = p->p_fd; MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); bzero(newfdp, sizeof(struct filedesc0)); newfdp->fd_fd.fd_cdir = fdp->fd_cdir; VREF(newfdp->fd_fd.fd_cdir); newfdp->fd_fd.fd_rdir = fdp->fd_rdir; VREF(newfdp->fd_fd.fd_rdir); newfdp->fd_fd.fd_jdir = fdp->fd_jdir; if (newfdp->fd_fd.fd_jdir) VREF(newfdp->fd_fd.fd_jdir); /* Create the file descriptor table. */ newfdp->fd_fd.fd_refcnt = 1; newfdp->fd_fd.fd_cmask = cmask; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_knlistsize = -1; return (&newfdp->fd_fd); } /* * Share a filedesc structure. */ struct filedesc * fdshare(p) struct proc *p; { p->p_fd->fd_refcnt++; return (p->p_fd); } /* * Copy a filedesc structure. */ struct filedesc * fdcopy(p) struct proc *p; { register struct filedesc *newfdp, *fdp = p->p_fd; register struct file **fpp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return (NULL); MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); bcopy(fdp, newfdp, sizeof(struct filedesc)); VREF(newfdp->fd_cdir); VREF(newfdp->fd_rdir); if (newfdp->fd_jdir) VREF(newfdp->fd_jdir); newfdp->fd_refcnt = 1; /* * If the number of open files fits in the internal arrays * of the open file structure, use them, otherwise allocate * additional memory for the number of descriptors currently * in use. */ if (newfdp->fd_lastfile < NDFILE) { newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; newfdp->fd_ofileflags = ((struct filedesc0 *) newfdp)->fd_dfileflags; i = NDFILE; } else { /* * Compute the smallest multiple of NDEXTENT needed * for the file descriptors currently in use, * allowing the table to shrink. */ i = newfdp->fd_nfiles; while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) i /= 2; MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE, M_FILEDESC, M_WAITOK); newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; } newfdp->fd_nfiles = i; bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); /* * kq descriptors cannot be copied. */ if (newfdp->fd_knlistsize != -1) { fpp = newfdp->fd_ofiles; for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) *fpp = NULL; newfdp->fd_knlist = NULL; newfdp->fd_knlistsize = -1; newfdp->fd_knhash = NULL; newfdp->fd_knhashmask = 0; } fpp = newfdp->fd_ofiles; for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) if (*fpp != NULL) fhold(*fpp); return (newfdp); } /* * Release a filedesc structure. */ void fdfree(p) struct proc *p; { register struct filedesc *fdp = p->p_fd; struct file **fpp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return; if (--fdp->fd_refcnt > 0) return; fpp = fdp->fd_ofiles; for (i = fdp->fd_lastfile; i-- >= 0; fpp++) if (*fpp) (void) closef(*fpp, p); if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); vrele(fdp->fd_cdir); vrele(fdp->fd_rdir); if (fdp->fd_jdir) vrele(fdp->fd_jdir); if (fdp->fd_knlist) FREE(fdp->fd_knlist, M_TEMP); if (fdp->fd_knhash) FREE(fdp->fd_knhash, M_TEMP); FREE(fdp, M_FILEDESC); } /* * For setugid programs, we don't want to people to use that setugidness * to generate error messages which write to a file which otherwise would * otherwise be off-limits to the process. * * This is a gross hack to plug the hole. A better solution would involve * a special vop or other form of generalized access control mechanism. We * go ahead and just reject all procfs file systems accesses as dangerous. * * Since setugidsafety calls this only for fd 0, 1 and 2, this check is * sufficient. We also don't for check setugidness since we know we are. */ static int is_unsafe(struct file *fp) { if (fp->f_type == DTYPE_VNODE && ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS) return (1); return (0); } /* * Make this setguid thing safe, if at all possible. */ void setugidsafety(p) struct proc *p; { struct filedesc *fdp = p->p_fd; struct file **fpp; char *fdfp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return; fpp = fdp->fd_ofiles; fdfp = fdp->fd_ofileflags; for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) { if (i > 2) break; if (*fpp != NULL && is_unsafe(*fpp)) { if ((*fdfp & UF_MAPPED) != 0) (void) munmapfd(p, i); (void) closef(*fpp, p); *fpp = NULL; *fdfp = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; } } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; } /* * Close any files on exec? */ void fdcloseexec(p) struct proc *p; { struct filedesc *fdp = p->p_fd; struct file **fpp; char *fdfp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return; fpp = fdp->fd_ofiles; fdfp = fdp->fd_ofileflags; for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) { if (*fdfp & UF_MAPPED) (void) munmapfd(p, i); (void) closef(*fpp, p); *fpp = NULL; *fdfp = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; } /* * Internal form of close. * Decrement reference count on file structure. * Note: p may be NULL when closing a file * that was being passed in a message. */ int closef(fp, p) register struct file *fp; register struct proc *p; { struct vnode *vp; struct flock lf; if (fp == NULL) return (0); /* * POSIX record locking dictates that any close releases ALL * locks owned by this process. This is handled by setting * a flag in the unlock to free ONLY locks obeying POSIX * semantics, and not to free BSD-style file locks. * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX); } return (fdrop(fp, p)); } int fdrop(fp, p) struct file *fp; struct proc *p; { struct flock lf; struct vnode *vp; int error; if (--fp->f_count > 0) return (0); if (fp->f_count < 0) panic("fdrop: count < 0"); if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); } if (fp->f_ops != &badfileops) error = fo_close(fp, p); else error = 0; ffree(fp); return (error); } /* * Apply an advisory lock on a file descriptor. * * Just attempt to get a record lock of the requested type on * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ #ifndef _SYS_SYSPROTO_H_ struct flock_args { int fd; int how; }; #endif /* ARGSUSED */ int flock(p, uap) struct proc *p; register struct flock_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vnode *vp; struct flock lf; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (EOPNOTSUPP); vp = (struct vnode *)fp->f_data; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; fp->f_flag &= ~FHASLOCK; return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK)); } if (uap->how & LOCK_EX) lf.l_type = F_WRLCK; else if (uap->how & LOCK_SH) lf.l_type = F_RDLCK; else return (EBADF); fp->f_flag |= FHASLOCK; if (uap->how & LOCK_NB) return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK)); return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT)); } /* * File Descriptor pseudo-device driver (/dev/fd/). * * Opening minor device N dup()s the file (if any) connected to file * descriptor N belonging to the calling process. Note that this driver * consists of only the ``open()'' routine, because all subsequent * references to this file will be direct to the other driver. */ /* ARGSUSED */ static int fdopen(dev, mode, type, p) dev_t dev; int mode, type; struct proc *p; { /* * XXX Kludge: set curproc->p_dupfd to contain the value of the * the file descriptor being sought for duplication. The error * return ensures that the vnode for this device will be released * by vn_open. Open will detect this special error and take the * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN * will simply report the error. */ p->p_dupfd = minor(dev); return (ENODEV); } /* * Duplicate the specified descriptor to a free descriptor. */ int dupfdopen(fdp, indx, dfd, mode, error) register struct filedesc *fdp; register int indx, dfd; int mode; int error; { register struct file *wfp; struct file *fp; /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been * closed, reject. Note, check for new == old is necessary as * falloc could allocate an already closed to-be-dup'd descriptor * as the new descriptor. */ fp = fdp->fd_ofiles[indx]; if ((u_int)dfd >= fdp->fd_nfiles || (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp) return (EBADF); /* * There are two cases of interest here. * * For ENODEV simply dup (dfd) to file descriptor * (indx) and return. * * For ENXIO steal away the file structure from (dfd) and * store it in (indx). (dfd) is effectively closed by * this operation. * * Any other error code is just returned. */ switch (error) { case ENODEV: /* * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) return (EACCES); fdp->fd_ofiles[indx] = wfp; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(wfp); if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; return (0); case ENXIO: /* * Steal away the file pointer from dfd, and stuff it into indx. */ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; fdp->fd_ofiles[dfd] = NULL; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fdp->fd_ofileflags[dfd] = 0; /* * Complete the clean up of the filedesc structure by * recomputing the various hints. */ if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; else while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (dfd < fdp->fd_freefile) fdp->fd_freefile = dfd; return (0); default: return (error); } /* NOTREACHED */ } /* * Get file structures. */ static int -sysctl_kern_file (SYSCTL_HANDLER_ARGS) +sysctl_kern_file(SYSCTL_HANDLER_ARGS) { int error; struct file *fp; if (!req->oldptr) { /* * overestimate by 10 files */ return (SYSCTL_OUT(req, 0, sizeof(filehead) + (nfiles + 10) * sizeof(struct file))); } error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead)); if (error) return (error); /* * followed by an array of file structures */ LIST_FOREACH(fp, &filehead, f_list) { error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file)); if (error) return (error); } return (0); } SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_kern_file, "S,file", "Entire file table"); SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, &maxfilesperproc, 0, "Maximum files allowed open per process"); SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "Maximum number of files"); static void fildesc_drvinit(void *unused) { int fd; for (fd = 0; fd < NUMFDESC; fd++) make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666, "fd/%d", fd); make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin"); make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout"); make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr"); } struct fileops badfileops = { badfo_readwrite, badfo_readwrite, badfo_ioctl, badfo_poll, badfo_stat, badfo_close }; static int badfo_readwrite(fp, uio, cred, flags, p) struct file *fp; struct uio *uio; struct ucred *cred; struct proc *p; int flags; { return (EBADF); } static int badfo_ioctl(fp, com, data, p) struct file *fp; u_long com; caddr_t data; struct proc *p; { return (EBADF); } static int badfo_poll(fp, events, cred, p) struct file *fp; int events; struct ucred *cred; struct proc *p; { return (0); } static int badfo_stat(fp, sb, p) struct file *fp; struct stat *sb; struct proc *p; { return (EBADF); } static int badfo_close(fp, p) struct file *fp; struct proc *p; { return (EBADF); } SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, fildesc_drvinit,NULL) Index: head/sys/kern/kern_environment.c =================================================================== --- head/sys/kern/kern_environment.c (revision 62572) +++ head/sys/kern/kern_environment.c (revision 62573) @@ -1,153 +1,153 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * The unified bootloader passes us a pointer to a preserved copy of * bootstrap/kernel environment variables. * We make these available using sysctl for both in-kernel and * out-of-kernel consumers. * * Note that the current sysctl infrastructure doesn't allow * dynamic insertion or traversal through handled spaces. Grr. */ #include #include #include #include #include char *kern_envp; static char *kernenv_next(char *cp); char * getenv(char *name) { char *cp, *ep; int len; for (cp = kern_envp; cp != NULL; cp = kernenv_next(cp)) { for (ep = cp; (*ep != '=') && (*ep != 0); ep++) ; len = ep - cp; if (*ep == '=') ep++; if (!strncmp(name, cp, len)) return(ep); } return(NULL); } /* * Return an integer value from an environment variable. */ int getenv_int(char *name, int *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) { *data = (int) tmp; } return (rval); } /* * Return a quad_t value from an environment variable. */ quad_t getenv_quad(char *name, quad_t *data) { const char *value; char *vtp; quad_t iv; if ((value = getenv(name)) == NULL) return(0); iv = strtoq(value, &vtp, 0); if ((vtp == value) || (*vtp != '\0')) return(0); *data = iv; return(1); } static int -sysctl_kernenv (SYSCTL_HANDLER_ARGS) +sysctl_kernenv(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; char *cp; int i, error; if (kern_envp == NULL) return(ENOENT); name++; namelen--; if (namelen != 1) return(EINVAL); cp = kern_envp; for (i = 0; i < name[0]; i++) { cp = kernenv_next(cp); if (cp == NULL) break; } if (cp == NULL) return(ENOENT); error = SYSCTL_OUT(req, cp, strlen(cp) + 1); return (error); } SYSCTL_NODE(_kern, OID_AUTO, environment, CTLFLAG_RD, sysctl_kernenv, "kernel environment space"); /* * Find the next entry after the one which (cp) falls within, return a * pointer to its start or NULL if there are no more. */ static char * kernenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return(cp); } Index: head/sys/kern/kern_fork.c =================================================================== --- head/sys/kern/kern_fork.c (revision 62572) +++ head/sys/kern/kern_fork.c (revision 62573) @@ -1,580 +1,580 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 * $FreeBSD$ */ #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback"); static int fast_vfork = 1; SYSCTL_INT(_kern, OID_AUTO, fast_vfork, CTLFLAG_RW, &fast_vfork, 0, "flag to indicate whether we have a fast vfork()"); /* * These are the stuctures used to create a callout list for things to do * when forking a process */ struct forklist { forklist_fn function; TAILQ_ENTRY(forklist) next; }; TAILQ_HEAD(forklist_head, forklist); static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list); #ifndef _SYS_SYSPROTO_H_ struct fork_args { int dummy; }; #endif /* ARGSUSED */ int fork(p, uap) struct proc *p; struct fork_args *uap; { int error; struct proc *p2; error = fork1(p, RFFDG | RFPROC, &p2); if (error == 0) { p->p_retval[0] = p2->p_pid; p->p_retval[1] = 0; } return error; } /* ARGSUSED */ int vfork(p, uap) struct proc *p; struct vfork_args *uap; { int error; struct proc *p2; error = fork1(p, RFFDG | RFPROC | RFPPWAIT | RFMEM, &p2); if (error == 0) { p->p_retval[0] = p2->p_pid; p->p_retval[1] = 0; } return error; } int rfork(p, uap) struct proc *p; struct rfork_args *uap; { int error; struct proc *p2; error = fork1(p, uap->flags, &p2); if (error == 0) { p->p_retval[0] = p2 ? p2->p_pid : 0; p->p_retval[1] = 0; } return error; } int nprocs = 1; /* process 0 */ static int nextpid = 0; /* * Random component to nextpid generation. We mix in a random factor to make * it a little harder to predict. We sanity check the modulus value to avoid * doing it in critical paths. Don't let it be too small or we pointlessly * waste randomness entropy, and don't let it be impossibly large. Using a * modulus that is too big causes a LOT more process table scans and slows * down fork processing as the pidchecked caching is defeated. */ static int randompid = 0; static int -sysctl_kern_randompid (SYSCTL_HANDLER_ARGS) +sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) { int error, pid; pid = randompid; error = sysctl_handle_int(oidp, &pid, 0, req); if (error || !req->newptr) return (error); if (pid < 0 || pid > PID_MAX - 100) /* out of range */ pid = PID_MAX - 100; else if (pid < 2) /* NOP */ pid = 0; else if (pid < 100) /* Make it reasonable */ pid = 100; randompid = pid; return (error); } SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); int fork1(p1, flags, procp) struct proc *p1; int flags; struct proc **procp; { struct proc *p2, *pptr; uid_t uid; struct proc *newproc; int ok; static int pidchecked = 0; struct forklist *ep; if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) return (EINVAL); /* * Here we don't create a new process, but we divorce * certain parts of a process from itself. */ if ((flags & RFPROC) == 0) { vm_fork(p1, 0, flags); /* * Close all file descriptors. */ if (flags & RFCFDG) { struct filedesc *fdtmp; fdtmp = fdinit(p1); fdfree(p1); p1->p_fd = fdtmp; } /* * Unshare file descriptors (from parent.) */ if (flags & RFFDG) { if (p1->p_fd->fd_refcnt > 1) { struct filedesc *newfd; newfd = fdcopy(p1); fdfree(p1); p1->p_fd = newfd; } } *procp = NULL; return (0); } /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. Don't allow * a nonprivileged user to use the last process; don't let root * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ uid = p1->p_cred->p_ruid; if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { tablefull("proc"); return (EAGAIN); } /* * Increment the nprocs resource before blocking can occur. There * are hard-limits as to the number of processes that can run. */ nprocs++; /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit. */ ok = chgproccnt(uid, 1, p1->p_rlimit[RLIMIT_NPROC].rlim_cur); if (uid != 0 && !ok) { /* * Back out the process count */ nprocs--; return (EAGAIN); } /* Allocate new proc. */ newproc = zalloc(proc_zone); /* * Setup linkage for kernel based threading */ if((flags & RFTHREAD) != 0) { newproc->p_peers = p1->p_peers; p1->p_peers = newproc; newproc->p_leader = p1->p_leader; } else { newproc->p_peers = 0; newproc->p_leader = newproc; } newproc->p_wakeup = 0; newproc->p_vmspace = NULL; /* * Find an unused process ID. We remember a range of unused IDs * ready to use (from nextpid+1 through pidchecked-1). */ nextpid++; if (randompid) nextpid += arc4random() % randompid; retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (nextpid >= PID_MAX) { nextpid = nextpid % PID_MAX; if (nextpid < 100) nextpid += 100; pidchecked = 0; } if (nextpid >= pidchecked) { int doingzomb = 0; pidchecked = PID_MAX; /* * Scan the active and zombie procs to check whether this pid * is in use. Remember the lowest pid that's greater * than nextpid, so we can avoid checking for a while. */ p2 = LIST_FIRST(&allproc); again: for (; p2 != 0; p2 = LIST_NEXT(p2, p_list)) { while (p2->p_pid == nextpid || p2->p_pgrp->pg_id == nextpid || p2->p_session->s_sid == nextpid) { nextpid++; if (nextpid >= pidchecked) goto retry; } if (p2->p_pid > nextpid && pidchecked > p2->p_pid) pidchecked = p2->p_pid; if (p2->p_pgrp->pg_id > nextpid && pidchecked > p2->p_pgrp->pg_id) pidchecked = p2->p_pgrp->pg_id; if (p2->p_session->s_sid > nextpid && pidchecked > p2->p_session->s_sid) pidchecked = p2->p_session->s_sid; } if (!doingzomb) { doingzomb = 1; p2 = LIST_FIRST(&zombproc); goto again; } } p2 = newproc; p2->p_stat = SIDL; /* protect against others */ p2->p_pid = nextpid; LIST_INSERT_HEAD(&allproc, p2, p_list); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ bzero(&p2->p_startzero, (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); bcopy(&p1->p_startcopy, &p2->p_startcopy, (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); p2->p_aioinfo = NULL; /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ p2->p_flag = P_INMEM; if (p1->p_flag & P_PROFIL) startprofclock(p2); MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred), M_SUBPROC, M_WAITOK); bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); p2->p_cred->p_refcnt = 1; crhold(p1->p_ucred); if (p2->p_prison) { p2->p_prison->pr_ref++; p2->p_flag |= P_JAILED; } if (p2->p_args) p2->p_args->ar_ref++; if (flags & RFSIGSHARE) { p2->p_procsig = p1->p_procsig; p2->p_procsig->ps_refcnt++; if (p1->p_sigacts == &p1->p_addr->u_sigacts) { struct sigacts *newsigacts; int s; /* Create the shared sigacts structure */ MALLOC(newsigacts, struct sigacts *, sizeof(struct sigacts), M_SUBPROC, M_WAITOK); s = splhigh(); /* * Set p_sigacts to the new shared structure. * Note that this is updating p1->p_sigacts at the * same time, since p_sigacts is just a pointer to * the shared p_procsig->ps_sigacts. */ p2->p_sigacts = newsigacts; bcopy(&p1->p_addr->u_sigacts, p2->p_sigacts, sizeof(*p2->p_sigacts)); *p2->p_sigacts = p1->p_addr->u_sigacts; splx(s); } } else { MALLOC(p2->p_procsig, struct procsig *, sizeof(struct procsig), M_SUBPROC, M_WAITOK); bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig)); p2->p_procsig->ps_refcnt = 1; p2->p_sigacts = NULL; /* finished in vm_fork() */ } if (flags & RFLINUXTHPN) p2->p_sigparent = SIGUSR1; else p2->p_sigparent = SIGCHLD; /* bump references to the text vnode (for procfs) */ p2->p_textvp = p1->p_textvp; if (p2->p_textvp) VREF(p2->p_textvp); if (flags & RFCFDG) p2->p_fd = fdinit(p1); else if (flags & RFFDG) p2->p_fd = fdcopy(p1); else p2->p_fd = fdshare(p1); /* * If p_limit is still copy-on-write, bump refcnt, * otherwise get a copy that won't be modified. * (If PL_SHAREMOD is clear, the structure is shared * copy-on-write.) */ if (p1->p_limit->p_lflags & PL_SHAREMOD) p2->p_limit = limcopy(p1->p_limit); else { p2->p_limit = p1->p_limit; p2->p_limit->p_refcnt++; } /* * Preserve some more flags in subprocess. P_PROFIL has already * been preserved. */ p2->p_flag |= p1->p_flag & P_SUGID; if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) p2->p_flag |= P_CONTROLT; if (flags & RFPPWAIT) p2->p_flag |= P_PPWAIT; LIST_INSERT_AFTER(p1, p2, p_pglist); /* * Attach the new process to its parent. * * If RFNOWAIT is set, the newly created process becomes a child * of init. This effectively disassociates the child from the * parent. */ if (flags & RFNOWAIT) pptr = initproc; else pptr = p1; p2->p_pptr = pptr; LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); LIST_INIT(&p2->p_children); #ifdef KTRACE /* * Copy traceflag and tracefile if enabled. * If not inherited, these were zeroed above. */ if (p1->p_traceflag&KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) VREF(p2->p_tracep); } #endif /* * set priority of child to be that of parent */ p2->p_estcpu = p1->p_estcpu; /* * This begins the section where we must prevent the parent * from being swapped. */ PHOLD(p1); /* * Finish creating the child process. It will return via a different * execution path later. (ie: directly into user mode) */ vm_fork(p1, p2, flags); /* * Both processes are set up, now check if any loadable modules want * to adjust anything. * What if they have an error? XXX */ TAILQ_FOREACH(ep, &fork_list, next) { (*ep->function)(p1, p2, flags); } /* * Make child runnable and add to run queue. */ microtime(&(p2->p_stats->p_start)); p2->p_acflag = AFORK; (void) splhigh(); p2->p_stat = SRUN; setrunqueue(p2); (void) spl0(); /* * Now can be swapped. */ PRELE(p1); /* * tell any interested parties about the new process */ KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, set P_PPWAIT on child, and sleep on our * proc (in case of exit). */ while (p2->p_flag & P_PPWAIT) tsleep(p1, PWAIT, "ppwait", 0); /* * Return child proc pointer to parent. */ *procp = p2; return (0); } /* * The next two functionms are general routines to handle adding/deleting * items on the fork callout list. * * at_fork(): * Take the arguments given and put them onto the fork callout list, * However first make sure that it's not already there. * Returns 0 on success or a standard error number. */ int at_fork(function) forklist_fn function; { struct forklist *ep; #ifdef INVARIANTS /* let the programmer know if he's been stupid */ if (rm_at_fork(function)) printf("WARNING: fork callout entry (%p) already present\n", function); #endif ep = malloc(sizeof(*ep), M_ATFORK, M_NOWAIT); if (ep == NULL) return (ENOMEM); ep->function = function; TAILQ_INSERT_TAIL(&fork_list, ep, next); return (0); } /* * Scan the exit callout list for the given item and remove it.. * Returns the number of items removed (0 or 1) */ int rm_at_fork(function) forklist_fn function; { struct forklist *ep; TAILQ_FOREACH(ep, &fork_list, next) { if (ep->function == function) { TAILQ_REMOVE(&fork_list, ep, next); free(ep, M_ATFORK); return(1); } } return (0); } Index: head/sys/kern/kern_mib.c =================================================================== --- head/sys/kern/kern_mib.c (revision 62572) +++ head/sys/kern/kern_mib.c (revision 62573) @@ -1,255 +1,255 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD * project, to make these variables more userfriendly. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 * $FreeBSD$ */ #include #include #include #include #include #include #include SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW, 0, "High kernel, proc, limits &c"); SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0, "Virtual memory"); SYSCTL_NODE(, CTL_VFS, vfs, CTLFLAG_RW, 0, "File system"); SYSCTL_NODE(, CTL_NET, net, CTLFLAG_RW, 0, "Network, (see socket.h)"); SYSCTL_NODE(, CTL_DEBUG, debug, CTLFLAG_RW, 0, "Debugging"); SYSCTL_NODE(_debug, OID_AUTO, sizeof, CTLFLAG_RW, 0, "Sizeof various things"); SYSCTL_NODE(, CTL_HW, hw, CTLFLAG_RW, 0, "hardware"); SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0, "machine dependent"); SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW, 0, "user-level"); SYSCTL_NODE(, CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0, "p1003_1b, (see p1003_1b.h)"); SYSCTL_NODE(, OID_AUTO, compat, CTLFLAG_RW, 0, "Compatibility code"); SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD, osrelease, 0, "Operating system type"); SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD, 0, BSD, "Operating system revision"); SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD, version, 0, "Kernel version"); SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD, ostype, 0, "Operating system type"); extern int osreldate; SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD, &osreldate, 0, "Operating system release date"); SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RD, &maxproc, 0, "Maximum number of processes"); SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, &maxprocperuid, 0, "Maximum processes allowed per userid"); SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD, 0, ARG_MAX, "Maximum bytes of argument to execve(2)"); SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD, 0, _KPOSIX_VERSION, "Version of POSIX attempting to comply to"); SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RD, 0, NGROUPS_MAX, "Maximum number of groups a user can belong to"); SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD, 0, 1, "Whether job control is available"); #ifdef _POSIX_SAVED_IDS SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, 0, 1, "Whether saved set-group/user ID is available"); #else SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, 0, 0, "Whether saved set-group/user ID is available"); #endif char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */ SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW, kernelname, sizeof kernelname, "Name of kernel file booted"); #ifdef SMP SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, &mp_ncpus, 0, "Number of active CPUs"); #else SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, 0, 1, "Number of active CPUs"); #endif SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD, 0, BYTE_ORDER, "System byte order"); SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD, 0, PAGE_SIZE, "System memory page size"); static char machine_arch[] = MACHINE_ARCH; SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD, machine_arch, 0, "System architecture"); char hostname[MAXHOSTNAMELEN]; static int -sysctl_hostname (SYSCTL_HANDLER_ARGS) +sysctl_hostname(SYSCTL_HANDLER_ARGS) { int error; if (req->p->p_prison) { if (!jail_set_hostname_allowed && req->newptr) return(EPERM); error = sysctl_handle_string(oidp, req->p->p_prison->pr_host, sizeof req->p->p_prison->pr_host, req); } else error = sysctl_handle_string(oidp, hostname, sizeof hostname, req); return (error); } SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname, CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, sysctl_hostname, "A", "Hostname"); int securelevel = -1; static int -sysctl_kern_securelvl (SYSCTL_HANDLER_ARGS) +sysctl_kern_securelvl(SYSCTL_HANDLER_ARGS) { int error, level; level = securelevel; error = sysctl_handle_int(oidp, &level, 0, req); if (error || !req->newptr) return (error); if (level < securelevel) return (EPERM); securelevel = level; return (error); } SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_kern_securelvl, "I", "Current secure level"); char domainname[MAXHOSTNAMELEN]; SYSCTL_STRING(_kern, KERN_NISDOMAINNAME, domainname, CTLFLAG_RW, &domainname, sizeof(domainname), "Name of the current YP/NIS domain"); long hostid; /* Some trouble here, if sizeof (int) != sizeof (long) */ SYSCTL_INT(_kern, KERN_HOSTID, hostid, CTLFLAG_RW, &hostid, 0, "Host ID"); /* * This is really cheating. These actually live in the libc, something * which I'm not quite sure is a good idea anyway, but in order for * getnext and friends to actually work, we define dummies here. */ SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD, "", 0, "PATH that finds all the standard utilities"); SYSCTL_INT(_user, USER_BC_BASE_MAX, bc_base_max, CTLFLAG_RD, 0, 0, "Max ibase/obase values in bc(1)"); SYSCTL_INT(_user, USER_BC_DIM_MAX, bc_dim_max, CTLFLAG_RD, 0, 0, "Max array size in bc(1)"); SYSCTL_INT(_user, USER_BC_SCALE_MAX, bc_scale_max, CTLFLAG_RD, 0, 0, "Max scale value in bc(1)"); SYSCTL_INT(_user, USER_BC_STRING_MAX, bc_string_max, CTLFLAG_RD, 0, 0, "Max string length in bc(1)"); SYSCTL_INT(_user, USER_COLL_WEIGHTS_MAX, coll_weights_max, CTLFLAG_RD, 0, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry"); SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, 0, 0, ""); SYSCTL_INT(_user, USER_LINE_MAX, line_max, CTLFLAG_RD, 0, 0, "Max length (bytes) of a text-processing utility's input line"); SYSCTL_INT(_user, USER_RE_DUP_MAX, re_dup_max, CTLFLAG_RD, 0, 0, "Maximum number of repeats of a regexp permitted"); SYSCTL_INT(_user, USER_POSIX2_VERSION, posix2_version, CTLFLAG_RD, 0, 0, "The version of POSIX 1003.2 with which the system attempts to comply"); SYSCTL_INT(_user, USER_POSIX2_C_BIND, posix2_c_bind, CTLFLAG_RD, 0, 0, "Whether C development supports the C bindings option"); SYSCTL_INT(_user, USER_POSIX2_C_DEV, posix2_c_dev, CTLFLAG_RD, 0, 0, "Whether system supports the C development utilities option"); SYSCTL_INT(_user, USER_POSIX2_CHAR_TERM, posix2_char_term, CTLFLAG_RD, 0, 0, ""); SYSCTL_INT(_user, USER_POSIX2_FORT_DEV, posix2_fort_dev, CTLFLAG_RD, 0, 0, "Whether system supports FORTRAN development utilities"); SYSCTL_INT(_user, USER_POSIX2_FORT_RUN, posix2_fort_run, CTLFLAG_RD, 0, 0, "Whether system supports FORTRAN runtime utilities"); SYSCTL_INT(_user, USER_POSIX2_LOCALEDEF, posix2_localedef, CTLFLAG_RD, 0, 0, "Whether system supports creation of locales"); SYSCTL_INT(_user, USER_POSIX2_SW_DEV, posix2_sw_dev, CTLFLAG_RD, 0, 0, "Whether system supports software development utilities"); SYSCTL_INT(_user, USER_POSIX2_UPE, posix2_upe, CTLFLAG_RD, 0, 0, "Whether system supports the user portability utilities"); SYSCTL_INT(_user, USER_STREAM_MAX, stream_max, CTLFLAG_RD, 0, 0, "Min Maximum number of streams a process may have open at one time"); SYSCTL_INT(_user, USER_TZNAME_MAX, tzname_max, CTLFLAG_RD, 0, 0, "Min Maximum number of types supported for timezone names"); #include SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD, 0, sizeof(struct vnode), "sizeof(struct vnode)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD, 0, sizeof(struct proc), "sizeof(struct proc)"); #include SYSCTL_INT(_debug_sizeof, OID_AUTO, specinfo, CTLFLAG_RD, 0, sizeof(struct specinfo), "sizeof(struct specinfo)"); #include #include SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD, 0, sizeof(struct bio), "sizeof(struct bio)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD, 0, sizeof(struct buf), "sizeof(struct buf)"); Index: head/sys/kern/kern_ntptime.c =================================================================== --- head/sys/kern/kern_ntptime.c (revision 62572) +++ head/sys/kern/kern_ntptime.c (revision 62573) @@ -1,859 +1,859 @@ /*********************************************************************** * * * Copyright (c) David L. Mills 1993-1999 * * * * Permission to use, copy, modify, and distribute this software and * * its documentation for any purpose and without fee is hereby * * granted, provided that the above copyright notice appears in all * * copies and that both the copyright notice and this permission * * notice appear in supporting documentation, and that the name * * University of Delaware not be used in advertising or publicity * * pertaining to distribution of the software without specific, * * written prior permission. The University of Delaware makes no * * representations about the suitability this software for any * * purpose. It is provided "as is" without express or implied * * warranty. * * * **********************************************************************/ /* * Adapted from the original sources for FreeBSD and timecounters by: * Poul-Henning Kamp . * * The 32bit version of the "LP" macros seems a bit past its "sell by" * date so I have retained only the 64bit version and included it directly * in this file. * * Only minor changes done to interface with the timecounters over in * sys/kern/kern_clock.c. Some of the comments below may be (even more) * confusing and/or plain wrong in that context. * * $FreeBSD$ */ #include "opt_ntp.h" #include #include #include #include #include #include #include #include #include #include /* * Single-precision macros for 64-bit machines */ typedef long long l_fp; #define L_ADD(v, u) ((v) += (u)) #define L_SUB(v, u) ((v) -= (u)) #define L_ADDHI(v, a) ((v) += (long long)(a) << 32) #define L_NEG(v) ((v) = -(v)) #define L_RSHIFT(v, n) \ do { \ if ((v) < 0) \ (v) = -(-(v) >> (n)); \ else \ (v) = (v) >> (n); \ } while (0) #define L_MPY(v, a) ((v) *= (a)) #define L_CLR(v) ((v) = 0) #define L_ISNEG(v) ((v) < 0) #define L_LINT(v, a) ((v) = (long long)(a) << 32) #define L_GINT(v) ((v) < 0 ? -(-(v) >> 32) : (v) >> 32) /* * Generic NTP kernel interface * * These routines constitute the Network Time Protocol (NTP) interfaces * for user and daemon application programs. The ntp_gettime() routine * provides the time, maximum error (synch distance) and estimated error * (dispersion) to client user application programs. The ntp_adjtime() * routine is used by the NTP daemon to adjust the system clock to an * externally derived time. The time offset and related variables set by * this routine are used by other routines in this module to adjust the * phase and frequency of the clock discipline loop which controls the * system clock. * * When the kernel time is reckoned directly in nanoseconds (NTP_NANO * defined), the time at each tick interrupt is derived directly from * the kernel time variable. When the kernel time is reckoned in * microseconds, (NTP_NANO undefined), the time is derived from the * kernel time variable together with a variable representing the * leftover nanoseconds at the last tick interrupt. In either case, the * current nanosecond time is reckoned from these values plus an * interpolated value derived by the clock routines in another * architecture-specific module. The interpolation can use either a * dedicated counter or a processor cycle counter (PCC) implemented in * some architectures. * * Note that all routines must run at priority splclock or higher. */ /* * Phase/frequency-lock loop (PLL/FLL) definitions * * The nanosecond clock discipline uses two variable types, time * variables and frequency variables. Both types are represented as 64- * bit fixed-point quantities with the decimal point between two 32-bit * halves. On a 32-bit machine, each half is represented as a single * word and mathematical operations are done using multiple-precision * arithmetic. On a 64-bit machine, ordinary computer arithmetic is * used. * * A time variable is a signed 64-bit fixed-point number in ns and * fraction. It represents the remaining time offset to be amortized * over succeeding tick interrupts. The maximum time offset is about * 0.5 s and the resolution is about 2.3e-10 ns. * * 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |s s s| ns | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | fraction | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * A frequency variable is a signed 64-bit fixed-point number in ns/s * and fraction. It represents the ns and fraction to be added to the * kernel time variable at each second. The maximum frequency offset is * about +-500000 ns/s and the resolution is about 2.3e-10 ns/s. * * 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |s s s s s s s s s s s s s| ns/s | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | fraction | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ /* * The following variables establish the state of the PLL/FLL and the * residual time and frequency offset of the local clock. */ #define SHIFT_PLL 4 /* PLL loop gain (shift) */ #define SHIFT_FLL 2 /* FLL loop gain (shift) */ static int time_state = TIME_OK; /* clock state */ static int time_status = STA_UNSYNC; /* clock status bits */ static long time_constant; /* poll interval (shift) (s) */ static long time_precision = 1; /* clock precision (ns) */ static long time_maxerror = MAXPHASE / 1000; /* maximum error (us) */ static long time_esterror = MAXPHASE / 1000; /* estimated error (us) */ static long time_reftime; /* time at last adjustment (s) */ static long time_tick; /* nanoseconds per tick (ns) */ static l_fp time_offset; /* time offset (ns) */ static l_fp time_freq; /* frequency offset (ns/s) */ static l_fp time_adj; /* resulting adjustment */ #ifdef PPS_SYNC /* * The following variables are used when a pulse-per-second (PPS) signal * is available and connected via a modem control lead. They establish * the engineering parameters of the clock discipline loop when * controlled by the PPS signal. */ #define PPS_FAVG 2 /* min freq avg interval (s) (shift) */ #define PPS_FAVGDEF 7 /* default freq avg int (s) (shift) */ #define PPS_FAVGMAX 15 /* max freq avg interval (s) (shift) */ #define PPS_PAVG 4 /* phase avg interval (s) (shift) */ #define PPS_VALID 120 /* PPS signal watchdog max (s) */ #define PPS_MAXWANDER 100000 /* max PPS wander (ns/s) */ #define PPS_POPCORN 2 /* popcorn spike threshold (shift) */ static struct timespec pps_tf[3]; /* phase median filter */ static l_fp pps_offset; /* time offset (ns) */ static l_fp pps_freq; /* scaled frequency offset (ns/s) */ static long pps_fcount; /* frequency accumulator */ static long pps_jitter; /* nominal jitter (ns) */ static long pps_stabil; /* nominal stability (scaled ns/s) */ static long pps_lastsec; /* time at last calibration (s) */ static int pps_valid; /* signal watchdog counter */ static int pps_shift = PPS_FAVG; /* interval duration (s) (shift) */ static int pps_shiftmax = PPS_FAVGDEF; /* max interval duration (s) (shift) */ static int pps_intcnt; /* wander counter */ static int pps_letgo; /* PLL frequency hold-off */ /* * PPS signal quality monitors */ static long pps_calcnt; /* calibration intervals */ static long pps_jitcnt; /* jitter limit exceeded */ static long pps_stbcnt; /* stability limit exceeded */ static long pps_errcnt; /* calibration errors */ #endif /* PPS_SYNC */ /* * End of phase/frequency-lock loop (PLL/FLL) definitions */ static void ntp_init(void); static void hardupdate(long offset); /* * ntp_gettime() - NTP user application interface * * See the timex.h header file for synopsis and API description. */ static int -ntp_sysctl (SYSCTL_HANDLER_ARGS) +ntp_sysctl(SYSCTL_HANDLER_ARGS) { struct ntptimeval ntv; /* temporary structure */ struct timespec atv; /* nanosecond time */ nanotime(&atv); ntv.time.tv_sec = atv.tv_sec; ntv.time.tv_nsec = atv.tv_nsec; ntv.maxerror = time_maxerror; ntv.esterror = time_esterror; ntv.time_state = time_state; /* * Status word error decode. If any of these conditions occur, * an error is returned, instead of the status word. Most * applications will care only about the fact the system clock * may not be trusted, not about the details. * * Hardware or software error */ if ((time_status & (STA_UNSYNC | STA_CLOCKERR)) || /* * PPS signal lost when either time or frequency synchronization * requested */ (time_status & (STA_PPSFREQ | STA_PPSTIME) && !(time_status & STA_PPSSIGNAL)) || /* * PPS jitter exceeded when time synchronization requested */ (time_status & STA_PPSTIME && time_status & STA_PPSJITTER) || /* * PPS wander exceeded or calibration error when frequency * synchronization requested */ (time_status & STA_PPSFREQ && time_status & (STA_PPSWANDER | STA_PPSERROR))) ntv.time_state = TIME_ERROR; return (sysctl_handle_opaque(oidp, &ntv, sizeof ntv, req)); } SYSCTL_NODE(_kern, OID_AUTO, ntp_pll, CTLFLAG_RW, 0, ""); SYSCTL_PROC(_kern_ntp_pll, OID_AUTO, gettime, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, sizeof(struct ntptimeval) , ntp_sysctl, "S,ntptimeval", ""); #ifdef PPS_SYNC SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shiftmax, CTLFLAG_RW, &pps_shiftmax, 0, ""); SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shift, CTLFLAG_RW, &pps_shift, 0, ""); SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD, &pps_freq, sizeof(pps_freq), "I", ""); SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD, &time_freq, sizeof(time_freq), "I", ""); SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_offset, CTLFLAG_RD, &pps_offset, sizeof(pps_offset), "I", ""); #endif /* * ntp_adjtime() - NTP daemon application interface * * See the timex.h header file for synopsis and API description. */ #ifndef _SYS_SYSPROTO_H_ struct ntp_adjtime_args { struct timex *tp; }; #endif int ntp_adjtime(struct proc *p, struct ntp_adjtime_args *uap) { struct timex ntv; /* temporary structure */ long freq; /* frequency ns/s) */ int modes; /* mode bits from structure */ int s; /* caller priority */ int error; error = copyin((caddr_t)uap->tp, (caddr_t)&ntv, sizeof(ntv)); if (error) return(error); /* * Update selected clock variables - only the superuser can * change anything. Note that there is no error checking here on * the assumption the superuser should know what it is doing. */ modes = ntv.modes; if (modes) error = suser(p); if (error) return (error); s = splclock(); if (modes & MOD_FREQUENCY) { freq = (ntv.freq * 1000LL) >> 16; if (freq > MAXFREQ) L_LINT(time_freq, MAXFREQ); else if (freq < -MAXFREQ) L_LINT(time_freq, -MAXFREQ); else L_LINT(time_freq, freq); #ifdef PPS_SYNC pps_freq = time_freq; #endif /* PPS_SYNC */ } if (modes & MOD_MAXERROR) time_maxerror = ntv.maxerror; if (modes & MOD_ESTERROR) time_esterror = ntv.esterror; if (modes & MOD_STATUS) { time_status &= STA_RONLY; time_status |= ntv.status & ~STA_RONLY; } if (modes & MOD_TIMECONST) { if (ntv.constant < 0) time_constant = 0; else if (ntv.constant > MAXTC) time_constant = MAXTC; else time_constant = ntv.constant; } #ifdef PPS_SYNC if (modes & MOD_PPSMAX) { if (ntv.shift < PPS_FAVG) pps_shiftmax = PPS_FAVG; else if (ntv.shift > PPS_FAVGMAX) pps_shiftmax = PPS_FAVGMAX; else pps_shiftmax = ntv.shift; } #endif /* PPS_SYNC */ if (modes & MOD_NANO) time_status |= STA_NANO; if (modes & MOD_MICRO) time_status &= ~STA_NANO; if (modes & MOD_CLKB) time_status |= STA_CLK; if (modes & MOD_CLKA) time_status &= ~STA_CLK; if (modes & MOD_OFFSET) { if (time_status & STA_NANO) hardupdate(ntv.offset); else hardupdate(ntv.offset * 1000); } /* * Retrieve all clock variables */ if (time_status & STA_NANO) ntv.offset = L_GINT(time_offset); else ntv.offset = L_GINT(time_offset) / 1000; ntv.freq = L_GINT((time_freq / 1000LL) << 16); ntv.maxerror = time_maxerror; ntv.esterror = time_esterror; ntv.status = time_status; ntv.constant = time_constant; if (time_status & STA_NANO) ntv.precision = time_precision; else ntv.precision = time_precision / 1000; ntv.tolerance = MAXFREQ * SCALE_PPM; #ifdef PPS_SYNC ntv.shift = pps_shift; ntv.ppsfreq = L_GINT((pps_freq / 1000LL) << 16); if (time_status & STA_NANO) ntv.jitter = pps_jitter; else ntv.jitter = pps_jitter / 1000; ntv.stabil = pps_stabil; ntv.calcnt = pps_calcnt; ntv.errcnt = pps_errcnt; ntv.jitcnt = pps_jitcnt; ntv.stbcnt = pps_stbcnt; #endif /* PPS_SYNC */ splx(s); error = copyout((caddr_t)&ntv, (caddr_t)uap->tp, sizeof(ntv)); if (error) return (error); /* * Status word error decode. See comments in * ntp_gettime() routine. */ if ((time_status & (STA_UNSYNC | STA_CLOCKERR)) || (time_status & (STA_PPSFREQ | STA_PPSTIME) && !(time_status & STA_PPSSIGNAL)) || (time_status & STA_PPSTIME && time_status & STA_PPSJITTER) || (time_status & STA_PPSFREQ && time_status & (STA_PPSWANDER | STA_PPSERROR))) p->p_retval[0] = TIME_ERROR; else p->p_retval[0] = time_state; return (error); } /* * second_overflow() - called after ntp_tick_adjust() * * This routine is ordinarily called immediately following the above * routine ntp_tick_adjust(). While these two routines are normally * combined, they are separated here only for the purposes of * simulation. */ void ntp_update_second(struct timecounter *tcp) { u_int32_t *newsec; newsec = &tcp->tc_offset_sec; /* * On rollover of the second both the nanosecond and microsecond * clocks are updated and the state machine cranked as * necessary. The phase adjustment to be used for the next * second is calculated and the maximum error is increased by * the tolerance. */ time_maxerror += MAXFREQ / 1000; /* * Leap second processing. If in leap-insert state at * the end of the day, the system clock is set back one * second; if in leap-delete state, the system clock is * set ahead one second. The nano_time() routine or * external clock driver will insure that reported time * is always monotonic. */ switch (time_state) { /* * No warning. */ case TIME_OK: if (time_status & STA_INS) time_state = TIME_INS; else if (time_status & STA_DEL) time_state = TIME_DEL; break; /* * Insert second 23:59:60 following second * 23:59:59. */ case TIME_INS: if (!(time_status & STA_INS)) time_state = TIME_OK; else if ((*newsec) % 86400 == 0) { (*newsec)--; time_state = TIME_OOP; } break; /* * Delete second 23:59:59. */ case TIME_DEL: if (!(time_status & STA_DEL)) time_state = TIME_OK; else if (((*newsec) + 1) % 86400 == 0) { (*newsec)++; time_state = TIME_WAIT; } break; /* * Insert second in progress. */ case TIME_OOP: time_state = TIME_WAIT; break; /* * Wait for status bits to clear. */ case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; } /* * Compute the total time adjustment for the next second * in ns. The offset is reduced by a factor depending on * whether the PPS signal is operating. Note that the * value is in effect scaled by the clock frequency, * since the adjustment is added at each tick interrupt. */ #ifdef PPS_SYNC /* XXX even if signal dies we should finish adjustment ? */ if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) { time_adj = pps_offset; L_RSHIFT(time_adj, pps_shift); L_SUB(pps_offset, time_adj); } else { time_adj = time_offset; L_RSHIFT(time_adj, SHIFT_PLL + time_constant); L_SUB(time_offset, time_adj); } #else time_adj = time_offset; L_RSHIFT(time_adj, SHIFT_PLL + time_constant); L_SUB(time_offset, time_adj); #endif /* PPS_SYNC */ L_ADD(time_adj, time_freq); tcp->tc_adjustment = time_adj; #ifdef PPS_SYNC if (pps_valid > 0) pps_valid--; else time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); #endif /* PPS_SYNC */ } /* * ntp_init() - initialize variables and structures * * This routine must be called after the kernel variables hz and tick * are set or changed and before the next tick interrupt. In this * particular implementation, these values are assumed set elsewhere in * the kernel. The design allows the clock frequency and tick interval * to be changed while the system is running. So, this routine should * probably be integrated with the code that does that. */ static void ntp_init() { /* * The following variable must be initialized any time the * kernel variable hz is changed. */ time_tick = NANOSECOND / hz; /* * The following variables are initialized only at startup. Only * those structures not cleared by the compiler need to be * initialized, and these only in the simulator. In the actual * kernel, any nonzero values here will quickly evaporate. */ L_CLR(time_offset); L_CLR(time_freq); #ifdef PPS_SYNC pps_tf[0].tv_sec = pps_tf[0].tv_nsec = 0; pps_tf[1].tv_sec = pps_tf[1].tv_nsec = 0; pps_tf[2].tv_sec = pps_tf[2].tv_nsec = 0; pps_fcount = 0; L_CLR(pps_freq); #endif /* PPS_SYNC */ } SYSINIT(ntpclocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, ntp_init, NULL) /* * hardupdate() - local clock update * * This routine is called by ntp_adjtime() to update the local clock * phase and frequency. The implementation is of an adaptive-parameter, * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new * time and frequency offset estimates for each call. If the kernel PPS * discipline code is configured (PPS_SYNC), the PPS signal itself * determines the new time offset, instead of the calling argument. * Presumably, calls to ntp_adjtime() occur only when the caller * believes the local clock is valid within some bound (+-128 ms with * NTP). If the caller's time is far different than the PPS time, an * argument will ensue, and it's not clear who will lose. * * For uncompensated quartz crystal oscillators and nominal update * intervals less than 256 s, operation should be in phase-lock mode, * where the loop is disciplined to phase. For update intervals greater * than 1024 s, operation should be in frequency-lock mode, where the * loop is disciplined to frequency. Between 256 s and 1024 s, the mode * is selected by the STA_MODE status bit. */ static void hardupdate(offset) long offset; /* clock offset (ns) */ { long ltemp, mtemp; l_fp ftemp; /* * Select how the phase is to be controlled and from which * source. If the PPS signal is present and enabled to * discipline the time, the PPS offset is used; otherwise, the * argument offset is used. */ if (!(time_status & STA_PLL)) return; ltemp = offset; if (ltemp > MAXPHASE) ltemp = MAXPHASE; else if (ltemp < -MAXPHASE) ltemp = -MAXPHASE; if (!(time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)) L_LINT(time_offset, ltemp); /* * Select how the frequency is to be controlled and in which * mode (PLL or FLL). If the PPS signal is present and enabled * to discipline the frequency, the PPS frequency is used; * otherwise, the argument offset is used to compute it. */ if (time_status & STA_PPSFREQ && time_status & STA_PPSSIGNAL) { time_reftime = time_second; return; } if (time_status & STA_FREQHOLD || time_reftime == 0) time_reftime = time_second; mtemp = time_second - time_reftime; L_LINT(ftemp, ltemp); L_RSHIFT(ftemp, (SHIFT_PLL + 2 + time_constant) << 1); L_MPY(ftemp, mtemp); L_ADD(time_freq, ftemp); time_status &= ~STA_MODE; if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { L_LINT(ftemp, (ltemp << 4) / mtemp); L_RSHIFT(ftemp, SHIFT_FLL + 4); L_ADD(time_freq, ftemp); time_status |= STA_MODE; } time_reftime = time_second; if (L_GINT(time_freq) > MAXFREQ) L_LINT(time_freq, MAXFREQ); else if (L_GINT(time_freq) < -MAXFREQ) L_LINT(time_freq, -MAXFREQ); } #ifdef PPS_SYNC /* * hardpps() - discipline CPU clock oscillator to external PPS signal * * This routine is called at each PPS interrupt in order to discipline * the CPU clock oscillator to the PPS signal. It measures the PPS phase * and leaves it in a handy spot for the hardclock() routine. It * integrates successive PPS phase differences and calculates the * frequency offset. This is used in hardclock() to discipline the CPU * clock oscillator so that the intrinsic frequency error is cancelled * out. The code requires the caller to capture the time and * architecture-dependent hardware counter values in nanoseconds at the * on-time PPS signal transition. * * Note that, on some Unix systems this routine runs at an interrupt * priority level higher than the timer interrupt routine hardclock(). * Therefore, the variables used are distinct from the hardclock() * variables, except for the actual time and frequency variables, which * are determined by this routine and updated atomically. */ void hardpps(tsp, nsec) struct timespec *tsp; /* time at PPS */ long nsec; /* hardware counter at PPS */ { long u_sec, u_nsec, v_nsec, w_nsec; /* temps */ l_fp ftemp; /* * The signal is first processed by a frequency discriminator * which rejects noise and input signals with frequencies * outside the range 1 +-MAXFREQ PPS. If two hits occur in the * same second, we ignore the later hit; if not and a hit occurs * outside the range gate, keep the later hit but do not * process it. */ time_status |= STA_PPSSIGNAL | STA_PPSJITTER; time_status &= ~(STA_PPSWANDER | STA_PPSERROR); pps_valid = PPS_VALID; u_sec = tsp->tv_sec; u_nsec = tsp->tv_nsec; if (u_nsec >= (NANOSECOND >> 1)) { u_nsec -= NANOSECOND; u_sec++; } v_nsec = u_nsec - pps_tf[0].tv_nsec; if (u_sec == pps_tf[0].tv_sec && v_nsec < -MAXFREQ) { return; } pps_tf[2] = pps_tf[1]; pps_tf[1] = pps_tf[0]; pps_tf[0].tv_sec = u_sec; pps_tf[0].tv_nsec = u_nsec; /* * Compute the difference between the current and previous * counter values. If the difference exceeds 0.5 s, assume it * has wrapped around, so correct 1.0 s. If the result exceeds * the tick interval, the sample point has crossed a tick * boundary during the last second, so correct the tick. Very * intricate. */ u_nsec = nsec; if (u_nsec > (NANOSECOND >> 1)) u_nsec -= NANOSECOND; else if (u_nsec < -(NANOSECOND >> 1)) u_nsec += NANOSECOND; pps_fcount += u_nsec; if (v_nsec > MAXFREQ || v_nsec < -MAXFREQ) { return; } time_status &= ~STA_PPSJITTER; /* * A three-stage median filter is used to help denoise the PPS * time. The median sample becomes the time offset estimate; the * difference between the other two samples becomes the time * dispersion (jitter) estimate. */ if (pps_tf[0].tv_nsec > pps_tf[1].tv_nsec) { if (pps_tf[1].tv_nsec > pps_tf[2].tv_nsec) { v_nsec = pps_tf[1].tv_nsec; /* 0 1 2 */ u_nsec = pps_tf[0].tv_nsec - pps_tf[2].tv_nsec; } else if (pps_tf[2].tv_nsec > pps_tf[0].tv_nsec) { v_nsec = pps_tf[0].tv_nsec; /* 2 0 1 */ u_nsec = pps_tf[2].tv_nsec - pps_tf[1].tv_nsec; } else { v_nsec = pps_tf[2].tv_nsec; /* 0 2 1 */ u_nsec = pps_tf[0].tv_nsec - pps_tf[1].tv_nsec; } } else { if (pps_tf[1].tv_nsec < pps_tf[2].tv_nsec) { v_nsec = pps_tf[1].tv_nsec; /* 2 1 0 */ u_nsec = pps_tf[2].tv_nsec - pps_tf[0].tv_nsec; } else if (pps_tf[2].tv_nsec < pps_tf[0].tv_nsec) { v_nsec = pps_tf[0].tv_nsec; /* 1 0 2 */ u_nsec = pps_tf[1].tv_nsec - pps_tf[2].tv_nsec; } else { v_nsec = pps_tf[2].tv_nsec; /* 1 2 0 */ u_nsec = pps_tf[1].tv_nsec - pps_tf[0].tv_nsec; } } /* * Nominal jitter is due to PPS signal noise and interrupt * latency. If it exceeds the popcorn threshold, * the sample is discarded. otherwise, if so enabled, the time * offset is updated. We can tolerate a modest loss of data here * without degrading time accuracy. */ if (u_nsec > (pps_jitter << PPS_POPCORN)) { time_status |= STA_PPSJITTER; pps_jitcnt++; } else if (time_status & STA_PPSTIME) { L_LINT(time_offset, -v_nsec); L_LINT(pps_offset, -v_nsec); if (pps_letgo >= 2) { L_LINT(ftemp, -v_nsec); L_RSHIFT(ftemp, (pps_shift * 2)); L_ADD(ftemp, time_freq); w_nsec = L_GINT(ftemp); if (w_nsec > MAXFREQ) L_LINT(ftemp, MAXFREQ); else if (w_nsec < -MAXFREQ) L_LINT(ftemp, -MAXFREQ); time_freq = ftemp; } } pps_jitter += (u_nsec - pps_jitter) >> PPS_FAVG; u_sec = pps_tf[0].tv_sec - pps_lastsec; if (u_sec < (1 << pps_shift)) return; /* * At the end of the calibration interval the difference between * the first and last counter values becomes the scaled * frequency. It will later be divided by the length of the * interval to determine the frequency update. If the frequency * exceeds a sanity threshold, or if the actual calibration * interval is not equal to the expected length, the data are * discarded. We can tolerate a modest loss of data here without * degrading frequency ccuracy. */ pps_calcnt++; v_nsec = -pps_fcount; pps_lastsec = pps_tf[0].tv_sec; pps_fcount = 0; u_nsec = MAXFREQ << pps_shift; if (v_nsec > u_nsec || v_nsec < -u_nsec || u_sec != (1 << pps_shift)) { time_status |= STA_PPSERROR; pps_errcnt++; return; } /* * Here the raw frequency offset and wander (stability) is * calculated. If the wander is less than the wander threshold * for four consecutive averaging intervals, the interval is * doubled; if it is greater than the threshold for four * consecutive intervals, the interval is halved. The scaled * frequency offset is converted to frequency offset. The * stability metric is calculated as the average of recent * frequency changes, but is used only for performance * monitoring. */ L_LINT(ftemp, v_nsec); L_RSHIFT(ftemp, pps_shift); L_SUB(ftemp, pps_freq); u_nsec = L_GINT(ftemp); if (u_nsec > PPS_MAXWANDER) { L_LINT(ftemp, PPS_MAXWANDER); pps_intcnt--; time_status |= STA_PPSWANDER; pps_stbcnt++; } else if (u_nsec < -PPS_MAXWANDER) { L_LINT(ftemp, -PPS_MAXWANDER); pps_intcnt--; time_status |= STA_PPSWANDER; pps_stbcnt++; } else { pps_intcnt++; } if (!(time_status & STA_PPSFREQ)) { pps_intcnt = 0; pps_shift = PPS_FAVG; } else if (pps_shift > pps_shiftmax) { /* If we lowered pps_shiftmax */ pps_shift = pps_shiftmax; pps_intcnt = 0; } else if (pps_intcnt >= 4) { pps_intcnt = 4; if (pps_shift < pps_shiftmax) { pps_shift++; pps_intcnt = 0; } } else if (pps_intcnt <= -4) { pps_intcnt = -4; if (pps_shift > PPS_FAVG) { pps_shift--; pps_intcnt = 0; } } if (u_nsec < 0) u_nsec = -u_nsec; pps_stabil += (u_nsec * SCALE_PPM - pps_stabil) >> PPS_FAVG; /* * The PPS frequency is recalculated and clamped to the maximum * MAXFREQ. If enabled, the system clock frequency is updated as * well. */ L_ADD(pps_freq, ftemp); u_nsec = L_GINT(pps_freq); if (u_nsec > MAXFREQ) L_LINT(pps_freq, MAXFREQ); else if (u_nsec < -MAXFREQ) L_LINT(pps_freq, -MAXFREQ); if ((time_status & (STA_PPSFREQ | STA_PPSTIME)) == STA_PPSFREQ) { pps_letgo = 0; time_freq = pps_freq; } else if (time_status & STA_PPSTIME) { if (pps_letgo < 2) pps_letgo++; } } #endif /* PPS_SYNC */ Index: head/sys/kern/kern_proc.c =================================================================== --- head/sys/kern/kern_proc.c (revision 62572) +++ head/sys/kern/kern_proc.c (revision 62573) @@ -1,725 +1,725 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PGRP, "pgrp", "process group header"); MALLOC_DEFINE(M_SESSION, "session", "session header"); static MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); static void pgdelete __P((struct pgrp *)); /* * Structure associated with user cacheing. */ struct uidinfo { LIST_ENTRY(uidinfo) ui_hash; uid_t ui_uid; long ui_proccnt; rlim_t ui_sbsize; }; #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static void orphanpg __P((struct pgrp *pg)); static struct uidinfo *uifind(uid_t uid); static struct uidinfo *uicreate(uid_t uid); static int uifree(struct uidinfo *uip); /* * Other process lists */ struct pidhashhead *pidhashtbl; u_long pidhash; struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; struct proclist zombproc; vm_zone_t proc_zone; /* * Initialize global process hashing structures. */ void procinit() { LIST_INIT(&allproc); LIST_INIT(&zombproc); pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash); pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash); proc_zone = zinit("PROC", sizeof (struct proc), 0, 0, 5); } /* * find/create a uidinfo struct for the uid passed in */ static struct uidinfo * uifind(uid) uid_t uid; { struct uihashhead *uipp; struct uidinfo *uip; uipp = UIHASH(uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) break; return (uip); } static struct uidinfo * uicreate(uid) uid_t uid; { struct uidinfo *uip, *norace; MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_NOWAIT); if (uip == NULL) { MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); /* * if we M_WAITOK we must look afterwards or risk * redundant entries */ norace = uifind(uid); if (norace != NULL) { FREE(uip, M_PROC); return (norace); } } LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); uip->ui_uid = uid; uip->ui_proccnt = 0; uip->ui_sbsize = 0; return (uip); } static int uifree(uip) struct uidinfo *uip; { if (uip->ui_sbsize == 0 && uip->ui_proccnt == 0) { LIST_REMOVE(uip, ui_hash); FREE(uip, M_PROC); return (1); } return (0); } /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ int chgproccnt(uid, diff, max) uid_t uid; int diff; int max; { struct uidinfo *uip; uip = uifind(uid); if (diff < 0) KASSERT(uip != NULL, ("reducing proccnt: lost count, uid = %d", uid)); if (uip == NULL) uip = uicreate(uid); /* don't allow them to exceed max, but allow subtraction */ if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { (void)uifree(uip); return (0); } uip->ui_proccnt += diff; (void)uifree(uip); return (1); } /* * Change the total socket buffer size a user has used. */ int chgsbsize(uid, diff, max) uid_t uid; rlim_t diff; rlim_t max; { struct uidinfo *uip; uip = uifind(uid); if (diff < 0) KASSERT(uip != NULL, ("reducing sbsize: lost count, uid = %d", uid)); if (uip == NULL) uip = uicreate(uid); /* don't allow them to exceed max, but allow subtraction */ if (diff > 0 && uip->ui_sbsize + diff > max) { (void)uifree(uip); return (0); } uip->ui_sbsize += diff; (void)uifree(uip); return (1); } /* * Is p an inferior of the current process? */ int inferior(p) register struct proc *p; { for (; p != curproc; p = p->p_pptr) if (p->p_pid == 0) return (0); return (1); } /* * Locate a process by number */ struct proc * pfind(pid) register pid_t pid; { register struct proc *p; LIST_FOREACH(p, PIDHASH(pid), p_hash) if (p->p_pid == pid) return (p); return (NULL); } /* * Locate a process group by number */ struct pgrp * pgfind(pgid) register pid_t pgid; { register struct pgrp *pgrp; LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) if (pgrp->pg_id == pgid) return (pgrp); return (NULL); } /* * Move p to a new or existing process group (and session) */ int enterpgrp(p, pgid, mksess) register struct proc *p; pid_t pgid; int mksess; { register struct pgrp *pgrp = pgfind(pgid); KASSERT(pgrp == NULL || !mksess, ("enterpgrp: setsid into non-empty pgrp")); KASSERT(!SESS_LEADER(p), ("enterpgrp: session leader attempted setpgrp")); if (pgrp == NULL) { pid_t savepid = p->p_pid; struct proc *np; /* * new process group */ KASSERT(p->p_pid == pgid, ("enterpgrp: new pgrp and pid != pgid")); MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP, M_WAITOK); if ((np = pfind(savepid)) == NULL || np != p) return (ESRCH); if (mksess) { register struct session *sess; /* * new session */ MALLOC(sess, struct session *, sizeof(struct session), M_SESSION, M_WAITOK); sess->s_leader = p; sess->s_sid = p->p_pid; sess->s_count = 1; sess->s_ttyvp = NULL; sess->s_ttyp = NULL; bcopy(p->p_session->s_login, sess->s_login, sizeof(sess->s_login)); p->p_flag &= ~P_CONTROLT; pgrp->pg_session = sess; KASSERT(p == curproc, ("enterpgrp: mksession and p != curproc")); } else { pgrp->pg_session = p->p_session; pgrp->pg_session->s_count++; } pgrp->pg_id = pgid; LIST_INIT(&pgrp->pg_members); LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash); pgrp->pg_jobc = 0; SLIST_INIT(&pgrp->pg_sigiolst); } else if (pgrp == p->p_pgrp) return (0); /* * Adjust eligibility of affected pgrps to participate in job control. * Increment eligibility counts before decrementing, otherwise we * could reach 0 spuriously during the first call. */ fixjobc(p, pgrp, 1); fixjobc(p, p->p_pgrp, 0); LIST_REMOVE(p, p_pglist); if (LIST_EMPTY(&p->p_pgrp->pg_members)) pgdelete(p->p_pgrp); p->p_pgrp = pgrp; LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); return (0); } /* * remove process from process group */ int leavepgrp(p) register struct proc *p; { LIST_REMOVE(p, p_pglist); if (LIST_EMPTY(&p->p_pgrp->pg_members)) pgdelete(p->p_pgrp); p->p_pgrp = 0; return (0); } /* * delete a process group */ static void pgdelete(pgrp) register struct pgrp *pgrp; { /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pgid. */ funsetownlst(&pgrp->pg_sigiolst); if (pgrp->pg_session->s_ttyp != NULL && pgrp->pg_session->s_ttyp->t_pgrp == pgrp) pgrp->pg_session->s_ttyp->t_pgrp = NULL; LIST_REMOVE(pgrp, pg_hash); if (--pgrp->pg_session->s_count == 0) FREE(pgrp->pg_session, M_SESSION); FREE(pgrp, M_PGRP); } /* * Adjust pgrp jobc counters when specified process changes process group. * We count the number of processes in each process group that "qualify" * the group for terminal job control (those with a parent in a different * process group of the same session). If that count reaches zero, the * process group becomes orphaned. Check both the specified process' * process group and that of its children. * entering == 0 => p is leaving specified group. * entering == 1 => p is entering specified group. */ void fixjobc(p, pgrp, entering) register struct proc *p; register struct pgrp *pgrp; int entering; { register struct pgrp *hispgrp; register struct session *mysession = pgrp->pg_session; /* * Check p's parent to see whether p qualifies its own process * group; if so, adjust count for p's process group. */ if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && hispgrp->pg_session == mysession) { if (entering) pgrp->pg_jobc++; else if (--pgrp->pg_jobc == 0) orphanpg(pgrp); } /* * Check this process' children to see whether they qualify * their process groups; if so, adjust counts for children's * process groups. */ LIST_FOREACH(p, &p->p_children, p_sibling) if ((hispgrp = p->p_pgrp) != pgrp && hispgrp->pg_session == mysession && p->p_stat != SZOMB) { if (entering) hispgrp->pg_jobc++; else if (--hispgrp->pg_jobc == 0) orphanpg(hispgrp); } } /* * A process group has become orphaned; * if there are any stopped processes in the group, * hang-up all process in that group. */ static void orphanpg(pg) struct pgrp *pg; { register struct proc *p; LIST_FOREACH(p, &pg->pg_members, p_pglist) { if (p->p_stat == SSTOP) { LIST_FOREACH(p, &pg->pg_members, p_pglist) { psignal(p, SIGHUP); psignal(p, SIGCONT); } return; } } } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(pgrpdump, pgrpdump) { register struct pgrp *pgrp; register struct proc *p; register int i; for (i = 0; i <= pgrphash; i++) { if (!LIST_EMPTY(&pgrphashtbl[i])) { printf("\tindx %d\n", i); LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) { printf( "\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n", (void *)pgrp, (long)pgrp->pg_id, (void *)pgrp->pg_session, pgrp->pg_session->s_count, (void *)LIST_FIRST(&pgrp->pg_members)); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { printf("\t\tpid %ld addr %p pgrp %p\n", (long)p->p_pid, (void *)p, (void *)p->p_pgrp); } } } } } #endif /* DDB */ /* * Fill in an eproc structure for the specified process. */ void fill_eproc(p, ep) register struct proc *p; register struct eproc *ep; { register struct tty *tp; bzero(ep, sizeof(*ep)); ep->e_paddr = p; if (p->p_cred) { ep->e_pcred = *p->p_cred; if (p->p_ucred) ep->e_ucred = *p->p_ucred; } if (p->p_procsig) { ep->e_procsig = *p->p_procsig; } if (p->p_stat != SIDL && p->p_stat != SZOMB && p->p_vmspace != NULL) { register struct vmspace *vm = p->p_vmspace; ep->e_vm = *vm; ep->e_vm.vm_rssize = vmspace_resident_count(vm); /*XXX*/ } if ((p->p_flag & P_INMEM) && p->p_stats) ep->e_stats = *p->p_stats; if (p->p_pptr) ep->e_ppid = p->p_pptr->p_pid; if (p->p_pgrp) { ep->e_pgid = p->p_pgrp->pg_id; ep->e_jobc = p->p_pgrp->pg_jobc; ep->e_sess = p->p_pgrp->pg_session; if (ep->e_sess) { bcopy(ep->e_sess->s_login, ep->e_login, sizeof(ep->e_login)); if (ep->e_sess->s_ttyvp) ep->e_flag = EPROC_CTTY; if (p->p_session && SESS_LEADER(p)) ep->e_flag |= EPROC_SLEADER; } } if ((p->p_flag & P_CONTROLT) && (ep->e_sess != NULL) && ((tp = ep->e_sess->s_ttyp) != NULL)) { ep->e_tdev = dev2udev(tp->t_dev); ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; ep->e_tsess = tp->t_session; } else ep->e_tdev = NOUDEV; if (p->p_wmesg) { strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN); ep->e_wmesg[WMESGLEN] = 0; } } static struct proc * zpfind(pid_t pid) { struct proc *p; LIST_FOREACH(p, &zombproc, p_list) if (p->p_pid == pid) return (p); return (NULL); } static int sysctl_out_proc(struct proc *p, struct sysctl_req *req, int doingzomb) { struct eproc eproc; int error; pid_t pid = p->p_pid; fill_eproc(p, &eproc); error = SYSCTL_OUT(req,(caddr_t)p, sizeof(struct proc)); if (error) return (error); error = SYSCTL_OUT(req,(caddr_t)&eproc, sizeof(eproc)); if (error) return (error); if (!doingzomb && pid && (pfind(pid) != p)) return EAGAIN; if (doingzomb && zpfind(pid) != p) return EAGAIN; return (0); } static int -sysctl_kern_proc (SYSCTL_HANDLER_ARGS) +sysctl_kern_proc(SYSCTL_HANDLER_ARGS) { int *name = (int*) arg1; u_int namelen = arg2; struct proc *p; int doingzomb; int error = 0; if (oidp->oid_number == KERN_PROC_PID) { if (namelen != 1) return (EINVAL); p = pfind((pid_t)name[0]); if (!p) return (0); if (!PRISON_CHECK(curproc, p)) return (0); error = sysctl_out_proc(p, req, 0); return (error); } if (oidp->oid_number == KERN_PROC_ALL && !namelen) ; else if (oidp->oid_number != KERN_PROC_ALL && namelen == 1) ; else return (EINVAL); if (!req->oldptr) { /* overestimate by 5 procs */ error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5); if (error) return (error); } for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) { if (!doingzomb) p = LIST_FIRST(&allproc); else p = LIST_FIRST(&zombproc); for (; p != 0; p = LIST_NEXT(p, p_list)) { /* * Skip embryonic processes. */ if (p->p_stat == SIDL) continue; /* * TODO - make more efficient (see notes below). * do by session. */ switch (oidp->oid_number) { case KERN_PROC_PGRP: /* could do this by traversing pgrp */ if (p->p_pgrp == NULL || p->p_pgrp->pg_id != (pid_t)name[0]) continue; break; case KERN_PROC_TTY: if ((p->p_flag & P_CONTROLT) == 0 || p->p_session == NULL || p->p_session->s_ttyp == NULL || dev2udev(p->p_session->s_ttyp->t_dev) != (udev_t)name[0]) continue; break; case KERN_PROC_UID: if (p->p_ucred == NULL || p->p_ucred->cr_uid != (uid_t)name[0]) continue; break; case KERN_PROC_RUID: if (p->p_ucred == NULL || p->p_cred->p_ruid != (uid_t)name[0]) continue; break; } if (!PRISON_CHECK(curproc, p)) continue; error = sysctl_out_proc(p, req, doingzomb); if (error) return (error); } } return (0); } /* * This sysctl allows a process to retrieve the argument list or process * title for another process without groping around in the address space * of the other process. It also allow a process to set its own "process * title to a string of its own choice. */ static int -sysctl_kern_proc_args (SYSCTL_HANDLER_ARGS) +sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) { int *name = (int*) arg1; u_int namelen = arg2; struct proc *p; struct pargs *pa; int error = 0; if (namelen != 1) return (EINVAL); p = pfind((pid_t)name[0]); if (!p) return (0); if ((!ps_argsopen) && p_trespass(curproc, p)) return (0); if (req->newptr && curproc != p) return (EPERM); if (req->oldptr && p->p_args != NULL) error = SYSCTL_OUT(req, p->p_args->ar_args, p->p_args->ar_length); if (req->newptr == NULL) return (error); if (p->p_args && --p->p_args->ar_ref == 0) FREE(p->p_args, M_PARGS); p->p_args = NULL; if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit) return (error); MALLOC(pa, struct pargs *, sizeof(struct pargs) + req->newlen, M_PARGS, M_WAITOK); pa->ar_ref = 1; pa->ar_length = req->newlen; error = SYSCTL_IN(req, pa->ar_args, req->newlen); if (!error) p->p_args = pa; else FREE(pa, M_PARGS); return (error); } SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT, 0, 0, sysctl_kern_proc, "S,proc", "Return entire process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_ANYBODY, sysctl_kern_proc_args, "Process argument list"); Index: head/sys/kern/kern_shutdown.c =================================================================== --- head/sys/kern/kern_shutdown.c (revision 62572) +++ head/sys/kern/kern_shutdown.c (revision 62573) @@ -1,604 +1,604 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #include "opt_ddb.h" #include "opt_hw_wdog.h" #include "opt_panic.h" #include "opt_show_busybufs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* smp_active, cpuid */ #include #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef DDB #ifdef DDB_UNATTENDED int debugger_on_panic = 0; #else int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW, &debugger_on_panic, 0, "Run debugger on kernel panic"); #endif SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); #ifdef HW_WDOG /* * If there is a hardware watchdog, point this at the function needed to * hold it off. * It's needed when the kernel needs to do some lengthy operations. * e.g. in wd.c when dumping core.. It's most annoying to have * your precious core-dump only half written because the wdog kicked in. */ watchdog_tickle_fn wdog_tickler = NULL; #endif /* HW_WDOG */ /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; static void boot __P((int)) __dead2; static void dumpsys __P((void)); static int setdumpdev __P((dev_t dev)); static void poweroff_wait __P((void *, int)); static void print_uptime __P((void)); static void shutdown_halt __P((void *junk, int howto)); static void shutdown_panic __P((void *junk, int howto)); static void shutdown_reset __P((void *junk, int howto)); /* register various local shutdown events */ static void shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); } SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL) /* ARGSUSED */ /* * The system call that results in a reboot */ int reboot(p, uap) struct proc *p; struct reboot_args *uap; { int error; if ((error = suser(p))) return (error); boot(uap->opt); return (0); } /* * Called by events that want to shut down.. e.g on a PC */ void shutdown_nice() { /* Send a signal to init(8) and have it shutdown the world */ if (initproc != NULL) { psignal(initproc, SIGINT); } else { /* No init(8) running, so simply reboot */ boot(RB_NOSYNC); } return; } static int waittime = -1; static struct pcb dumppcb; static void print_uptime() { int f; struct timespec ts; getnanouptime(&ts); printf("Uptime: "); f = 0; if (ts.tv_sec >= 86400) { printf("%ldd", ts.tv_sec / 86400); ts.tv_sec %= 86400; f = 1; } if (f || ts.tv_sec >= 3600) { printf("%ldh", ts.tv_sec / 3600); ts.tv_sec %= 3600; f = 1; } if (f || ts.tv_sec >= 60) { printf("%ldm", ts.tv_sec / 60); ts.tv_sec %= 60; f = 1; } printf("%lds\n", ts.tv_sec); } /* * Go through the rigmarole of shutting down.. * this used to be in machdep.c but I'll be dammned if I could see * anything machine dependant in it. */ static void boot(howto) int howto; { #ifdef SMP if (smp_active) { printf("boot() called on cpu#%d\n", cpuid); } #endif /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { register struct buf *bp; int iter, nbusy; waittime = 0; printf("\nsyncing disks... "); sync(&proc0, NULL); /* * With soft updates, some buffers that are * written will be remarked as dirty until other * buffers are written. */ for (iter = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if ((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp) > 0) { nbusy++; } else if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { /* bawrite(bp);*/ nbusy++; } } if (nbusy == 0) break; printf("%d ", nbusy); sync(&proc0, NULL); DELAY(50000 * iter); } printf("\n"); /* * Count only busy local buffers to prevent forcing * a fsck if we're just a client of a wedged NFS server */ nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) || ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) { if (bp->b_dev == NODEV) { TAILQ_REMOVE(&mountlist, bp->b_vp->v_mount, mnt_list); continue; } nbusy++; #if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC) printf( "%d: dev:%s, flags:%08lx, blkno:%ld, lblkno:%ld\n", nbusy, devtoname(bp->b_dev), bp->b_flags, (long)bp->b_blkno, (long)bp->b_lblkno); #endif } } if (nbusy) { /* * Failed to sync all blocks. Indicate this and don't * unmount filesystems (thus forcing an fsck on reboot). */ printf("giving up on %d buffers\n", nbusy); DELAY(5000000); /* 5 seconds */ } else { printf("done\n"); /* * Unmount filesystems */ if (panicstr == 0) vfs_unmountall(); } DELAY(100000); /* wait for console output to finish */ } print_uptime(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); splhigh(); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) { savectx(&dumppcb); #ifdef __i386__ dumppcb.pcb_cr3 = rcr3(); #endif dumpsys(); } /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ } /* * If the shutdown was a clean halt, behave accordingly. */ static void shutdown_halt(void *junk, int howto) { if (howto & RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: howto &= ~RB_HALT; break; } } } /* * Check to see if the system paniced, pause and then reboot * according to the specified delay. */ static void shutdown_panic(void *junk, int howto) { int loop; if (howto & RB_DUMP) { if (PANIC_REBOOT_WAIT_TIME != 0) { if (PANIC_REBOOT_WAIT_TIME != -1) { printf("Automatic reboot in %d seconds - " "press a key on the console to abort\n", PANIC_REBOOT_WAIT_TIME); for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) return; } } else { /* zero time specified - reboot NOW */ return; } printf("--> Press a key on the console to reboot <--\n"); cngetc(); } } /* * Everything done, now reset */ static void shutdown_reset(void *junk, int howto) { printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ } /* * Magic number for savecore * * exported (symorder) and used at least by savecore(8) * */ static u_long const dumpmag = 0x8fca0101UL; static int dumpsize = 0; /* also for savecore */ static int dodump = 1; SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "Try to perform coredump on kernel panic"); static int setdumpdev(dev) dev_t dev; { int psize; long newdumplo; if (dev == NODEV) { dumpdev = dev; return (0); } if (devsw(dev) == NULL) return (ENXIO); /* XXX is this right? */ if (devsw(dev)->d_psize == NULL) return (ENXIO); /* XXX should be ENODEV ? */ psize = devsw(dev)->d_psize(dev); if (psize == -1) return (ENXIO); /* XXX should be ENODEV ? */ /* * XXX should clean up checking in dumpsys() to be more like this. */ newdumplo = psize - Maxmem * PAGE_SIZE / DEV_BSIZE; if (newdumplo < 0) return (ENOSPC); dumpdev = dev; dumplo = newdumplo; return (0); } /* ARGSUSED */ static void dump_conf __P((void *dummy)); static void dump_conf(dummy) void *dummy; { if (setdumpdev(dumpdev) != 0) dumpdev = NODEV; } SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL) static int -sysctl_kern_dumpdev (SYSCTL_HANDLER_ARGS) +sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS) { int error; udev_t ndumpdev; ndumpdev = dev2udev(dumpdev); error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req); if (error == 0 && req->newptr != NULL) error = setdumpdev(udev2dev(ndumpdev, 0)); return (error); } SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, sizeof dumpdev, sysctl_kern_dumpdev, "T,dev_t", ""); /* * Doadump comes here after turning off memory management and * getting on the dump stack, either when called above, or by * the auto-restart code. */ static void dumpsys(void) { int error; static int dumping; if (dumping++) { printf("Dump already in progress, bailing...\n"); return; } if (!dodump) return; if (dumpdev == NODEV) return; if (!(devsw(dumpdev))) return; if (!(devsw(dumpdev)->d_dump)) return; dumpsize = Maxmem; printf("\ndumping to dev %s, offset %ld\n", devtoname(dumpdev), dumplo); printf("dump "); error = (*devsw(dumpdev)->d_dump)(dumpdev); if (error == 0) { printf("succeeded\n"); return; } printf("failed, reason: "); switch (error) { case ENODEV: printf("device doesn't support a dump routine\n"); break; case ENXIO: printf("device bad\n"); break; case EFAULT: printf("device not ready\n"); break; case EINVAL: printf("area improper\n"); break; case EIO: printf("i/o error\n"); break; case EINTR: printf("aborted from console\n"); break; default: printf("unknown, error = %d\n", error); break; } } /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { int bootopt; va_list ap; static char buf[256]; bootopt = RB_AUTOBOOT | RB_DUMP; if (panicstr) bootopt |= RB_NOSYNC; else panicstr = fmt; va_start(ap, fmt); (void)vsnprintf(buf, sizeof(buf), fmt, ap); if (panicstr == fmt) panicstr = buf; va_end(ap); printf("panic: %s\n", buf); #ifdef SMP /* three seperate prints in case of an unmapped page and trap */ printf("mp_lock = %08x; ", mp_lock); printf("cpuid = %d; ", cpuid); printf("lapic.id = %08x\n", lapic.id); #endif #if defined(DDB) if (debugger_on_panic) Debugger ("panic"); #endif boot(bootopt); } /* * Support for poweroff delay. */ #ifndef POWEROFF_DELAY # define POWEROFF_DELAY 5000 #endif static int poweroff_delay = POWEROFF_DELAY; SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, &poweroff_delay, 0, ""); static void poweroff_wait(void *junk, int howto) { if(!(howto & RB_POWEROFF) || poweroff_delay <= 0) return; DELAY(poweroff_delay * 1000); } /* * Some system processes (e.g. syncer) need to be stopped at appropriate * points in their main loops prior to a system shutdown, so that they * won't interfere with the shutdown process (e.g. by holding a disk buf * to cause sync to fail). For each of these system processes, register * shutdown_kproc() as a handler for one of shutdown events. */ static int kproc_shutdown_wait = 60; SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, &kproc_shutdown_wait, 0, ""); void shutdown_kproc(void *arg, int howto) { struct proc *p; int error; if (panicstr) return; p = (struct proc *)arg; printf("Waiting (max %d seconds) for system process `%s' to stop...", kproc_shutdown_wait, p->p_comm); error = suspend_kproc(p, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("stopped\n"); } Index: head/sys/kern/kern_synch.c =================================================================== --- head/sys/kern/kern_synch.c (revision 62572) +++ head/sys/kern/kern_synch.c (revision 62573) @@ -1,962 +1,962 @@ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 * $FreeBSD$ */ #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #include #include #include static void sched_setup __P((void *dummy)); SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) u_char curpriority; int hogticks; int lbolt; int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ static int curpriority_cmp __P((struct proc *p)); static void endtsleep __P((void *)); static void maybe_resched __P((struct proc *chk)); static void roundrobin __P((void *arg)); static void schedcpu __P((void *arg)); static void updatepri __P((struct proc *p)); static int -sysctl_kern_quantum (SYSCTL_HANDLER_ARGS) +sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) { int error, new_val; new_val = sched_quantum * tick; error = sysctl_handle_int(oidp, &new_val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new_val < tick) return (EINVAL); sched_quantum = new_val / tick; hogticks = 2 * sched_quantum; return (0); } SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); /*- * Compare priorities. Return: * <0: priority of p < current priority * 0: priority of p == current priority * >0: priority of p > current priority * The priorities are the normal priorities or the normal realtime priorities * if p is on the same scheduler as curproc. Otherwise the process on the * more realtimeish scheduler has lowest priority. As usual, a higher * priority really means a lower priority. */ static int curpriority_cmp(p) struct proc *p; { int c_class, p_class; c_class = RTP_PRIO_BASE(curproc->p_rtprio.type); p_class = RTP_PRIO_BASE(p->p_rtprio.type); if (p_class != c_class) return (p_class - c_class); if (p_class == RTP_PRIO_NORMAL) return (((int)p->p_priority - (int)curpriority) / PPQ); return ((int)p->p_rtprio.prio - (int)curproc->p_rtprio.prio); } /* * Arrange to reschedule if necessary, taking the priorities and * schedulers into account. */ static void maybe_resched(chk) struct proc *chk; { struct proc *p = curproc; /* XXX */ /* * XXX idle scheduler still broken because proccess stays on idle * scheduler during waits (such as when getting FS locks). If a * standard process becomes runaway cpu-bound, the system can lockup * due to idle-scheduler processes in wakeup never getting any cpu. */ if (p == NULL) { #if 0 need_resched(); #endif } else if (chk == p) { /* We may need to yield if our priority has been raised. */ if (curpriority_cmp(chk) > 0) need_resched(); } else if (curpriority_cmp(chk) < 0) need_resched(); } int roundrobin_interval(void) { return (sched_quantum); } /* * Force switch among equal priority processes every 100ms. */ /* ARGSUSED */ static void roundrobin(arg) void *arg; { #ifndef SMP struct proc *p = curproc; /* XXX */ #endif #ifdef SMP need_resched(); forward_roundrobin(); #else if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type)) need_resched(); #endif timeout(roundrobin, NULL, sched_quantum); } /* * Constants for digital decay and forget: * 90% of (p_estcpu) usage in 5 * loadav time * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) * Note that, as ps(1) mentions, this can let percentages * total over 100% (I've seen 137.9% for 3 processes). * * Note that schedclock() updates p_estcpu and p_cpticks asynchronously. * * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. * That is, the system wants to compute a value of decay such * that the following for loop: * for (i = 0; i < (5 * loadavg); i++) * p_estcpu *= decay; * will compute * p_estcpu *= 0.1; * for all values of loadavg: * * Mathematically this loop can be expressed by saying: * decay ** (5 * loadavg) ~= .1 * * The system computes decay as: * decay = (2 * loadavg) / (2 * loadavg + 1) * * We wish to prove that the system's computation of decay * will always fulfill the equation: * decay ** (5 * loadavg) ~= .1 * * If we compute b as: * b = 2 * loadavg * then * decay = b / (b + 1) * * We now need to prove two things: * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) * * Facts: * For x close to zero, exp(x) =~ 1 + x, since * exp(x) = 0! + x**1/1! + x**2/2! + ... . * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. * For x close to zero, ln(1+x) =~ x, since * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). * ln(.1) =~ -2.30 * * Proof of (1): * Solve (factor)**(power) =~ .1 given power (5*loadav): * solving for factor, * ln(factor) =~ (-2.30/5*loadav), or * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED * * Proof of (2): * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): * solving for power, * power*ln(b/(b+1)) =~ -2.30, or * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED * * Actual power values for the implemented algorithm are as follows: * loadav: 1 2 3 4 * power: 5.68 10.32 14.94 19.55 */ /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ #define loadfactor(loadav) (2 * (loadav)) #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ static int fscale __unused = FSCALE; SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); /* * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). * * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). * * If you don't want to bother with the faster/more-accurate formula, you * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate * (more general) method of calculating the %age of CPU used by a process. */ #define CCPU_SHIFT 11 /* * Recompute process priorities, every hz ticks. */ /* ARGSUSED */ static void schedcpu(arg) void *arg; { register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); register struct proc *p; register int realstathz, s; realstathz = stathz ? stathz : hz; LIST_FOREACH(p, &allproc, p_list) { /* * Increment time in/out of memory and sleep time * (if sleeping). We ignore overflow; with 16-bit int's * (remember them?) overflow takes 45 days. */ p->p_swtime++; if (p->p_stat == SSLEEP || p->p_stat == SSTOP) p->p_slptime++; p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; /* * If the process has slept the entire second, * stop recalculating its priority until it wakes up. */ if (p->p_slptime > 1) continue; s = splhigh(); /* prevent state changes and protect run queue */ /* * p_pctcpu is only for ps. */ #if (FSHIFT >= CCPU_SHIFT) p->p_pctcpu += (realstathz == 100)? ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 100 * (((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT)) / realstathz; #else p->p_pctcpu += ((FSCALE - ccpu) * (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT; #endif p->p_cpticks = 0; p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); resetpriority(p); if (p->p_priority >= PUSER) { if ((p != curproc) && #ifdef SMP p->p_oncpu == 0xff && /* idle */ #endif p->p_stat == SRUN && (p->p_flag & P_INMEM) && (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { remrunqueue(p); p->p_priority = p->p_usrpri; setrunqueue(p); } else p->p_priority = p->p_usrpri; } splx(s); } vmmeter(); wakeup((caddr_t)&lbolt); timeout(schedcpu, (void *)0, hz); } /* * Recalculate the priority of a process after it has slept for a while. * For all load averages >= 1 and max p_estcpu of 255, sleeping for at * least six times the loadfactor will decay p_estcpu to zero. */ static void updatepri(p) register struct proc *p; { register unsigned int newcpu = p->p_estcpu; register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); if (p->p_slptime > 5 * loadfac) p->p_estcpu = 0; else { p->p_slptime--; /* the first time was done in schedcpu */ while (newcpu && --p->p_slptime) newcpu = decay_cpu(loadfac, newcpu); p->p_estcpu = newcpu; } resetpriority(p); } /* * We're only looking at 7 bits of the address; everything is * aligned to 4, lots of things are aligned to greater powers * of 2. Shift right by 8, i.e. drop the bottom 256 worth. */ #define TABLESIZE 128 static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE]; #define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1)) /* * During autoconfiguration or after a panic, a sleep will simply * lower the priority briefly to allow interrupts, then return. * The priority to be used (safepri) is machine-dependent, thus this * value is initialized and maintained in the machine-dependent layers. * This priority will typically be 0, or the lowest priority * that is safe for use on the interrupt stack; it can be made * higher to block network software interrupts after panics. */ int safepri; void sleepinit(void) { int i; sched_quantum = hz/10; hogticks = 2 * sched_quantum; for (i = 0; i < TABLESIZE; i++) TAILQ_INIT(&slpque[i]); } /* * General sleep call. Suspends the current process until a wakeup is * performed on the specified identifier. The process will then be made * runnable with the specified priority. Sleeps at most timo/hz seconds * (0 means no timeout). If pri includes PCATCH flag, signals are checked * before and after sleeping, else signals are not checked. Returns 0 if * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a * signal needs to be delivered, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). */ int tsleep(ident, priority, wmesg, timo) void *ident; int priority, timo; const char *wmesg; { struct proc *p = curproc; int s, sig, catch = priority & PCATCH; struct callout_handle thandle; #ifdef KTRACE if (p && KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 1, 0); #endif s = splhigh(); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, * just give interrupts a chance, then just return; * don't run any other procs or panic below, * in case this is the idle process and already asleep. */ splx(safepri); splx(s); return (0); } KASSERT(p != NULL, ("tsleep1")); KASSERT(ident != NULL && p->p_stat == SRUN, ("tsleep")); /* * Process may be sitting on a slpque if asleep() was called, remove * it before re-adding. */ if (p->p_wchan != NULL) unsleep(p); p->p_wchan = ident; p->p_wmesg = wmesg; p->p_slptime = 0; p->p_priority = priority & PRIMASK; TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); if (timo) thandle = timeout(endtsleep, (void *)p, timo); /* * We put ourselves on the sleep queue and start our timeout * before calling CURSIG, as we could stop there, and a wakeup * or a SIGCONT (or both) could occur while we were stopped. * A SIGCONT would cause us to be marked as SSLEEP * without resuming us, thus we must be ready for sleep * when CURSIG is called. If the wakeup happens while we're * stopped, p->p_wchan will be 0 upon return from CURSIG. */ if (catch) { p->p_flag |= P_SINTR; if ((sig = CURSIG(p))) { if (p->p_wchan) unsleep(p); p->p_stat = SRUN; goto resume; } if (p->p_wchan == 0) { catch = 0; goto resume; } } else sig = 0; p->p_stat = SSLEEP; p->p_stats->p_ru.ru_nvcsw++; mi_switch(); resume: curpriority = p->p_usrpri; splx(s); p->p_flag &= ~P_SINTR; if (p->p_flag & P_TIMEOUT) { p->p_flag &= ~P_TIMEOUT; if (sig == 0) { #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif return (EWOULDBLOCK); } } else if (timo) untimeout(endtsleep, (void *)p, thandle); if (catch && (sig != 0 || (sig = CURSIG(p)))) { #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) return (EINTR); return (ERESTART); } #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif return (0); } /* * asleep() - async sleep call. Place process on wait queue and return * immediately without blocking. The process stays runnable until await() * is called. If ident is NULL, remove process from wait queue if it is still * on one. * * Only the most recent sleep condition is effective when making successive * calls to asleep() or when calling tsleep(). * * The timeout, if any, is not initiated until await() is called. The sleep * priority, signal, and timeout is specified in the asleep() call but may be * overriden in the await() call. * * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> */ int asleep(void *ident, int priority, const char *wmesg, int timo) { struct proc *p = curproc; int s; /* * splhigh() while manipulating sleep structures and slpque. * * Remove preexisting wait condition (if any) and place process * on appropriate slpque, but do not put process to sleep. */ s = splhigh(); if (p->p_wchan != NULL) unsleep(p); if (ident) { p->p_wchan = ident; p->p_wmesg = wmesg; p->p_slptime = 0; p->p_asleep.as_priority = priority; p->p_asleep.as_timo = timo; TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); } splx(s); return(0); } /* * await() - wait for async condition to occur. The process blocks until * wakeup() is called on the most recent asleep() address. If wakeup is called * priority to await(), await() winds up being a NOP. * * If await() is called more then once (without an intervening asleep() call), * await() is still effectively a NOP but it calls mi_switch() to give other * processes some cpu before returning. The process is left runnable. * * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> */ int await(int priority, int timo) { struct proc *p = curproc; int s; s = splhigh(); if (p->p_wchan != NULL) { struct callout_handle thandle; int sig; int catch; /* * The call to await() can override defaults specified in * the original asleep(). */ if (priority < 0) priority = p->p_asleep.as_priority; if (timo < 0) timo = p->p_asleep.as_timo; /* * Install timeout */ if (timo) thandle = timeout(endtsleep, (void *)p, timo); sig = 0; catch = priority & PCATCH; if (catch) { p->p_flag |= P_SINTR; if ((sig = CURSIG(p))) { if (p->p_wchan) unsleep(p); p->p_stat = SRUN; goto resume; } if (p->p_wchan == NULL) { catch = 0; goto resume; } } p->p_stat = SSLEEP; p->p_stats->p_ru.ru_nvcsw++; mi_switch(); resume: curpriority = p->p_usrpri; splx(s); p->p_flag &= ~P_SINTR; if (p->p_flag & P_TIMEOUT) { p->p_flag &= ~P_TIMEOUT; if (sig == 0) { #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif return (EWOULDBLOCK); } } else if (timo) untimeout(endtsleep, (void *)p, thandle); if (catch && (sig != 0 || (sig = CURSIG(p)))) { #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) return (EINTR); return (ERESTART); } #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif } else { /* * If as_priority is 0, await() has been called without an * intervening asleep(). We are still effectively a NOP, * but we call mi_switch() for safety. */ if (p->p_asleep.as_priority == 0) { p->p_stats->p_ru.ru_nvcsw++; mi_switch(); } splx(s); } /* * clear p_asleep.as_priority as an indication that await() has been * called. If await() is called again without an intervening asleep(), * await() is still effectively a NOP but the above mi_switch() code * is triggered as a safety. */ p->p_asleep.as_priority = 0; return (0); } /* * Implement timeout for tsleep or asleep()/await() * * If process hasn't been awakened (wchan non-zero), * set timeout flag and undo the sleep. If proc * is stopped, just unsleep so it will remain stopped. */ static void endtsleep(arg) void *arg; { register struct proc *p; int s; p = (struct proc *)arg; s = splhigh(); if (p->p_wchan) { if (p->p_stat == SSLEEP) setrunnable(p); else unsleep(p); p->p_flag |= P_TIMEOUT; } splx(s); } /* * Remove a process from its wait queue */ void unsleep(p) register struct proc *p; { int s; s = splhigh(); if (p->p_wchan) { TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq); p->p_wchan = 0; } splx(s); } /* * Make all processes sleeping on the specified identifier runnable. */ void wakeup(ident) register void *ident; { register struct slpquehead *qp; register struct proc *p; int s; s = splhigh(); qp = &slpque[LOOKUP(ident)]; restart: TAILQ_FOREACH(p, qp, p_procq) { if (p->p_wchan == ident) { TAILQ_REMOVE(qp, p, p_procq); p->p_wchan = 0; if (p->p_stat == SSLEEP) { /* OPTIMIZED EXPANSION OF setrunnable(p); */ if (p->p_slptime > 1) updatepri(p); p->p_slptime = 0; p->p_stat = SRUN; if (p->p_flag & P_INMEM) { setrunqueue(p); maybe_resched(p); } else { p->p_flag |= P_SWAPINREQ; wakeup((caddr_t)&proc0); } /* END INLINE EXPANSION */ goto restart; } } } splx(s); } /* * Make a process sleeping on the specified identifier runnable. * May wake more than one process if a target process is currently * swapped out. */ void wakeup_one(ident) register void *ident; { register struct slpquehead *qp; register struct proc *p; int s; s = splhigh(); qp = &slpque[LOOKUP(ident)]; TAILQ_FOREACH(p, qp, p_procq) { if (p->p_wchan == ident) { TAILQ_REMOVE(qp, p, p_procq); p->p_wchan = 0; if (p->p_stat == SSLEEP) { /* OPTIMIZED EXPANSION OF setrunnable(p); */ if (p->p_slptime > 1) updatepri(p); p->p_slptime = 0; p->p_stat = SRUN; if (p->p_flag & P_INMEM) { setrunqueue(p); maybe_resched(p); break; } else { p->p_flag |= P_SWAPINREQ; wakeup((caddr_t)&proc0); } /* END INLINE EXPANSION */ } } } splx(s); } /* * The machine independent parts of mi_switch(). * Must be called at splstatclock() or higher. */ void mi_switch() { struct timeval new_switchtime; register struct proc *p = curproc; /* XXX */ register struct rlimit *rlim; int x; /* * XXX this spl is almost unnecessary. It is partly to allow for * sloppy callers that don't do it (issignal() via CURSIG() is the * main offender). It is partly to work around a bug in the i386 * cpu_switch() (the ipl is not preserved). We ran for years * without it. I think there was only a interrupt latency problem. * The main caller, tsleep(), does an splx() a couple of instructions * after calling here. The buggy caller, issignal(), usually calls * here at spl0() and sometimes returns at splhigh(). The process * then runs for a little too long at splhigh(). The ipl gets fixed * when the process returns to user mode (or earlier). * * It would probably be better to always call here at spl0(). Callers * are prepared to give up control to another process, so they must * be prepared to be interrupted. The clock stuff here may not * actually need splstatclock(). */ x = splstatclock(); #ifdef SIMPLELOCK_DEBUG if (p->p_simple_locks) printf("sleep: holding simple lock\n"); #endif /* * Compute the amount of time during which the current * process was running, and add that to its total so far. */ microuptime(&new_switchtime); if (timevalcmp(&new_switchtime, &switchtime, <)) { printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n", switchtime.tv_sec, switchtime.tv_usec, new_switchtime.tv_sec, new_switchtime.tv_usec); new_switchtime = switchtime; } else { p->p_runtime += (new_switchtime.tv_usec - switchtime.tv_usec) + (new_switchtime.tv_sec - switchtime.tv_sec) * (int64_t)1000000; } /* * Check if the process exceeds its cpu resource allocation. * If over max, kill it. */ if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY && p->p_runtime > p->p_limit->p_cpulimit) { rlim = &p->p_rlimit[RLIMIT_CPU]; if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) { killproc(p, "exceeded maximum CPU limit"); } else { psignal(p, SIGXCPU); if (rlim->rlim_cur < rlim->rlim_max) { /* XXX: we should make a private copy */ rlim->rlim_cur += 5; } } } /* * Pick a new current process and record its start time. */ cnt.v_swtch++; switchtime = new_switchtime; cpu_switch(p); if (switchtime.tv_sec == 0) microuptime(&switchtime); switchticks = ticks; splx(x); } /* * Change process state to be runnable, * placing it on the run queue if it is in memory, * and awakening the swapper if it isn't in memory. */ void setrunnable(p) register struct proc *p; { register int s; s = splhigh(); switch (p->p_stat) { case 0: case SRUN: case SZOMB: default: panic("setrunnable"); case SSTOP: case SSLEEP: unsleep(p); /* e.g. when sending signals */ break; case SIDL: break; } p->p_stat = SRUN; if (p->p_flag & P_INMEM) setrunqueue(p); splx(s); if (p->p_slptime > 1) updatepri(p); p->p_slptime = 0; if ((p->p_flag & P_INMEM) == 0) { p->p_flag |= P_SWAPINREQ; wakeup((caddr_t)&proc0); } else maybe_resched(p); } /* * Compute the priority of a process when running in user mode. * Arrange to reschedule if the resulting priority is better * than that of the current process. */ void resetpriority(p) register struct proc *p; { register unsigned int newpriority; if (p->p_rtprio.type == RTP_PRIO_NORMAL) { newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT + NICE_WEIGHT * (p->p_nice - PRIO_MIN); newpriority = min(newpriority, MAXPRI); p->p_usrpri = newpriority; } maybe_resched(p); } /* ARGSUSED */ static void sched_setup(dummy) void *dummy; { /* Kick off timeout driven events by calling first time. */ roundrobin(NULL); schedcpu(NULL); } /* * We adjust the priority of the current process. The priority of * a process gets worse as it accumulates CPU time. The cpu usage * estimator (p_estcpu) is increased here. resetpriority() will * compute a different priority each time p_estcpu increases by * INVERSE_ESTCPU_WEIGHT * (until MAXPRI is reached). The cpu usage estimator ramps up * quite quickly when the process is running (linearly), and decays * away exponentially, at a rate which is proportionally slower when * the system is busy. The basic principle is that the system will * 90% forget that the process used a lot of CPU time in 5 * loadav * seconds. This causes the system to favor processes which haven't * run much recently, and to round-robin among other processes. */ void schedclock(p) struct proc *p; { p->p_cpticks++; p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { resetpriority(p); if (p->p_priority >= PUSER) p->p_priority = p->p_usrpri; } } Index: head/sys/kern/kern_sysctl.c =================================================================== --- head/sys/kern/kern_sysctl.c (revision 62572) +++ head/sys/kern/kern_sysctl.c (revision 62573) @@ -1,1102 +1,1102 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD * project, to make these variables more userfriendly. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 * $FreeBSD$ */ #include "opt_compat.h" #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); /* * Locking and stats */ static struct sysctl_lock { int sl_lock; int sl_want; int sl_locked; } memlock; -static int sysctl_root (SYSCTL_HANDLER_ARGS); +static int sysctl_root(SYSCTL_HANDLER_ARGS); struct sysctl_oid_list sysctl__children; /* root list */ /* * Initialization of the MIB tree. * * Order by number in each list. */ void sysctl_register_oid(struct sysctl_oid *oidp) { struct sysctl_oid_list *parent = oidp->oid_parent; struct sysctl_oid *p; struct sysctl_oid *q; int n; /* * If this oid has a number OID_AUTO, give it a number which * is greater than any current oid. Make sure it is at least * 100 to leave space for pre-assigned oid numbers. */ if (oidp->oid_number == OID_AUTO) { /* First, find the highest oid in the parent list >99 */ n = 99; SLIST_FOREACH(p, parent, oid_link) { if (p->oid_number > n) n = p->oid_number; } oidp->oid_number = n + 1; } /* * Insert the oid into the parent's list in order. */ q = NULL; SLIST_FOREACH(p, parent, oid_link) { if (oidp->oid_number < p->oid_number) break; q = p; } if (q) SLIST_INSERT_AFTER(q, oidp, oid_link); else SLIST_INSERT_HEAD(parent, oidp, oid_link); } void sysctl_unregister_oid(struct sysctl_oid *oidp) { SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link); } /* * Bulk-register all the oids in a linker_set. */ void sysctl_register_set(struct linker_set *lsp) { int count = lsp->ls_length; int i; for (i = 0; i < count; i++) sysctl_register_oid((struct sysctl_oid *) lsp->ls_items[i]); } void sysctl_unregister_set(struct linker_set *lsp) { int count = lsp->ls_length; int i; for (i = 0; i < count; i++) sysctl_unregister_oid((struct sysctl_oid *) lsp->ls_items[i]); } /* * Register the kernel's oids on startup. */ extern struct linker_set sysctl_set; static void sysctl_register_all(void *arg) { sysctl_register_set(&sysctl_set); } SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0); /* * "Staff-functions" * * These functions implement a presently undocumented interface * used by the sysctl program to walk the tree, and get the type * so it can print the value. * This interface is under work and consideration, and should probably * be killed with a big axe by the first person who can find the time. * (be aware though, that the proper interface isn't as obvious as it * may seem, there are various conflicting requirements. * * {0,0} printf the entire MIB-tree. * {0,1,...} return the name of the "..." OID. * {0,2,...} return the next OID. * {0,3} return the OID of the name in "new" * {0,4,...} return the kind & format info for the "..." OID. */ static void sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) { int k; struct sysctl_oid *oidp; SLIST_FOREACH(oidp, l, oid_link) { for (k=0; koid_number, oidp->oid_name); printf("%c%c", oidp->oid_kind & CTLFLAG_RD ? 'R':' ', oidp->oid_kind & CTLFLAG_WR ? 'W':' '); if (oidp->oid_handler) printf(" *Handler"); switch (oidp->oid_kind & CTLTYPE) { case CTLTYPE_NODE: printf(" Node\n"); if (!oidp->oid_handler) { sysctl_sysctl_debug_dump_node( oidp->oid_arg1, i+2); } break; case CTLTYPE_INT: printf(" Int\n"); break; case CTLTYPE_STRING: printf(" String\n"); break; case CTLTYPE_QUAD: printf(" Quad\n"); break; case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; default: printf("\n"); } } } static int -sysctl_sysctl_debug (SYSCTL_HANDLER_ARGS) +sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) { sysctl_sysctl_debug_dump_node(&sysctl__children, 0); return ENOENT; } SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_sysctl_debug, "-", ""); static int -sysctl_sysctl_name (SYSCTL_HANDLER_ARGS) +sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) { int *name = (int *) arg1; u_int namelen = arg2; int error = 0; struct sysctl_oid *oid; struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; char buf[10]; while (namelen) { if (!lsp) { snprintf(buf,sizeof(buf),"%d",*name); if (req->oldidx) error = SYSCTL_OUT(req, ".", 1); if (!error) error = SYSCTL_OUT(req, buf, strlen(buf)); if (error) return (error); namelen--; name++; continue; } lsp2 = 0; SLIST_FOREACH(oid, lsp, oid_link) { if (oid->oid_number != *name) continue; if (req->oldidx) error = SYSCTL_OUT(req, ".", 1); if (!error) error = SYSCTL_OUT(req, oid->oid_name, strlen(oid->oid_name)); if (error) return (error); namelen--; name++; if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) break; if (oid->oid_handler) break; lsp2 = (struct sysctl_oid_list *)oid->oid_arg1; break; } lsp = lsp2; } return (SYSCTL_OUT(req, "", 1)); } SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, ""); static int sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen, int *next, int *len, int level, struct sysctl_oid **oidpp) { struct sysctl_oid *oidp; *len = level; SLIST_FOREACH(oidp, lsp, oid_link) { *next = oidp->oid_number; *oidpp = oidp; if (!namelen) { if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) return 0; if (oidp->oid_handler) /* We really should call the handler here...*/ return 0; lsp = (struct sysctl_oid_list *)oidp->oid_arg1; if (!sysctl_sysctl_next_ls (lsp, 0, 0, next+1, len, level+1, oidpp)) return 0; goto next; } if (oidp->oid_number < *name) continue; if (oidp->oid_number > *name) { if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) return 0; if (oidp->oid_handler) return 0; lsp = (struct sysctl_oid_list *)oidp->oid_arg1; if (!sysctl_sysctl_next_ls (lsp, name+1, namelen-1, next+1, len, level+1, oidpp)) return (0); goto next; } if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) continue; if (oidp->oid_handler) continue; lsp = (struct sysctl_oid_list *)oidp->oid_arg1; if (!sysctl_sysctl_next_ls (lsp, name+1, namelen-1, next+1, len, level+1, oidpp)) return (0); next: namelen = 1; *len = level; } return 1; } static int -sysctl_sysctl_next (SYSCTL_HANDLER_ARGS) +sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) { int *name = (int *) arg1; u_int namelen = arg2; int i, j, error; struct sysctl_oid *oid; struct sysctl_oid_list *lsp = &sysctl__children; int newoid[CTL_MAXNAME]; i = sysctl_sysctl_next_ls (lsp, name, namelen, newoid, &j, 1, &oid); if (i) return ENOENT; error = SYSCTL_OUT(req, newoid, j * sizeof (int)); return (error); } SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, ""); static int name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp) { int i; struct sysctl_oid *oidp; struct sysctl_oid_list *lsp = &sysctl__children; char *p; if (!*name) return ENOENT; p = name + strlen(name) - 1 ; if (*p == '.') *p = '\0'; *len = 0; for (p = name; *p && *p != '.'; p++) ; i = *p; if (i == '.') *p = '\0'; oidp = SLIST_FIRST(lsp); while (oidp && *len < CTL_MAXNAME) { if (strcmp(name, oidp->oid_name)) { oidp = SLIST_NEXT(oidp, oid_link); continue; } *oid++ = oidp->oid_number; (*len)++; if (!i) { if (oidpp) *oidpp = oidp; return (0); } if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) break; if (oidp->oid_handler) break; lsp = (struct sysctl_oid_list *)oidp->oid_arg1; oidp = SLIST_FIRST(lsp); name = p+1; for (p = name; *p && *p != '.'; p++) ; i = *p; if (i == '.') *p = '\0'; } return ENOENT; } static int -sysctl_sysctl_name2oid (SYSCTL_HANDLER_ARGS) +sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) { char *p; int error, oid[CTL_MAXNAME], len; struct sysctl_oid *op = 0; if (!req->newlen) return ENOENT; if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ return (ENAMETOOLONG); p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); error = SYSCTL_IN(req, p, req->newlen); if (error) { free(p, M_SYSCTL); return (error); } p [req->newlen] = '\0'; error = name2oid(p, oid, &len, &op); free(p, M_SYSCTL); if (error) return (error); error = SYSCTL_OUT(req, oid, len * sizeof *oid); return (error); } SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY, 0, 0, sysctl_sysctl_name2oid, "I", ""); static int -sysctl_sysctl_oidfmt (SYSCTL_HANDLER_ARGS) +sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) { struct sysctl_oid *oid; int error; error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); if (error) return (error); if (!oid->oid_fmt) return (ENOENT); error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); if (error) return (error); error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); return (error); } SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD, sysctl_sysctl_oidfmt, ""); /* * Default "handler" functions. */ /* * Handle an int, signed or unsigned. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int -sysctl_handle_int (SYSCTL_HANDLER_ARGS) +sysctl_handle_int(SYSCTL_HANDLER_ARGS) { int error = 0; if (arg1) error = SYSCTL_OUT(req, arg1, sizeof(int)); else error = SYSCTL_OUT(req, &arg2, sizeof(int)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else error = SYSCTL_IN(req, arg1, sizeof(int)); return (error); } /* * Handle a long, signed or unsigned. arg1 points to it. */ int -sysctl_handle_long (SYSCTL_HANDLER_ARGS) +sysctl_handle_long(SYSCTL_HANDLER_ARGS) { int error = 0; if (!arg1) return (EINVAL); error = SYSCTL_OUT(req, arg1, sizeof(long)); if (error || !req->newptr) return (error); error = SYSCTL_IN(req, arg1, sizeof(long)); return (error); } /* * Handle our generic '\0' terminated 'C' string. * Two cases: * a variable string: point arg1 at it, arg2 is max length. * a constant string: point arg1 at it, arg2 is zero. */ int -sysctl_handle_string (SYSCTL_HANDLER_ARGS) +sysctl_handle_string(SYSCTL_HANDLER_ARGS) { int error=0; error = SYSCTL_OUT(req, arg1, strlen((char *)arg1)+1); if (error || !req->newptr) return (error); if ((req->newlen - req->newidx) >= arg2) { error = EINVAL; } else { arg2 = (req->newlen - req->newidx); error = SYSCTL_IN(req, arg1, arg2); ((char *)arg1)[arg2] = '\0'; } return (error); } /* * Handle any kind of opaque data. * arg1 points to it, arg2 is the size. */ int -sysctl_handle_opaque (SYSCTL_HANDLER_ARGS) +sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) { int error; error = SYSCTL_OUT(req, arg1, arg2); if (error || !req->newptr) return (error); error = SYSCTL_IN(req, arg1, arg2); return (error); } /* * Transfer functions to/from kernel space. * XXX: rather untested at this point */ static int sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) { size_t i = 0; if (req->oldptr) { i = l; if (i > req->oldlen - req->oldidx) i = req->oldlen - req->oldidx; if (i > 0) bcopy(p, (char *)req->oldptr + req->oldidx, i); } req->oldidx += l; if (req->oldptr && i != l) return (ENOMEM); return (0); } static int sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) { if (!req->newptr) return 0; if (req->newlen - req->newidx < l) return (EINVAL); bcopy((char *)req->newptr + req->newidx, p, l); req->newidx += l; return (0); } int kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval) { int error = 0; struct sysctl_req req; bzero(&req, sizeof req); req.p = p; if (oldlenp) { req.oldlen = *oldlenp; } if (old) { req.oldptr= old; } if (newlen) { req.newlen = newlen; req.newptr = new; } req.oldfunc = sysctl_old_kernel; req.newfunc = sysctl_new_kernel; req.lock = 1; /* XXX this should probably be done in a general way */ while (memlock.sl_lock) { memlock.sl_want = 1; (void) tsleep((caddr_t)&memlock, PRIBIO+1, "sysctl", 0); memlock.sl_locked++; } memlock.sl_lock = 1; error = sysctl_root(0, name, namelen, &req); if (req.lock == 2) vsunlock(req.oldptr, req.oldlen); memlock.sl_lock = 0; if (memlock.sl_want) { memlock.sl_want = 0; wakeup((caddr_t)&memlock); } if (error && error != ENOMEM) return (error); if (retval) { if (req.oldptr && req.oldidx > req.oldlen) *retval = req.oldlen; else *retval = req.oldidx; } return (error); } /* * Transfer function to/from user space. */ static int sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) { int error = 0; size_t i = 0; if (req->lock == 1 && req->oldptr) { vslock(req->oldptr, req->oldlen); req->lock = 2; } if (req->oldptr) { i = l; if (i > req->oldlen - req->oldidx) i = req->oldlen - req->oldidx; if (i > 0) error = copyout(p, (char *)req->oldptr + req->oldidx, i); } req->oldidx += l; if (error) return (error); if (req->oldptr && i < l) return (ENOMEM); return (0); } static int sysctl_new_user(struct sysctl_req *req, void *p, size_t l) { int error; if (!req->newptr) return 0; if (req->newlen - req->newidx < l) return (EINVAL); error = copyin((char *)req->newptr + req->newidx, p, l); req->newidx += l; return (error); } int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, int *nindx, struct sysctl_req *req) { struct sysctl_oid *oid; int indx; oid = SLIST_FIRST(&sysctl__children); indx = 0; while (oid && indx < CTL_MAXNAME) { if (oid->oid_number == name[indx]) { indx++; if (oid->oid_kind & CTLFLAG_NOLOCK) req->lock = 0; if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { if (oid->oid_handler != NULL || indx == namelen) { *noid = oid; if (nindx != NULL) *nindx = indx; return (0); } oid = SLIST_FIRST( (struct sysctl_oid_list *)oid->oid_arg1); } else if (indx == namelen) { *noid = oid; if (nindx != NULL) *nindx = indx; return (0); } else { return (ENOTDIR); } } else { oid = SLIST_NEXT(oid, oid_link); } } return (ENOENT); } /* * Traverse our tree, and find the right node, execute whatever it points * to, and return the resulting error code. */ int -sysctl_root (SYSCTL_HANDLER_ARGS) +sysctl_root(SYSCTL_HANDLER_ARGS) { struct sysctl_oid *oid; int error, indx; error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); if (error) return (error); if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { /* * You can't call a sysctl when it's a node, but has * no handler. Inform the user that it's a node. * The indx may or may not be the same as namelen. */ if (oid->oid_handler == NULL) return (EISDIR); } /* If writing isn't allowed */ if (req->newptr && (!(oid->oid_kind & CTLFLAG_WR) || ((oid->oid_kind & CTLFLAG_SECURE) && securelevel > 0))) return (EPERM); /* Most likely only root can write */ if (!(oid->oid_kind & CTLFLAG_ANYBODY) && req->newptr && req->p && (error = suser_xxx(0, req->p, (oid->oid_kind & CTLFLAG_PRISON) ? PRISON_ROOT : 0))) return (error); if (!oid->oid_handler) return EINVAL; if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) error = oid->oid_handler(oid, (int *)arg1 + indx, arg2 - indx, req); else error = oid->oid_handler(oid, oid->oid_arg1, oid->oid_arg2, req); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sysctl_args { int *name; u_int namelen; void *old; size_t *oldlenp; void *new; size_t newlen; }; #endif int __sysctl(struct proc *p, struct sysctl_args *uap) { int error, i, name[CTL_MAXNAME]; size_t j; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, &name, uap->namelen * sizeof(int)); if (error) return (error); error = userland_sysctl(p, name, uap->namelen, uap->old, uap->oldlenp, 0, uap->new, uap->newlen, &j); if (error && error != ENOMEM) return (error); if (uap->oldlenp) { i = copyout(&j, uap->oldlenp, sizeof(j)); if (i) return (i); } return (error); } /* * This is used from various compatibility syscalls too. That's why name * must be in kernel space. */ int userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval) { int error = 0; struct sysctl_req req, req2; bzero(&req, sizeof req); req.p = p; if (oldlenp) { if (inkernel) { req.oldlen = *oldlenp; } else { error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); if (error) return (error); } } if (old) { if (!useracc(old, req.oldlen, VM_PROT_WRITE)) return (EFAULT); req.oldptr= old; } if (newlen) { if (!useracc(new, req.newlen, VM_PROT_READ)) return (EFAULT); req.newlen = newlen; req.newptr = new; } req.oldfunc = sysctl_old_user; req.newfunc = sysctl_new_user; req.lock = 1; /* XXX this should probably be done in a general way */ while (memlock.sl_lock) { memlock.sl_want = 1; (void) tsleep((caddr_t)&memlock, PRIBIO+1, "sysctl", 0); memlock.sl_locked++; } memlock.sl_lock = 1; do { req2 = req; error = sysctl_root(0, name, namelen, &req2); } while (error == EAGAIN); req = req2; if (req.lock == 2) vsunlock(req.oldptr, req.oldlen); memlock.sl_lock = 0; if (memlock.sl_want) { memlock.sl_want = 0; wakeup((caddr_t)&memlock); } if (error && error != ENOMEM) return (error); if (retval) { if (req.oldptr && req.oldidx > req.oldlen) *retval = req.oldlen; else *retval = req.oldidx; } return (error); } #ifdef COMPAT_43 #include #include #define KINFO_PROC (0<<8) #define KINFO_RT (1<<8) #define KINFO_VNODE (2<<8) #define KINFO_FILE (3<<8) #define KINFO_METER (4<<8) #define KINFO_LOADAVG (5<<8) #define KINFO_CLOCKRATE (6<<8) /* Non-standard BSDI extension - only present on their 4.3 net-2 releases */ #define KINFO_BSDI_SYSINFO (101<<8) /* * XXX this is bloat, but I hope it's better here than on the potentially * limited kernel stack... -Peter */ static struct { int bsdi_machine; /* "i386" on BSD/386 */ /* ^^^ this is an offset to the string, relative to the struct start */ char *pad0; long pad1; long pad2; long pad3; u_long pad4; u_long pad5; u_long pad6; int bsdi_ostype; /* "BSD/386" on BSD/386 */ int bsdi_osrelease; /* "1.1" on BSD/386 */ long pad7; long pad8; char *pad9; long pad10; long pad11; int pad12; long pad13; quad_t pad14; long pad15; struct timeval pad16; /* we dont set this, because BSDI's uname used gethostname() instead */ int bsdi_hostname; /* hostname on BSD/386 */ /* the actual string data is appended here */ } bsdi_si; /* * this data is appended to the end of the bsdi_si structure during copyout. * The "char *" offsets are relative to the base of the bsdi_si struct. * This contains "FreeBSD\02.0-BUILT-nnnnnn\0i386\0", and these strings * should not exceed the length of the buffer here... (or else!! :-) */ static char bsdi_strings[80]; /* It had better be less than this! */ #ifndef _SYS_SYSPROTO_H_ struct getkerninfo_args { int op; char *where; size_t *size; int arg; }; #endif int ogetkerninfo(struct proc *p, struct getkerninfo_args *uap) { int error, name[6]; size_t size; switch (uap->op & 0xff00) { case KINFO_RT: name[0] = CTL_NET; name[1] = PF_ROUTE; name[2] = 0; name[3] = (uap->op & 0xff0000) >> 16; name[4] = uap->op & 0xff; name[5] = uap->arg; error = userland_sysctl(p, name, 6, uap->where, uap->size, 0, 0, 0, &size); break; case KINFO_VNODE: name[0] = CTL_KERN; name[1] = KERN_VNODE; error = userland_sysctl(p, name, 2, uap->where, uap->size, 0, 0, 0, &size); break; case KINFO_PROC: name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = uap->op & 0xff; name[3] = uap->arg; error = userland_sysctl(p, name, 4, uap->where, uap->size, 0, 0, 0, &size); break; case KINFO_FILE: name[0] = CTL_KERN; name[1] = KERN_FILE; error = userland_sysctl(p, name, 2, uap->where, uap->size, 0, 0, 0, &size); break; case KINFO_METER: name[0] = CTL_VM; name[1] = VM_METER; error = userland_sysctl(p, name, 2, uap->where, uap->size, 0, 0, 0, &size); break; case KINFO_LOADAVG: name[0] = CTL_VM; name[1] = VM_LOADAVG; error = userland_sysctl(p, name, 2, uap->where, uap->size, 0, 0, 0, &size); break; case KINFO_CLOCKRATE: name[0] = CTL_KERN; name[1] = KERN_CLOCKRATE; error = userland_sysctl(p, name, 2, uap->where, uap->size, 0, 0, 0, &size); break; case KINFO_BSDI_SYSINFO: { /* * this is pretty crude, but it's just enough for uname() * from BSDI's 1.x libc to work. * * In particular, it doesn't return the same results when * the supplied buffer is too small. BSDI's version apparently * will return the amount copied, and set the *size to how * much was needed. The emulation framework here isn't capable * of that, so we just set both to the amount copied. * BSDI's 2.x product apparently fails with ENOMEM in this * scenario. */ u_int needed; u_int left; char *s; bzero((char *)&bsdi_si, sizeof(bsdi_si)); bzero(bsdi_strings, sizeof(bsdi_strings)); s = bsdi_strings; bsdi_si.bsdi_ostype = (s - bsdi_strings) + sizeof(bsdi_si); strcpy(s, ostype); s += strlen(s) + 1; bsdi_si.bsdi_osrelease = (s - bsdi_strings) + sizeof(bsdi_si); strcpy(s, osrelease); s += strlen(s) + 1; bsdi_si.bsdi_machine = (s - bsdi_strings) + sizeof(bsdi_si); strcpy(s, machine); s += strlen(s) + 1; needed = sizeof(bsdi_si) + (s - bsdi_strings); if (uap->where == NULL) { /* process is asking how much buffer to supply.. */ size = needed; error = 0; break; } /* if too much buffer supplied, trim it down */ if (size > needed) size = needed; /* how much of the buffer is remaining */ left = size; if ((error = copyout((char *)&bsdi_si, uap->where, left)) != 0) break; /* is there any point in continuing? */ if (left > sizeof(bsdi_si)) { left -= sizeof(bsdi_si); error = copyout(&bsdi_strings, uap->where + sizeof(bsdi_si), left); } break; } default: return (EOPNOTSUPP); } if (error) return (error); p->p_retval[0] = size; if (uap->size) error = copyout((caddr_t)&size, (caddr_t)uap->size, sizeof(size)); return (error); } #endif /* COMPAT_43 */ Index: head/sys/kern/kern_tc.c =================================================================== --- head/sys/kern/kern_tc.c (revision 62572) +++ head/sys/kern/kern_tc.c (revision 62573) @@ -1,581 +1,581 @@ /* * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * $FreeBSD$ */ #include "opt_ntp.h" #include #include #include #include #include #include #include #include /* * Number of timecounters used to implement stable storage */ #ifndef NTIMECOUNTER #define NTIMECOUNTER 5 #endif static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter", "Timecounter stable storage"); static void tco_setscales __P((struct timecounter *tc)); static __inline unsigned tco_delta __P((struct timecounter *tc)); time_t time_second; struct timeval boottime; SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, &boottime, timeval, "System boottime"); SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); static unsigned nmicrotime; static unsigned nnanotime; static unsigned ngetmicrotime; static unsigned ngetnanotime; static unsigned nmicrouptime; static unsigned nnanouptime; static unsigned ngetmicrouptime; static unsigned ngetnanouptime; SYSCTL_INT(_kern_timecounter, OID_AUTO, nmicrotime, CTLFLAG_RD, &nmicrotime, 0, ""); SYSCTL_INT(_kern_timecounter, OID_AUTO, nnanotime, CTLFLAG_RD, &nnanotime, 0, ""); SYSCTL_INT(_kern_timecounter, OID_AUTO, nmicrouptime, CTLFLAG_RD, &nmicrouptime, 0, ""); SYSCTL_INT(_kern_timecounter, OID_AUTO, nnanouptime, CTLFLAG_RD, &nnanouptime, 0, ""); SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetmicrotime, CTLFLAG_RD, &ngetmicrotime, 0, ""); SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetnanotime, CTLFLAG_RD, &ngetnanotime, 0, ""); SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetmicrouptime, CTLFLAG_RD, &ngetmicrouptime, 0, ""); SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetnanouptime, CTLFLAG_RD, &ngetnanouptime, 0, ""); /* * Implement a dummy timecounter which we can use until we get a real one * in the air. This allows the console and other early stuff to use * timeservices. */ static unsigned dummy_get_timecount(struct timecounter *tc) { static unsigned now; return (++now); } static struct timecounter dummy_timecounter = { dummy_get_timecount, 0, ~0u, 1000000, "dummy" }; struct timecounter *timecounter = &dummy_timecounter; static __inline unsigned tco_delta(struct timecounter *tc) { return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) & tc->tc_counter_mask); } /* * We have eight functions for looking at the clock, four for * microseconds and four for nanoseconds. For each there is fast * but less precise version "get{nano|micro}[up]time" which will * return a time which is up to 1/HZ previous to the call, whereas * the raw version "{nano|micro}[up]time" will return a timestamp * which is as precise as possible. The "up" variants return the * time relative to system boot, these are well suited for time * interval measurements. */ void getmicrotime(struct timeval *tvp) { struct timecounter *tc; ngetmicrotime++; tc = timecounter; *tvp = tc->tc_microtime; } void getnanotime(struct timespec *tsp) { struct timecounter *tc; ngetnanotime++; tc = timecounter; *tsp = tc->tc_nanotime; } void microtime(struct timeval *tv) { struct timecounter *tc; nmicrotime++; tc = timecounter; tv->tv_sec = tc->tc_offset_sec; tv->tv_usec = tc->tc_offset_micro; tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; tv->tv_usec += boottime.tv_usec; tv->tv_sec += boottime.tv_sec; while (tv->tv_usec >= 1000000) { tv->tv_usec -= 1000000; tv->tv_sec++; } } void nanotime(struct timespec *ts) { unsigned count; u_int64_t delta; struct timecounter *tc; nnanotime++; tc = timecounter; ts->tv_sec = tc->tc_offset_sec; count = tco_delta(tc); delta = tc->tc_offset_nano; delta += ((u_int64_t)count * tc->tc_scale_nano_f); delta >>= 32; delta += ((u_int64_t)count * tc->tc_scale_nano_i); delta += boottime.tv_usec * 1000; ts->tv_sec += boottime.tv_sec; while (delta >= 1000000000) { delta -= 1000000000; ts->tv_sec++; } ts->tv_nsec = delta; } void getmicrouptime(struct timeval *tvp) { struct timecounter *tc; ngetmicrouptime++; tc = timecounter; tvp->tv_sec = tc->tc_offset_sec; tvp->tv_usec = tc->tc_offset_micro; } void getnanouptime(struct timespec *tsp) { struct timecounter *tc; ngetnanouptime++; tc = timecounter; tsp->tv_sec = tc->tc_offset_sec; tsp->tv_nsec = tc->tc_offset_nano >> 32; } void microuptime(struct timeval *tv) { struct timecounter *tc; nmicrouptime++; tc = timecounter; tv->tv_sec = tc->tc_offset_sec; tv->tv_usec = tc->tc_offset_micro; tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; if (tv->tv_usec >= 1000000) { tv->tv_usec -= 1000000; tv->tv_sec++; } } void nanouptime(struct timespec *ts) { unsigned count; u_int64_t delta; struct timecounter *tc; nnanouptime++; tc = timecounter; ts->tv_sec = tc->tc_offset_sec; count = tco_delta(tc); delta = tc->tc_offset_nano; delta += ((u_int64_t)count * tc->tc_scale_nano_f); delta >>= 32; delta += ((u_int64_t)count * tc->tc_scale_nano_i); if (delta >= 1000000000) { delta -= 1000000000; ts->tv_sec++; } ts->tv_nsec = delta; } static void tco_setscales(struct timecounter *tc) { u_int64_t scale; scale = 1000000000LL << 32; scale += tc->tc_adjustment; scale /= tc->tc_tweak->tc_frequency; tc->tc_scale_micro = scale / 1000; tc->tc_scale_nano_f = scale & 0xffffffff; tc->tc_scale_nano_i = scale >> 32; } void tc_update(struct timecounter *tc) { tco_setscales(tc); } void tc_init(struct timecounter *tc) { struct timespec ts1; struct timecounter *t1, *t2, *t3; int i; tc->tc_adjustment = 0; tc->tc_tweak = tc; tco_setscales(tc); tc->tc_offset_count = tc->tc_get_timecount(tc); if (timecounter == &dummy_timecounter) tc->tc_avail = tc; else { tc->tc_avail = timecounter->tc_tweak->tc_avail; timecounter->tc_tweak->tc_avail = tc; } MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK); tc->tc_other = t1; *t1 = *tc; t2 = t1; for (i = 1; i < NTIMECOUNTER; i++) { MALLOC(t3, struct timecounter *, sizeof *t3, M_TIMECOUNTER, M_WAITOK); *t3 = *tc; t3->tc_other = t2; t2 = t3; } t1->tc_other = t3; tc = t1; printf("Timecounter \"%s\" frequency %lu Hz\n", tc->tc_name, (u_long)tc->tc_frequency); /* XXX: For now always start using the counter. */ tc->tc_offset_count = tc->tc_get_timecount(tc); nanouptime(&ts1); tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32; tc->tc_offset_micro = ts1.tv_nsec / 1000; tc->tc_offset_sec = ts1.tv_sec; timecounter = tc; } void tc_setclock(struct timespec *ts) { struct timespec ts2; nanouptime(&ts2); boottime.tv_sec = ts->tv_sec - ts2.tv_sec; boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; if (boottime.tv_usec < 0) { boottime.tv_usec += 1000000; boottime.tv_sec--; } /* fiddle all the little crinkly bits around the fiords... */ tc_windup(); } static void switch_timecounter(struct timecounter *newtc) { int s; struct timecounter *tc; struct timespec ts; s = splclock(); tc = timecounter; if (newtc->tc_tweak == tc->tc_tweak) { splx(s); return; } newtc = newtc->tc_tweak->tc_other; nanouptime(&ts); newtc->tc_offset_sec = ts.tv_sec; newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32; newtc->tc_offset_micro = ts.tv_nsec / 1000; newtc->tc_offset_count = newtc->tc_get_timecount(newtc); tco_setscales(newtc); timecounter = newtc; splx(s); } static struct timecounter * sync_other_counter(void) { struct timecounter *tc, *tcn, *tco; unsigned delta; tco = timecounter; tc = tco->tc_other; tcn = tc->tc_other; *tc = *tco; tc->tc_other = tcn; delta = tco_delta(tc); tc->tc_offset_count += delta; tc->tc_offset_count &= tc->tc_counter_mask; tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f; tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32; return (tc); } void tc_windup(void) { struct timecounter *tc, *tco; struct timeval tvt; tco = timecounter; tc = sync_other_counter(); /* * We may be inducing a tiny error here, the tc_poll_pps() may * process a latched count which happens after the tco_delta() * in sync_other_counter(), which would extend the previous * counters parameters into the domain of this new one. * Since the timewindow is very small for this, the error is * going to be only a few weenieseconds (as Dave Mills would * say), so lets just not talk more about it, OK ? */ if (tco->tc_poll_pps) tco->tc_poll_pps(tco); if (timedelta != 0) { tvt = boottime; tvt.tv_usec += tickdelta; if (tvt.tv_usec >= 1000000) { tvt.tv_sec++; tvt.tv_usec -= 1000000; } else if (tvt.tv_usec < 0) { tvt.tv_sec--; tvt.tv_usec += 1000000; } boottime = tvt; timedelta -= tickdelta; } while (tc->tc_offset_nano >= 1000000000ULL << 32) { tc->tc_offset_nano -= 1000000000ULL << 32; tc->tc_offset_sec++; ntp_update_second(tc); /* XXX only needed if xntpd runs */ tco_setscales(tc); } tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32; /* Figure out the wall-clock time */ tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec; tc->tc_nanotime.tv_nsec = (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000; tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec; if (tc->tc_nanotime.tv_nsec >= 1000000000) { tc->tc_nanotime.tv_nsec -= 1000000000; tc->tc_microtime.tv_usec -= 1000000; tc->tc_nanotime.tv_sec++; } time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec; timecounter = tc; } static int -sysctl_kern_timecounter_hardware (SYSCTL_HANDLER_ARGS) +sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) { char newname[32]; struct timecounter *newtc, *tc; int error; tc = timecounter->tc_tweak; strncpy(newname, tc->tc_name, sizeof(newname)); error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); if (error == 0 && req->newptr != NULL && strcmp(newname, tc->tc_name) != 0) { for (newtc = tc->tc_avail; newtc != tc; newtc = newtc->tc_avail) { if (strcmp(newname, newtc->tc_name) == 0) { /* Warm up new timecounter. */ (void)newtc->tc_get_timecount(newtc); switch_timecounter(newtc); return (0); } } return (EINVAL); } return (error); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_kern_timecounter_hardware, "A", ""); int pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) { pps_params_t *app; struct pps_fetch_args *fapi; #ifdef PPS_SYNC struct pps_kcbind_args *kapi; #endif switch (cmd) { case PPS_IOC_CREATE: return (0); case PPS_IOC_DESTROY: return (0); case PPS_IOC_SETPARAMS: app = (pps_params_t *)data; if (app->mode & ~pps->ppscap) return (EINVAL); pps->ppsparam = *app; return (0); case PPS_IOC_GETPARAMS: app = (pps_params_t *)data; *app = pps->ppsparam; app->api_version = PPS_API_VERS_1; return (0); case PPS_IOC_GETCAP: *(int*)data = pps->ppscap; return (0); case PPS_IOC_FETCH: fapi = (struct pps_fetch_args *)data; if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) return (EOPNOTSUPP); pps->ppsinfo.current_mode = pps->ppsparam.mode; fapi->pps_info_buf = pps->ppsinfo; return (0); case PPS_IOC_KCBIND: #ifdef PPS_SYNC kapi = (struct pps_kcbind_args *)data; /* XXX Only root should be able to do this */ if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (kapi->kernel_consumer != PPS_KC_HARDPPS) return (EINVAL); if (kapi->edge & ~pps->ppscap) return (EINVAL); pps->kcmode = kapi->edge; return (0); #else return (EOPNOTSUPP); #endif default: return (ENOTTY); } } void pps_init(struct pps_state *pps) { pps->ppscap |= PPS_TSFMT_TSPEC; if (pps->ppscap & PPS_CAPTUREASSERT) pps->ppscap |= PPS_OFFSETASSERT; if (pps->ppscap & PPS_CAPTURECLEAR) pps->ppscap |= PPS_OFFSETCLEAR; } void pps_event(struct pps_state *pps, struct timecounter *tc, unsigned count, int event) { struct timespec ts, *tsp, *osp; u_int64_t delta; unsigned tcount, *pcount; int foff, fhard; pps_seq_t *pseq; /* Things would be easier with arrays... */ if (event == PPS_CAPTUREASSERT) { tsp = &pps->ppsinfo.assert_timestamp; osp = &pps->ppsparam.assert_offset; foff = pps->ppsparam.mode & PPS_OFFSETASSERT; fhard = pps->kcmode & PPS_CAPTUREASSERT; pcount = &pps->ppscount[0]; pseq = &pps->ppsinfo.assert_sequence; } else { tsp = &pps->ppsinfo.clear_timestamp; osp = &pps->ppsparam.clear_offset; foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; fhard = pps->kcmode & PPS_CAPTURECLEAR; pcount = &pps->ppscount[1]; pseq = &pps->ppsinfo.clear_sequence; } /* The timecounter changed: bail */ if (!pps->ppstc || pps->ppstc->tc_name != tc->tc_name || tc->tc_name != timecounter->tc_name) { pps->ppstc = tc; *pcount = count; return; } /* Nothing really happened */ if (*pcount == count) return; *pcount = count; /* Convert the count to timespec */ ts.tv_sec = tc->tc_offset_sec; tcount = count - tc->tc_offset_count; tcount &= tc->tc_counter_mask; delta = tc->tc_offset_nano; delta += ((u_int64_t)tcount * tc->tc_scale_nano_f); delta >>= 32; delta += ((u_int64_t)tcount * tc->tc_scale_nano_i); delta += boottime.tv_usec * 1000; ts.tv_sec += boottime.tv_sec; while (delta >= 1000000000) { delta -= 1000000000; ts.tv_sec++; } ts.tv_nsec = delta; (*pseq)++; *tsp = ts; if (foff) { timespecadd(tsp, osp); if (tsp->tv_nsec < 0) { tsp->tv_nsec += 1000000000; tsp->tv_sec -= 1; } } #ifdef PPS_SYNC if (fhard) { /* magic, at its best... */ tcount = count - pps->ppscount[2]; pps->ppscount[2] = count; tcount &= tc->tc_counter_mask; delta = ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_f); delta >>= 32; delta += ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_i); hardpps(tsp, delta); } #endif } Index: head/sys/kern/subr_bus.c =================================================================== --- head/sys/kern/subr_bus.c (revision 62572) +++ head/sys/kern/subr_bus.c (revision 62573) @@ -1,2422 +1,2422 @@ /*- * Copyright (c) 1997,1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_bus.h" #include #include #include #include #include #ifdef DEVICE_SYSCTLS #include #endif #include #include #include #include #include #include /* for device_printf() */ MALLOC_DEFINE(M_BUS, "bus", "Bus data structures"); #ifdef BUS_DEBUG #include static int bus_debug = 1; SYSCTL_INT(_debug, OID_AUTO, bus_debug, CTLFLAG_RW, &bus_debug, 0, "Debug bus code"); #define PDEBUG(a) if (bus_debug) {printf(__FUNCTION__ ":%d: ", __LINE__), printf a, printf("\n");} #define DEVICENAME(d) ((d)? device_get_name(d): "no device") #define DRIVERNAME(d) ((d)? d->name : "no driver") #define DEVCLANAME(d) ((d)? d->name : "no devclass") /* Produce the indenting, indent*2 spaces plus a '.' ahead of that to * prevent syslog from deleting initial spaces */ #define indentprintf(p) do { int iJ; printf("."); for (iJ=0; iJname, classname)) return dc; PDEBUG(("%s not found%s", classname, (create? ", creating": ""))); if (create) { dc = malloc(sizeof(struct devclass) + strlen(classname) + 1, M_BUS, M_NOWAIT); if (!dc) return NULL; bzero(dc, sizeof(struct devclass) + strlen(classname) + 1); dc->name = (char*) (dc + 1); strcpy(dc->name, classname); dc->devices = NULL; dc->maxunit = 0; TAILQ_INIT(&dc->drivers); TAILQ_INSERT_TAIL(&devclasses, dc, link); } return dc; } devclass_t devclass_create(const char *classname) { return devclass_find_internal(classname, TRUE); } devclass_t devclass_find(const char *classname) { return devclass_find_internal(classname, FALSE); } int devclass_add_driver(devclass_t dc, driver_t *driver) { driverlink_t dl; int i; PDEBUG(("%s", DRIVERNAME(driver))); dl = malloc(sizeof *dl, M_BUS, M_NOWAIT); if (!dl) return ENOMEM; bzero(dl, sizeof *dl); /* * Compile the driver's methods. Also increase the reference count * so that the class doesn't get freed when the last instance * goes. This means we can safely use static methods and avoids a * double-free in devclass_delete_driver. */ kobj_class_compile((kobj_class_t) driver); /* * Make sure the devclass which the driver is implementing exists. */ devclass_find_internal(driver->name, TRUE); dl->driver = driver; TAILQ_INSERT_TAIL(&dc->drivers, dl, link); driver->refs++; /* * Call BUS_DRIVER_ADDED for any existing busses in this class. */ for (i = 0; i < dc->maxunit; i++) if (dc->devices[i]) BUS_DRIVER_ADDED(dc->devices[i], driver); return 0; } int devclass_delete_driver(devclass_t busclass, driver_t *driver) { devclass_t dc = devclass_find(driver->name); driverlink_t dl; device_t dev; int i; int error; PDEBUG(("%s from devclass %s", driver->name, DEVCLANAME(busclass))); if (!dc) return 0; /* * Find the link structure in the bus' list of drivers. */ for (dl = TAILQ_FIRST(&busclass->drivers); dl; dl = TAILQ_NEXT(dl, link)) { if (dl->driver == driver) break; } if (!dl) { PDEBUG(("%s not found in %s list", driver->name, busclass->name)); return ENOENT; } /* * Disassociate from any devices. We iterate through all the * devices in the devclass of the driver and detach any which are * using the driver and which have a parent in the devclass which * we are deleting from. * * Note that since a driver can be in multiple devclasses, we * should not detach devices which are not children of devices in * the affected devclass. */ for (i = 0; i < dc->maxunit; i++) { if (dc->devices[i]) { dev = dc->devices[i]; if (dev->driver == driver && dev->parent && dev->parent->devclass == busclass) { if ((error = device_detach(dev)) != 0) return error; device_set_driver(dev, NULL); } } } TAILQ_REMOVE(&busclass->drivers, dl, link); free(dl, M_BUS); driver->refs--; if (driver->refs == 0) kobj_class_free((kobj_class_t) driver); return 0; } static driverlink_t devclass_find_driver_internal(devclass_t dc, const char *classname) { driverlink_t dl; PDEBUG(("%s in devclass %s", classname, DEVCLANAME(dc))); for (dl = TAILQ_FIRST(&dc->drivers); dl; dl = TAILQ_NEXT(dl, link)) { if (!strcmp(dl->driver->name, classname)) return dl; } PDEBUG(("not found")); return NULL; } driver_t * devclass_find_driver(devclass_t dc, const char *classname) { driverlink_t dl; dl = devclass_find_driver_internal(dc, classname); if (dl) return dl->driver; else return NULL; } const char * devclass_get_name(devclass_t dc) { return dc->name; } device_t devclass_get_device(devclass_t dc, int unit) { if (dc == NULL || unit < 0 || unit >= dc->maxunit) return NULL; return dc->devices[unit]; } void * devclass_get_softc(devclass_t dc, int unit) { device_t dev; dev = devclass_get_device(dc, unit); if (!dev) return (NULL); return (device_get_softc(dev)); } int devclass_get_devices(devclass_t dc, device_t **devlistp, int *devcountp) { int i; int count; device_t *list; count = 0; for (i = 0; i < dc->maxunit; i++) if (dc->devices[i]) count++; list = malloc(count * sizeof(device_t), M_TEMP, M_NOWAIT); if (!list) return ENOMEM; bzero(list, count * sizeof(device_t)); count = 0; for (i = 0; i < dc->maxunit; i++) if (dc->devices[i]) { list[count] = dc->devices[i]; count++; } *devlistp = list; *devcountp = count; return 0; } int devclass_get_maxunit(devclass_t dc) { return dc->maxunit; } static int devclass_alloc_unit(devclass_t dc, int *unitp) { int unit = *unitp; PDEBUG(("unit %d in devclass %s", unit, DEVCLANAME(dc))); /* If we have been given a wired unit number, check for existing device */ if (unit != -1) { if (unit >= 0 && unit < dc->maxunit && dc->devices[unit] != NULL) { if (bootverbose) printf("%s-: %s%d exists, using next available unit number\n", dc->name, dc->name, unit); /* find the next available slot */ while (++unit < dc->maxunit && dc->devices[unit] != NULL) ; } } else { /* Unwired device, find the next available slot for it */ unit = 0; while (unit < dc->maxunit && dc->devices[unit] != NULL) unit++; } /* * We've selected a unit beyond the length of the table, so let's extend * the table to make room for all units up to and including this one. */ if (unit >= dc->maxunit) { device_t *newlist; int newsize; newsize = roundup((unit + 1), MINALLOCSIZE / sizeof(device_t)); newlist = malloc(sizeof(device_t) * newsize, M_BUS, M_NOWAIT); if (!newlist) return ENOMEM; bcopy(dc->devices, newlist, sizeof(device_t) * dc->maxunit); bzero(newlist + dc->maxunit, sizeof(device_t) * (newsize - dc->maxunit)); if (dc->devices) free(dc->devices, M_BUS); dc->devices = newlist; dc->maxunit = newsize; } PDEBUG(("now: unit %d in devclass %s", unit, DEVCLANAME(dc))); *unitp = unit; return 0; } static int devclass_add_device(devclass_t dc, device_t dev) { int buflen, error; PDEBUG(("%s in devclass %s", DEVICENAME(dev), DEVCLANAME(dc))); buflen = strlen(dc->name) + 5; dev->nameunit = malloc(buflen, M_BUS, M_NOWAIT); if (!dev->nameunit) return ENOMEM; bzero(dev->nameunit, buflen); if ((error = devclass_alloc_unit(dc, &dev->unit)) != 0) { free(dev->nameunit, M_BUS); dev->nameunit = NULL; return error; } dc->devices[dev->unit] = dev; dev->devclass = dc; snprintf(dev->nameunit, buflen, "%s%d", dc->name, dev->unit); #ifdef DEVICE_SYSCTLS device_register_oids(dev); #endif return 0; } static int devclass_delete_device(devclass_t dc, device_t dev) { if (!dc || !dev) return 0; PDEBUG(("%s in devclass %s", DEVICENAME(dev), DEVCLANAME(dc))); if (dev->devclass != dc || dc->devices[dev->unit] != dev) panic("devclass_delete_device: inconsistent device class"); dc->devices[dev->unit] = NULL; if (dev->flags & DF_WILDCARD) dev->unit = -1; dev->devclass = NULL; free(dev->nameunit, M_BUS); dev->nameunit = NULL; #ifdef DEVICE_SYSCTLS device_unregister_oids(dev); #endif return 0; } static device_t make_device(device_t parent, const char *name, int unit) { device_t dev; devclass_t dc; PDEBUG(("%s at %s as unit %d", name, DEVICENAME(parent), unit)); if (name) { dc = devclass_find_internal(name, TRUE); if (!dc) { printf("make_device: can't find device class %s\n", name); return NULL; } } else dc = NULL; dev = malloc(sizeof(struct device), M_BUS, M_NOWAIT); if (!dev) return 0; bzero(dev, sizeof(struct device)); dev->parent = parent; TAILQ_INIT(&dev->children); kobj_init((kobj_t) dev, &null_class); dev->driver = NULL; dev->devclass = NULL; dev->unit = unit; dev->nameunit = NULL; dev->desc = NULL; dev->busy = 0; dev->devflags = 0; dev->flags = DF_ENABLED; dev->order = 0; if (unit == -1) dev->flags |= DF_WILDCARD; if (name) { dev->flags |= DF_FIXEDCLASS; devclass_add_device(dc, dev); } dev->ivars = NULL; dev->softc = NULL; dev->state = DS_NOTPRESENT; kobj_init((kobj_t) dev, &null_class); return dev; } static int device_print_child(device_t dev, device_t child) { int retval = 0; if (device_is_alive(child)) { retval += BUS_PRINT_CHILD(dev, child); } else retval += device_printf(child, " not found\n"); return (retval); } device_t device_add_child(device_t dev, const char *name, int unit) { return device_add_child_ordered(dev, 0, name, unit); } device_t device_add_child_ordered(device_t dev, int order, const char *name, int unit) { device_t child; device_t place; PDEBUG(("%s at %s with order %d as unit %d", name, DEVICENAME(dev), order, unit)); child = make_device(dev, name, unit); if (child == NULL) return child; child->order = order; TAILQ_FOREACH(place, &dev->children, link) if (place->order > order) break; if (place) { /* * The device 'place' is the first device whose order is * greater than the new child. */ TAILQ_INSERT_BEFORE(place, child, link); } else { /* * The new child's order is greater or equal to the order of * any existing device. Add the child to the tail of the list. */ TAILQ_INSERT_TAIL(&dev->children, child, link); } return child; } int device_delete_child(device_t dev, device_t child) { int error; device_t grandchild; PDEBUG(("%s from %s", DEVICENAME(child), DEVICENAME(dev))); /* remove children first */ while ( (grandchild = TAILQ_FIRST(&child->children)) ) { error = device_delete_child(child, grandchild); if (error) return error; } if ((error = device_detach(child)) != 0) return error; if (child->devclass) devclass_delete_device(child->devclass, child); TAILQ_REMOVE(&dev->children, child, link); device_set_desc(child, NULL); free(child, M_BUS); return 0; } /* * Find only devices attached to this bus. */ device_t device_find_child(device_t dev, const char *classname, int unit) { devclass_t dc; device_t child; dc = devclass_find(classname); if (!dc) return NULL; child = devclass_get_device(dc, unit); if (child && child->parent == dev) return child; return NULL; } static driverlink_t first_matching_driver(devclass_t dc, device_t dev) { if (dev->devclass) return devclass_find_driver_internal(dc, dev->devclass->name); else return TAILQ_FIRST(&dc->drivers); } static driverlink_t next_matching_driver(devclass_t dc, device_t dev, driverlink_t last) { if (dev->devclass) { driverlink_t dl; for (dl = TAILQ_NEXT(last, link); dl; dl = TAILQ_NEXT(dl, link)) if (!strcmp(dev->devclass->name, dl->driver->name)) return dl; return NULL; } else return TAILQ_NEXT(last, link); } static int device_probe_child(device_t dev, device_t child) { devclass_t dc; driverlink_t best = 0; driverlink_t dl; int result, pri = 0; int hasclass = (child->devclass != 0); dc = dev->devclass; if (!dc) panic("device_probe_child: parent device has no devclass"); if (child->state == DS_ALIVE) return 0; for (dl = first_matching_driver(dc, child); dl; dl = next_matching_driver(dc, child, dl)) { PDEBUG(("Trying %s", DRIVERNAME(dl->driver))); device_set_driver(child, dl->driver); if (!hasclass) device_set_devclass(child, dl->driver->name); result = DEVICE_PROBE(child); if (!hasclass) device_set_devclass(child, 0); /* * If the driver returns SUCCESS, there can be no higher match * for this device. */ if (result == 0) { best = dl; pri = 0; break; } /* * The driver returned an error so it certainly doesn't match. */ if (result > 0) { device_set_driver(child, 0); continue; } /* * A priority lower than SUCCESS, remember the best matching * driver. Initialise the value of pri for the first match. */ if (best == 0 || result > pri) { best = dl; pri = result; continue; } } /* * If we found a driver, change state and initialise the devclass. */ if (best) { if (!child->devclass) device_set_devclass(child, best->driver->name); device_set_driver(child, best->driver); if (pri < 0) { /* * A bit bogus. Call the probe method again to make sure * that we have the right description. */ DEVICE_PROBE(child); } child->state = DS_ALIVE; return 0; } return ENXIO; } device_t device_get_parent(device_t dev) { return dev->parent; } int device_get_children(device_t dev, device_t **devlistp, int *devcountp) { int count; device_t child; device_t *list; count = 0; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) count++; list = malloc(count * sizeof(device_t), M_TEMP, M_NOWAIT); if (!list) return ENOMEM; bzero(list, count * sizeof(device_t)); count = 0; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) { list[count] = child; count++; } *devlistp = list; *devcountp = count; return 0; } driver_t * device_get_driver(device_t dev) { return dev->driver; } devclass_t device_get_devclass(device_t dev) { return dev->devclass; } const char * device_get_name(device_t dev) { if (dev->devclass) return devclass_get_name(dev->devclass); return NULL; } const char * device_get_nameunit(device_t dev) { return dev->nameunit; } int device_get_unit(device_t dev) { return dev->unit; } const char * device_get_desc(device_t dev) { return dev->desc; } u_int32_t device_get_flags(device_t dev) { return dev->devflags; } int device_print_prettyname(device_t dev) { const char *name = device_get_name(dev); if (name == 0) return printf("unknown: "); else return printf("%s%d: ", name, device_get_unit(dev)); } int device_printf(device_t dev, const char * fmt, ...) { va_list ap; int retval; retval = device_print_prettyname(dev); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return retval; } static void device_set_desc_internal(device_t dev, const char* desc, int copy) { if (dev->desc && (dev->flags & DF_DESCMALLOCED)) { free(dev->desc, M_BUS); dev->flags &= ~DF_DESCMALLOCED; dev->desc = NULL; } if (copy && desc) { dev->desc = malloc(strlen(desc) + 1, M_BUS, M_NOWAIT); if (dev->desc) { strcpy(dev->desc, desc); dev->flags |= DF_DESCMALLOCED; } } else /* Avoid a -Wcast-qual warning */ dev->desc = (char *)(uintptr_t) desc; #ifdef DEVICE_SYSCTLS { struct sysctl_oid *oid = &dev->oid[1]; oid->oid_arg1 = dev->desc ? dev->desc : ""; oid->oid_arg2 = dev->desc ? strlen(dev->desc) : 0; } #endif } void device_set_desc(device_t dev, const char* desc) { device_set_desc_internal(dev, desc, FALSE); } void device_set_desc_copy(device_t dev, const char* desc) { device_set_desc_internal(dev, desc, TRUE); } void device_set_flags(device_t dev, u_int32_t flags) { dev->devflags = flags; } void * device_get_softc(device_t dev) { return dev->softc; } void device_set_softc(device_t dev, void *softc) { if (dev->softc && !(dev->flags & DF_EXTERNALSOFTC)) free(dev->softc, M_BUS); dev->softc = softc; if (dev->softc) dev->flags |= DF_EXTERNALSOFTC; else dev->flags &= ~DF_EXTERNALSOFTC; } void * device_get_ivars(device_t dev) { return dev->ivars; } void device_set_ivars(device_t dev, void * ivars) { if (!dev) return; dev->ivars = ivars; return; } device_state_t device_get_state(device_t dev) { return dev->state; } void device_enable(device_t dev) { dev->flags |= DF_ENABLED; } void device_disable(device_t dev) { dev->flags &= ~DF_ENABLED; } void device_busy(device_t dev) { if (dev->state < DS_ATTACHED) panic("device_busy: called for unattached device"); if (dev->busy == 0 && dev->parent) device_busy(dev->parent); dev->busy++; dev->state = DS_BUSY; } void device_unbusy(device_t dev) { if (dev->state != DS_BUSY) panic("device_unbusy: called for non-busy device"); dev->busy--; if (dev->busy == 0) { if (dev->parent) device_unbusy(dev->parent); dev->state = DS_ATTACHED; } } void device_quiet(device_t dev) { dev->flags |= DF_QUIET; } void device_verbose(device_t dev) { dev->flags &= ~DF_QUIET; } int device_is_quiet(device_t dev) { return (dev->flags & DF_QUIET) != 0; } int device_is_enabled(device_t dev) { return (dev->flags & DF_ENABLED) != 0; } int device_is_alive(device_t dev) { return dev->state >= DS_ALIVE; } int device_set_devclass(device_t dev, const char *classname) { devclass_t dc; if (!classname) { if (dev->devclass) devclass_delete_device(dev->devclass, dev); return 0; } if (dev->devclass) { printf("device_set_devclass: device class already set\n"); return EINVAL; } dc = devclass_find_internal(classname, TRUE); if (!dc) return ENOMEM; return devclass_add_device(dc, dev); } int device_set_driver(device_t dev, driver_t *driver) { if (dev->state >= DS_ATTACHED) return EBUSY; if (dev->driver == driver) return 0; if (dev->softc && !(dev->flags & DF_EXTERNALSOFTC)) { free(dev->softc, M_BUS); dev->softc = NULL; } kobj_delete((kobj_t) dev, 0); dev->driver = driver; if (driver) { kobj_init((kobj_t) dev, (kobj_class_t) driver); if (!(dev->flags & DF_EXTERNALSOFTC)) { dev->softc = malloc(driver->size, M_BUS, M_NOWAIT); if (!dev->softc) { kobj_init((kobj_t) dev, &null_class); dev->driver = NULL; return ENOMEM; } bzero(dev->softc, driver->size); } } else kobj_init((kobj_t) dev, &null_class); return 0; } int device_probe_and_attach(device_t dev) { device_t bus = dev->parent; int error = 0; if (dev->state >= DS_ALIVE) return 0; if (dev->flags & DF_ENABLED) { error = device_probe_child(bus, dev); if (!error) { if (!device_is_quiet(dev)) device_print_child(bus, dev); error = DEVICE_ATTACH(dev); if (!error) dev->state = DS_ATTACHED; else { printf("device_probe_and_attach: %s%d attach returned %d\n", dev->driver->name, dev->unit, error); device_set_driver(dev, NULL); dev->state = DS_NOTPRESENT; } } else { if (!(dev->flags & DF_DONENOMATCH)) { BUS_PROBE_NOMATCH(bus, dev); dev->flags |= DF_DONENOMATCH; } } } else { if (bootverbose) { device_print_prettyname(dev); printf("not probed (disabled)\n"); } } return error; } int device_detach(device_t dev) { int error; PDEBUG(("%s", DEVICENAME(dev))); if (dev->state == DS_BUSY) return EBUSY; if (dev->state != DS_ATTACHED) return 0; if ((error = DEVICE_DETACH(dev)) != 0) return error; device_printf(dev, "detached\n"); if (dev->parent) BUS_CHILD_DETACHED(dev->parent, dev); if (!(dev->flags & DF_FIXEDCLASS)) devclass_delete_device(dev->devclass, dev); dev->state = DS_NOTPRESENT; device_set_driver(dev, NULL); return 0; } int device_shutdown(device_t dev) { if (dev->state < DS_ATTACHED) return 0; return DEVICE_SHUTDOWN(dev); } int device_set_unit(device_t dev, int unit) { devclass_t dc; int err; dc = device_get_devclass(dev); if (unit < dc->maxunit && dc->devices[unit]) return EBUSY; err = devclass_delete_device(dc, dev); if (err) return err; dev->unit = unit; err = devclass_add_device(dc, dev); if (err) return err; return 0; } #ifdef DEVICE_SYSCTLS /* * Sysctl nodes for devices. */ SYSCTL_NODE(_hw, OID_AUTO, devices, CTLFLAG_RW, 0, "A list of all devices"); static int -sysctl_handle_children (SYSCTL_HANDLER_ARGS) +sysctl_handle_children(SYSCTL_HANDLER_ARGS) { device_t dev = arg1; device_t child; int first = 1, error = 0; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) { if (child->nameunit) { if (!first) { error = SYSCTL_OUT(req, ",", 1); if (error) return error; } else { first = 0; } error = SYSCTL_OUT(req, child->nameunit, strlen(child->nameunit)); if (error) return error; } } error = SYSCTL_OUT(req, "", 1); return error; } static int -sysctl_handle_state (SYSCTL_HANDLER_ARGS) +sysctl_handle_state(SYSCTL_HANDLER_ARGS) { device_t dev = arg1; switch (dev->state) { case DS_NOTPRESENT: return SYSCTL_OUT(req, "notpresent", sizeof("notpresent")); case DS_ALIVE: return SYSCTL_OUT(req, "alive", sizeof("alive")); case DS_ATTACHED: return SYSCTL_OUT(req, "attached", sizeof("attached")); case DS_BUSY: return SYSCTL_OUT(req, "busy", sizeof("busy")); } return 0; } static void device_register_oids(device_t dev) { struct sysctl_oid* oid; oid = &dev->oid[0]; bzero(oid, sizeof(*oid)); oid->oid_parent = &sysctl__hw_devices_children; oid->oid_number = OID_AUTO; oid->oid_kind = CTLTYPE_NODE | CTLFLAG_RW; oid->oid_arg1 = &dev->oidlist[0]; oid->oid_arg2 = 0; oid->oid_name = dev->nameunit; oid->oid_handler = 0; oid->oid_fmt = "N"; SLIST_INIT(&dev->oidlist[0]); sysctl_register_oid(oid); oid = &dev->oid[1]; bzero(oid, sizeof(*oid)); oid->oid_parent = &dev->oidlist[0]; oid->oid_number = OID_AUTO; oid->oid_kind = CTLTYPE_STRING | CTLFLAG_RD; oid->oid_arg1 = dev->desc ? dev->desc : ""; oid->oid_arg2 = dev->desc ? strlen(dev->desc) : 0; oid->oid_name = "desc"; oid->oid_handler = sysctl_handle_string; oid->oid_fmt = "A"; sysctl_register_oid(oid); oid = &dev->oid[2]; bzero(oid, sizeof(*oid)); oid->oid_parent = &dev->oidlist[0]; oid->oid_number = OID_AUTO; oid->oid_kind = CTLTYPE_INT | CTLFLAG_RD; oid->oid_arg1 = dev; oid->oid_arg2 = 0; oid->oid_name = "children"; oid->oid_handler = sysctl_handle_children; oid->oid_fmt = "A"; sysctl_register_oid(oid); oid = &dev->oid[3]; bzero(oid, sizeof(*oid)); oid->oid_parent = &dev->oidlist[0]; oid->oid_number = OID_AUTO; oid->oid_kind = CTLTYPE_INT | CTLFLAG_RD; oid->oid_arg1 = dev; oid->oid_arg2 = 0; oid->oid_name = "state"; oid->oid_handler = sysctl_handle_state; oid->oid_fmt = "A"; sysctl_register_oid(oid); } static void device_unregister_oids(device_t dev) { sysctl_unregister_oid(&dev->oid[0]); sysctl_unregister_oid(&dev->oid[1]); sysctl_unregister_oid(&dev->oid[2]); } #endif /*======================================*/ /* * Access functions for device resources. */ /* Runtime version */ static struct config_device *devtab; static int devtab_count = 0; static int resource_new_name(const char *name, int unit) { struct config_device *new; new = malloc((devtab_count + 1) * sizeof(*new), M_TEMP, M_NOWAIT); if (new == NULL) return -1; if (devtab && devtab_count > 0) bcopy(devtab, new, devtab_count * sizeof(*new)); bzero(&new[devtab_count], sizeof(*new)); new[devtab_count].name = malloc(strlen(name) + 1, M_TEMP, M_NOWAIT); if (new[devtab_count].name == NULL) { free(new, M_TEMP); return -1; } strcpy(new[devtab_count].name, name); new[devtab_count].unit = unit; new[devtab_count].resource_count = 0; new[devtab_count].resources = NULL; devtab = new; return devtab_count++; } static int resource_new_resname(int j, const char *resname, resource_type type) { struct config_resource *new; int i; i = devtab[j].resource_count; new = malloc((i + 1) * sizeof(*new), M_TEMP, M_NOWAIT); if (new == NULL) return -1; if (devtab[j].resources && i > 0) bcopy(devtab[j].resources, new, i * sizeof(*new)); bzero(&new[i], sizeof(*new)); new[i].name = malloc(strlen(resname) + 1, M_TEMP, M_NOWAIT); if (new[i].name == NULL) { free(new, M_TEMP); return -1; } strcpy(new[i].name, resname); new[i].type = type; if (devtab[j].resources) free(devtab[j].resources, M_TEMP); devtab[j].resources = new; devtab[j].resource_count = i + 1; return i; } static int resource_match_string(int i, const char *resname, const char *value) { int j; struct config_resource *res; for (j = 0, res = devtab[i].resources; j < devtab[i].resource_count; j++, res++) if (!strcmp(res->name, resname) && res->type == RES_STRING && !strcmp(res->u.stringval, value)) return j; return -1; } static int resource_find_hard(const char *cp, const char *name, int unit, const char *resname, struct config_resource **result) { char match[256]; int matchlen; char *op; long val; snprintf(match, sizeof(match), "hint.%s.%d.%s=", name, unit, resname); matchlen = strlen(match); while (cp) { if (strncmp(match, cp, matchlen) == 0) break; while (*cp != '\0') cp++; cp++; if (*cp == '\0') { cp = NULL; break; } } if (cp) cp += matchlen; /* skip over name and '=' */ else return ENOENT; val = strtoul(cp, &op, 0); if (*cp != '\0' && *op == '\0') { (*result)->type = RES_INT; (*result)->u.intval = val; } else { (*result)->type = RES_STRING; (*result)->u.stringval = cp; } return 0; } static int resource_find(const char *name, int unit, const char *resname, struct config_resource **result) { int i, j; struct config_resource *res; if (!hints_loaded) { /* First specific, then generic. Dynamic over static. */ i = resource_find_hard(kern_envp, name, unit, resname, result); if (i == 0) return 0; i = resource_find_hard(static_hints, name, unit, resname, result); if (i == 0) return 0; i = resource_find_hard(kern_envp, name, -1, resname, result); if (i == 0) return 0; i = resource_find_hard(static_hints, name, -1, resname, result); return i; } /* * First check specific instances, then generic. */ for (i = 0; i < devtab_count; i++) { if (devtab[i].unit < 0) continue; if (!strcmp(devtab[i].name, name) && devtab[i].unit == unit) { res = devtab[i].resources; for (j = 0; j < devtab[i].resource_count; j++, res++) if (!strcmp(res->name, resname)) { *result = res; return 0; } } } for (i = 0; i < devtab_count; i++) { if (devtab[i].unit >= 0) continue; if (!strcmp(devtab[i].name, name)) { res = devtab[i].resources; for (j = 0; j < devtab[i].resource_count; j++, res++) if (!strcmp(res->name, resname)) { *result = res; return 0; } } } return ENOENT; } int resource_int_value(const char *name, int unit, const char *resname, int *result) { int error; struct config_resource *res; if ((error = resource_find(name, unit, resname, &res)) != 0) return error; if (res->type != RES_INT) return EFTYPE; *result = res->u.intval; return 0; } int resource_long_value(const char *name, int unit, const char *resname, long *result) { int error; struct config_resource *res; if ((error = resource_find(name, unit, resname, &res)) != 0) return error; if (res->type != RES_LONG) return EFTYPE; *result = res->u.longval; return 0; } int resource_string_value(const char *name, int unit, const char *resname, char **result) { int error; struct config_resource *res; if ((error = resource_find(name, unit, resname, &res)) != 0) return error; if (res->type != RES_STRING) return EFTYPE; *result = res->u.stringval; return 0; } int resource_query_string(int i, const char *resname, const char *value) { if (i < 0) i = 0; else i = i + 1; for (; i < devtab_count; i++) if (resource_match_string(i, resname, value) >= 0) return i; return -1; } int resource_locate(int i, const char *resname) { if (i < 0) i = 0; else i = i + 1; for (; i < devtab_count; i++) if (!strcmp(devtab[i].name, resname)) return i; return -1; } int resource_count(void) { return devtab_count; } char * resource_query_name(int i) { return devtab[i].name; } int resource_query_unit(int i) { return devtab[i].unit; } static int resource_create(const char *name, int unit, const char *resname, resource_type type, struct config_resource **result) { int i, j; struct config_resource *res = NULL; for (i = 0; i < devtab_count; i++) { if (!strcmp(devtab[i].name, name) && devtab[i].unit == unit) { res = devtab[i].resources; break; } } if (res == NULL) { i = resource_new_name(name, unit); if (i < 0) return ENOMEM; res = devtab[i].resources; } for (j = 0; j < devtab[i].resource_count; j++, res++) { if (!strcmp(res->name, resname)) { *result = res; return 0; } } j = resource_new_resname(i, resname, type); if (j < 0) return ENOMEM; res = &devtab[i].resources[j]; *result = res; return 0; } int resource_set_int(const char *name, int unit, const char *resname, int value) { int error; struct config_resource *res; error = resource_create(name, unit, resname, RES_INT, &res); if (error) return error; if (res->type != RES_INT) return EFTYPE; res->u.intval = value; return 0; } int resource_set_long(const char *name, int unit, const char *resname, long value) { int error; struct config_resource *res; error = resource_create(name, unit, resname, RES_LONG, &res); if (error) return error; if (res->type != RES_LONG) return EFTYPE; res->u.longval = value; return 0; } int resource_set_string(const char *name, int unit, const char *resname, const char *value) { int error; struct config_resource *res; error = resource_create(name, unit, resname, RES_STRING, &res); if (error) return error; if (res->type != RES_STRING) return EFTYPE; if (res->u.stringval) free(res->u.stringval, M_TEMP); res->u.stringval = malloc(strlen(value) + 1, M_TEMP, M_NOWAIT); if (res->u.stringval == NULL) return ENOMEM; strcpy(res->u.stringval, value); return 0; } /* * We use the identify routine to get the hints for all the other devices. * Strings that are all digits or begin with 0x are integers. * * hint.aha.0.bus_speedup=1 * hint.aha.1.irq=10 * hint.wl.0.netid=PLUG * hint.wl.1.netid=XYZZY */ static void hint_load(char *cp) { char *ep, *op, *walker; int len; int val; char name[20]; int unit; char resname[255]; for (ep = cp; *ep != '=' && *ep != '\0'; ep++) ; len = ep - cp; if (*ep == '=') ep++; if (strncmp(cp, "hint.", 5) != 0) return; walker = cp; walker += 5; op = walker; while (*walker && *walker != '.') walker++; if (*walker != '.') return; if (walker - op > sizeof(name)) return; strncpy(name, op, walker - op); name[walker - op] = '\0'; walker++; op = walker; while (*walker && *walker != '.') walker++; if (*walker != '.') return; unit = strtoul(op, &walker, 0); if (*walker != '.') return; walker++; op = walker; while (*walker && *walker != '=') walker++; if (*walker != '=') return; if (walker - op > sizeof(resname)) return; strncpy(resname, op, walker - op); resname[walker - op] = '\0'; walker++; if (walker != ep) return; if (bootverbose) printf("Setting %s %d %s to ", name, unit, resname); val = strtoul(ep, &op, 0); if (*ep != '\0' && *op == '\0') { resource_set_int(name, unit, resname, val); if (bootverbose) printf("%d (int)\n", val); } else { resource_set_string(name, unit, resname, ep); if (bootverbose) printf("%s (string)\n", ep); } } static void hints_load(void *dummy __unused) { char *cp; cp = static_hints; while (cp) { hint_load(cp); while (*cp != '\0') cp++; cp++; if (*cp == '\0') break; } cp = kern_envp; while (cp) { hint_load(cp); while (*cp != '\0') cp++; cp++; if (*cp == '\0') break; } hints_loaded++; } SYSINIT(cfghints, SI_SUB_KMEM, SI_ORDER_ANY + 60, hints_load, 0) /*======================================*/ /* * Some useful method implementations to make life easier for bus drivers. */ void resource_list_init(struct resource_list *rl) { SLIST_INIT(rl); } void resource_list_free(struct resource_list *rl) { struct resource_list_entry *rle; while ((rle = SLIST_FIRST(rl)) != NULL) { if (rle->res) panic("resource_list_free: resource entry is busy"); SLIST_REMOVE_HEAD(rl, link); free(rle, M_BUS); } } void resource_list_add(struct resource_list *rl, int type, int rid, u_long start, u_long end, u_long count) { struct resource_list_entry *rle; rle = resource_list_find(rl, type, rid); if (!rle) { rle = malloc(sizeof(struct resource_list_entry), M_BUS, M_NOWAIT); if (!rle) panic("resource_list_add: can't record entry"); SLIST_INSERT_HEAD(rl, rle, link); rle->type = type; rle->rid = rid; rle->res = NULL; } if (rle->res) panic("resource_list_add: resource entry is busy"); rle->start = start; rle->end = end; rle->count = count; } struct resource_list_entry* resource_list_find(struct resource_list *rl, int type, int rid) { struct resource_list_entry *rle; SLIST_FOREACH(rle, rl, link) if (rle->type == type && rle->rid == rid) return rle; return NULL; } void resource_list_delete(struct resource_list *rl, int type, int rid) { struct resource_list_entry *rle = resource_list_find(rl, type, rid); if (rle) { SLIST_REMOVE(rl, rle, resource_list_entry, link); free(rle, M_BUS); } } struct resource * resource_list_alloc(struct resource_list *rl, device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct resource_list_entry *rle = 0; int passthrough = (device_get_parent(child) != bus); int isdefault = (start == 0UL && end == ~0UL); if (passthrough) { return BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type, rid, start, end, count, flags); } rle = resource_list_find(rl, type, *rid); if (!rle) return 0; /* no resource of that type/rid */ if (rle->res) panic("resource_list_alloc: resource entry is busy"); if (isdefault) { start = rle->start; count = max(count, rle->count); end = max(rle->end, start + count - 1); } rle->res = BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type, rid, start, end, count, flags); /* * Record the new range. */ if (rle->res) { rle->start = rman_get_start(rle->res); rle->end = rman_get_end(rle->res); rle->count = count; } return rle->res; } int resource_list_release(struct resource_list *rl, device_t bus, device_t child, int type, int rid, struct resource *res) { struct resource_list_entry *rle = 0; int passthrough = (device_get_parent(child) != bus); int error; if (passthrough) { return BUS_RELEASE_RESOURCE(device_get_parent(bus), child, type, rid, res); } rle = resource_list_find(rl, type, rid); if (!rle) panic("resource_list_release: can't find resource"); if (!rle->res) panic("resource_list_release: resource entry is not busy"); error = BUS_RELEASE_RESOURCE(device_get_parent(bus), child, type, rid, res); if (error) return error; rle->res = NULL; return 0; } /* * Call DEVICE_IDENTIFY for each driver. */ int bus_generic_probe(device_t dev) { devclass_t dc = dev->devclass; driverlink_t dl; for (dl = TAILQ_FIRST(&dc->drivers); dl; dl = TAILQ_NEXT(dl, link)) DEVICE_IDENTIFY(dl->driver, dev); return 0; } int bus_generic_attach(device_t dev) { device_t child; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) device_probe_and_attach(child); return 0; } int bus_generic_detach(device_t dev) { device_t child; int error; if (dev->state != DS_ATTACHED) return EBUSY; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) if ((error = device_detach(child)) != 0) return error; return 0; } int bus_generic_shutdown(device_t dev) { device_t child; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) device_shutdown(child); return 0; } int bus_generic_suspend(device_t dev) { int error; device_t child, child2; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) { error = DEVICE_SUSPEND(child); if (error) { for (child2 = TAILQ_FIRST(&dev->children); child2 && child2 != child; child2 = TAILQ_NEXT(child2, link)) DEVICE_RESUME(child2); return (error); } } return 0; } int bus_generic_resume(device_t dev) { device_t child; for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) { DEVICE_RESUME(child); /* if resume fails, there's nothing we can usefully do... */ } return 0; } int bus_print_child_header (device_t dev, device_t child) { int retval = 0; if (device_get_desc(child)) { retval += device_printf(child, "<%s>", device_get_desc(child)); } else { retval += printf("%s", device_get_nameunit(child)); } return (retval); } int bus_print_child_footer (device_t dev, device_t child) { return(printf(" on %s\n", device_get_nameunit(dev))); } int bus_generic_print_child(device_t dev, device_t child) { int retval = 0; retval += bus_print_child_header(dev, child); retval += bus_print_child_footer(dev, child); return (retval); } int bus_generic_read_ivar(device_t dev, device_t child, int index, uintptr_t * result) { return ENOENT; } int bus_generic_write_ivar(device_t dev, device_t child, int index, uintptr_t value) { return ENOENT; } void bus_generic_driver_added(device_t dev, driver_t *driver) { device_t child; DEVICE_IDENTIFY(driver, dev); for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) if (child->state == DS_NOTPRESENT) device_probe_and_attach(child); } int bus_generic_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, driver_intr_t *intr, void *arg, void **cookiep) { /* Propagate up the bus hierarchy until someone handles it. */ if (dev->parent) return (BUS_SETUP_INTR(dev->parent, child, irq, flags, intr, arg, cookiep)); else return (EINVAL); } int bus_generic_teardown_intr(device_t dev, device_t child, struct resource *irq, void *cookie) { /* Propagate up the bus hierarchy until someone handles it. */ if (dev->parent) return (BUS_TEARDOWN_INTR(dev->parent, child, irq, cookie)); else return (EINVAL); } struct resource * bus_generic_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { /* Propagate up the bus hierarchy until someone handles it. */ if (dev->parent) return (BUS_ALLOC_RESOURCE(dev->parent, child, type, rid, start, end, count, flags)); else return (NULL); } int bus_generic_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { /* Propagate up the bus hierarchy until someone handles it. */ if (dev->parent) return (BUS_RELEASE_RESOURCE(dev->parent, child, type, rid, r)); else return (EINVAL); } int bus_generic_activate_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { /* Propagate up the bus hierarchy until someone handles it. */ if (dev->parent) return (BUS_ACTIVATE_RESOURCE(dev->parent, child, type, rid, r)); else return (EINVAL); } int bus_generic_deactivate_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { /* Propagate up the bus hierarchy until someone handles it. */ if (dev->parent) return (BUS_DEACTIVATE_RESOURCE(dev->parent, child, type, rid, r)); else return (EINVAL); } /* * Some convenience functions to make it easier for drivers to use the * resource-management functions. All these really do is hide the * indirection through the parent's method table, making for slightly * less-wordy code. In the future, it might make sense for this code * to maintain some sort of a list of resources allocated by each device. */ struct resource * bus_alloc_resource(device_t dev, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { if (dev->parent == 0) return (0); return (BUS_ALLOC_RESOURCE(dev->parent, dev, type, rid, start, end, count, flags)); } int bus_activate_resource(device_t dev, int type, int rid, struct resource *r) { if (dev->parent == 0) return (EINVAL); return (BUS_ACTIVATE_RESOURCE(dev->parent, dev, type, rid, r)); } int bus_deactivate_resource(device_t dev, int type, int rid, struct resource *r) { if (dev->parent == 0) return (EINVAL); return (BUS_DEACTIVATE_RESOURCE(dev->parent, dev, type, rid, r)); } int bus_release_resource(device_t dev, int type, int rid, struct resource *r) { if (dev->parent == 0) return (EINVAL); return (BUS_RELEASE_RESOURCE(dev->parent, dev, type, rid, r)); } int bus_setup_intr(device_t dev, struct resource *r, int flags, driver_intr_t handler, void *arg, void **cookiep) { if (dev->parent == 0) return (EINVAL); return (BUS_SETUP_INTR(dev->parent, dev, r, flags, handler, arg, cookiep)); } int bus_teardown_intr(device_t dev, struct resource *r, void *cookie) { if (dev->parent == 0) return (EINVAL); return (BUS_TEARDOWN_INTR(dev->parent, dev, r, cookie)); } int bus_set_resource(device_t dev, int type, int rid, u_long start, u_long count) { return BUS_SET_RESOURCE(device_get_parent(dev), dev, type, rid, start, count); } int bus_get_resource(device_t dev, int type, int rid, u_long *startp, u_long *countp) { return BUS_GET_RESOURCE(device_get_parent(dev), dev, type, rid, startp, countp); } u_long bus_get_resource_start(device_t dev, int type, int rid) { u_long start, count; int error; error = BUS_GET_RESOURCE(device_get_parent(dev), dev, type, rid, &start, &count); if (error) return 0; return start; } u_long bus_get_resource_count(device_t dev, int type, int rid) { u_long start, count; int error; error = BUS_GET_RESOURCE(device_get_parent(dev), dev, type, rid, &start, &count); if (error) return 0; return count; } void bus_delete_resource(device_t dev, int type, int rid) { BUS_DELETE_RESOURCE(device_get_parent(dev), dev, type, rid); } static int root_print_child(device_t dev, device_t child) { return (0); } static int root_setup_intr(device_t dev, device_t child, driver_intr_t *intr, void *arg, void **cookiep) { /* * If an interrupt mapping gets to here something bad has happened. */ panic("root_setup_intr"); } static kobj_method_t root_methods[] = { /* Device interface */ KOBJMETHOD(device_shutdown, bus_generic_shutdown), KOBJMETHOD(device_suspend, bus_generic_suspend), KOBJMETHOD(device_resume, bus_generic_resume), /* Bus interface */ KOBJMETHOD(bus_print_child, root_print_child), KOBJMETHOD(bus_read_ivar, bus_generic_read_ivar), KOBJMETHOD(bus_write_ivar, bus_generic_write_ivar), KOBJMETHOD(bus_setup_intr, root_setup_intr), { 0, 0 } }; static driver_t root_driver = { "root", root_methods, 1, /* no softc */ }; device_t root_bus; devclass_t root_devclass; static int root_bus_module_handler(module_t mod, int what, void* arg) { switch (what) { case MOD_LOAD: kobj_class_compile((kobj_class_t) &root_driver); root_bus = make_device(NULL, "root", 0); root_bus->desc = "System root bus"; kobj_init((kobj_t) root_bus, (kobj_class_t) &root_driver); root_bus->driver = &root_driver; root_bus->state = DS_ATTACHED; root_devclass = devclass_find_internal("root", FALSE); return 0; case MOD_SHUTDOWN: device_shutdown(root_bus); return 0; } return 0; } static moduledata_t root_bus_mod = { "rootbus", root_bus_module_handler, 0 }; DECLARE_MODULE(rootbus, root_bus_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); void root_bus_configure(void) { device_t dev; PDEBUG((".")); for (dev = TAILQ_FIRST(&root_bus->children); dev; dev = TAILQ_NEXT(dev, link)) { device_probe_and_attach(dev); } } int driver_module_handler(module_t mod, int what, void *arg) { int error, i; struct driver_module_data *dmd; devclass_t bus_devclass; dmd = (struct driver_module_data *)arg; bus_devclass = devclass_find_internal(dmd->dmd_busname, TRUE); error = 0; switch (what) { case MOD_LOAD: if (dmd->dmd_chainevh) error = dmd->dmd_chainevh(mod,what,dmd->dmd_chainarg); for (i = 0; !error && i < dmd->dmd_ndrivers; i++) { PDEBUG(("Loading module: driver %s on bus %s", DRIVERNAME(dmd->dmd_drivers[i]), dmd->dmd_busname)); error = devclass_add_driver(bus_devclass, dmd->dmd_drivers[i]); } if (error) break; /* * The drivers loaded in this way are assumed to all * implement the same devclass. */ *dmd->dmd_devclass = devclass_find_internal(dmd->dmd_drivers[0]->name, TRUE); break; case MOD_UNLOAD: for (i = 0; !error && i < dmd->dmd_ndrivers; i++) { PDEBUG(("Unloading module: driver %s from bus %s", DRIVERNAME(dmd->dmd_drivers[i]), dmd->dmd_busname)); error = devclass_delete_driver(bus_devclass, dmd->dmd_drivers[i]); } if (!error && dmd->dmd_chainevh) error = dmd->dmd_chainevh(mod,what,dmd->dmd_chainarg); break; } return (error); } #ifdef BUS_DEBUG /* the _short versions avoid iteration by not calling anything that prints * more than oneliners. I love oneliners. */ static void print_device_short(device_t dev, int indent) { if (!dev) return; indentprintf(("device %d: <%s> %sparent,%schildren,%s%s%s%s,%sivars,%ssoftc,busy=%d\n", dev->unit, dev->desc, (dev->parent? "":"no "), (TAILQ_EMPTY(&dev->children)? "no ":""), (dev->flags&DF_ENABLED? "enabled,":"disabled,"), (dev->flags&DF_FIXEDCLASS? "fixed,":""), (dev->flags&DF_WILDCARD? "wildcard,":""), (dev->flags&DF_DESCMALLOCED? "descmalloced,":""), (dev->ivars? "":"no "), (dev->softc? "":"no "), dev->busy)); } static void print_device(device_t dev, int indent) { if (!dev) return; print_device_short(dev, indent); indentprintf(("Parent:\n")); print_device_short(dev->parent, indent+1); indentprintf(("Driver:\n")); print_driver_short(dev->driver, indent+1); indentprintf(("Devclass:\n")); print_devclass_short(dev->devclass, indent+1); } void print_device_tree_short(device_t dev, int indent) /* print the device and all its children (indented) */ { device_t child; if (!dev) return; print_device_short(dev, indent); for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) print_device_tree_short(child, indent+1); } void print_device_tree(device_t dev, int indent) /* print the device and all its children (indented) */ { device_t child; if (!dev) return; print_device(dev, indent); for (child = TAILQ_FIRST(&dev->children); child; child = TAILQ_NEXT(child, link)) print_device_tree(child, indent+1); } static void print_driver_short(driver_t *driver, int indent) { if (!driver) return; indentprintf(("driver %s: softc size = %d\n", driver->name, driver->size)); } static void print_driver(driver_t *driver, int indent) { if (!driver) return; print_driver_short(driver, indent); } static void print_driver_list(driver_list_t drivers, int indent) { driverlink_t driver; for (driver = TAILQ_FIRST(&drivers); driver; driver = TAILQ_NEXT(driver, link)) print_driver(driver->driver, indent); } static void print_devclass_short(devclass_t dc, int indent) { if ( !dc ) return; indentprintf(("devclass %s: max units = %d\n", dc->name, dc->maxunit)); } static void print_devclass(devclass_t dc, int indent) { int i; if ( !dc ) return; print_devclass_short(dc, indent); indentprintf(("Drivers:\n")); print_driver_list(dc->drivers, indent+1); indentprintf(("Devices:\n")); for (i = 0; i < dc->maxunit; i++) if (dc->devices[i]) print_device(dc->devices[i], indent+1); } void print_devclass_list_short(void) { devclass_t dc; printf("Short listing of devclasses, drivers & devices:\n"); for (dc = TAILQ_FIRST(&devclasses); dc; dc = TAILQ_NEXT(dc, link)) print_devclass_short(dc, 0); } void print_devclass_list(void) { devclass_t dc; printf("Full listing of devclasses, drivers & devices:\n"); for (dc = TAILQ_FIRST(&devclasses); dc; dc = TAILQ_NEXT(dc, link)) print_devclass(dc, 0); } #endif Index: head/sys/kern/subr_devstat.c =================================================================== --- head/sys/kern/subr_devstat.c (revision 62572) +++ head/sys/kern/subr_devstat.c (revision 62573) @@ -1,309 +1,309 @@ /* * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include static int devstat_num_devs; static long devstat_generation; static int devstat_version = DEVSTAT_VERSION; static int devstat_current_devnumber; static STAILQ_HEAD(devstatlist, devstat) device_statq; /* * Take a malloced and zeroed devstat structure given to us, fill it in * and add it to the queue of devices. */ void devstat_add_entry(struct devstat *ds, const char *dev_name, int unit_number, u_int32_t block_size, devstat_support_flags flags, devstat_type_flags device_type, devstat_priority priority) { struct devstatlist *devstat_head; struct devstat *ds_tmp; if (ds == NULL) return; if (devstat_num_devs == 0) STAILQ_INIT(&device_statq); devstat_generation++; devstat_num_devs++; devstat_head = &device_statq; /* * Priority sort. Each driver passes in its priority when it adds * its devstat entry. Drivers are sorted first by priority, and * then by probe order. * * For the first device, we just insert it, since the priority * doesn't really matter yet. Subsequent devices are inserted into * the list using the order outlined above. */ if (devstat_num_devs == 1) STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); else { for (ds_tmp = STAILQ_FIRST(devstat_head); ds_tmp != NULL; ds_tmp = STAILQ_NEXT(ds_tmp, dev_links)) { struct devstat *ds_next; ds_next = STAILQ_NEXT(ds_tmp, dev_links); /* * If we find a break between higher and lower * priority items, and if this item fits in the * break, insert it. This also applies if the * "lower priority item" is the end of the list. */ if ((priority <= ds_tmp->priority) && ((ds_next == NULL) || (priority > ds_next->priority))) { STAILQ_INSERT_AFTER(devstat_head, ds_tmp, ds, dev_links); break; } else if (priority > ds_tmp->priority) { /* * If this is the case, we should be able * to insert ourselves at the head of the * list. If we can't, something is wrong. */ if (ds_tmp == STAILQ_FIRST(devstat_head)) { STAILQ_INSERT_HEAD(devstat_head, ds, dev_links); break; } else { STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); printf("devstat_add_entry: HELP! " "sorting problem detected " "for %s%d\n", dev_name, unit_number); break; } } } } ds->device_number = devstat_current_devnumber++; ds->unit_number = unit_number; strncpy(ds->device_name, dev_name, DEVSTAT_NAME_LEN); ds->device_name[DEVSTAT_NAME_LEN - 1] = '\0'; ds->block_size = block_size; ds->flags = flags; ds->device_type = device_type; ds->priority = priority; getmicrotime(&ds->dev_creation_time); } /* * Remove a devstat structure from the list of devices. */ void devstat_remove_entry(struct devstat *ds) { struct devstatlist *devstat_head; if (ds == NULL) return; devstat_generation++; devstat_num_devs--; devstat_head = &device_statq; /* Remove this entry from the devstat queue */ STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); } /* * Record a transaction start. */ void devstat_start_transaction(struct devstat *ds) { /* sanity check */ if (ds == NULL) return; /* * We only want to set the start time when we are going from idle * to busy. The start time is really the start of the latest busy * period. */ if (ds->busy_count == 0) getmicrouptime(&ds->start_time); ds->busy_count++; } /* * Record the ending of a transaction, and incrment the various counters. */ void devstat_end_transaction(struct devstat *ds, u_int32_t bytes, devstat_tag_type tag_type, devstat_trans_flags flags) { struct timeval busy_time; /* sanity check */ if (ds == NULL) return; getmicrouptime(&ds->last_comp_time); ds->busy_count--; /* * There might be some transactions (DEVSTAT_NO_DATA) that don't * transfer any data. */ if (flags == DEVSTAT_READ) { ds->bytes_read += bytes; ds->num_reads++; } else if (flags == DEVSTAT_WRITE) { ds->bytes_written += bytes; ds->num_writes++; } else if (flags == DEVSTAT_FREE) { ds->bytes_freed += bytes; ds->num_frees++; } else ds->num_other++; /* * Keep a count of the various tag types sent. */ if ((ds->flags & DEVSTAT_NO_ORDERED_TAGS) == 0 && tag_type != DEVSTAT_TAG_NONE) ds->tag_types[tag_type]++; /* * We only update the busy time when we go idle. Otherwise, this * calculation would require many more clock cycles. */ if (ds->busy_count == 0) { /* Calculate how long we were busy */ busy_time = ds->last_comp_time; timevalsub(&busy_time, &ds->start_time); /* Add our busy time to the total busy time. */ timevaladd(&ds->busy_time, &busy_time); } else if (ds->busy_count < 0) printf("devstat_end_transaction: HELP!! busy_count " "for %s%d is < 0 (%d)!\n", ds->device_name, ds->unit_number, ds->busy_count); } void devstat_end_transaction_bio(struct devstat *ds, struct bio *bp) { devstat_trans_flags flg; if (bp->bio_cmd == BIO_DELETE) flg = DEVSTAT_FREE; else if (bp->bio_cmd == BIO_READ) flg = DEVSTAT_READ; else flg = DEVSTAT_WRITE; devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, (bp->bio_flags & BIO_ORDERED) ? DEVSTAT_TAG_ORDERED : DEVSTAT_TAG_SIMPLE, flg); } /* * This is the sysctl handler for the devstat package. The data pushed out * on the kern.devstat.all sysctl variable consists of the current devstat * generation number, and then an array of devstat structures, one for each * device in the system. * * I'm really not too fond of this method of doing things, but there really * aren't that many alternatives. We must have some method of making sure * that the generation number the user gets corresponds with the data the * user gets. If the user makes a separate sysctl call to get the * generation, and then a sysctl call to get the device statistics, the * device list could have changed in that brief period of time. By * supplying the generation number along with the statistics output, we can * guarantee that the generation number and the statistics match up. */ static int -sysctl_devstat (SYSCTL_HANDLER_ARGS) +sysctl_devstat(SYSCTL_HANDLER_ARGS) { int error, i; struct devstat *nds; struct devstatlist *devstat_head; if (devstat_num_devs == 0) return(EINVAL); error = 0; devstat_head = &device_statq; /* * First push out the generation number. */ error = SYSCTL_OUT(req, &devstat_generation, sizeof(long)); /* * Now push out all the devices. */ for (i = 0, nds = STAILQ_FIRST(devstat_head); (nds != NULL) && (i < devstat_num_devs) && (error == 0); nds = STAILQ_NEXT(nds, dev_links), i++) error = SYSCTL_OUT(req, nds, sizeof(struct devstat)); return(error); } /* * Sysctl entries for devstat. The first one is a node that all the rest * hang off of. */ SYSCTL_NODE(_kern, OID_AUTO, devstat, CTLFLAG_RD, 0, "Device Statistics"); SYSCTL_PROC(_kern_devstat, OID_AUTO, all, CTLFLAG_RD|CTLTYPE_OPAQUE, 0, 0, sysctl_devstat, "S,devstat", "All devices in the devstat list"); /* * Export the number of devices in the system so that userland utilities * can determine how much memory to allocate to hold all the devices. */ SYSCTL_INT(_kern_devstat, OID_AUTO, numdevs, CTLFLAG_RD, &devstat_num_devs, 0, "Number of devices in the devstat list"); SYSCTL_LONG(_kern_devstat, OID_AUTO, generation, CTLFLAG_RD, &devstat_generation, "Devstat list generation"); SYSCTL_INT(_kern_devstat, OID_AUTO, version, CTLFLAG_RD, &devstat_version, 0, "Devstat list version number"); Index: head/sys/kern/subr_disk.c =================================================================== --- head/sys/kern/subr_disk.c (revision 62572) +++ head/sys/kern/subr_disk.c (revision 62573) @@ -1,295 +1,295 @@ /* * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * $FreeBSD$ * */ #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_DISK, "disk", "disk data"); static d_strategy_t diskstrategy; static d_open_t diskopen; static d_close_t diskclose; static d_ioctl_t diskioctl; static d_psize_t diskpsize; static LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist); dev_t disk_create(int unit, struct disk *dp, int flags, struct cdevsw *cdevsw, struct cdevsw *proto) { dev_t dev; bzero(dp, sizeof(*dp)); dev = makedev(cdevsw->d_maj, 0); if (!devsw(dev)) { *proto = *cdevsw; proto->d_open = diskopen; proto->d_close = diskclose; proto->d_ioctl = diskioctl; proto->d_strategy = diskstrategy; proto->d_psize = diskpsize; cdevsw_add(proto); } if (bootverbose) printf("Creating DISK %s%d\n", cdevsw->d_name, unit); dev = make_dev(proto, dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART), 0, 0, 0, "%s%d", cdevsw->d_name, unit); dev->si_disk = dp; dp->d_dev = dev; dp->d_dsflags = flags; dp->d_devsw = cdevsw; LIST_INSERT_HEAD(&disklist, dp, d_list); return (dev); } int disk_dumpcheck(dev_t dev, u_int *count, u_int *blkno, u_int *secsize) { struct disk *dp; struct disklabel *dl; u_int boff; dp = dev->si_disk; if (!dp) return (ENXIO); if (!dp->d_slice) return (ENXIO); dl = dsgetlabel(dev, dp->d_slice); if (!dl) return (ENXIO); *count = (u_long)Maxmem * PAGE_SIZE / dl->d_secsize; if (dumplo < 0 || (dumplo + *count > dl->d_partitions[dkpart(dev)].p_size)) return (EINVAL); boff = dl->d_partitions[dkpart(dev)].p_offset + dp->d_slice->dss_slices[dkslice(dev)].ds_offset; *blkno = boff + dumplo; *secsize = dl->d_secsize; return (0); } void disk_invalidate (struct disk *disk) { if (disk->d_slice) dsgone(&disk->d_slice); } void disk_destroy(dev_t dev) { LIST_REMOVE(dev->si_disk, d_list); bzero(dev->si_disk, sizeof(*dev->si_disk)); dev->si_disk = NULL; destroy_dev(dev); return; } struct disk * disk_enumerate(struct disk *disk) { if (!disk) return (LIST_FIRST(&disklist)); else return (LIST_NEXT(disk, d_list)); } static int -sysctl_disks (SYSCTL_HANDLER_ARGS) +sysctl_disks(SYSCTL_HANDLER_ARGS) { struct disk *disk; int error, first; disk = NULL; first = 1; while ((disk = disk_enumerate(disk))) { if (!first) { error = SYSCTL_OUT(req, " ", 1); if (error) return error; } else { first = 0; } error = SYSCTL_OUT(req, disk->d_dev->si_name, strlen(disk->d_dev->si_name)); if (error) return error; } error = SYSCTL_OUT(req, "", 1); return error; } SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, 0, NULL, sysctl_disks, "A", "names of available disks"); /* * The cdevsw functions */ static int diskopen(dev_t dev, int oflags, int devtype, struct proc *p) { dev_t pdev; struct disk *dp; int error; error = 0; pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); dp = pdev->si_disk; if (!dp) return (ENXIO); while (dp->d_flags & DISKFLAG_LOCK) { dp->d_flags |= DISKFLAG_WANTED; error = tsleep(dp, PRIBIO | PCATCH, "diskopen", hz); if (error) return (error); } dp->d_flags |= DISKFLAG_LOCK; if (!dsisopen(dp->d_slice)) { if (!pdev->si_iosize_max) pdev->si_iosize_max = dev->si_iosize_max; error = dp->d_devsw->d_open(pdev, oflags, devtype, p); } /* Inherit properties from the whole/raw dev_t */ dev->si_disk = pdev->si_disk; dev->si_drv1 = pdev->si_drv1; dev->si_drv2 = pdev->si_drv2; dev->si_iosize_max = pdev->si_iosize_max; dev->si_bsize_phys = pdev->si_bsize_phys; dev->si_bsize_best = pdev->si_bsize_best; if (error) goto out; error = dsopen(dev, devtype, dp->d_dsflags, &dp->d_slice, &dp->d_label); if (!dsisopen(dp->d_slice)) dp->d_devsw->d_close(pdev, oflags, devtype, p); out: dp->d_flags &= ~DISKFLAG_LOCK; if (dp->d_flags & DISKFLAG_WANTED) { dp->d_flags &= ~DISKFLAG_WANTED; wakeup(dp); } return(error); } static int diskclose(dev_t dev, int fflag, int devtype, struct proc *p) { struct disk *dp; int error; error = 0; dp = dev->si_disk; dsclose(dev, devtype, dp->d_slice); if (!dsisopen(dp->d_slice)) { error = dp->d_devsw->d_close(dp->d_dev, fflag, devtype, p); } return (error); } static void diskstrategy(struct bio *bp) { dev_t pdev; struct disk *dp; dp = bp->bio_dev->si_disk; if (!dp) { pdev = dkmodpart(dkmodslice(bp->bio_dev, WHOLE_DISK_SLICE), RAW_PART); dp = bp->bio_dev->si_disk = pdev->si_disk; bp->bio_dev->si_drv1 = pdev->si_drv1; bp->bio_dev->si_drv2 = pdev->si_drv2; bp->bio_dev->si_iosize_max = pdev->si_iosize_max; bp->bio_dev->si_bsize_phys = pdev->si_bsize_phys; bp->bio_dev->si_bsize_best = pdev->si_bsize_best; } if (!dp) { bp->bio_error = ENXIO; bp->bio_flags |= BIO_ERROR; biodone(bp); return; } if (dscheck(bp, dp->d_slice) <= 0) { biodone(bp); return; } KASSERT(dp->d_devsw != NULL, ("NULL devsw")); KASSERT(dp->d_devsw->d_strategy != NULL, ("NULL d_strategy")); dp->d_devsw->d_strategy(bp); return; } static int diskioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) { struct disk *dp; int error; dp = dev->si_disk; error = dsioctl(dev, cmd, data, fflag, &dp->d_slice); if (error == ENOIOCTL) error = dp->d_devsw->d_ioctl(dev, cmd, data, fflag, p); return (error); } static int diskpsize(dev_t dev) { struct disk *dp; dev_t pdev; dp = dev->si_disk; if (!dp) { pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); dp = pdev->si_disk; if (!dp) return (-1); dev->si_drv1 = pdev->si_drv1; dev->si_drv2 = pdev->si_drv2; /* XXX: don't set bp->b_dev->si_disk (?) */ } return (dssize(dev, &dp->d_slice)); } SYSCTL_DECL(_debug_sizeof); SYSCTL_INT(_debug_sizeof, OID_AUTO, disklabel, CTLFLAG_RD, 0, sizeof(struct disklabel), "sizeof(struct disklabel)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD, 0, sizeof(struct diskslices), "sizeof(struct diskslices)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD, 0, sizeof(struct disk), "sizeof(struct disk)"); Index: head/sys/kern/subr_prof.c =================================================================== --- head/sys/kern/subr_prof.c (revision 62572) +++ head/sys/kern/subr_prof.c (revision 62573) @@ -1,461 +1,461 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #include #undef MCOUNT static MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer"); static void kmstartup __P((void *)); SYSINIT(kmem, SI_SUB_KPROF, SI_ORDER_FIRST, kmstartup, NULL) struct gmonparam _gmonparam = { GMON_PROF_OFF }; #ifdef GUPROF #include void nullfunc_loop_profiled() { int i; for (i = 0; i < CALIB_SCALE; i++) nullfunc_profiled(); } #define nullfunc_loop_profiled_end nullfunc_profiled /* XXX */ void nullfunc_profiled() { } #endif /* GUPROF */ static void kmstartup(dummy) void *dummy; { char *cp; struct gmonparam *p = &_gmonparam; #ifdef GUPROF int cputime_overhead; int empty_loop_time; int i; int mcount_overhead; int mexitcount_overhead; int nullfunc_loop_overhead; int nullfunc_loop_profiled_time; uintfptr_t tmp_addr; #endif /* * Round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. */ p->lowpc = ROUNDDOWN((u_long)btext, HISTFRACTION * sizeof(HISTCOUNTER)); p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER)); p->textsize = p->highpc - p->lowpc; printf("Profiling kernel, textsize=%lu [%x..%x]\n", p->textsize, p->lowpc, p->highpc); p->kcountsize = p->textsize / HISTFRACTION; p->hashfraction = HASHFRACTION; p->fromssize = p->textsize / HASHFRACTION; p->tolimit = p->textsize * ARCDENSITY / 100; if (p->tolimit < MINARCS) p->tolimit = MINARCS; else if (p->tolimit > MAXARCS) p->tolimit = MAXARCS; p->tossize = p->tolimit * sizeof(struct tostruct); cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize, M_GPROF, M_NOWAIT); if (cp == 0) { printf("No memory for profiling.\n"); return; } bzero(cp, p->kcountsize + p->tossize + p->fromssize); p->tos = (struct tostruct *)cp; cp += p->tossize; p->kcount = (HISTCOUNTER *)cp; cp += p->kcountsize; p->froms = (u_short *)cp; #ifdef GUPROF /* Initialize pointers to overhead counters. */ p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime)); p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount)); p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount)); /* * Disable interrupts to avoid interference while we calibrate * things. */ disable_intr(); /* * Determine overheads. * XXX this needs to be repeated for each useful timer/counter. */ cputime_overhead = 0; startguprof(p); for (i = 0; i < CALIB_SCALE; i++) cputime_overhead += cputime(); empty_loop(); startguprof(p); empty_loop(); empty_loop_time = cputime(); nullfunc_loop_profiled(); /* * Start profiling. There won't be any normal function calls since * interrupts are disabled, but we will call the profiling routines * directly to determine their overheads. */ p->state = GMON_PROF_HIRES; startguprof(p); nullfunc_loop_profiled(); startguprof(p); for (i = 0; i < CALIB_SCALE; i++) #if defined(__i386__) && __GNUC__ >= 2 __asm("pushl %0; call __mcount; popl %%ecx" : : "i" (profil) : "ax", "bx", "cx", "dx", "memory"); #else #error #endif mcount_overhead = KCOUNT(p, PC_TO_I(p, profil)); startguprof(p); for (i = 0; i < CALIB_SCALE; i++) #if defined(__i386__) && __GNUC__ >= 2 __asm("call " __XSTRING(HIDENAME(mexitcount)) "; 1:" : : : "ax", "bx", "cx", "dx", "memory"); __asm("movl $1b,%0" : "=rm" (tmp_addr)); #else #error #endif mexitcount_overhead = KCOUNT(p, PC_TO_I(p, tmp_addr)); p->state = GMON_PROF_OFF; stopguprof(p); enable_intr(); nullfunc_loop_profiled_time = 0; for (tmp_addr = (uintfptr_t)nullfunc_loop_profiled; tmp_addr < (uintfptr_t)nullfunc_loop_profiled_end; tmp_addr += HISTFRACTION * sizeof(HISTCOUNTER)) nullfunc_loop_profiled_time += KCOUNT(p, PC_TO_I(p, tmp_addr)); #define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE) #define c2n(count, freq) ((int)((count) * 1000000000LL / freq)) printf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n", CALIB_DOSCALE(c2n(cputime_overhead, p->profrate)), CALIB_DOSCALE(c2n(empty_loop_time, p->profrate)), CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time, p->profrate)), CALIB_DOSCALE(c2n(mcount_overhead, p->profrate)), CALIB_DOSCALE(c2n(mexitcount_overhead, p->profrate))); cputime_overhead -= empty_loop_time; mcount_overhead -= empty_loop_time; mexitcount_overhead -= empty_loop_time; /*- * Profiling overheads are determined by the times between the * following events: * MC1: mcount() is called * MC2: cputime() (called from mcount()) latches the timer * MC3: mcount() completes * ME1: mexitcount() is called * ME2: cputime() (called from mexitcount()) latches the timer * ME3: mexitcount() completes. * The times between the events vary slightly depending on instruction * combination and cache misses, etc. Attempt to determine the * minimum times. These can be subtracted from the profiling times * without much risk of reducing the profiling times below what they * would be when profiling is not configured. Abbreviate: * ab = minimum time between MC1 and MC3 * a = minumum time between MC1 and MC2 * b = minimum time between MC2 and MC3 * cd = minimum time between ME1 and ME3 * c = minimum time between ME1 and ME2 * d = minimum time between ME2 and ME3. * These satisfy the relations: * ab <= mcount_overhead (just measured) * a + b <= ab * cd <= mexitcount_overhead (just measured) * c + d <= cd * a + d <= nullfunc_loop_profiled_time (just measured) * a >= 0, b >= 0, c >= 0, d >= 0. * Assume that ab and cd are equal to the minimums. */ p->cputime_overhead = CALIB_DOSCALE(cputime_overhead); p->mcount_overhead = CALIB_DOSCALE(mcount_overhead - cputime_overhead); p->mexitcount_overhead = CALIB_DOSCALE(mexitcount_overhead - cputime_overhead); nullfunc_loop_overhead = nullfunc_loop_profiled_time - empty_loop_time; p->mexitcount_post_overhead = CALIB_DOSCALE((mcount_overhead - nullfunc_loop_overhead) / 4); p->mexitcount_pre_overhead = p->mexitcount_overhead + p->cputime_overhead - p->mexitcount_post_overhead; p->mcount_pre_overhead = CALIB_DOSCALE(nullfunc_loop_overhead) - p->mexitcount_post_overhead; p->mcount_post_overhead = p->mcount_overhead + p->cputime_overhead - p->mcount_pre_overhead; printf( "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n", c2n(p->cputime_overhead, p->profrate), c2n(p->mcount_overhead, p->profrate), c2n(p->mcount_pre_overhead, p->profrate), c2n(p->mcount_post_overhead, p->profrate), c2n(p->cputime_overhead, p->profrate), c2n(p->mexitcount_overhead, p->profrate), c2n(p->mexitcount_pre_overhead, p->profrate), c2n(p->mexitcount_post_overhead, p->profrate)); printf( "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n", p->cputime_overhead, p->mcount_overhead, p->mcount_pre_overhead, p->mcount_post_overhead, p->cputime_overhead, p->mexitcount_overhead, p->mexitcount_pre_overhead, p->mexitcount_post_overhead); #endif /* GUPROF */ } /* * Return kernel profiling information. */ static int -sysctl_kern_prof (SYSCTL_HANDLER_ARGS) +sysctl_kern_prof(SYSCTL_HANDLER_ARGS) { int *name = (int *) arg1; u_int namelen = arg2; struct gmonparam *gp = &_gmonparam; int error; int state; /* all sysctl names at this level are terminal */ if (namelen != 1) return (ENOTDIR); /* overloaded */ switch (name[0]) { case GPROF_STATE: state = gp->state; error = sysctl_handle_int(oidp, &state, 0, req); if (error) return (error); if (!req->newptr) return (0); if (state == GMON_PROF_OFF) { gp->state = state; stopprofclock(&proc0); stopguprof(gp); } else if (state == GMON_PROF_ON) { gp->state = GMON_PROF_OFF; stopguprof(gp); gp->profrate = profhz; startprofclock(&proc0); gp->state = state; #ifdef GUPROF } else if (state == GMON_PROF_HIRES) { gp->state = GMON_PROF_OFF; stopprofclock(&proc0); startguprof(gp); gp->state = state; #endif } else if (state != gp->state) return (EINVAL); return (0); case GPROF_COUNT: return (sysctl_handle_opaque(oidp, gp->kcount, gp->kcountsize, req)); case GPROF_FROMS: return (sysctl_handle_opaque(oidp, gp->froms, gp->fromssize, req)); case GPROF_TOS: return (sysctl_handle_opaque(oidp, gp->tos, gp->tossize, req)); case GPROF_GMONPARAM: return (sysctl_handle_opaque(oidp, gp, sizeof *gp, req)); default: return (EOPNOTSUPP); } /* NOTREACHED */ } SYSCTL_NODE(_kern, KERN_PROF, prof, CTLFLAG_RW, sysctl_kern_prof, ""); #endif /* GPROF */ /* * Profiling system call. * * The scale factor is a fixed point number with 16 bits of fraction, so that * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling. */ #ifndef _SYS_SYSPROTO_H_ struct profil_args { caddr_t samples; size_t size; size_t offset; u_int scale; }; #endif /* ARGSUSED */ int profil(p, uap) struct proc *p; register struct profil_args *uap; { register struct uprof *upp; int s; if (uap->scale > (1 << 16)) return (EINVAL); if (uap->scale == 0) { stopprofclock(p); return (0); } upp = &p->p_stats->p_prof; /* Block profile interrupts while changing state. */ s = splstatclock(); upp->pr_off = uap->offset; upp->pr_scale = uap->scale; upp->pr_base = uap->samples; upp->pr_size = uap->size; startprofclock(p); splx(s); return (0); } /* * Scale is a fixed-point number with the binary point 16 bits * into the value, and is <= 1.0. pc is at most 32 bits, so the * intermediate result is at most 48 bits. */ #define PC_TO_INDEX(pc, prof) \ ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) /* * Collect user-level profiling statistics; called on a profiling tick, * when a process is running in user-mode. This routine may be called * from an interrupt context. We try to update the user profiling buffers * cheaply with fuswintr() and suswintr(). If that fails, we revert to * an AST that will vector us to trap() with a context in which copyin * and copyout will work. Trap will then call addupc_task(). * * Note that we may (rarely) not get around to the AST soon enough, and * lose profile ticks when the next tick overwrites this one, but in this * case the system is overloaded and the profile is probably already * inaccurate. */ void addupc_intr(p, pc, ticks) register struct proc *p; register u_long pc; u_int ticks; { register struct uprof *prof; register caddr_t addr; register u_int i; register int v; if (ticks == 0) return; prof = &p->p_stats->p_prof; if (pc < prof->pr_off || (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) return; /* out of range; ignore */ addr = prof->pr_base + i; if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) { prof->pr_addr = pc; prof->pr_ticks = ticks; need_proftick(p); } } /* * Much like before, but we can afford to take faults here. If the * update fails, we simply turn off profiling. */ void addupc_task(p, pc, ticks) register struct proc *p; register u_long pc; u_int ticks; { register struct uprof *prof; register caddr_t addr; register u_int i; u_short v; /* Testing P_PROFIL may be unnecessary, but is certainly safe. */ if ((p->p_flag & P_PROFIL) == 0 || ticks == 0) return; prof = &p->p_stats->p_prof; if (pc < prof->pr_off || (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) return; addr = prof->pr_base + i; if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) { v += ticks; if (copyout((caddr_t)&v, addr, sizeof(v)) == 0) return; } stopprofclock(p); } Index: head/sys/kern/tty.c =================================================================== --- head/sys/kern/tty.c (revision 62572) +++ head/sys/kern/tty.c (revision 62573) @@ -1,2485 +1,2485 @@ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tty.c 8.8 (Berkeley) 1/21/94 * $FreeBSD$ */ /*- * TODO: * o Fix races for sending the start char in ttyflush(). * o Handle inter-byte timeout for "MIN > 0, TIME > 0" in ttyselect(). * With luck, there will be MIN chars before select() returns(). * o Handle CLOCAL consistently for ptys. Perhaps disallow setting it. * o Don't allow input in TS_ZOMBIE case. It would be visible through * FIONREAD. * o Do the new sio locking stuff here and use it to avoid special * case for EXTPROC? * o Lock PENDIN too? * o Move EXTPROC and/or PENDIN to t_state? * o Wrap most of ttioctl in spltty/splx. * o Implement TIOCNOTTY or remove it from . * o Send STOP if IXOFF is toggled off while TS_TBLOCK is set. * o Don't allow certain termios flags to affect disciplines other * than TTYDISC. Cancel their effects before switch disciplines * and ignore them if they are set while we are in another * discipline. * o Now that historical speed conversions are handled here, don't * do them in drivers. * o Check for TS_CARR_ON being set while everything is closed and not * waiting for carrier. TS_CARR_ON isn't cleared if nothing is open, * so it would live until the next open even if carrier drops. * o Restore TS_WOPEN since it is useful in pstat. It must be cleared * only when _all_ openers leave open(). */ #include "snp.h" #include "opt_compat.h" #include "opt_uconsole.h" #include #include #include #if defined(COMPAT_43) || defined(COMPAT_SUNOS) #include #endif #include #define TTYDEFCHARS #include #undef TTYDEFCHARS #include #include #include #include #include #include #include #include #include #include #if NSNP > 0 #include #endif #include #include #include #include #include MALLOC_DEFINE(M_TTYS, "ttys", "tty data structures"); static int proc_compare __P((struct proc *p1, struct proc *p2)); static int ttnread __P((struct tty *tp)); static void ttyecho __P((int c, struct tty *tp)); static int ttyoutput __P((int c, register struct tty *tp)); static void ttypend __P((struct tty *tp)); static void ttyretype __P((struct tty *tp)); static void ttyrub __P((int c, struct tty *tp)); static void ttyrubo __P((struct tty *tp, int cnt)); static void ttyunblock __P((struct tty *tp)); static int ttywflush __P((struct tty *tp)); /* * Table with character classes and parity. The 8th bit indicates parity, * the 7th bit indicates the character is an alphameric or underscore (for * ALTWERASE), and the low 6 bits indicate delay type. If the low 6 bits * are 0 then the character needs no special processing on output; classes * other than 0 might be translated or (not currently) require delays. */ #define E 0x00 /* Even parity. */ #define O 0x80 /* Odd parity. */ #define PARITY(c) (char_type[c] & O) #define ALPHA 0x40 /* Alpha or underscore. */ #define ISALPHA(c) (char_type[(c) & TTY_CHARMASK] & ALPHA) #define CCLASSMASK 0x3f #define CCLASS(c) (char_type[c] & CCLASSMASK) #define BS BACKSPACE #define CC CONTROL #define CR RETURN #define NA ORDINARY | ALPHA #define NL NEWLINE #define NO ORDINARY #define TB TAB #define VT VTAB static u_char const char_type[] = { E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* nul - bel */ O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */ O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */ E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */ O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */ E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */ O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */ O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */ O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */ E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */ E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */ /* * Meta chars; should be settable per character set; * for now, treat them all as normal characters. */ NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, }; #undef BS #undef CC #undef CR #undef NA #undef NL #undef NO #undef TB #undef VT /* Macros to clear/set/test flags. */ #define SET(t, f) (t) |= (f) #define CLR(t, f) (t) &= ~(f) #define ISSET(t, f) ((t) & (f)) #undef MAX_INPUT /* XXX wrong in */ #define MAX_INPUT TTYHOG /* XXX limit is usually larger for !ICANON */ /* * list of struct tty where pstat(8) can pick it up with sysctl */ static SLIST_HEAD(, tty) tty_list; static int drainwait = 5*60; SYSCTL_INT(_kern, OID_AUTO, drainwait, CTLFLAG_RW, &drainwait, 0, "Output drain timeout in seconds"); /* * Initial open of tty, or (re)entry to standard tty line discipline. */ int ttyopen(device, tp) dev_t device; register struct tty *tp; { int s; s = spltty(); tp->t_dev = device; if (!ISSET(tp->t_state, TS_ISOPEN)) { SET(tp->t_state, TS_ISOPEN); if (ISSET(tp->t_cflag, CLOCAL)) SET(tp->t_state, TS_CONNECTED); bzero(&tp->t_winsize, sizeof(tp->t_winsize)); } /* XXX don't hang forever on output */ if (tp->t_timeout < 0) tp->t_timeout = drainwait*hz; ttsetwater(tp); splx(s); return (0); } /* * Handle close() on a tty line: flush and set to initial state, * bumping generation number so that pending read/write calls * can detect recycling of the tty. * XXX our caller should have done `spltty(); l_close(); ttyclose();' * and l_close() should have flushed, but we repeat the spltty() and * the flush in case there are buggy callers. */ int ttyclose(tp) register struct tty *tp; { int s; funsetown(tp->t_sigio); s = spltty(); if (constty == tp) constty = NULL; ttyflush(tp, FREAD | FWRITE); clist_free_cblocks(&tp->t_canq); clist_free_cblocks(&tp->t_outq); clist_free_cblocks(&tp->t_rawq); #if NSNP > 0 if (ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpdown((struct snoop *)tp->t_sc); #endif tp->t_gen++; tp->t_line = TTYDISC; tp->t_pgrp = NULL; tp->t_session = NULL; tp->t_state = 0; splx(s); return (0); } #define FLUSHQ(q) { \ if ((q)->c_cc) \ ndflush(q, (q)->c_cc); \ } /* Is 'c' a line delimiter ("break" character)? */ #define TTBREAKC(c, lflag) \ ((c) == '\n' || (((c) == cc[VEOF] || \ (c) == cc[VEOL] || ((c) == cc[VEOL2] && lflag & IEXTEN)) && \ (c) != _POSIX_VDISABLE)) /* * Process input of a single character received on a tty. */ int ttyinput(c, tp) register int c; register struct tty *tp; { register tcflag_t iflag, lflag; register cc_t *cc; int i, err; /* * If input is pending take it first. */ lflag = tp->t_lflag; if (ISSET(lflag, PENDIN)) ttypend(tp); /* * Gather stats. */ if (ISSET(lflag, ICANON)) { ++tk_cancc; ++tp->t_cancc; } else { ++tk_rawcc; ++tp->t_rawcc; } ++tk_nin; /* * Block further input iff: * current input > threshold AND input is available to user program * AND input flow control is enabled and not yet invoked. * The 3 is slop for PARMRK. */ iflag = tp->t_iflag; if (tp->t_rawq.c_cc + tp->t_canq.c_cc > tp->t_ihiwat - 3 && (!ISSET(lflag, ICANON) || tp->t_canq.c_cc != 0) && (ISSET(tp->t_cflag, CRTS_IFLOW) || ISSET(iflag, IXOFF)) && !ISSET(tp->t_state, TS_TBLOCK)) ttyblock(tp); /* Handle exceptional conditions (break, parity, framing). */ cc = tp->t_cc; err = (ISSET(c, TTY_ERRORMASK)); if (err) { CLR(c, TTY_ERRORMASK); if (ISSET(err, TTY_BI)) { if (ISSET(iflag, IGNBRK)) return (0); if (ISSET(iflag, BRKINT)) { ttyflush(tp, FREAD | FWRITE); pgsignal(tp->t_pgrp, SIGINT, 1); goto endcase; } if (ISSET(iflag, PARMRK)) goto parmrk; } else if ((ISSET(err, TTY_PE) && ISSET(iflag, INPCK)) || ISSET(err, TTY_FE)) { if (ISSET(iflag, IGNPAR)) return (0); else if (ISSET(iflag, PARMRK)) { parmrk: if (tp->t_rawq.c_cc + tp->t_canq.c_cc > MAX_INPUT - 3) goto input_overflow; (void)putc(0377 | TTY_QUOTE, &tp->t_rawq); (void)putc(0 | TTY_QUOTE, &tp->t_rawq); (void)putc(c | TTY_QUOTE, &tp->t_rawq); goto endcase; } else c = 0; } } if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP)) CLR(c, 0x80); if (!ISSET(lflag, EXTPROC)) { /* * Check for literal nexting very first */ if (ISSET(tp->t_state, TS_LNCH)) { SET(c, TTY_QUOTE); CLR(tp->t_state, TS_LNCH); } /* * Scan for special characters. This code * is really just a big case statement with * non-constant cases. The bottom of the * case statement is labeled ``endcase'', so goto * it after a case match, or similar. */ /* * Control chars which aren't controlled * by ICANON, ISIG, or IXON. */ if (ISSET(lflag, IEXTEN)) { if (CCEQ(cc[VLNEXT], c)) { if (ISSET(lflag, ECHO)) { if (ISSET(lflag, ECHOE)) { (void)ttyoutput('^', tp); (void)ttyoutput('\b', tp); } else ttyecho(c, tp); } SET(tp->t_state, TS_LNCH); goto endcase; } if (CCEQ(cc[VDISCARD], c)) { if (ISSET(lflag, FLUSHO)) CLR(tp->t_lflag, FLUSHO); else { ttyflush(tp, FWRITE); ttyecho(c, tp); if (tp->t_rawq.c_cc + tp->t_canq.c_cc) ttyretype(tp); SET(tp->t_lflag, FLUSHO); } goto startoutput; } } /* * Signals. */ if (ISSET(lflag, ISIG)) { if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) { if (!ISSET(lflag, NOFLSH)) ttyflush(tp, FREAD | FWRITE); ttyecho(c, tp); pgsignal(tp->t_pgrp, CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1); goto endcase; } if (CCEQ(cc[VSUSP], c)) { if (!ISSET(lflag, NOFLSH)) ttyflush(tp, FREAD); ttyecho(c, tp); pgsignal(tp->t_pgrp, SIGTSTP, 1); goto endcase; } } /* * Handle start/stop characters. */ if (ISSET(iflag, IXON)) { if (CCEQ(cc[VSTOP], c)) { if (!ISSET(tp->t_state, TS_TTSTOP)) { SET(tp->t_state, TS_TTSTOP); (*tp->t_stop)(tp, 0); return (0); } if (!CCEQ(cc[VSTART], c)) return (0); /* * if VSTART == VSTOP then toggle */ goto endcase; } if (CCEQ(cc[VSTART], c)) goto restartoutput; } /* * IGNCR, ICRNL, & INLCR */ if (c == '\r') { if (ISSET(iflag, IGNCR)) return (0); else if (ISSET(iflag, ICRNL)) c = '\n'; } else if (c == '\n' && ISSET(iflag, INLCR)) c = '\r'; } if (!ISSET(tp->t_lflag, EXTPROC) && ISSET(lflag, ICANON)) { /* * From here on down canonical mode character * processing takes place. */ /* * erase (^H / ^?) */ if (CCEQ(cc[VERASE], c)) { if (tp->t_rawq.c_cc) ttyrub(unputc(&tp->t_rawq), tp); goto endcase; } /* * kill (^U) */ if (CCEQ(cc[VKILL], c)) { if (ISSET(lflag, ECHOKE) && tp->t_rawq.c_cc == tp->t_rocount && !ISSET(lflag, ECHOPRT)) while (tp->t_rawq.c_cc) ttyrub(unputc(&tp->t_rawq), tp); else { ttyecho(c, tp); if (ISSET(lflag, ECHOK) || ISSET(lflag, ECHOKE)) ttyecho('\n', tp); FLUSHQ(&tp->t_rawq); tp->t_rocount = 0; } CLR(tp->t_state, TS_LOCAL); goto endcase; } /* * word erase (^W) */ if (CCEQ(cc[VWERASE], c) && ISSET(lflag, IEXTEN)) { int ctype; /* * erase whitespace */ while ((c = unputc(&tp->t_rawq)) == ' ' || c == '\t') ttyrub(c, tp); if (c == -1) goto endcase; /* * erase last char of word and remember the * next chars type (for ALTWERASE) */ ttyrub(c, tp); c = unputc(&tp->t_rawq); if (c == -1) goto endcase; if (c == ' ' || c == '\t') { (void)putc(c, &tp->t_rawq); goto endcase; } ctype = ISALPHA(c); /* * erase rest of word */ do { ttyrub(c, tp); c = unputc(&tp->t_rawq); if (c == -1) goto endcase; } while (c != ' ' && c != '\t' && (!ISSET(lflag, ALTWERASE) || ISALPHA(c) == ctype)); (void)putc(c, &tp->t_rawq); goto endcase; } /* * reprint line (^R) */ if (CCEQ(cc[VREPRINT], c) && ISSET(lflag, IEXTEN)) { ttyretype(tp); goto endcase; } /* * ^T - kernel info and generate SIGINFO */ if (CCEQ(cc[VSTATUS], c) && ISSET(lflag, IEXTEN)) { if (ISSET(lflag, ISIG)) pgsignal(tp->t_pgrp, SIGINFO, 1); if (!ISSET(lflag, NOKERNINFO)) ttyinfo(tp); goto endcase; } } /* * Check for input buffer overflow */ if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= MAX_INPUT) { input_overflow: if (ISSET(iflag, IMAXBEL)) { if (tp->t_outq.c_cc < tp->t_ohiwat) (void)ttyoutput(CTRL('g'), tp); } goto endcase; } if ( c == 0377 && ISSET(iflag, PARMRK) && !ISSET(iflag, ISTRIP) && ISSET(iflag, IGNBRK|IGNPAR) != (IGNBRK|IGNPAR)) (void)putc(0377 | TTY_QUOTE, &tp->t_rawq); /* * Put data char in q for user and * wakeup on seeing a line delimiter. */ if (putc(c, &tp->t_rawq) >= 0) { if (!ISSET(lflag, ICANON)) { ttwakeup(tp); ttyecho(c, tp); goto endcase; } if (TTBREAKC(c, lflag)) { tp->t_rocount = 0; catq(&tp->t_rawq, &tp->t_canq); ttwakeup(tp); } else if (tp->t_rocount++ == 0) tp->t_rocol = tp->t_column; if (ISSET(tp->t_state, TS_ERASE)) { /* * end of prterase \.../ */ CLR(tp->t_state, TS_ERASE); (void)ttyoutput('/', tp); } i = tp->t_column; ttyecho(c, tp); if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) { /* * Place the cursor over the '^' of the ^D. */ i = imin(2, tp->t_column - i); while (i > 0) { (void)ttyoutput('\b', tp); i--; } } } endcase: /* * IXANY means allow any character to restart output. */ if (ISSET(tp->t_state, TS_TTSTOP) && !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP]) return (0); restartoutput: CLR(tp->t_lflag, FLUSHO); CLR(tp->t_state, TS_TTSTOP); startoutput: return (ttstart(tp)); } /* * Output a single character on a tty, doing output processing * as needed (expanding tabs, newline processing, etc.). * Returns < 0 if succeeds, otherwise returns char to resend. * Must be recursive. */ static int ttyoutput(c, tp) register int c; register struct tty *tp; { register tcflag_t oflag; register int col, s; oflag = tp->t_oflag; if (!ISSET(oflag, OPOST)) { if (ISSET(tp->t_lflag, FLUSHO)) return (-1); if (putc(c, &tp->t_outq)) return (c); tk_nout++; tp->t_outcc++; return (-1); } /* * Do tab expansion if OXTABS is set. Special case if we external * processing, we don't do the tab expansion because we'll probably * get it wrong. If tab expansion needs to be done, let it happen * externally. */ CLR(c, ~TTY_CHARMASK); if (c == '\t' && ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) { c = 8 - (tp->t_column & 7); if (!ISSET(tp->t_lflag, FLUSHO)) { s = spltty(); /* Don't interrupt tabs. */ c -= b_to_q(" ", c, &tp->t_outq); tk_nout += c; tp->t_outcc += c; splx(s); } tp->t_column += c; return (c ? -1 : '\t'); } if (c == CEOT && ISSET(oflag, ONOEOT)) return (-1); /* * Newline translation: if ONLCR is set, * translate newline into "\r\n". */ if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) { tk_nout++; tp->t_outcc++; if (putc('\r', &tp->t_outq)) return (c); } tk_nout++; tp->t_outcc++; if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq)) return (c); col = tp->t_column; switch (CCLASS(c)) { case BACKSPACE: if (col > 0) --col; break; case CONTROL: break; case NEWLINE: case RETURN: col = 0; break; case ORDINARY: ++col; break; case TAB: col = (col + 8) & ~7; break; } tp->t_column = col; return (-1); } /* * Ioctls for all tty devices. Called after line-discipline specific ioctl * has been called to do discipline-specific functions and/or reject any * of these ioctl commands. */ /* ARGSUSED */ int ttioctl(tp, cmd, data, flag) register struct tty *tp; u_long cmd; int flag; void *data; { register struct proc *p; int s, error; p = curproc; /* XXX */ /* If the ioctl involves modification, hang if in the background. */ switch (cmd) { case TIOCCBRK: case TIOCCONS: case TIOCDRAIN: case TIOCEXCL: case TIOCFLUSH: #ifdef TIOCHPCL case TIOCHPCL: #endif case TIOCNXCL: case TIOCSBRK: case TIOCSCTTY: case TIOCSDRAINWAIT: case TIOCSETA: case TIOCSETAF: case TIOCSETAW: case TIOCSETD: case TIOCSPGRP: case TIOCSTART: case TIOCSTAT: case TIOCSTI: case TIOCSTOP: case TIOCSWINSZ: #if defined(COMPAT_43) || defined(COMPAT_SUNOS) case TIOCLBIC: case TIOCLBIS: case TIOCLSET: case TIOCSETC: case OTIOCSETD: case TIOCSETN: case TIOCSETP: case TIOCSLTC: #endif while (isbackground(p, tp) && !(p->p_flag & P_PPWAIT) && !SIGISMEMBER(p->p_sigignore, SIGTTOU) && !SIGISMEMBER(p->p_sigmask, SIGTTOU)) { if (p->p_pgrp->pg_jobc == 0) return (EIO); pgsignal(p->p_pgrp, SIGTTOU, 1); error = ttysleep(tp, &lbolt, TTOPRI | PCATCH, "ttybg1", 0); if (error) return (error); } break; } switch (cmd) { /* Process the ioctl. */ case FIOASYNC: /* set/clear async i/o */ s = spltty(); if (*(int *)data) SET(tp->t_state, TS_ASYNC); else CLR(tp->t_state, TS_ASYNC); splx(s); break; case FIONBIO: /* set/clear non-blocking i/o */ break; /* XXX: delete. */ case FIONREAD: /* get # bytes to read */ s = spltty(); *(int *)data = ttnread(tp); splx(s); break; case FIOSETOWN: /* * Policy -- Don't allow FIOSETOWN on someone else's * controlling tty */ if (tp->t_session != NULL && !isctty(p, tp)) return (ENOTTY); error = fsetown(*(int *)data, &tp->t_sigio); if (error) return (error); break; case FIOGETOWN: if (tp->t_session != NULL && !isctty(p, tp)) return (ENOTTY); *(int *)data = fgetown(tp->t_sigio); break; case TIOCEXCL: /* set exclusive use of tty */ s = spltty(); SET(tp->t_state, TS_XCLUDE); splx(s); break; case TIOCFLUSH: { /* flush buffers */ register int flags = *(int *)data; if (flags == 0) flags = FREAD | FWRITE; else flags &= FREAD | FWRITE; ttyflush(tp, flags); break; } case TIOCCONS: /* become virtual console */ if (*(int *)data) { if (constty && constty != tp && ISSET(constty->t_state, TS_CONNECTED)) return (EBUSY); #ifndef UCONSOLE if ((error = suser(p)) != 0) return (error); #endif constty = tp; } else if (tp == constty) constty = NULL; break; case TIOCDRAIN: /* wait till output drained */ error = ttywait(tp); if (error) return (error); break; case TIOCGETA: { /* get termios struct */ struct termios *t = (struct termios *)data; bcopy(&tp->t_termios, t, sizeof(struct termios)); break; } case TIOCGETD: /* get line discipline */ *(int *)data = tp->t_line; break; case TIOCGWINSZ: /* get window size */ *(struct winsize *)data = tp->t_winsize; break; case TIOCGPGRP: /* get pgrp of tty */ if (!isctty(p, tp)) return (ENOTTY); *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; break; #ifdef TIOCHPCL case TIOCHPCL: /* hang up on last close */ s = spltty(); SET(tp->t_cflag, HUPCL); splx(s); break; #endif case TIOCNXCL: /* reset exclusive use of tty */ s = spltty(); CLR(tp->t_state, TS_XCLUDE); splx(s); break; case TIOCOUTQ: /* output queue size */ *(int *)data = tp->t_outq.c_cc; break; case TIOCSETA: /* set termios struct */ case TIOCSETAW: /* drain output, set */ case TIOCSETAF: { /* drn out, fls in, set */ register struct termios *t = (struct termios *)data; if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; if (t->c_ispeed == 0) t->c_ispeed = tp->t_ospeed; if (t->c_ispeed == 0) return (EINVAL); s = spltty(); if (cmd == TIOCSETAW || cmd == TIOCSETAF) { error = ttywait(tp); if (error) { splx(s); return (error); } if (cmd == TIOCSETAF) ttyflush(tp, FREAD); } if (!ISSET(t->c_cflag, CIGNORE)) { /* * Set device hardware. */ if (tp->t_param && (error = (*tp->t_param)(tp, t))) { splx(s); return (error); } if (ISSET(t->c_cflag, CLOCAL) && !ISSET(tp->t_cflag, CLOCAL)) { /* * XXX disconnections would be too hard to * get rid of without this kludge. The only * way to get rid of controlling terminals * is to exit from the session leader. */ CLR(tp->t_state, TS_ZOMBIE); wakeup(TSA_CARR_ON(tp)); ttwakeup(tp); ttwwakeup(tp); } if ((ISSET(tp->t_state, TS_CARR_ON) || ISSET(t->c_cflag, CLOCAL)) && !ISSET(tp->t_state, TS_ZOMBIE)) SET(tp->t_state, TS_CONNECTED); else CLR(tp->t_state, TS_CONNECTED); tp->t_cflag = t->c_cflag; tp->t_ispeed = t->c_ispeed; if (t->c_ospeed != 0) tp->t_ospeed = t->c_ospeed; ttsetwater(tp); } if (ISSET(t->c_lflag, ICANON) != ISSET(tp->t_lflag, ICANON) && cmd != TIOCSETAF) { if (ISSET(t->c_lflag, ICANON)) SET(tp->t_lflag, PENDIN); else { /* * XXX we really shouldn't allow toggling * ICANON while we're in a non-termios line * discipline. Now we have to worry about * panicing for a null queue. */ if (tp->t_canq.c_cbreserved > 0 && tp->t_rawq.c_cbreserved > 0) { catq(&tp->t_rawq, &tp->t_canq); /* * XXX the queue limits may be * different, so the old queue * swapping method no longer works. */ catq(&tp->t_canq, &tp->t_rawq); } CLR(tp->t_lflag, PENDIN); } ttwakeup(tp); } tp->t_iflag = t->c_iflag; tp->t_oflag = t->c_oflag; /* * Make the EXTPROC bit read only. */ if (ISSET(tp->t_lflag, EXTPROC)) SET(t->c_lflag, EXTPROC); else CLR(t->c_lflag, EXTPROC); tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN); if (t->c_cc[VMIN] != tp->t_cc[VMIN] || t->c_cc[VTIME] != tp->t_cc[VTIME]) ttwakeup(tp); bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc)); splx(s); break; } case TIOCSETD: { /* set line discipline */ register int t = *(int *)data; dev_t device = tp->t_dev; if ((u_int)t >= nlinesw) return (ENXIO); if (t != tp->t_line) { s = spltty(); (*linesw[tp->t_line].l_close)(tp, flag); error = (*linesw[t].l_open)(device, tp); if (error) { (void)(*linesw[tp->t_line].l_open)(device, tp); splx(s); return (error); } tp->t_line = t; splx(s); } break; } case TIOCSTART: /* start output, like ^Q */ s = spltty(); if (ISSET(tp->t_state, TS_TTSTOP) || ISSET(tp->t_lflag, FLUSHO)) { CLR(tp->t_lflag, FLUSHO); CLR(tp->t_state, TS_TTSTOP); ttstart(tp); } splx(s); break; case TIOCSTI: /* simulate terminal input */ if ((flag & FREAD) == 0 && suser(p)) return (EPERM); if (!isctty(p, tp) && suser(p)) return (EACCES); s = spltty(); (*linesw[tp->t_line].l_rint)(*(u_char *)data, tp); splx(s); break; case TIOCSTOP: /* stop output, like ^S */ s = spltty(); if (!ISSET(tp->t_state, TS_TTSTOP)) { SET(tp->t_state, TS_TTSTOP); (*tp->t_stop)(tp, 0); } splx(s); break; case TIOCSCTTY: /* become controlling tty */ /* Session ctty vnode pointer set in vnode layer. */ if (!SESS_LEADER(p) || ((p->p_session->s_ttyvp || tp->t_session) && (tp->t_session != p->p_session))) return (EPERM); tp->t_session = p->p_session; tp->t_pgrp = p->p_pgrp; p->p_session->s_ttyp = tp; p->p_flag |= P_CONTROLT; break; case TIOCSPGRP: { /* set pgrp of tty */ register struct pgrp *pgrp = pgfind(*(int *)data); if (!isctty(p, tp)) return (ENOTTY); else if (pgrp == NULL || pgrp->pg_session != p->p_session) return (EPERM); tp->t_pgrp = pgrp; break; } case TIOCSTAT: /* simulate control-T */ s = spltty(); ttyinfo(tp); splx(s); break; case TIOCSWINSZ: /* set window size */ if (bcmp((caddr_t)&tp->t_winsize, data, sizeof (struct winsize))) { tp->t_winsize = *(struct winsize *)data; pgsignal(tp->t_pgrp, SIGWINCH, 1); } break; case TIOCSDRAINWAIT: error = suser(p); if (error) return (error); tp->t_timeout = *(int *)data * hz; wakeup(TSA_OCOMPLETE(tp)); wakeup(TSA_OLOWAT(tp)); break; case TIOCGDRAINWAIT: *(int *)data = tp->t_timeout / hz; break; default: #if defined(COMPAT_43) || defined(COMPAT_SUNOS) return (ttcompat(tp, cmd, data, flag)); #else return (ENOIOCTL); #endif } return (0); } int ttypoll(dev, events, p) dev_t dev; int events; struct proc *p; { int s; int revents = 0; struct tty *tp; tp = dev->si_tty; if (tp == NULL) /* XXX used to return ENXIO, but that means true! */ return ((events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)) | POLLHUP); s = spltty(); if (events & (POLLIN | POLLRDNORM)) { if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE)) revents |= events & (POLLIN | POLLRDNORM); else selrecord(p, &tp->t_rsel); } if (events & (POLLOUT | POLLWRNORM)) { if ((tp->t_outq.c_cc <= tp->t_olowat && ISSET(tp->t_state, TS_CONNECTED)) || ISSET(tp->t_state, TS_ZOMBIE)) revents |= events & (POLLOUT | POLLWRNORM); else selrecord(p, &tp->t_wsel); } splx(s); return (revents); } /* * Must be called at spltty(). */ static int ttnread(tp) struct tty *tp; { int nread; if (ISSET(tp->t_lflag, PENDIN)) ttypend(tp); nread = tp->t_canq.c_cc; if (!ISSET(tp->t_lflag, ICANON)) { nread += tp->t_rawq.c_cc; if (nread < tp->t_cc[VMIN] && tp->t_cc[VTIME] == 0) nread = 0; } return (nread); } /* * Wait for output to drain. */ int ttywait(tp) register struct tty *tp; { int error, s; error = 0; s = spltty(); while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) && ISSET(tp->t_state, TS_CONNECTED) && tp->t_oproc) { (*tp->t_oproc)(tp); if ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) && ISSET(tp->t_state, TS_CONNECTED)) { SET(tp->t_state, TS_SO_OCOMPLETE); error = ttysleep(tp, TSA_OCOMPLETE(tp), TTOPRI | PCATCH, "ttywai", tp->t_timeout); if (error) { if (error == EWOULDBLOCK) error = EIO; break; } } else break; } if (!error && (tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY))) error = EIO; splx(s); return (error); } /* * Flush if successfully wait. */ static int ttywflush(tp) struct tty *tp; { int error; if ((error = ttywait(tp)) == 0) ttyflush(tp, FREAD); return (error); } /* * Flush tty read and/or write queues, notifying anyone waiting. */ void ttyflush(tp, rw) register struct tty *tp; int rw; { register int s; s = spltty(); #if 0 again: #endif if (rw & FWRITE) { FLUSHQ(&tp->t_outq); CLR(tp->t_state, TS_TTSTOP); } (*tp->t_stop)(tp, rw); if (rw & FREAD) { FLUSHQ(&tp->t_canq); FLUSHQ(&tp->t_rawq); CLR(tp->t_lflag, PENDIN); tp->t_rocount = 0; tp->t_rocol = 0; CLR(tp->t_state, TS_LOCAL); ttwakeup(tp); if (ISSET(tp->t_state, TS_TBLOCK)) { if (rw & FWRITE) FLUSHQ(&tp->t_outq); ttyunblock(tp); /* * Don't let leave any state that might clobber the * next line discipline (although we should do more * to send the START char). Not clearing the state * may have caused the "putc to a clist with no * reserved cblocks" panic/printf. */ CLR(tp->t_state, TS_TBLOCK); #if 0 /* forget it, sleeping isn't always safe and we don't know when it is */ if (ISSET(tp->t_iflag, IXOFF)) { /* * XXX wait a bit in the hope that the stop * character (if any) will go out. Waiting * isn't good since it allows races. This * will be fixed when the stop character is * put in a special queue. Don't bother with * the checks in ttywait() since the timeout * will save us. */ SET(tp->t_state, TS_SO_OCOMPLETE); ttysleep(tp, TSA_OCOMPLETE(tp), TTOPRI, "ttyfls", hz / 10); /* * Don't try sending the stop character again. */ CLR(tp->t_state, TS_TBLOCK); goto again; } #endif } } if (rw & FWRITE) { FLUSHQ(&tp->t_outq); ttwwakeup(tp); } splx(s); } /* * Copy in the default termios characters. */ void termioschars(t) struct termios *t; { bcopy(ttydefchars, t->c_cc, sizeof t->c_cc); } /* * Old interface. */ void ttychars(tp) struct tty *tp; { termioschars(&tp->t_termios); } /* * Handle input high water. Send stop character for the IXOFF case. Turn * on our input flow control bit and propagate the changes to the driver. * XXX the stop character should be put in a special high priority queue. */ void ttyblock(tp) struct tty *tp; { SET(tp->t_state, TS_TBLOCK); if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTOP] != _POSIX_VDISABLE && putc(tp->t_cc[VSTOP], &tp->t_outq) != 0) CLR(tp->t_state, TS_TBLOCK); /* try again later */ ttstart(tp); } /* * Handle input low water. Send start character for the IXOFF case. Turn * off our input flow control bit and propagate the changes to the driver. * XXX the start character should be put in a special high priority queue. */ static void ttyunblock(tp) struct tty *tp; { CLR(tp->t_state, TS_TBLOCK); if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTART] != _POSIX_VDISABLE && putc(tp->t_cc[VSTART], &tp->t_outq) != 0) SET(tp->t_state, TS_TBLOCK); /* try again later */ ttstart(tp); } #ifdef notyet /* Not used by any current (i386) drivers. */ /* * Restart after an inter-char delay. */ void ttrstrt(tp_arg) void *tp_arg; { struct tty *tp; int s; KASSERT(tp_arg != NULL, ("ttrstrt")); tp = tp_arg; s = spltty(); CLR(tp->t_state, TS_TIMEOUT); ttstart(tp); splx(s); } #endif int ttstart(tp) struct tty *tp; { if (tp->t_oproc != NULL) /* XXX: Kludge for pty. */ (*tp->t_oproc)(tp); return (0); } /* * "close" a line discipline */ int ttylclose(tp, flag) struct tty *tp; int flag; { if (flag & FNONBLOCK || ttywflush(tp)) ttyflush(tp, FREAD | FWRITE); return (0); } /* * Handle modem control transition on a tty. * Flag indicates new state of carrier. * Returns 0 if the line should be turned off, otherwise 1. */ int ttymodem(tp, flag) register struct tty *tp; int flag; { if (ISSET(tp->t_state, TS_CARR_ON) && ISSET(tp->t_cflag, MDMBUF)) { /* * MDMBUF: do flow control according to carrier flag * XXX TS_CAR_OFLOW doesn't do anything yet. TS_TTSTOP * works if IXON and IXANY are clear. */ if (flag) { CLR(tp->t_state, TS_CAR_OFLOW); CLR(tp->t_state, TS_TTSTOP); ttstart(tp); } else if (!ISSET(tp->t_state, TS_CAR_OFLOW)) { SET(tp->t_state, TS_CAR_OFLOW); SET(tp->t_state, TS_TTSTOP); (*tp->t_stop)(tp, 0); } } else if (flag == 0) { /* * Lost carrier. */ CLR(tp->t_state, TS_CARR_ON); if (ISSET(tp->t_state, TS_ISOPEN) && !ISSET(tp->t_cflag, CLOCAL)) { SET(tp->t_state, TS_ZOMBIE); CLR(tp->t_state, TS_CONNECTED); if (tp->t_session && tp->t_session->s_leader) psignal(tp->t_session->s_leader, SIGHUP); ttyflush(tp, FREAD | FWRITE); return (0); } } else { /* * Carrier now on. */ SET(tp->t_state, TS_CARR_ON); if (!ISSET(tp->t_state, TS_ZOMBIE)) SET(tp->t_state, TS_CONNECTED); wakeup(TSA_CARR_ON(tp)); ttwakeup(tp); ttwwakeup(tp); } return (1); } /* * Reinput pending characters after state switch * call at spltty(). */ static void ttypend(tp) register struct tty *tp; { struct clist tq; register int c; CLR(tp->t_lflag, PENDIN); SET(tp->t_state, TS_TYPEN); /* * XXX this assumes too much about clist internals. It may even * fail if the cblock slush pool is empty. We can't allocate more * cblocks here because we are called from an interrupt handler * and clist_alloc_cblocks() can wait. */ tq = tp->t_rawq; bzero(&tp->t_rawq, sizeof tp->t_rawq); tp->t_rawq.c_cbmax = tq.c_cbmax; tp->t_rawq.c_cbreserved = tq.c_cbreserved; while ((c = getc(&tq)) >= 0) ttyinput(c, tp); CLR(tp->t_state, TS_TYPEN); } /* * Process a read call on a tty device. */ int ttread(tp, uio, flag) register struct tty *tp; struct uio *uio; int flag; { register struct clist *qp; register int c; register tcflag_t lflag; register cc_t *cc = tp->t_cc; register struct proc *p = curproc; int s, first, error = 0; int has_stime = 0, last_cc = 0; long slp = 0; /* XXX this should be renamed `timo'. */ struct timeval stime; loop: s = spltty(); lflag = tp->t_lflag; /* * take pending input first */ if (ISSET(lflag, PENDIN)) { ttypend(tp); splx(s); /* reduce latency */ s = spltty(); lflag = tp->t_lflag; /* XXX ttypend() clobbers it */ } /* * Hang process if it's in the background. */ if (isbackground(p, tp)) { splx(s); if (SIGISMEMBER(p->p_sigignore, SIGTTIN) || SIGISMEMBER(p->p_sigmask, SIGTTIN) || (p->p_flag & P_PPWAIT) || p->p_pgrp->pg_jobc == 0) return (EIO); pgsignal(p->p_pgrp, SIGTTIN, 1); error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg2", 0); if (error) return (error); goto loop; } if (ISSET(tp->t_state, TS_ZOMBIE)) { splx(s); return (0); /* EOF */ } /* * If canonical, use the canonical queue, * else use the raw queue. * * (should get rid of clists...) */ qp = ISSET(lflag, ICANON) ? &tp->t_canq : &tp->t_rawq; if (flag & IO_NDELAY) { if (qp->c_cc > 0) goto read; if (!ISSET(lflag, ICANON) && cc[VMIN] == 0) { splx(s); return (0); } splx(s); return (EWOULDBLOCK); } if (!ISSET(lflag, ICANON)) { int m = cc[VMIN]; long t = cc[VTIME]; struct timeval timecopy; /* * Check each of the four combinations. * (m > 0 && t == 0) is the normal read case. * It should be fairly efficient, so we check that and its * companion case (m == 0 && t == 0) first. * For the other two cases, we compute the target sleep time * into slp. */ if (t == 0) { if (qp->c_cc < m) goto sleep; if (qp->c_cc > 0) goto read; /* m, t and qp->c_cc are all 0. 0 is enough input. */ splx(s); return (0); } t *= 100000; /* time in us */ #define diff(t1, t2) (((t1).tv_sec - (t2).tv_sec) * 1000000 + \ ((t1).tv_usec - (t2).tv_usec)) if (m > 0) { if (qp->c_cc <= 0) goto sleep; if (qp->c_cc >= m) goto read; getmicrotime(&timecopy); if (!has_stime) { /* first character, start timer */ has_stime = 1; stime = timecopy; slp = t; } else if (qp->c_cc > last_cc) { /* got a character, restart timer */ stime = timecopy; slp = t; } else { /* nothing, check expiration */ slp = t - diff(timecopy, stime); if (slp <= 0) goto read; } last_cc = qp->c_cc; } else { /* m == 0 */ if (qp->c_cc > 0) goto read; getmicrotime(&timecopy); if (!has_stime) { has_stime = 1; stime = timecopy; slp = t; } else { slp = t - diff(timecopy, stime); if (slp <= 0) { /* Timed out, but 0 is enough input. */ splx(s); return (0); } } } #undef diff /* * Rounding down may make us wake up just short * of the target, so we round up. * The formula is ceiling(slp * hz/1000000). * 32-bit arithmetic is enough for hz < 169. * XXX see tvtohz() for how to avoid overflow if hz * is large (divide by `tick' and/or arrange to * use tvtohz() if hz is large). */ slp = (long) (((u_long)slp * hz) + 999999) / 1000000; goto sleep; } if (qp->c_cc <= 0) { sleep: /* * There is no input, or not enough input and we can block. */ error = ttysleep(tp, TSA_HUP_OR_INPUT(tp), TTIPRI | PCATCH, ISSET(tp->t_state, TS_CONNECTED) ? "ttyin" : "ttyhup", (int)slp); splx(s); if (error == EWOULDBLOCK) error = 0; else if (error) return (error); /* * XXX what happens if another process eats some input * while we are asleep (not just here)? It would be * safest to detect changes and reset our state variables * (has_stime and last_cc). */ slp = 0; goto loop; } read: splx(s); /* * Input present, check for input mapping and processing. */ first = 1; if (ISSET(lflag, ICANON | ISIG)) goto slowcase; for (;;) { char ibuf[IBUFSIZ]; int icc; icc = imin(uio->uio_resid, IBUFSIZ); icc = q_to_b(qp, ibuf, icc); if (icc <= 0) { if (first) goto loop; break; } error = uiomove(ibuf, icc, uio); /* * XXX if there was an error then we should ungetc() the * unmoved chars and reduce icc here. */ #if NSNP > 0 if (ISSET(tp->t_lflag, ECHO) && ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpin((struct snoop *)tp->t_sc, ibuf, icc); #endif if (error) break; if (uio->uio_resid == 0) break; first = 0; } goto out; slowcase: for (;;) { c = getc(qp); if (c < 0) { if (first) goto loop; break; } /* * delayed suspend (^Y) */ if (CCEQ(cc[VDSUSP], c) && ISSET(lflag, IEXTEN | ISIG) == (IEXTEN | ISIG)) { pgsignal(tp->t_pgrp, SIGTSTP, 1); if (first) { error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg3", 0); if (error) break; goto loop; } break; } /* * Interpret EOF only in canonical mode. */ if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON)) break; /* * Give user character. */ error = ureadc(c, uio); if (error) /* XXX should ungetc(c, qp). */ break; #if NSNP > 0 /* * Only snoop directly on input in echo mode. Non-echoed * input will be snooped later iff the application echoes it. */ if (ISSET(tp->t_lflag, ECHO) && ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpinc((struct snoop *)tp->t_sc, (char)c); #endif if (uio->uio_resid == 0) break; /* * In canonical mode check for a "break character" * marking the end of a "line of input". */ if (ISSET(lflag, ICANON) && TTBREAKC(c, lflag)) break; first = 0; } out: /* * Look to unblock input now that (presumably) * the input queue has gone down. */ s = spltty(); if (ISSET(tp->t_state, TS_TBLOCK) && tp->t_rawq.c_cc + tp->t_canq.c_cc <= tp->t_ilowat) ttyunblock(tp); splx(s); return (error); } /* * Check the output queue on tp for space for a kernel message (from uprintf * or tprintf). Allow some space over the normal hiwater mark so we don't * lose messages due to normal flow control, but don't let the tty run amok. * Sleeps here are not interruptible, but we return prematurely if new signals * arrive. */ int ttycheckoutq(tp, wait) register struct tty *tp; int wait; { int hiwat, s; sigset_t oldmask; hiwat = tp->t_ohiwat; SIGEMPTYSET(oldmask); s = spltty(); if (wait) oldmask = curproc->p_siglist; if (tp->t_outq.c_cc > hiwat + OBUFSIZ + 100) while (tp->t_outq.c_cc > hiwat) { ttstart(tp); if (tp->t_outq.c_cc <= hiwat) break; if (!(wait && SIGSETEQ(curproc->p_siglist, oldmask))) { splx(s); return (0); } SET(tp->t_state, TS_SO_OLOWAT); tsleep(TSA_OLOWAT(tp), PZERO - 1, "ttoutq", hz); } splx(s); return (1); } /* * Process a write call on a tty device. */ int ttwrite(tp, uio, flag) register struct tty *tp; register struct uio *uio; int flag; { register char *cp = NULL; register int cc, ce; register struct proc *p; int i, hiwat, cnt, error, s; char obuf[OBUFSIZ]; hiwat = tp->t_ohiwat; cnt = uio->uio_resid; error = 0; cc = 0; loop: s = spltty(); if (ISSET(tp->t_state, TS_ZOMBIE)) { splx(s); if (uio->uio_resid == cnt) error = EIO; goto out; } if (!ISSET(tp->t_state, TS_CONNECTED)) { if (flag & IO_NDELAY) { splx(s); error = EWOULDBLOCK; goto out; } error = ttysleep(tp, TSA_CARR_ON(tp), TTIPRI | PCATCH, "ttydcd", 0); splx(s); if (error) goto out; goto loop; } splx(s); /* * Hang the process if it's in the background. */ p = curproc; if (isbackground(p, tp) && ISSET(tp->t_lflag, TOSTOP) && !(p->p_flag & P_PPWAIT) && !SIGISMEMBER(p->p_sigignore, SIGTTOU) && !SIGISMEMBER(p->p_sigmask, SIGTTOU)) { if (p->p_pgrp->pg_jobc == 0) { error = EIO; goto out; } pgsignal(p->p_pgrp, SIGTTOU, 1); error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg4", 0); if (error) goto out; goto loop; } /* * Process the user's data in at most OBUFSIZ chunks. Perform any * output translation. Keep track of high water mark, sleep on * overflow awaiting device aid in acquiring new space. */ while (uio->uio_resid > 0 || cc > 0) { if (ISSET(tp->t_lflag, FLUSHO)) { uio->uio_resid = 0; return (0); } if (tp->t_outq.c_cc > hiwat) goto ovhiwat; /* * Grab a hunk of data from the user, unless we have some * leftover from last time. */ if (cc == 0) { cc = imin(uio->uio_resid, OBUFSIZ); cp = obuf; error = uiomove(cp, cc, uio); if (error) { cc = 0; break; } #if NSNP > 0 if (ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpin((struct snoop *)tp->t_sc, cp, cc); #endif } /* * If nothing fancy need be done, grab those characters we * can handle without any of ttyoutput's processing and * just transfer them to the output q. For those chars * which require special processing (as indicated by the * bits in char_type), call ttyoutput. After processing * a hunk of data, look for FLUSHO so ^O's will take effect * immediately. */ while (cc > 0) { if (!ISSET(tp->t_oflag, OPOST)) ce = cc; else { ce = cc - scanc((u_int)cc, (u_char *)cp, char_type, CCLASSMASK); /* * If ce is zero, then we're processing * a special character through ttyoutput. */ if (ce == 0) { tp->t_rocount = 0; if (ttyoutput(*cp, tp) >= 0) { /* No Clists, wait a bit. */ ttstart(tp); if (flag & IO_NDELAY) { error = EWOULDBLOCK; goto out; } error = ttysleep(tp, &lbolt, TTOPRI|PCATCH, "ttybf1", 0); if (error) goto out; goto loop; } cp++; cc--; if (ISSET(tp->t_lflag, FLUSHO) || tp->t_outq.c_cc > hiwat) goto ovhiwat; continue; } } /* * A bunch of normal characters have been found. * Transfer them en masse to the output queue and * continue processing at the top of the loop. * If there are any further characters in this * <= OBUFSIZ chunk, the first should be a character * requiring special handling by ttyoutput. */ tp->t_rocount = 0; i = b_to_q(cp, ce, &tp->t_outq); ce -= i; tp->t_column += ce; cp += ce, cc -= ce, tk_nout += ce; tp->t_outcc += ce; if (i > 0) { /* No Clists, wait a bit. */ ttstart(tp); if (flag & IO_NDELAY) { error = EWOULDBLOCK; goto out; } error = ttysleep(tp, &lbolt, TTOPRI | PCATCH, "ttybf2", 0); if (error) goto out; goto loop; } if (ISSET(tp->t_lflag, FLUSHO) || tp->t_outq.c_cc > hiwat) break; } ttstart(tp); } out: /* * If cc is nonzero, we leave the uio structure inconsistent, as the * offset and iov pointers have moved forward, but it doesn't matter * (the call will either return short or restart with a new uio). */ uio->uio_resid += cc; return (error); ovhiwat: ttstart(tp); s = spltty(); /* * This can only occur if FLUSHO is set in t_lflag, * or if ttstart/oproc is synchronous (or very fast). */ if (tp->t_outq.c_cc <= hiwat) { splx(s); goto loop; } if (flag & IO_NDELAY) { splx(s); uio->uio_resid += cc; return (uio->uio_resid == cnt ? EWOULDBLOCK : 0); } SET(tp->t_state, TS_SO_OLOWAT); error = ttysleep(tp, TSA_OLOWAT(tp), TTOPRI | PCATCH, "ttywri", tp->t_timeout); splx(s); if (error == EWOULDBLOCK) error = EIO; if (error) goto out; goto loop; } /* * Rubout one character from the rawq of tp * as cleanly as possible. */ static void ttyrub(c, tp) register int c; register struct tty *tp; { register char *cp; register int savecol; int tabc, s; if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC)) return; CLR(tp->t_lflag, FLUSHO); if (ISSET(tp->t_lflag, ECHOE)) { if (tp->t_rocount == 0) { /* * Screwed by ttwrite; retype */ ttyretype(tp); return; } if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE)) ttyrubo(tp, 2); else { CLR(c, ~TTY_CHARMASK); switch (CCLASS(c)) { case ORDINARY: ttyrubo(tp, 1); break; case BACKSPACE: case CONTROL: case NEWLINE: case RETURN: case VTAB: if (ISSET(tp->t_lflag, ECHOCTL)) ttyrubo(tp, 2); break; case TAB: if (tp->t_rocount < tp->t_rawq.c_cc) { ttyretype(tp); return; } s = spltty(); savecol = tp->t_column; SET(tp->t_state, TS_CNTTB); SET(tp->t_lflag, FLUSHO); tp->t_column = tp->t_rocol; cp = tp->t_rawq.c_cf; if (cp) tabc = *cp; /* XXX FIX NEXTC */ for (; cp; cp = nextc(&tp->t_rawq, cp, &tabc)) ttyecho(tabc, tp); CLR(tp->t_lflag, FLUSHO); CLR(tp->t_state, TS_CNTTB); splx(s); /* savecol will now be length of the tab. */ savecol -= tp->t_column; tp->t_column += savecol; if (savecol > 8) savecol = 8; /* overflow screw */ while (--savecol >= 0) (void)ttyoutput('\b', tp); break; default: /* XXX */ #define PANICSTR "ttyrub: would panic c = %d, val = %d\n" (void)printf(PANICSTR, c, CCLASS(c)); #ifdef notdef panic(PANICSTR, c, CCLASS(c)); #endif } } } else if (ISSET(tp->t_lflag, ECHOPRT)) { if (!ISSET(tp->t_state, TS_ERASE)) { SET(tp->t_state, TS_ERASE); (void)ttyoutput('\\', tp); } ttyecho(c, tp); } else ttyecho(tp->t_cc[VERASE], tp); --tp->t_rocount; } /* * Back over cnt characters, erasing them. */ static void ttyrubo(tp, cnt) register struct tty *tp; int cnt; { while (cnt-- > 0) { (void)ttyoutput('\b', tp); (void)ttyoutput(' ', tp); (void)ttyoutput('\b', tp); } } /* * ttyretype -- * Reprint the rawq line. Note, it is assumed that c_cc has already * been checked. */ static void ttyretype(tp) register struct tty *tp; { register char *cp; int s, c; /* Echo the reprint character. */ if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE) ttyecho(tp->t_cc[VREPRINT], tp); (void)ttyoutput('\n', tp); /* * XXX * FIX: NEXTC IS BROKEN - DOESN'T CHECK QUOTE * BIT OF FIRST CHAR. */ s = spltty(); for (cp = tp->t_canq.c_cf, c = (cp != NULL ? *cp : 0); cp != NULL; cp = nextc(&tp->t_canq, cp, &c)) ttyecho(c, tp); for (cp = tp->t_rawq.c_cf, c = (cp != NULL ? *cp : 0); cp != NULL; cp = nextc(&tp->t_rawq, cp, &c)) ttyecho(c, tp); CLR(tp->t_state, TS_ERASE); splx(s); tp->t_rocount = tp->t_rawq.c_cc; tp->t_rocol = 0; } /* * Echo a typed character to the terminal. */ static void ttyecho(c, tp) register int c; register struct tty *tp; { if (!ISSET(tp->t_state, TS_CNTTB)) CLR(tp->t_lflag, FLUSHO); if ((!ISSET(tp->t_lflag, ECHO) && (c != '\n' || !ISSET(tp->t_lflag, ECHONL))) || ISSET(tp->t_lflag, EXTPROC)) return; if (ISSET(tp->t_lflag, ECHOCTL) && ((ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n') || ISSET(c, TTY_CHARMASK) == 0177)) { (void)ttyoutput('^', tp); CLR(c, ~TTY_CHARMASK); if (c == 0177) c = '?'; else c += 'A' - 1; } (void)ttyoutput(c, tp); } /* * Wake up any readers on a tty. */ void ttwakeup(tp) register struct tty *tp; { if (tp->t_rsel.si_pid != 0) selwakeup(&tp->t_rsel); if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL) pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL)); wakeup(TSA_HUP_OR_INPUT(tp)); } /* * Wake up any writers on a tty. */ void ttwwakeup(tp) register struct tty *tp; { if (tp->t_wsel.si_pid != 0 && tp->t_outq.c_cc <= tp->t_olowat) selwakeup(&tp->t_wsel); if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL) pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL)); if (ISSET(tp->t_state, TS_BUSY | TS_SO_OCOMPLETE) == TS_SO_OCOMPLETE && tp->t_outq.c_cc == 0) { CLR(tp->t_state, TS_SO_OCOMPLETE); wakeup(TSA_OCOMPLETE(tp)); } if (ISSET(tp->t_state, TS_SO_OLOWAT) && tp->t_outq.c_cc <= tp->t_olowat) { CLR(tp->t_state, TS_SO_OLOWAT); wakeup(TSA_OLOWAT(tp)); } } /* * Look up a code for a specified speed in a conversion table; * used by drivers to map software speed values to hardware parameters. */ int ttspeedtab(speed, table) int speed; register struct speedtab *table; { for ( ; table->sp_speed != -1; table++) if (table->sp_speed == speed) return (table->sp_code); return (-1); } /* * Set input and output watermarks and buffer sizes. For input, the * high watermark is about one second's worth of input above empty, the * low watermark is slightly below high water, and the buffer size is a * driver-dependent amount above high water. For output, the watermarks * are near the ends of the buffer, with about 1 second's worth of input * between them. All this only applies to the standard line discipline. */ void ttsetwater(tp) struct tty *tp; { register int cps, ttmaxhiwat, x; /* Input. */ clist_alloc_cblocks(&tp->t_canq, TTYHOG, 512); switch (tp->t_ispeedwat) { case (speed_t)-1: cps = tp->t_ispeed / 10; break; case 0: /* * This case is for old drivers that don't know about * t_ispeedwat. Arrange for them to get the old buffer * sizes and watermarks. */ cps = TTYHOG - 2 * 256; tp->t_ififosize = 2 * 256; break; default: cps = tp->t_ispeedwat / 10; break; } tp->t_ihiwat = cps; tp->t_ilowat = 7 * cps / 8; x = cps + tp->t_ififosize; clist_alloc_cblocks(&tp->t_rawq, x, x); /* Output. */ switch (tp->t_ospeedwat) { case (speed_t)-1: cps = tp->t_ospeed / 10; ttmaxhiwat = 2 * TTMAXHIWAT; break; case 0: cps = tp->t_ospeed / 10; ttmaxhiwat = TTMAXHIWAT; break; default: cps = tp->t_ospeedwat / 10; ttmaxhiwat = 8 * TTMAXHIWAT; break; } #define CLAMP(x, h, l) ((x) > h ? h : ((x) < l) ? l : (x)) tp->t_olowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT); x += cps; x = CLAMP(x, ttmaxhiwat, TTMINHIWAT); /* XXX clamps are too magic */ tp->t_ohiwat = roundup(x, CBSIZE); /* XXX for compat */ x = imax(tp->t_ohiwat, TTMAXHIWAT); /* XXX for compat/safety */ x += OBUFSIZ + 100; clist_alloc_cblocks(&tp->t_outq, x, x); #undef CLAMP } /* * Report on state of foreground process group. */ void ttyinfo(tp) register struct tty *tp; { register struct proc *p, *pick; struct timeval utime, stime; int tmp; if (ttycheckoutq(tp,0) == 0) return; /* Print load average. */ tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT; ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100); if (tp->t_session == NULL) ttyprintf(tp, "not a controlling terminal\n"); else if (tp->t_pgrp == NULL) ttyprintf(tp, "no foreground process group\n"); else if ((p = LIST_FIRST(&tp->t_pgrp->pg_members)) == 0) ttyprintf(tp, "empty foreground process group\n"); else { /* Pick interesting process. */ for (pick = NULL; p != 0; p = LIST_NEXT(p, p_pglist)) if (proc_compare(pick, p)) pick = p; ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid, pick->p_stat == SRUN ? "running" : pick->p_wmesg ? pick->p_wmesg : "iowait"); if (pick->p_flag & P_INMEM) { calcru(pick, &utime, &stime, NULL); /* Print user time. */ ttyprintf(tp, "%ld.%02ldu ", utime.tv_sec, utime.tv_usec / 10000); /* Print system time. */ ttyprintf(tp, "%ld.%02lds ", stime.tv_sec, stime.tv_usec / 10000); } else ttyprintf(tp, "?.??u ?.??s "); /* Print percentage cpu, resident set size. */ tmp = (pick->p_pctcpu * 10000 + FSCALE / 2) >> FSHIFT; ttyprintf(tp, "%d%% %ldk\n", tmp / 100, pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 : (long)pgtok(vmspace_resident_count(pick->p_vmspace))); } tp->t_rocount = 0; /* so pending input will be retyped if BS */ } /* * Returns 1 if p2 is "better" than p1 * * The algorithm for picking the "interesting" process is thus: * * 1) Only foreground processes are eligible - implied. * 2) Runnable processes are favored over anything else. The runner * with the highest cpu utilization is picked (p_estcpu). Ties are * broken by picking the highest pid. * 3) The sleeper with the shortest sleep time is next. With ties, * we pick out just "short-term" sleepers (P_SINTR == 0). * 4) Further ties are broken by picking the highest pid. */ #define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL)) #define TESTAB(a, b) ((a)<<1 | (b)) #define ONLYA 2 #define ONLYB 1 #define BOTH 3 static int proc_compare(p1, p2) register struct proc *p1, *p2; { if (p1 == NULL) return (1); /* * see if at least one of them is runnable */ switch (TESTAB(ISRUN(p1), ISRUN(p2))) { case ONLYA: return (0); case ONLYB: return (1); case BOTH: /* * tie - favor one with highest recent cpu utilization */ if (p2->p_estcpu > p1->p_estcpu) return (1); if (p1->p_estcpu > p2->p_estcpu) return (0); return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } /* * weed out zombies */ switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) { case ONLYA: return (1); case ONLYB: return (0); case BOTH: return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } /* * pick the one with the smallest sleep time */ if (p2->p_slptime > p1->p_slptime) return (0); if (p1->p_slptime > p2->p_slptime) return (1); /* * favor one sleeping in a non-interruptible sleep */ if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0) return (1); if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0) return (0); return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } /* * Output char to tty; console putchar style. */ int tputchar(c, tp) int c; struct tty *tp; { register int s; s = spltty(); if (!ISSET(tp->t_state, TS_CONNECTED)) { splx(s); return (-1); } if (c == '\n') (void)ttyoutput('\r', tp); (void)ttyoutput(c, tp); ttstart(tp); splx(s); return (0); } /* * Sleep on chan, returning ERESTART if tty changed while we napped and * returning any errors (e.g. EINTR/EWOULDBLOCK) reported by tsleep. If * the tty is revoked, restarting a pending call will redo validation done * at the start of the call. */ int ttysleep(tp, chan, pri, wmesg, timo) struct tty *tp; void *chan; int pri, timo; char *wmesg; { int error; int gen; gen = tp->t_gen; error = tsleep(chan, pri, wmesg, timo); if (error) return (error); return (tp->t_gen == gen ? 0 : ERESTART); } /* * Allocate a tty struct. Clists in the struct will be allocated by * ttyopen(). */ struct tty * ttymalloc(tp) struct tty *tp; { if (tp) return(tp); tp = malloc(sizeof *tp, M_TTYS, M_WAITOK); bzero(tp, sizeof *tp); ttyregister(tp); return (tp); } #if 0 /* XXX not yet usable: session leader holds a ref (see kern_exit.c). */ /* * Free a tty struct. Clists in the struct should have been freed by * ttyclose(). */ void ttyfree(tp) struct tty *tp; { free(tp, M_TTYS); } #endif /* 0 */ void ttyregister(tp) struct tty *tp; { tp->t_timeout = -1; SLIST_INSERT_HEAD(&tty_list, tp, t_list); } static int -sysctl_kern_ttys (SYSCTL_HANDLER_ARGS) +sysctl_kern_ttys(SYSCTL_HANDLER_ARGS) { int error; struct tty *tp, t; SLIST_FOREACH(tp, &tty_list, t_list) { t = *tp; if (t.t_dev) t.t_dev = (dev_t)dev2udev(t.t_dev); error = SYSCTL_OUT(req, (caddr_t)&t, sizeof(t)); if (error) return (error); } return (0); } SYSCTL_PROC(_kern, OID_AUTO, ttys, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_kern_ttys, "S,tty", "All struct ttys"); void nottystop(tp, rw) struct tty *tp; int rw; { return; } int ttyread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { struct tty *tp; tp = dev->si_tty; if (tp == NULL) return (ENODEV); return ((*linesw[tp->t_line].l_read)(tp, uio, flag)); } int ttywrite(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { struct tty *tp; tp = dev->si_tty; if (tp == NULL) return (ENODEV); return ((*linesw[tp->t_line].l_write)(tp, uio, flag)); } Index: head/sys/kern/tty_cons.c =================================================================== --- head/sys/kern/tty_cons.c (revision 62572) +++ head/sys/kern/tty_cons.c (revision 62573) @@ -1,456 +1,456 @@ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)cons.c 7.2 (Berkeley) 5/9/91 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include static d_open_t cnopen; static d_close_t cnclose; static d_read_t cnread; static d_write_t cnwrite; static d_ioctl_t cnioctl; static d_poll_t cnpoll; #define CDEV_MAJOR 0 static struct cdevsw cn_cdevsw = { /* open */ cnopen, /* close */ cnclose, /* read */ cnread, /* write */ cnwrite, /* ioctl */ cnioctl, /* poll */ cnpoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ "console", /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ D_TTY, /* bmaj */ -1 }; static dev_t cn_dev_t; /* seems to be never really used */ static udev_t cn_udev_t; SYSCTL_OPAQUE(_machdep, CPU_CONSDEV, consdev, CTLFLAG_RD, &cn_udev_t, sizeof cn_udev_t, "T,dev_t", ""); static int cn_mute; int cons_unavail = 0; /* XXX: * physical console not available for * input (i.e., it is in graphics mode) */ static u_char cn_is_open; /* nonzero if logical console is open */ static int openmode, openflag; /* how /dev/console was openned */ static dev_t cn_devfsdev; /* represents the device private info */ static u_char cn_phys_is_open; /* nonzero if physical device is open */ static d_close_t *cn_phys_close; /* physical device close function */ static d_open_t *cn_phys_open; /* physical device open function */ struct consdev *cn_tab; /* physical console device info */ CONS_DRIVER(cons, NULL, NULL, NULL, NULL, NULL, NULL, NULL); void cninit() { struct consdev *best_cp, *cp, **list; /* * Find the first console with the highest priority. */ best_cp = NULL; list = (struct consdev **)cons_set.ls_items; while ((cp = *list++) != NULL) { if (cp->cn_probe == NULL) continue; (*cp->cn_probe)(cp); if (cp->cn_pri > CN_DEAD && (best_cp == NULL || cp->cn_pri > best_cp->cn_pri)) best_cp = cp; } /* * Check if we should mute the console (for security reasons perhaps) * It can be changes dynamically using sysctl kern.consmute * once we are up and going. * */ cn_mute = ((boothowto & (RB_MUTE |RB_SINGLE |RB_VERBOSE |RB_ASKNAME |RB_CONFIG)) == RB_MUTE); /* * If no console, give up. */ if (best_cp == NULL) { if (cn_tab != NULL && cn_tab->cn_term != NULL) (*cn_tab->cn_term)(cn_tab); cn_tab = best_cp; return; } /* * Initialize console, then attach to it. This ordering allows * debugging using the previous console, if any. */ (*best_cp->cn_init)(best_cp); if (cn_tab != NULL && cn_tab != best_cp) { /* Turn off the previous console. */ if (cn_tab->cn_term != NULL) (*cn_tab->cn_term)(cn_tab); } cn_tab = best_cp; } void cninit_finish() { struct cdevsw *cdp; if ((cn_tab == NULL) || cn_mute) return; /* * Hook the open and close functions. */ cdp = devsw(cn_tab->cn_dev); if (cdp != NULL) { cn_phys_close = cdp->d_close; cdp->d_close = cnclose; cn_phys_open = cdp->d_open; cdp->d_open = cnopen; } cn_dev_t = cn_tab->cn_dev; cn_udev_t = dev2udev(cn_dev_t); } static void cnuninit(void) { struct cdevsw *cdp; if (cn_tab == NULL) return; /* * Unhook the open and close functions. */ cdp = devsw(cn_tab->cn_dev); if (cdp != NULL) { cdp->d_close = cn_phys_close; cdp->d_open = cn_phys_open; } cn_phys_close = NULL; cn_phys_open = NULL; cn_dev_t = NODEV; cn_udev_t = NOUDEV; } /* * User has changed the state of the console muting. * This may require us to open or close the device in question. */ static int -sysctl_kern_consmute (SYSCTL_HANDLER_ARGS) +sysctl_kern_consmute(SYSCTL_HANDLER_ARGS) { int error; int ocn_mute; ocn_mute = cn_mute; error = sysctl_handle_int(oidp, &cn_mute, 0, req); if((error == 0) && (cn_tab != NULL) && (req->newptr != NULL)) { if(ocn_mute && !cn_mute) { /* * going from muted to unmuted.. open the physical dev * if the console has been openned */ cninit_finish(); if(cn_is_open) /* XXX curproc is not what we want really */ error = cnopen(cn_dev_t, openflag, openmode, curproc); /* if it failed, back it out */ if ( error != 0) cnuninit(); } else if (!ocn_mute && cn_mute) { /* * going from unmuted to muted.. close the physical dev * if it's only open via /dev/console */ if(cn_is_open) error = cnclose(cn_dev_t, openflag, openmode, curproc); if ( error == 0) cnuninit(); } if (error != 0) { /* * back out the change if there was an error */ cn_mute = ocn_mute; } } return (error); } SYSCTL_PROC(_kern, OID_AUTO, consmute, CTLTYPE_INT|CTLFLAG_RW, 0, sizeof cn_mute, sysctl_kern_consmute, "I", ""); static int cnopen(dev, flag, mode, p) dev_t dev; int flag, mode; struct proc *p; { dev_t cndev, physdev; int retval = 0; if (cn_tab == NULL || cn_phys_open == NULL) return (0); cndev = cn_tab->cn_dev; physdev = (major(dev) == major(cndev) ? dev : cndev); /* * If mute is active, then non console opens don't get here * so we don't need to check for that. They * bypass this and go straight to the device. */ if(!cn_mute) retval = (*cn_phys_open)(physdev, flag, mode, p); if (retval == 0) { /* * check if we openned it via /dev/console or * via the physical entry (e.g. /dev/sio0). */ if (dev == cndev) cn_phys_is_open = 1; else if (physdev == cndev) { openmode = mode; openflag = flag; cn_is_open = 1; } dev->si_tty = physdev->si_tty; } return (retval); } static int cnclose(dev, flag, mode, p) dev_t dev; int flag, mode; struct proc *p; { dev_t cndev; struct tty *cn_tp; if (cn_tab == NULL || cn_phys_open == NULL) return (0); cndev = cn_tab->cn_dev; cn_tp = cndev->si_tty; /* * act appropriatly depending on whether it's /dev/console * or the pysical device (e.g. /dev/sio) that's being closed. * in either case, don't actually close the device unless * both are closed. */ if (dev == cndev) { /* the physical device is about to be closed */ cn_phys_is_open = 0; if (cn_is_open) { if (cn_tp) { /* perform a ttyhalfclose() */ /* reset session and proc group */ cn_tp->t_pgrp = NULL; cn_tp->t_session = NULL; } return (0); } } else if (major(dev) != major(cndev)) { /* the logical console is about to be closed */ cn_is_open = 0; if (cn_phys_is_open) return (0); dev = cndev; } if(cn_phys_close) return ((*cn_phys_close)(dev, flag, mode, p)); return (0); } static int cnread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { if (cn_tab == NULL || cn_phys_open == NULL) return (0); dev = cn_tab->cn_dev; return ((*devsw(dev)->d_read)(dev, uio, flag)); } static int cnwrite(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { if (cn_tab == NULL || cn_phys_open == NULL) { uio->uio_resid = 0; /* dump the data */ return (0); } if (constty) dev = constty->t_dev; else dev = cn_tab->cn_dev; return ((*devsw(dev)->d_write)(dev, uio, flag)); } static int cnioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { int error; if (cn_tab == NULL || cn_phys_open == NULL) return (0); /* * Superuser can always use this to wrest control of console * output from the "virtual" console. */ if (cmd == TIOCCONS && constty) { error = suser(p); if (error) return (error); constty = NULL; return (0); } dev = cn_tab->cn_dev; return ((*devsw(dev)->d_ioctl)(dev, cmd, data, flag, p)); } static int cnpoll(dev, events, p) dev_t dev; int events; struct proc *p; { if ((cn_tab == NULL) || cn_mute) return (1); dev = cn_tab->cn_dev; return ((*devsw(dev)->d_poll)(dev, events, p)); } int cngetc() { int c; if ((cn_tab == NULL) || cn_mute) return (-1); c = (*cn_tab->cn_getc)(cn_tab->cn_dev); if (c == '\r') c = '\n'; /* console input is always ICRNL */ return (c); } int cncheckc() { if ((cn_tab == NULL) || cn_mute) return (-1); return ((*cn_tab->cn_checkc)(cn_tab->cn_dev)); } void cnputc(c) register int c; { if ((cn_tab == NULL) || cn_mute) return; if (c) { if (c == '\n') (*cn_tab->cn_putc)(cn_tab->cn_dev, '\r'); (*cn_tab->cn_putc)(cn_tab->cn_dev, c); } } void cndbctl(on) int on; { static int refcount; if (cn_tab == NULL) return; if (!on) refcount--; if (refcount == 0 && cn_tab->cn_dbctl != NULL) (*cn_tab->cn_dbctl)(cn_tab->cn_dev, on); if (on) refcount++; } static void cn_drvinit(void *unused) { cn_devfsdev = make_dev(&cn_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "console"); } SYSINIT(cndev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,cn_drvinit,NULL) Index: head/sys/kern/uipc_usrreq.c =================================================================== --- head/sys/kern/uipc_usrreq.c (revision 62572) +++ head/sys/kern/uipc_usrreq.c (revision 62573) @@ -1,1282 +1,1282 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 * $FreeBSD$ */ #include #include #include #include #include #include /* XXX must be before */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct vm_zone *unp_zone; static unp_gen_t unp_gencnt; static u_int unp_count; static struct unp_head unp_shead, unp_dhead; /* * Unix communications domain. * * TODO: * SEQPACKET, RDM * rethink name space problems * need a proper out-of-band * lock pushdown */ static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; static ino_t unp_ino; /* prototype for fake inode numbers */ static int unp_attach __P((struct socket *)); static void unp_detach __P((struct unpcb *)); static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); static int unp_connect __P((struct socket *,struct sockaddr *, struct proc *)); static void unp_disconnect __P((struct unpcb *)); static void unp_shutdown __P((struct unpcb *)); static void unp_drop __P((struct unpcb *, int)); static void unp_gc __P((void)); static void unp_scan __P((struct mbuf *, void (*)(struct file *))); static void unp_mark __P((struct file *)); static void unp_discard __P((struct file *)); static int unp_internalize __P((struct mbuf *, struct proc *)); static int uipc_abort(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; unp_drop(unp, ECONNABORTED); return 0; } static int uipc_accept(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; /* * Pass back name of connected socket, * if it was bound and we are still connected * (our peer may have closed already!). */ if (unp->unp_conn && unp->unp_conn->unp_addr) { *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 1); } else { *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); } return 0; } static int uipc_attach(struct socket *so, int proto, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp != 0) return EISCONN; return unp_attach(so); } static int uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; return unp_bind(unp, nam, p); } static int uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; return unp_connect(so, nam, curproc); } static int uipc_connect2(struct socket *so1, struct socket *so2) { struct unpcb *unp = sotounpcb(so1); if (unp == 0) return EINVAL; return unp_connect2(so1, so2); } /* control is EOPNOTSUPP */ static int uipc_detach(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; unp_detach(unp); return 0; } static int uipc_disconnect(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; unp_disconnect(unp); return 0; } static int uipc_listen(struct socket *so, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0 || unp->unp_vnode == 0) return EINVAL; return 0; } static int uipc_peeraddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; if (unp->unp_conn && unp->unp_conn->unp_addr) *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 1); return 0; } static int uipc_rcvd(struct socket *so, int flags) { struct unpcb *unp = sotounpcb(so); struct socket *so2; if (unp == 0) return EINVAL; switch (so->so_type) { case SOCK_DGRAM: panic("uipc_rcvd DGRAM?"); /*NOTREACHED*/ case SOCK_STREAM: if (unp->unp_conn == 0) break; so2 = unp->unp_conn->unp_socket; /* * Adjust backpressure on sender * and wakeup any waiting to write. */ so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; unp->unp_mbcnt = so->so_rcv.sb_mbcnt; so2->so_snd.sb_hiwat += unp->unp_cc - so->so_rcv.sb_cc; (void)chgsbsize(so2->so_cred->cr_uid, (rlim_t)unp->unp_cc - so->so_rcv.sb_cc, RLIM_INFINITY); unp->unp_cc = so->so_rcv.sb_cc; sowwakeup(so2); break; default: panic("uipc_rcvd unknown socktype"); } return 0; } /* pru_rcvoob is EOPNOTSUPP */ static int uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p) { int error = 0; struct unpcb *unp = sotounpcb(so); struct socket *so2; if (unp == 0) { error = EINVAL; goto release; } if (flags & PRUS_OOB) { error = EOPNOTSUPP; goto release; } if (control && (error = unp_internalize(control, p))) goto release; switch (so->so_type) { case SOCK_DGRAM: { struct sockaddr *from; if (nam) { if (unp->unp_conn) { error = EISCONN; break; } error = unp_connect(so, nam, p); if (error) break; } else { if (unp->unp_conn == 0) { error = ENOTCONN; break; } } so2 = unp->unp_conn->unp_socket; if (unp->unp_addr) from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; if (sbappendaddr(&so2->so_rcv, from, m, control)) { sorwakeup(so2); m = 0; control = 0; } else error = ENOBUFS; if (nam) unp_disconnect(unp); break; } case SOCK_STREAM: /* Connect if not connected yet. */ /* * Note: A better implementation would complain * if not equal to the peer's address. */ if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam) { error = unp_connect(so, nam, p); if (error) break; /* XXX */ } else { error = ENOTCONN; break; } } if (so->so_state & SS_CANTSENDMORE) { error = EPIPE; break; } if (unp->unp_conn == 0) panic("uipc_send connected but no connection?"); so2 = unp->unp_conn->unp_socket; /* * Send to paired receive port, and then reduce * send buffer hiwater marks to maintain backpressure. * Wake up readers. */ if (control) { if (sbappendcontrol(&so2->so_rcv, m, control)) control = 0; } else sbappend(&so2->so_rcv, m); so->so_snd.sb_mbmax -= so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; so->so_snd.sb_hiwat -= so2->so_rcv.sb_cc - unp->unp_conn->unp_cc; (void)chgsbsize(so->so_cred->cr_uid, (rlim_t)unp->unp_conn->unp_cc - so2->so_rcv.sb_cc, RLIM_INFINITY); unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; sorwakeup(so2); m = 0; break; default: panic("uipc_send unknown socktype"); } /* * SEND_EOF is equivalent to a SEND followed by * a SHUTDOWN. */ if (flags & PRUS_EOF) { socantsendmore(so); unp_shutdown(unp); } if (control && error != 0) unp_dispose(control); release: if (control) m_freem(control); if (m) m_freem(m); return error; } static int uipc_sense(struct socket *so, struct stat *sb) { struct unpcb *unp = sotounpcb(so); struct socket *so2; if (unp == 0) return EINVAL; sb->st_blksize = so->so_snd.sb_hiwat; if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { so2 = unp->unp_conn->unp_socket; sb->st_blksize += so2->so_rcv.sb_cc; } sb->st_dev = NOUDEV; if (unp->unp_ino == 0) unp->unp_ino = unp_ino++; sb->st_ino = unp->unp_ino; return (0); } static int uipc_shutdown(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; socantsendmore(so); unp_shutdown(unp); return 0; } static int uipc_sockaddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; if (unp->unp_addr) *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); return 0; } struct pr_usrreqs uipc_usrreqs = { uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, sosend, soreceive, sopoll }; /* * Both send and receive buffers are allocated PIPSIZ bytes of buffering * for stream sockets, although the total for sender and receiver is * actually only PIPSIZ. * Datagram sockets really use the sendspace as the maximum datagram size, * and don't really want to reserve the sendspace. Their recvspace should * be large enough for at least one max-size datagram plus address. */ #ifndef PIPSIZ #define PIPSIZ 8192 #endif static u_long unpst_sendspace = PIPSIZ; static u_long unpst_recvspace = PIPSIZ; static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ static u_long unpdg_recvspace = 4*1024; static int unp_rights; /* file descriptors in flight */ SYSCTL_DECL(_net_local_stream); SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, &unpst_sendspace, 0, ""); SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, &unpst_recvspace, 0, ""); SYSCTL_DECL(_net_local_dgram); SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, &unpdg_sendspace, 0, ""); SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, &unpdg_recvspace, 0, ""); SYSCTL_DECL(_net_local); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); static int unp_attach(so) struct socket *so; { register struct unpcb *unp; int error; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { switch (so->so_type) { case SOCK_STREAM: error = soreserve(so, unpst_sendspace, unpst_recvspace); break; case SOCK_DGRAM: error = soreserve(so, unpdg_sendspace, unpdg_recvspace); break; default: panic("unp_attach"); } if (error) return (error); } unp = zalloc(unp_zone); if (unp == NULL) return (ENOBUFS); bzero(unp, sizeof *unp); unp->unp_gencnt = ++unp_gencnt; unp_count++; LIST_INIT(&unp->unp_refs); unp->unp_socket = so; unp->unp_rvnode = curproc->p_fd->fd_rdir; LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); so->so_pcb = (caddr_t)unp; return (0); } static void unp_detach(unp) register struct unpcb *unp; { LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; if (unp->unp_vnode) { unp->unp_vnode->v_socket = 0; vrele(unp->unp_vnode); unp->unp_vnode = 0; } if (unp->unp_conn) unp_disconnect(unp); while (!LIST_EMPTY(&unp->unp_refs)) unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = 0; if (unp_rights) { /* * Normally the receive buffer is flushed later, * in sofree, but if our receive buffer holds references * to descriptors that are now garbage, we will dispose * of those descriptor references after the garbage collector * gets them (resulting in a "panic: closef: count < 0"). */ sorflush(unp->unp_socket); unp_gc(); } if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); zfree(unp_zone, unp); } static int unp_bind(unp, nam, p) struct unpcb *unp; struct sockaddr *nam; struct proc *p; { struct sockaddr_un *soun = (struct sockaddr_un *)nam; register struct vnode *vp; struct vattr vattr; int error, namelen; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; if (unp->unp_vnode != NULL) return (EINVAL); #define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0)) namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); if (namelen <= 0) return EINVAL; strncpy(buf, soun->sun_path, namelen); buf[namelen] = 0; /* null-terminate the string */ NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) return (error); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EADDRINUSE); } VATTR_NULL(&vattr); vattr.va_type = VSOCK; vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (error) return (error); vp = nd.ni_vp; vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); VOP_UNLOCK(vp, 0, p); return (0); } static int unp_connect(so, nam, p) struct socket *so; struct sockaddr *nam; struct proc *p; { register struct sockaddr_un *soun = (struct sockaddr_un *)nam; register struct vnode *vp; register struct socket *so2, *so3; struct unpcb *unp2, *unp3; int error, len; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return EINVAL; strncpy(buf, soun->sun_path, len); buf[len] = 0; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if (vp->v_type != VSOCK) { error = ENOTSOCK; goto bad; } error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); if (error) goto bad; so2 = vp->v_socket; if (so2 == 0) { error = ECONNREFUSED; goto bad; } if (so->so_type != so2->so_type) { error = EPROTOTYPE; goto bad; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if ((so2->so_options & SO_ACCEPTCONN) == 0 || (so3 = sonewconn3(so2, 0, p)) == 0) { error = ECONNREFUSED; goto bad; } unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) unp3->unp_addr = (struct sockaddr_un *) dup_sockaddr((struct sockaddr *) unp2->unp_addr, 1); so2 = so3; } error = unp_connect2(so, so2); bad: vput(vp); return (error); } int unp_connect2(so, so2) register struct socket *so; register struct socket *so2; { register struct unpcb *unp = sotounpcb(so); register struct unpcb *unp2; if (so2->so_type != so->so_type) return (EPROTOTYPE); unp2 = sotounpcb(so2); unp->unp_conn = unp2; switch (so->so_type) { case SOCK_DGRAM: LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); soisconnected(so); break; case SOCK_STREAM: unp2->unp_conn = unp; soisconnected(so); soisconnected(so2); break; default: panic("unp_connect2"); } return (0); } static void unp_disconnect(unp) struct unpcb *unp; { register struct unpcb *unp2 = unp->unp_conn; if (unp2 == 0) return; unp->unp_conn = 0; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: LIST_REMOVE(unp, unp_reflink); unp->unp_socket->so_state &= ~SS_ISCONNECTED; break; case SOCK_STREAM: soisdisconnected(unp->unp_socket); unp2->unp_conn = 0; soisdisconnected(unp2->unp_socket); break; } } #ifdef notdef void unp_abort(unp) struct unpcb *unp; { unp_detach(unp); } #endif static int prison_unpcb(struct proc *p, struct unpcb *unp) { if (!p->p_prison) return (0); if (p->p_fd->fd_rdir == unp->unp_rvnode) return (0); return (1); } static int -unp_pcblist (SYSCTL_HANDLER_ARGS) +unp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct unpcb *unp, **unp_list; unp_gen_t gencnt; struct xunpgen xug; struct unp_head *head; head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = unp_count; req->oldidx = 2 * (sizeof xug) + (n + n/8) * sizeof(struct xunpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ gencnt = unp_gencnt; n = unp_count; xug.xug_len = sizeof xug; xug.xug_count = n; xug.xug_gen = gencnt; xug.xug_sogen = so_gencnt; error = SYSCTL_OUT(req, &xug, sizeof xug); if (error) return error; unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); if (unp_list == 0) return ENOMEM; for (unp = LIST_FIRST(head), i = 0; unp && i < n; unp = LIST_NEXT(unp, unp_link)) { if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->p, unp)) unp_list[i++] = unp; } n = i; /* in case we lost some during malloc */ error = 0; for (i = 0; i < n; i++) { unp = unp_list[i]; if (unp->unp_gencnt <= gencnt) { struct xunpcb xu; xu.xu_len = sizeof xu; xu.xu_unpp = unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. */ if (unp->unp_addr) bcopy(unp->unp_addr, &xu.xu_addr, unp->unp_addr->sun_len); if (unp->unp_conn && unp->unp_conn->unp_addr) bcopy(unp->unp_conn->unp_addr, &xu.xu_caddr, unp->unp_conn->unp_addr->sun_len); bcopy(unp, &xu.xu_unp, sizeof *unp); sotoxsocket(unp->unp_socket, &xu.xu_socket); error = SYSCTL_OUT(req, &xu, sizeof xu); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ xug.xug_gen = unp_gencnt; xug.xug_sogen = so_gencnt; xug.xug_count = unp_count; error = SYSCTL_OUT(req, &xug, sizeof xug); } free(unp_list, M_TEMP); return error; } SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", "List of active local datagram sockets"); SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", "List of active local stream sockets"); static void unp_shutdown(unp) struct unpcb *unp; { struct socket *so; if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && (so = unp->unp_conn->unp_socket)) socantrcvmore(so); } static void unp_drop(unp, errno) struct unpcb *unp; int errno; { struct socket *so = unp->unp_socket; so->so_error = errno; unp_disconnect(unp); if (so->so_head) { LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; unp_count--; so->so_pcb = (caddr_t) 0; if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); zfree(unp_zone, unp); sofree(so); } } #ifdef notdef void unp_drain() { } #endif int unp_externalize(rights) struct mbuf *rights; { struct proc *p = curproc; /* XXX */ register int i; register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); register int *fdp; register struct file **rp; register struct file *fp; int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) / sizeof (struct file *); int f; /* * if the new FD's will not fit, then we free them all */ if (!fdavail(p, newfds)) { rp = (struct file **)CMSG_DATA(cm); for (i = 0; i < newfds; i++) { fp = *rp; /* * zero the pointer before calling unp_discard, * since it may end up in unp_gc().. */ *rp++ = 0; unp_discard(fp); } return (EMSGSIZE); } /* * now change each pointer to an fd in the global table to * an integer that is the index to the local fd table entry * that we set up to point to the global one we are transferring. * If sizeof (struct file *) is bigger than or equal to sizeof int, * then do it in forward order. In that case, an integer will * always come in the same place or before its corresponding * struct file pointer. * If sizeof (struct file *) is smaller than sizeof int, then * do it in reverse order. */ if (sizeof (struct file *) >= sizeof (int)) { fdp = (int *)(cm + 1); rp = (struct file **)CMSG_DATA(cm); for (i = 0; i < newfds; i++) { if (fdalloc(p, 0, &f)) panic("unp_externalize"); fp = *rp++; p->p_fd->fd_ofiles[f] = fp; fp->f_msgcount--; unp_rights--; *fdp++ = f; } } else { fdp = (int *)(cm + 1) + newfds - 1; rp = (struct file **)CMSG_DATA(cm) + newfds - 1; for (i = 0; i < newfds; i++) { if (fdalloc(p, 0, &f)) panic("unp_externalize"); fp = *rp--; p->p_fd->fd_ofiles[f] = fp; fp->f_msgcount--; unp_rights--; *fdp-- = f; } } /* * Adjust length, in case sizeof(struct file *) and sizeof(int) * differs. */ cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); rights->m_len = cm->cmsg_len; return (0); } void unp_init(void) { unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0); if (unp_zone == 0) panic("unp_init"); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); } #ifndef MIN #define MIN(a,b) (((a)<(b))?(a):(b)) #endif static int unp_internalize(control, p) struct mbuf *control; struct proc *p; { struct filedesc *fdescp = p->p_fd; register struct cmsghdr *cm = mtod(control, struct cmsghdr *); register struct file **rp; register struct file *fp; register int i, fd, *fdp; register struct cmsgcred *cmcred; int oldfds; u_int newlen; if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) return (EINVAL); /* * Fill in credential information. */ if (cm->cmsg_type == SCM_CREDS) { cmcred = (struct cmsgcred *)(cm + 1); cmcred->cmcred_pid = p->p_pid; cmcred->cmcred_uid = p->p_cred->p_ruid; cmcred->cmcred_gid = p->p_cred->p_rgid; cmcred->cmcred_euid = p->p_ucred->cr_uid; cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, CMGROUP_MAX); for (i = 0; i < cmcred->cmcred_ngroups; i++) cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; return(0); } oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); /* * check that all the FDs passed in refer to legal OPEN files * If not, reject the entire operation. */ fdp = (int *)(cm + 1); for (i = 0; i < oldfds; i++) { fd = *fdp++; if ((unsigned)fd >= fdescp->fd_nfiles || fdescp->fd_ofiles[fd] == NULL) return (EBADF); } /* * Now replace the integer FDs with pointers to * the associated global file table entry.. * Allocate a bigger buffer as necessary. But if an cluster is not * enough, return E2BIG. */ newlen = CMSG_LEN(oldfds * sizeof(struct file *)); if (newlen > MCLBYTES) return (E2BIG); if (newlen - control->m_len > M_TRAILINGSPACE(control)) { if (control->m_flags & M_EXT) return (E2BIG); MCLGET(control, M_WAIT); if ((control->m_flags & M_EXT) == 0) return (ENOBUFS); /* copy the data to the cluster */ memcpy(mtod(control, char *), cm, cm->cmsg_len); cm = mtod(control, struct cmsghdr *); } /* * Adjust length, in case sizeof(struct file *) and sizeof(int) * differs. */ control->m_len = cm->cmsg_len = newlen; /* * Transform the file descriptors into struct file pointers. * If sizeof (struct file *) is bigger than or equal to sizeof int, * then do it in reverse order so that the int won't get until * we're done. * If sizeof (struct file *) is smaller than sizeof int, then * do it in forward order. */ if (sizeof (struct file *) >= sizeof (int)) { fdp = (int *)(cm + 1) + oldfds - 1; rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; for (i = 0; i < oldfds; i++) { fp = fdescp->fd_ofiles[*fdp--]; *rp-- = fp; fp->f_count++; fp->f_msgcount++; unp_rights++; } } else { fdp = (int *)(cm + 1); rp = (struct file **)CMSG_DATA(cm); for (i = 0; i < oldfds; i++) { fp = fdescp->fd_ofiles[*fdp++]; *rp++ = fp; fp->f_count++; fp->f_msgcount++; unp_rights++; } } return (0); } static int unp_defer, unp_gcing; static void unp_gc() { register struct file *fp, *nextfp; register struct socket *so; struct file **extra_ref, **fpp; int nunref, i; if (unp_gcing) return; unp_gcing = 1; unp_defer = 0; /* * before going through all this, set all FDs to * be NOT defered and NOT externally accessible */ LIST_FOREACH(fp, &filehead, f_list) fp->f_flag &= ~(FMARK|FDEFER); do { LIST_FOREACH(fp, &filehead, f_list) { /* * If the file is not open, skip it */ if (fp->f_count == 0) continue; /* * If we already marked it as 'defer' in a * previous pass, then try process it this time * and un-mark it */ if (fp->f_flag & FDEFER) { fp->f_flag &= ~FDEFER; unp_defer--; } else { /* * if it's not defered, then check if it's * already marked.. if so skip it */ if (fp->f_flag & FMARK) continue; /* * If all references are from messages * in transit, then skip it. it's not * externally accessible. */ if (fp->f_count == fp->f_msgcount) continue; /* * If it got this far then it must be * externally accessible. */ fp->f_flag |= FMARK; } /* * either it was defered, or it is externally * accessible and not already marked so. * Now check if it is possibly one of OUR sockets. */ if (fp->f_type != DTYPE_SOCKET || (so = (struct socket *)fp->f_data) == 0) continue; if (so->so_proto->pr_domain != &localdomain || (so->so_proto->pr_flags&PR_RIGHTS) == 0) continue; #ifdef notdef if (so->so_rcv.sb_flags & SB_LOCK) { /* * This is problematical; it's not clear * we need to wait for the sockbuf to be * unlocked (on a uniprocessor, at least), * and it's also not clear what to do * if sbwait returns an error due to receipt * of a signal. If sbwait does return * an error, we'll go into an infinite * loop. Delete all of this for now. */ (void) sbwait(&so->so_rcv); goto restart; } #endif /* * So, Ok, it's one of our sockets and it IS externally * accessible (or was defered). Now we look * to see if we hold any file descriptors in its * message buffers. Follow those links and mark them * as accessible too. */ unp_scan(so->so_rcv.sb_mb, unp_mark); } } while (unp_defer); /* * We grab an extra reference to each of the file table entries * that are not otherwise accessible and then free the rights * that are stored in messages on them. * * The bug in the orginal code is a little tricky, so I'll describe * what's wrong with it here. * * It is incorrect to simply unp_discard each entry for f_msgcount * times -- consider the case of sockets A and B that contain * references to each other. On a last close of some other socket, * we trigger a gc since the number of outstanding rights (unp_rights) * is non-zero. If during the sweep phase the gc code un_discards, * we end up doing a (full) closef on the descriptor. A closef on A * results in the following chain. Closef calls soo_close, which * calls soclose. Soclose calls first (through the switch * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply * returns because the previous instance had set unp_gcing, and * we return all the way back to soclose, which marks the socket * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush * to free up the rights that are queued in messages on the socket A, * i.e., the reference on B. The sorflush calls via the dom_dispose * switch unp_dispose, which unp_scans with unp_discard. This second * instance of unp_discard just calls closef on B. * * Well, a similar chain occurs on B, resulting in a sorflush on B, * which results in another closef on A. Unfortunately, A is already * being closed, and the descriptor has already been marked with * SS_NOFDREF, and soclose panics at this point. * * Here, we first take an extra reference to each inaccessible * descriptor. Then, we call sorflush ourself, since we know * it is a Unix domain socket anyhow. After we destroy all the * rights carried in messages, we do a last closef to get rid * of our extra reference. This is the last close, and the * unp_detach etc will shut down the socket. * * 91/09/19, bsy@cs.cmu.edu */ extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; fp = nextfp) { nextfp = LIST_NEXT(fp, f_list); /* * If it's not open, skip it */ if (fp->f_count == 0) continue; /* * If all refs are from msgs, and it's not marked accessible * then it must be referenced from some unreachable cycle * of (shut-down) FDs, so include it in our * list of FDs to remove */ if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { *fpp++ = fp; nunref++; fp->f_count++; } } /* * for each FD on our hit list, do the following two things */ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { struct file *tfp = *fpp; if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) sorflush((struct socket *)(tfp->f_data)); } for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) closef(*fpp, (struct proc *) NULL); free((caddr_t)extra_ref, M_FILE); unp_gcing = 0; } void unp_dispose(m) struct mbuf *m; { if (m) unp_scan(m, unp_discard); } static void unp_scan(m0, op) register struct mbuf *m0; void (*op) __P((struct file *)); { register struct mbuf *m; register struct file **rp; register struct cmsghdr *cm; register int i; int qfds; while (m0) { for (m = m0; m; m = m->m_next) if (m->m_type == MT_CONTROL && m->m_len >= sizeof(*cm)) { cm = mtod(m, struct cmsghdr *); if (cm->cmsg_level != SOL_SOCKET || cm->cmsg_type != SCM_RIGHTS) continue; qfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) / sizeof (struct file *); rp = (struct file **)CMSG_DATA(cm); for (i = 0; i < qfds; i++) (*op)(*rp++); break; /* XXX, but saves time */ } m0 = m0->m_act; } } static void unp_mark(fp) struct file *fp; { if (fp->f_flag & FMARK) return; unp_defer++; fp->f_flag |= (FMARK|FDEFER); } static void unp_discard(fp) struct file *fp; { fp->f_msgcount--; unp_rights--; (void) closef(fp, (struct proc *)NULL); } Index: head/sys/kern/vfs_export.c =================================================================== --- head/sys/kern/vfs_export.c (revision 62572) +++ head/sys/kern/vfs_export.c (revision 62573) @@ -1,2931 +1,2931 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD$ */ /* * External virtual filesystem routines */ #include "opt_ddb.h" #include "opt_ffs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); static void insmntque __P((struct vnode *vp, struct mount *mp)); static void vclean __P((struct vnode *vp, int flags, struct proc *p)); static unsigned long numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ static u_long wantfreevnodes = 25; SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); static u_long freevnodes = 0; SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); static int reassignbufloops; SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); static int reassignbufsortgood; SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); static int reassignbufsortbad; SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); static int reassignbufmethod = 1; SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); #ifdef ENABLE_VFS_IOOPT int vfs_ioopt = 0; SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); #endif struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */ struct simplelock mountlist_slock; struct simplelock mntvnode_slock; int nfs_mount_type = -1; #ifndef NULL_SIMPLELOCKS static struct simplelock mntid_slock; static struct simplelock vnode_free_list_slock; static struct simplelock spechash_slock; #endif struct nfs_public nfs_pub; /* publicly exported FS */ static vm_zone_t vnode_zone; int prtactive = 0; /* 1 => print out reclaim of active vnodes */ /* * The workitem queue. */ #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ time_t syncdelay = 30; /* max time to delay syncing data */ time_t filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); time_t dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); time_t metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); static int syncer_delayno = 0; static long syncer_mask; LIST_HEAD(synclist, vnode); static struct synclist *syncer_workitem_pending; int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "Maximum number of vnodes"); static void vfs_free_addrlist __P((struct netexport *nep)); static int vfs_free_netcred __P((struct radix_node *rn, void *w)); static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, struct export_args *argp)); /* * Initialize the vnode management data structures. */ void vntblinit() { desiredvnodes = maxproc + cnt.v_page_count / 4; simple_lock_init(&mntvnode_slock); simple_lock_init(&mntid_slock); simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); simple_lock_init(&vnode_free_list_slock); vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; } /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Interlock is not released on failure. */ int vfs_busy(mp, flags, interlkp, p) struct mount *mp; int flags; struct simplelock *interlkp; struct proc *p; { int lkflags; if (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & LK_NOWAIT) return (ENOENT); mp->mnt_kern_flag |= MNTK_MWAIT; if (interlkp) { simple_unlock(interlkp); } /* * Since all busy locks are shared except the exclusive * lock granted when unmounting, the only place that a * wakeup needs to be done is at the release of the * exclusive lock at the end of dounmount. */ tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); if (interlkp) { simple_lock(interlkp); } return (ENOENT); } lkflags = LK_SHARED | LK_NOPAUSE; if (interlkp) lkflags |= LK_INTERLOCK; if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) panic("vfs_busy: unexpected lock failure"); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(mp, p) struct mount *mp; struct proc *p; { lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* * Lookup a filesystem type, and if found allocate and initialize * a mount structure for it. * * Devname is usually updated by mount(8) after booting. */ int vfs_rootmountalloc(fstypename, devname, mpp) char *fstypename; char *devname; struct mount **mpp; { struct proc *p = curproc; /* XXX */ struct vfsconf *vfsp; struct mount *mp; if (fstypename == NULL) return (ENODEV); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) return (ENODEV); mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); LIST_INIT(&mp->mnt_vnodelist); mp->mnt_vfc = vfsp; mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_flag = MNT_RDONLY; mp->mnt_vnodecovered = NULLVP; vfsp->vfc_refcount++; mp->mnt_iosize_max = DFLTPHYS; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); mp->mnt_stat.f_mntonname[0] = '/'; mp->mnt_stat.f_mntonname[1] = 0; (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); *mpp = mp; return (0); } /* * Find an appropriate filesystem to use for the root. If a filesystem * has not been preselected, walk through the list of known filesystems * trying those that have mountroot routines, and try them until one * works or we have tried them all. */ #ifdef notdef /* XXX JH */ int lite2_vfs_mountroot() { struct vfsconf *vfsp; extern int (*lite2_mountroot) __P((void)); int error; if (lite2_mountroot != NULL) return ((*lite2_mountroot)()); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { if (vfsp->vfc_mountroot == NULL) continue; if ((error = (*vfsp->vfc_mountroot)()) == 0) return (0); printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); } return (ENODEV); } #endif /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; simple_lock(&mountlist_slock); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { simple_unlock(&mountlist_slock); return (mp); } } simple_unlock(&mountlist_slock); return ((struct mount *) 0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(mp) struct mount *mp; { static u_int16_t mntid_base; fsid_t tfsid; int mtype; simple_lock(&mntid_slock); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 16; for (;;) { tfsid.val[0] = makeudev(255, mtype | mntid_base++); if (vfs_getvfs(&tfsid) == NULL) break; } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; simple_unlock(&mntid_slock); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_SEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, ""); /* * Get a current timestamp. */ void vfs_timestamp(tsp) struct timespec *tsp; { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(vap) register struct vattr *vap; { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ extern vop_t **dead_vnodeop_p; /* * Return the next vnode from the free list. */ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; vop_t **vops; struct vnode **vpp; { int s, count; struct proc *p = curproc; /* XXX */ struct vnode *vp = NULL; vm_object_t object; /* * We take the least recently used vnode from the freelist * if we can get it and it has no cached pages, and no * namecache entries are relative to it. * Otherwise we allocate a new vnode */ s = splbio(); simple_lock(&vnode_free_list_slock); if (wantfreevnodes && freevnodes < wantfreevnodes) { vp = NULL; } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { /* * XXX: this is only here to be backwards compatible */ vp = NULL; } else for (count = 0; count < freevnodes; count++) { vp = TAILQ_FIRST(&vnode_free_list); if (vp == NULL || vp->v_usecount) panic("getnewvnode: free vnode isn't"); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* * Don't recycle if active in the namecache or * if it still has cached pages or we cannot get * its interlock. */ object = vp->v_object; if (LIST_FIRST(&vp->v_cache_src) != NULL || (object && (object->resident_page_count || object->ref_count)) || !simple_lock_try(&vp->v_interlock)) { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); vp = NULL; continue; } break; } if (vp) { vp->v_flag |= VDOOMED; freevnodes--; simple_unlock(&vnode_free_list_slock); cache_purge(vp); vp->v_lease = NULL; if (vp->v_type != VBAD) { vgonel(vp, p); } else { simple_unlock(&vp->v_interlock); } #ifdef INVARIANTS { int s; if (vp->v_data) panic("cleaned vnode isn't"); s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); } #endif vp->v_flag = 0; vp->v_lastw = 0; vp->v_lasta = 0; vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ } else { simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) zalloc(vnode_zone); bzero((char *) vp, sizeof *vp); simple_lock_init(&vp->v_interlock); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); numvnodes++; } TAILQ_INIT(&vp->v_cleanblkhd); TAILQ_INIT(&vp->v_dirtyblkhd); vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; insmntque(vp, mp); *vpp = vp; vp->v_usecount = 1; vp->v_data = 0; splx(s); vfs_object_create(vp, p, p->p_ucred); return (0); } /* * Move a vnode from one mount queue to another. */ static void insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) == NULL) { simple_unlock(&mntvnode_slock); return; } LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); simple_unlock(&mntvnode_slock); } /* * Update outstanding I/O count and do wakeup if requested. */ void vwakeup(bp) register struct buf *bp; { register struct vnode *vp; bp->b_flags &= ~B_WRITEINPROG; if ((vp = bp->b_vp)) { vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { vp->v_flag &= ~VBWAIT; wakeup((caddr_t) &vp->v_numoutput); } } } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int slpflag, slptimeo; { register struct buf *bp; struct buf *nbp, *blist; int s, error; vm_object_t object; if (flags & V_SAVE) { s = splbio(); while (vp->v_numoutput) { vp->v_flag |= VBWAIT; error = tsleep((caddr_t)&vp->v_numoutput, slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); if (error) { splx(s); return (error); } } if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { splx(s); if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) return (error); s = splbio(); if (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd)) panic("vinvalbuf: dirty bufs"); } splx(s); } s = splbio(); for (;;) { blist = TAILQ_FIRST(&vp->v_cleanblkhd); if (!blist) blist = TAILQ_FIRST(&vp->v_dirtyblkhd); if (!blist) break; for (bp = blist; bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL, "vinvalbuf", slpflag, slptimeo); if (error == ENOLCK) break; splx(s); return (error); } /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. Note that vfs_bio_awrite expects * buffers to reside on a queue, while VOP_BWRITE and * brelse do not. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { if (bp->b_vp == vp) { if (bp->b_flags & B_CLUSTEROK) { BUF_UNLOCK(bp); vfs_bio_awrite(bp); } else { bremfree(bp); bp->b_flags |= B_ASYNC; BUF_WRITE(bp); } } else { bremfree(bp); (void) BUF_WRITE(bp); } break; } bremfree(bp); bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); } } while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); } splx(s); /* * Destroy the copy in the VM cache, too. */ simple_lock(&vp->v_interlock); object = vp->v_object; if (object != NULL) { vm_object_page_remove(object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); } simple_unlock(&vp->v_interlock); if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) panic("vinvalbuf: flush failed"); return (0); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(vp, cred, p, length, blksize) register struct vnode *vp; struct ucred *cred; struct proc *p; off_t length; int blksize; { register struct buf *bp; struct buf *nbp; int s, anyfreed; int trunclbn; /* * Round up to the *next* lbn. */ trunclbn = (length + blksize - 1) / blksize; s = splbio(); restart: anyfreed = 1; for (;anyfreed;) { anyfreed = 0; for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (bp->b_lblkno >= trunclbn) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); goto restart; } else { bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; } if (nbp && (((nbp->b_xflags & BX_VNCLEAN) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { goto restart; } } } for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (bp->b_lblkno >= trunclbn) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); goto restart; } else { bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; } if (nbp && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { goto restart; } } } } if (length > 0) { restartsync: for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); goto restart; } else { bremfree(bp); if (bp->b_vp == vp) { bp->b_flags |= B_ASYNC; } else { bp->b_flags &= ~B_ASYNC; } BUF_WRITE(bp); } goto restartsync; } } } while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); } splx(s); vnode_pager_setsize(vp, length); return (0); } /* * Associate a buffer with a vnode. */ void bgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { int s; KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); vhold(vp); bp->b_vp = vp; bp->b_dev = vn_todev(vp); /* * Insert onto list for new vnode. */ s = splbio(); bp->b_xflags |= BX_VNCLEAN; bp->b_xflags &= ~BX_VNDIRTY; TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); splx(s); } /* * Disassociate a buffer from a vnode. */ void brelvp(bp) register struct buf *bp; { struct vnode *vp; struct buflists *listheadp; int s; KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; s = splbio(); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { if (bp->b_xflags & BX_VNDIRTY) listheadp = &vp->v_dirtyblkhd; else listheadp = &vp->v_cleanblkhd; TAILQ_REMOVE(listheadp, bp, b_vnbufs); bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { vp->v_flag &= ~VONWORKLST; LIST_REMOVE(vp, v_synclist); } splx(s); bp->b_vp = (struct vnode *) 0; vdrop(vp); } /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct vnode *vp, int delay) { int s, slot; s = splbio(); if (vp->v_flag & VONWORKLST) { LIST_REMOVE(vp, v_synclist); } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); vp->v_flag |= VONWORKLST; splx(s); } struct proc *updateproc; static void sched_sync __P((void)); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) /* * System filesystem synchronizer daemon. */ void sched_sync(void) { struct synclist *slp; struct vnode *vp; long starttime; int s; struct proc *p = updateproc; EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p, SHUTDOWN_PRI_LAST); for (;;) { kproc_suspend_loop(p); starttime = time_second; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. */ s = splbio(); slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; splx(s); while ((vp = LIST_FIRST(slp)) != NULL) { if (VOP_ISLOCKED(vp, NULL) == 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); VOP_UNLOCK(vp, 0, p); } s = splbio(); if (LIST_FIRST(slp) == vp) { /* * Note: v_tag VT_VFS vps can remain on the * worklist too with no dirty blocks, but * since sync_fsync() moves it to a different * slot we are safe. */ if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && !vn_isdisk(vp, NULL)) panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(vp, syncdelay); } splx(s); } /* * Do soft update processing. */ #ifdef SOFTUPDATES softdep_process_worklist(NULL); #endif /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (time_second == starttime) tsleep(&lbolt, PPAUSE, "syncer", 0); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer() { int s; s = splhigh(); if (updateproc->p_wchan == &lbolt) setrunnable(updateproc); splx(s); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; return (1); } return(0); } /* * Associate a p-buffer with a vnode. * * Also sets B_PAGING flag to indicate that vnode is not fully associated * with the buffer. i.e. the bp has not been linked into the vnode or * ref-counted. */ void pbgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); bp->b_vp = vp; bp->b_flags |= B_PAGING; bp->b_dev = vn_todev(vp); } /* * Disassociate a p-buffer from a vnode. */ void pbrelvp(bp) register struct buf *bp; { KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); /* XXX REMOVE ME */ if (bp->b_vnbufs.tqe_next != NULL) { panic( "relpbuf(): b_vp was probably reassignbuf()d %p %x", bp, (int)bp->b_flags ); } bp->b_vp = (struct vnode *) 0; bp->b_flags &= ~B_PAGING; } void pbreassignbuf(bp, newvp) struct buf *bp; struct vnode *newvp; { if ((bp->b_flags & B_PAGING) == 0) { panic( "pbreassignbuf() on non phys bp %p", bp ); } bp->b_vp = newvp; } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { struct buflists *listheadp; int delay; int s; if (newvp == NULL) { printf("reassignbuf: NULL"); return; } ++reassignbufcalls; /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); s = splbio(); /* * Delete from old vnode list, if on one. */ if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { if (bp->b_xflags & BX_VNDIRTY) listheadp = &bp->b_vp->v_dirtyblkhd; else listheadp = &bp->b_vp->v_cleanblkhd; TAILQ_REMOVE(listheadp, bp, b_vnbufs); bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); if (bp->b_vp != newvp) { vdrop(bp->b_vp); bp->b_vp = NULL; /* for clarification */ } } /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { struct buf *tbp; listheadp = &newvp->v_dirtyblkhd; if ((newvp->v_flag & VONWORKLST) == 0) { switch (newvp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: case VBLK: if (newvp->v_specmountpoint != NULL) { delay = metadelay; break; } /* fall through */ default: delay = filedelay; } vn_syncer_add_to_worklist(newvp, delay); } bp->b_xflags |= BX_VNDIRTY; tbp = TAILQ_FIRST(listheadp); if (tbp == NULL || bp->b_lblkno == 0 || (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); ++reassignbufsortgood; } else if (bp->b_lblkno < 0) { TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); ++reassignbufsortgood; } else if (reassignbufmethod == 1) { /* * New sorting algorithm, only handle sequential case, * otherwise append to end (but before metadata) */ if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && (tbp->b_xflags & BX_VNDIRTY)) { /* * Found the best place to insert the buffer */ TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortgood; } else { /* * Missed, append to end, but before meta-data. * We know that the head buffer in the list is * not meta-data due to prior conditionals. * * Indirect effects: NFS second stage write * tends to wind up here, giving maximum * distance between the unstable write and the * commit rpc. */ tbp = TAILQ_LAST(listheadp, buflists); while (tbp && tbp->b_lblkno < 0) tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortbad; } } else { /* * Old sorting algorithm, scan queue and insert */ struct buf *ttbp; while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && (ttbp->b_lblkno < bp->b_lblkno)) { ++reassignbufloops; tbp = ttbp; } TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); } } else { bp->b_xflags |= BX_VNCLEAN; TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); if ((newvp->v_flag & VONWORKLST) && TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { newvp->v_flag &= ~VONWORKLST; LIST_REMOVE(newvp, v_synclist); } } if (bp->b_vp != newvp) { bp->b_vp = newvp; vhold(bp->b_vp); } splx(s); } /* * Create a vnode for a block device. * Used for mounting the root file system. * XXX: This now changed to a VCHR due to the block/char merging. */ int bdevvp(dev, vpp) dev_t dev; struct vnode **vpp; { register struct vnode *vp; struct vnode *nvp; int error; if (dev == NODEV) { *vpp = NULLVP; return (ENXIO); } error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); if (error) { *vpp = NULLVP; return (error); } vp = nvp; vp->v_type = VCHR; addalias(vp, dev); *vpp = vp; return (0); } /* * Add vnode to the alias list hung off the dev_t. * * The reason for this gunk is that multiple vnodes can reference * the same physical device, so checking vp->v_usecount to see * how many users there are is inadequate; the v_usecount for * the vnodes need to be accumulated. vcount() does that. */ void addaliasu(nvp, nvp_rdev) struct vnode *nvp; udev_t nvp_rdev; { if (nvp->v_type != VBLK && nvp->v_type != VCHR) panic("addaliasu on non-special vnode"); addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0)); } void addalias(nvp, dev) struct vnode *nvp; dev_t dev; { if (nvp->v_type != VBLK && nvp->v_type != VCHR) panic("addalias on non-special vnode"); nvp->v_rdev = dev; simple_lock(&spechash_slock); SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); simple_unlock(&spechash_slock); } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set if the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new file system type). */ int vget(vp, flags, p) register struct vnode *vp; int flags; struct proc *p; { int error; /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then * return failure. Cleaning is determined by checking that * the VXLOCK flag is set. */ if ((flags & LK_INTERLOCK) == 0) { simple_lock(&vp->v_interlock); } if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); return (ENOENT); } vp->v_usecount++; if (VSHOULDBUSY(vp)) vbusy(vp); if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { /* * must expand vrele here because we do not want * to call VOP_INACTIVE if the reference count * drops back to zero since it was never really * active. We must remove it from the free list * before sleeping so that multiple processes do * not try to recycle it. */ simple_lock(&vp->v_interlock); vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); simple_unlock(&vp->v_interlock); } return (error); } simple_unlock(&vp->v_interlock); return (0); } void vref(struct vnode *vp) { simple_lock(&vp->v_interlock); vp->v_usecount++; simple_unlock(&vp->v_interlock); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vrele: null vp")); simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; simple_unlock(&vp->v_interlock); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { VOP_INACTIVE(vp, p); } } else { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); simple_unlock(&vp->v_interlock); #endif panic("vrele: negative ref cnt"); } } void vput(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vput: null vp")); simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; VOP_UNLOCK(vp, LK_INTERLOCK, p); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ simple_unlock(&vp->v_interlock); VOP_INACTIVE(vp, p); } else { #ifdef DIAGNOSTIC vprint("vput: negative ref count", vp); #endif panic("vput: negative ref cnt"); } } /* * Somebody doesn't want the vnode recycled. */ void vhold(vp) register struct vnode *vp; { int s; s = splbio(); vp->v_holdcnt++; if (VSHOULDBUSY(vp)) vbusy(vp); splx(s); } /* * One less who cares about this vnode. */ void vdrop(vp) register struct vnode *vp; { int s; s = splbio(); if (vp->v_holdcnt <= 0) panic("vdrop: holdcnt"); vp->v_holdcnt--; if (VSHOULDFREE(vp)) vfree(vp); splx(s); } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If MNT_NOFORCE is specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If MNT_FORCE is specified, detach any active vnodes * that are found. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush(mp, skipvp, flags) struct mount *mp; struct vnode *skipvp; int flags; { struct proc *p = curproc; /* XXX */ struct vnode *vp, *nvp; int busy = 0; simple_lock(&mntvnode_slock); loop: for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { /* * Make sure this vnode wasn't reclaimed in getnewvnode(). * Start over if it has (it won't be on the list anymore). */ if (vp->v_mount != mp) goto loop; nvp = LIST_NEXT(vp, v_mntvnodes); /* * Skip over a selected vnode. */ if (vp == skipvp) continue; simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { simple_unlock(&vp->v_interlock); continue; } /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) { simple_unlock(&vp->v_interlock); continue; } /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. */ if (vp->v_usecount == 0) { simple_unlock(&mntvnode_slock); vgonel(vp, p); simple_lock(&mntvnode_slock); continue; } /* * If FORCECLOSE is set, forcibly close the vnode. For block * or character devices, revert to an anonymous device. For * all other files, just kill them. */ if (flags & FORCECLOSE) { simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { vgonel(vp, p); } else { vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } simple_lock(&mntvnode_slock); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif simple_unlock(&vp->v_interlock); busy++; } simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); } /* * Disassociate the underlying file system from a vnode. */ static void vclean(vp, flags, p) struct vnode *vp; int flags; struct proc *p; { int active; vm_object_t obj; /* * Check to see if the vnode is in use. If so we have to reference it * before we clean it out so that its count cannot fall to zero and * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) vp->v_usecount++; /* * Prevent the vnode from being recycled or brought into use while we * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* * Even if the count is zero, the VOP_INACTIVE routine may still * have the object locked while it cleans it out. The VOP_LOCK * ensures that the VOP_INACTIVE routine is done with its work. * For active vnodes, it ensures that no other activity can * occur while the underlying object is being cleaned out. */ VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ if (flags & DOCLOSE) { if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) vinvalbuf(vp, 0, NOCRED, p, 0, 0); } if ((obj = vp->v_object) != NULL) { if (obj->ref_count == 0) { /* * vclean() may be called twice. The first time * removes the primary reference to the object, * the second time goes one further and is a * special-case to terminate the object. */ vm_object_terminate(obj); } else { /* * Woe to the process that tries to page now :-). */ vm_pager_deallocate(obj); } } /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. Note that the * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); VOP_INACTIVE(vp, p); } else { /* * Any other processes trying to obtain this lock must first * wait for VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp, 0, p); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) { /* * Inline copy of vrele() since VOP_INACTIVE * has already been called. */ simple_lock(&vp->v_interlock); if (--vp->v_usecount <= 0) { #ifdef DIAGNOSTIC if (vp->v_usecount < 0 || vp->v_writecount != 0) { vprint("vclean: bad ref count", vp); panic("vclean: ref cnt"); } #endif vfree(vp); } simple_unlock(&vp->v_interlock); } cache_purge(vp); if (vp->v_vnlock) { FREE(vp->v_vnlock, M_VNODE); vp->v_vnlock = NULL; } if (VSHOULDFREE(vp)) vfree(vp); /* * Done with purge, notify sleepers of the grim news. */ vp->v_op = dead_vnodeop_p; vn_pollgone(vp); vp->v_tag = VT_NON; vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup((caddr_t) vp); } } /* * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ int vop_revoke(ap) struct vop_revoke_args /* { struct vnode *a_vp; int a_flags; } */ *ap; { struct vnode *vp, *vq; dev_t dev; KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); vp = ap->a_vp; /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); return (0); } dev = vp->v_rdev; for (;;) { simple_lock(&spechash_slock); vq = SLIST_FIRST(&dev->si_hlist); simple_unlock(&spechash_slock); if (!vq) break; vgone(vq); } return (0); } /* * Recycle an unused vnode to the front of the free list. * Release the passed interlock if the vnode will be recycled. */ int vrecycle(vp, inter_lkp, p) struct vnode *vp; struct simplelock *inter_lkp; struct proc *p; { simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { if (inter_lkp) { simple_unlock(inter_lkp); } vgonel(vp, p); return (1); } simple_unlock(&vp->v_interlock); return (0); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(vp) register struct vnode *vp; { struct proc *p = curproc; /* XXX */ simple_lock(&vp->v_interlock); vgonel(vp, p); } /* * vgone, with the vp interlock held. */ void vgonel(vp, p) struct vnode *vp; struct proc *p; { int s; /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ vclean(vp, DOCLOSE, p); simple_lock(&vp->v_interlock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) insmntque(vp, (struct mount *)0); /* * If special device, remove it from special device alias list * if it is on one. */ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { simple_lock(&spechash_slock); SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext); freedev(vp->v_rdev); simple_unlock(&spechash_slock); vp->v_rdev = NULL; } /* * If it is on the freelist and not already at the head, * move it to the head of the list. The test of the * VDOOMED flag and the reference count of zero is because * it will be removed from the free list by getnewvnode, * but will not have its reference count incremented until * after calling vgone. If the reference count were * incremented first, vgone would (incorrectly) try to * close the previous instance of the underlying object. */ if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VFREE) TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); else freevnodes++; vp->v_flag |= VFREE; TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); simple_unlock(&vnode_free_list_slock); splx(s); } vp->v_type = VBAD; simple_unlock(&vp->v_interlock); } /* * Lookup a vnode by device number. */ int vfinddev(dev, type, vpp) dev_t dev; enum vtype type; struct vnode **vpp; { struct vnode *vp; simple_lock(&spechash_slock); SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { if (type == vp->v_type) { *vpp = vp; simple_unlock(&spechash_slock); return (1); } } simple_unlock(&spechash_slock); return (0); } /* * Calculate the total number of references to a special device. */ int vcount(vp) struct vnode *vp; { struct vnode *vq; int count; count = 0; simple_lock(&spechash_slock); SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext) count += vq->v_usecount; simple_unlock(&spechash_slock); return (count); } /* * Same as above, but using the dev_t as argument */ int count_dev(dev) dev_t dev; { struct vnode *vp; vp = SLIST_FIRST(&dev->si_hlist); if (vp == NULL) return (0); return(vcount(vp)); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) char *label; struct vnode *vp; { char buf[96]; if (label != NULL) printf("%s: %p: ", label, (void *)vp); else printf("%p: ", (void *)vp); printf("type %s, usecount %d, writecount %d, refcount %d,", typename[vp->v_type], vp->v_usecount, vp->v_writecount, vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); if (vp->v_flag & VTEXT) strcat(buf, "|VTEXT"); if (vp->v_flag & VSYSTEM) strcat(buf, "|VSYSTEM"); if (vp->v_flag & VXLOCK) strcat(buf, "|VXLOCK"); if (vp->v_flag & VXWANT) strcat(buf, "|VXWANT"); if (vp->v_flag & VBWAIT) strcat(buf, "|VBWAIT"); if (vp->v_flag & VDOOMED) strcat(buf, "|VDOOMED"); if (vp->v_flag & VFREE) strcat(buf, "|VFREE"); if (vp->v_flag & VOBJBUF) strcat(buf, "|VOBJBUF"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { printf("\n"); } else { printf("\n\t"); VOP_PRINT(vp); } } #ifdef DDB #include /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *vp; printf("Locked vnodes\n"); simple_lock(&mountlist_slock); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { if (VOP_ISLOCKED(vp, NULL)) vprint((char *)0, vp); } simple_lock(&mountlist_slock); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS)); static int -vfs_sysctl (SYSCTL_HANDLER_ARGS) +vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif #ifdef notyet /* all sysctl names at this level are at least name and field */ if (namelen < 2) return (ENOTDIR); /* overloaded */ if (name[0] != VFS_GENERIC) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[0]) break; if (vfsp == NULL) return (EOPNOTSUPP); return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, oldp, oldlenp, newp, newlen, p)); } #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) return (EOPNOTSUPP); return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); } return (EOPNOTSUPP); } SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int -sysctl_ovfs_conf (SYSCTL_HANDLER_ARGS) +sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error) return error; } return 0; } #endif /* 1 || COMPAT_PRELITE2 */ #if 0 #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). * Copyout address of vnode followed by vnode. */ /* ARGSUSED */ static int -sysctl_vnode (SYSCTL_HANDLER_ARGS) +sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *nvp, *vp; int error; #define VPTRSZ sizeof (struct vnode *) #define VNODESZ sizeof (struct vnode) req->lock = 0; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); simple_lock(&mountlist_slock); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } again: simple_lock(&mntvnode_slock); for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { simple_unlock(&mntvnode_slock); goto again; } nvp = LIST_NEXT(vp, v_mntvnodes); simple_unlock(&mntvnode_slock); if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) return (error); simple_lock(&mntvnode_slock); } simple_unlock(&mntvnode_slock); simple_lock(&mountlist_slock); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); return (0); } #endif /* * XXX * Exporting the vnode list on large systems causes them to crash. * Exporting the vnode list on medium systems causes sysctl to coredump. */ #if 0 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,vnode", ""); #endif /* * Check to see if a filesystem is mounted on a block device. */ int vfs_mountedon(vp) struct vnode *vp; { if (vp->v_specmountpoint != NULL) return (EBUSY); return (0); } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall() { struct mount *mp; struct proc *p; int error; if (curproc != NULL) p = curproc; else p = initproc; /* XXX XXX should this be proc0? */ /* * Since this only runs when rebooting, it is not interlocked. */ while(!TAILQ_EMPTY(&mountlist)) { mp = TAILQ_LAST(&mountlist, mntlist); error = dounmount(mp, MNT_FORCE, p); if (error) { TAILQ_REMOVE(&mountlist, mp, mnt_list); printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } else { /* The unmount has removed mp from the mountlist */ } } } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ static int vfs_hang_addrlist(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { register struct netcred *np; register struct radix_node_head *rnh; register int i; struct radix_node *rn; struct sockaddr *saddr, *smask = 0; struct domain *dom; int error; if (argp->ex_addrlen == 0) { if (mp->mnt_flag & MNT_DEFEXPORTED) return (EPERM); np = &nep->ne_defexported; np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; mp->mnt_flag |= MNT_DEFEXPORTED; return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); bzero((caddr_t) np, i); saddr = (struct sockaddr *) (np + 1); if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) smask->sa_len = argp->ex_masklen; } i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* * Seems silly to initialize every AF when most are not used, * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { dom->dom_rtattach((void **) &nep->ne_rtable[i], dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { error = ENOBUFS; goto out; } } rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, np->netc_rnodes); if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; return (0); out: free(np, M_NETADDR); return (error); } /* ARGSUSED */ static int vfs_free_netcred(rn, w) struct radix_node *rn; void *w; { register struct radix_node_head *rnh = (struct radix_node_head *) w; (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); free((caddr_t) rn, M_NETADDR); return (0); } /* * Free the net address hash lists that are hanging off the mount points. */ static void vfs_free_addrlist(nep) struct netexport *nep; { register int i; register struct radix_node_head *rnh; for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { (*rnh->rnh_walktree) (rnh, vfs_free_netcred, (caddr_t) rnh); free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } int vfs_export(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; if (argp->ex_flags & MNT_DELEXPORT) { if (mp->mnt_flag & MNT_EXPUBLIC) { vfs_setpublicfs(NULL, NULL, NULL); mp->mnt_flag &= ~MNT_EXPUBLIC; } vfs_free_addrlist(nep); mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); } if (argp->ex_flags & MNT_EXPORTED) { if (argp->ex_flags & MNT_EXPUBLIC) { if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) return (error); mp->mnt_flag |= MNT_EXPUBLIC; } if ((error = vfs_hang_addrlist(mp, nep, argp))) return (error); mp->mnt_flag |= MNT_EXPORTED; } return (0); } /* * Set the publicly exported filesystem (WebNFS). Currently, only * one public filesystem is possible in the spec (RFC 2054 and 2055) */ int vfs_setpublicfs(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; struct vnode *rvp; char *cp; /* * mp == NULL -> invalidate the current info, the FS is * no longer exported. May be called from either vfs_export * or unmount, so check if it hasn't already been done. */ if (mp == NULL) { if (nfs_pub.np_valid) { nfs_pub.np_valid = 0; if (nfs_pub.np_index != NULL) { FREE(nfs_pub.np_index, M_TEMP); nfs_pub.np_index = NULL; } } return (0); } /* * Only one allowed at a time. */ if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) return (EBUSY); /* * Get real filehandle for root of exported FS. */ bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; if ((error = VFS_ROOT(mp, &rvp))) return (error); if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) return (error); vput(rvp); /* * If an indexfile was specified, pull it in. */ if (argp->ex_indexfile != NULL) { MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, M_WAITOK); error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, MAXNAMLEN, (size_t *)0); if (!error) { /* * Check for illegal filenames. */ for (cp = nfs_pub.np_index; *cp; cp++) { if (*cp == '/') { error = EINVAL; break; } } } if (error) { FREE(nfs_pub.np_index, M_TEMP); return (error); } } nfs_pub.np_mount = mp; nfs_pub.np_valid = 1; return (0); } struct netcred * vfs_export_lookup(mp, nep, nam) register struct mount *mp; struct netexport *nep; struct sockaddr *nam; { register struct netcred *np; register struct radix_node_head *rnh; struct sockaddr *saddr; np = NULL; if (mp->mnt_flag & MNT_EXPORTED) { /* * Lookup in the export list first. */ if (nam != NULL) { saddr = nam; rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } } /* * If no address match, use the default if it exists. */ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) np = &nep->ne_defexported; } return (np); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; struct vm_object *obj; int anyio, tries; tries = 5; loop: anyio = 0; for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { nvp = LIST_NEXT(vp, v_mntvnodes); if (vp->v_mount != mp) { goto loop; } if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ continue; if (flags != MNT_WAIT) { obj = vp->v_object; if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) continue; if (VOP_ISLOCKED(vp, NULL)) continue; } simple_lock(&vp->v_interlock); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { if (!vget(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); anyio = 1; } vput(vp); } } else { simple_unlock(&vp->v_interlock); } } if (anyio && (--tries > 0)) goto loop; } /* * Create the VM object needed for VMIO and mmap support. This * is done for all VREG files in the system. Some filesystems might * afford the additional metadata buffering capability of the * VMIO code by making the device node be VMIO mode also. * * vp must be locked when vfs_object_create is called. */ int vfs_object_create(vp, p, cred) struct vnode *vp; struct proc *p; struct ucred *cred; { struct vattr vat; vm_object_t object; int error = 0; if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) return 0; retry: if ((object = vp->v_object) == NULL) { if (vp->v_type == VREG || vp->v_type == VDIR) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; object = vnode_pager_alloc(vp, vat.va_size, 0, 0); } else if (devsw(vp->v_rdev) != NULL) { /* * This simply allocates the biggest object possible * for a disk vnode. This should be fixed, but doesn't * cause any problems (yet). */ object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); } else { goto retn; } /* * Dereference the reference we just created. This assumes * that the object is associated with the vp. */ object->ref_count--; vp->v_usecount--; } else { if (object->flags & OBJ_DEAD) { VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } } KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); vp->v_flag |= VOBJBUF; retn: return error; } void vfree(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free")); if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~VAGE; vp->v_flag |= VFREE; splx(s); } void vbusy(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free")); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~(VFREE|VAGE); splx(s); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(vp, p, events) struct vnode *vp; struct proc *p; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo.vpi_revents; vp->v_pollinfo.vpi_revents &= ~events; simple_unlock(&vp->v_pollinfo.vpi_lock); return events; } vp->v_pollinfo.vpi_events |= events; selrecord(p, &vp->v_pollinfo.vpi_selinfo); simple_unlock(&vp->v_pollinfo.vpi_lock); return 0; } /* * Note the occurrence of an event. If the VN_POLLEVENT macro is used, * it is possible for us to miss an event due to race conditions, but * that condition is expected to be rare, so for the moment it is the * preferred interface. */ void vn_pollevent(vp, events) struct vnode *vp; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events & events) { /* * We clear vpi_events so that we don't * call selwakeup() twice if two events are * posted before the polling process(es) is * awakened. This also ensures that we take at * most one selwakeup() if the polling process * is no longer interested. However, it does * mean that only one event can be noticed at * a time. (Perhaps we should only clear those * event bits which we note?) XXX */ vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ vp->v_pollinfo.vpi_revents |= events; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); } /* * Wake up anyone polling on vp because it is being revoked. * This depends on dead_poll() returning POLLHUP for correct * behavior. */ void vn_pollgone(vp) struct vnode *vp; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events) { vp->v_pollinfo.vpi_events = 0; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) static int sync_fsync __P((struct vop_fsync_args *)); static int sync_inactive __P((struct vop_inactive_args *)); static int sync_reclaim __P((struct vop_reclaim_args *)); #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) static int sync_print __P((struct vop_print_args *)); #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) static vop_t **sync_vnodeop_p; static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_eopnotsupp }, { &vop_close_desc, (vop_t *) sync_close }, /* close */ { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ { &vop_print_desc, (vop_t *) sync_print }, /* print */ { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ { NULL, NULL } }; static struct vnodeopv_desc sync_vnodeop_opv_desc = { &sync_vnodeop_p, sync_vnodeop_entries }; VNODEOP_SET(sync_vnodeop_opv_desc); /* * Create a new filesystem syncer vnode for the specified mount point. */ int vfs_allocate_syncvnode(mp) struct mount *mp; { struct vnode *vp; static long start, incr, next; int error; /* Allocate a new vnode */ if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { mp->mnt_syncer = NULL; return (error); } vp->v_type = VNON; /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); mp->mnt_syncer = vp; return (0); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; struct proc *p = ap->a_p; int asyncflag; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ vn_syncer_add_to_worklist(syncvp, syncdelay); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ simple_lock(&mountlist_slock); if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { simple_unlock(&mountlist_slock); return (0); } asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; vfs_unbusy(mp, p); return (0); } /* * The syncer vnode is no referenced. */ static int sync_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected at splbio(). */ static int sync_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; int s; s = splbio(); vp->v_mount->mnt_syncer = NULL; if (vp->v_flag & VONWORKLST) { LIST_REMOVE(vp, v_synclist); vp->v_flag &= ~VONWORKLST; } splx(s); return (0); } /* * Print out a syncer vnode. */ static int sync_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; printf("syncer vnode"); if (vp->v_vnlock != NULL) lockmgr_printinfo(vp->v_vnlock); printf("\n"); return (0); } /* * extract the dev_t from a VBLK or VCHR */ dev_t vn_todev(vp) struct vnode *vp; { if (vp->v_type != VBLK && vp->v_type != VCHR) return (NODEV); return (vp->v_rdev); } /* * Check if vnode represents a disk device */ int vn_isdisk(vp, errp) struct vnode *vp; int *errp; { if (vp->v_type != VBLK && vp->v_type != VCHR) { if (errp != NULL) *errp = ENOTBLK; return (0); } if (vp->v_rdev == NULL) { if (errp != NULL) *errp = ENXIO; return (0); } if (!devsw(vp->v_rdev)) { if (errp != NULL) *errp = ENXIO; return (0); } if (!(devsw(vp->v_rdev)->d_flags & D_DISK)) { if (errp != NULL) *errp = ENOTBLK; return (0); } if (errp != NULL) *errp = 0; return (1); } void NDFREE(ndp, flags) struct nameidata *ndp; const uint flags; { if (!(flags & NDF_NO_FREE_PNBUF) && (ndp->ni_cnd.cn_flags & HASBUF)) { zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } if (!(flags & NDF_NO_DVP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKPARENT) && ndp->ni_dvp != ndp->ni_vp) VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc); if (!(flags & NDF_NO_DVP_RELE) && (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; } if (!(flags & NDF_NO_VP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc); if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { vrele(ndp->ni_vp); ndp->ni_vp = NULL; } if (!(flags & NDF_NO_STARTDIR_RELE) && (ndp->ni_cnd.cn_flags & SAVESTART)) { vrele(ndp->ni_startdir); ndp->ni_startdir = NULL; } } Index: head/sys/kern/vfs_subr.c =================================================================== --- head/sys/kern/vfs_subr.c (revision 62572) +++ head/sys/kern/vfs_subr.c (revision 62573) @@ -1,2931 +1,2931 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD$ */ /* * External virtual filesystem routines */ #include "opt_ddb.h" #include "opt_ffs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); static void insmntque __P((struct vnode *vp, struct mount *mp)); static void vclean __P((struct vnode *vp, int flags, struct proc *p)); static unsigned long numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ static u_long wantfreevnodes = 25; SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); static u_long freevnodes = 0; SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); static int reassignbufloops; SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); static int reassignbufsortgood; SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); static int reassignbufsortbad; SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); static int reassignbufmethod = 1; SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); #ifdef ENABLE_VFS_IOOPT int vfs_ioopt = 0; SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); #endif struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */ struct simplelock mountlist_slock; struct simplelock mntvnode_slock; int nfs_mount_type = -1; #ifndef NULL_SIMPLELOCKS static struct simplelock mntid_slock; static struct simplelock vnode_free_list_slock; static struct simplelock spechash_slock; #endif struct nfs_public nfs_pub; /* publicly exported FS */ static vm_zone_t vnode_zone; int prtactive = 0; /* 1 => print out reclaim of active vnodes */ /* * The workitem queue. */ #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ time_t syncdelay = 30; /* max time to delay syncing data */ time_t filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); time_t dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); time_t metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); static int syncer_delayno = 0; static long syncer_mask; LIST_HEAD(synclist, vnode); static struct synclist *syncer_workitem_pending; int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "Maximum number of vnodes"); static void vfs_free_addrlist __P((struct netexport *nep)); static int vfs_free_netcred __P((struct radix_node *rn, void *w)); static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, struct export_args *argp)); /* * Initialize the vnode management data structures. */ void vntblinit() { desiredvnodes = maxproc + cnt.v_page_count / 4; simple_lock_init(&mntvnode_slock); simple_lock_init(&mntid_slock); simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); simple_lock_init(&vnode_free_list_slock); vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; } /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Interlock is not released on failure. */ int vfs_busy(mp, flags, interlkp, p) struct mount *mp; int flags; struct simplelock *interlkp; struct proc *p; { int lkflags; if (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & LK_NOWAIT) return (ENOENT); mp->mnt_kern_flag |= MNTK_MWAIT; if (interlkp) { simple_unlock(interlkp); } /* * Since all busy locks are shared except the exclusive * lock granted when unmounting, the only place that a * wakeup needs to be done is at the release of the * exclusive lock at the end of dounmount. */ tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); if (interlkp) { simple_lock(interlkp); } return (ENOENT); } lkflags = LK_SHARED | LK_NOPAUSE; if (interlkp) lkflags |= LK_INTERLOCK; if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) panic("vfs_busy: unexpected lock failure"); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(mp, p) struct mount *mp; struct proc *p; { lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* * Lookup a filesystem type, and if found allocate and initialize * a mount structure for it. * * Devname is usually updated by mount(8) after booting. */ int vfs_rootmountalloc(fstypename, devname, mpp) char *fstypename; char *devname; struct mount **mpp; { struct proc *p = curproc; /* XXX */ struct vfsconf *vfsp; struct mount *mp; if (fstypename == NULL) return (ENODEV); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) return (ENODEV); mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); LIST_INIT(&mp->mnt_vnodelist); mp->mnt_vfc = vfsp; mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_flag = MNT_RDONLY; mp->mnt_vnodecovered = NULLVP; vfsp->vfc_refcount++; mp->mnt_iosize_max = DFLTPHYS; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); mp->mnt_stat.f_mntonname[0] = '/'; mp->mnt_stat.f_mntonname[1] = 0; (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); *mpp = mp; return (0); } /* * Find an appropriate filesystem to use for the root. If a filesystem * has not been preselected, walk through the list of known filesystems * trying those that have mountroot routines, and try them until one * works or we have tried them all. */ #ifdef notdef /* XXX JH */ int lite2_vfs_mountroot() { struct vfsconf *vfsp; extern int (*lite2_mountroot) __P((void)); int error; if (lite2_mountroot != NULL) return ((*lite2_mountroot)()); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { if (vfsp->vfc_mountroot == NULL) continue; if ((error = (*vfsp->vfc_mountroot)()) == 0) return (0); printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); } return (ENODEV); } #endif /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; simple_lock(&mountlist_slock); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { simple_unlock(&mountlist_slock); return (mp); } } simple_unlock(&mountlist_slock); return ((struct mount *) 0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(mp) struct mount *mp; { static u_int16_t mntid_base; fsid_t tfsid; int mtype; simple_lock(&mntid_slock); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 16; for (;;) { tfsid.val[0] = makeudev(255, mtype | mntid_base++); if (vfs_getvfs(&tfsid) == NULL) break; } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; simple_unlock(&mntid_slock); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_SEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, ""); /* * Get a current timestamp. */ void vfs_timestamp(tsp) struct timespec *tsp; { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(vap) register struct vattr *vap; { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ extern vop_t **dead_vnodeop_p; /* * Return the next vnode from the free list. */ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; vop_t **vops; struct vnode **vpp; { int s, count; struct proc *p = curproc; /* XXX */ struct vnode *vp = NULL; vm_object_t object; /* * We take the least recently used vnode from the freelist * if we can get it and it has no cached pages, and no * namecache entries are relative to it. * Otherwise we allocate a new vnode */ s = splbio(); simple_lock(&vnode_free_list_slock); if (wantfreevnodes && freevnodes < wantfreevnodes) { vp = NULL; } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { /* * XXX: this is only here to be backwards compatible */ vp = NULL; } else for (count = 0; count < freevnodes; count++) { vp = TAILQ_FIRST(&vnode_free_list); if (vp == NULL || vp->v_usecount) panic("getnewvnode: free vnode isn't"); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* * Don't recycle if active in the namecache or * if it still has cached pages or we cannot get * its interlock. */ object = vp->v_object; if (LIST_FIRST(&vp->v_cache_src) != NULL || (object && (object->resident_page_count || object->ref_count)) || !simple_lock_try(&vp->v_interlock)) { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); vp = NULL; continue; } break; } if (vp) { vp->v_flag |= VDOOMED; freevnodes--; simple_unlock(&vnode_free_list_slock); cache_purge(vp); vp->v_lease = NULL; if (vp->v_type != VBAD) { vgonel(vp, p); } else { simple_unlock(&vp->v_interlock); } #ifdef INVARIANTS { int s; if (vp->v_data) panic("cleaned vnode isn't"); s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); } #endif vp->v_flag = 0; vp->v_lastw = 0; vp->v_lasta = 0; vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ } else { simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) zalloc(vnode_zone); bzero((char *) vp, sizeof *vp); simple_lock_init(&vp->v_interlock); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); numvnodes++; } TAILQ_INIT(&vp->v_cleanblkhd); TAILQ_INIT(&vp->v_dirtyblkhd); vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; insmntque(vp, mp); *vpp = vp; vp->v_usecount = 1; vp->v_data = 0; splx(s); vfs_object_create(vp, p, p->p_ucred); return (0); } /* * Move a vnode from one mount queue to another. */ static void insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) == NULL) { simple_unlock(&mntvnode_slock); return; } LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); simple_unlock(&mntvnode_slock); } /* * Update outstanding I/O count and do wakeup if requested. */ void vwakeup(bp) register struct buf *bp; { register struct vnode *vp; bp->b_flags &= ~B_WRITEINPROG; if ((vp = bp->b_vp)) { vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { vp->v_flag &= ~VBWAIT; wakeup((caddr_t) &vp->v_numoutput); } } } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int slpflag, slptimeo; { register struct buf *bp; struct buf *nbp, *blist; int s, error; vm_object_t object; if (flags & V_SAVE) { s = splbio(); while (vp->v_numoutput) { vp->v_flag |= VBWAIT; error = tsleep((caddr_t)&vp->v_numoutput, slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); if (error) { splx(s); return (error); } } if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { splx(s); if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) return (error); s = splbio(); if (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd)) panic("vinvalbuf: dirty bufs"); } splx(s); } s = splbio(); for (;;) { blist = TAILQ_FIRST(&vp->v_cleanblkhd); if (!blist) blist = TAILQ_FIRST(&vp->v_dirtyblkhd); if (!blist) break; for (bp = blist; bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL, "vinvalbuf", slpflag, slptimeo); if (error == ENOLCK) break; splx(s); return (error); } /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. Note that vfs_bio_awrite expects * buffers to reside on a queue, while VOP_BWRITE and * brelse do not. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { if (bp->b_vp == vp) { if (bp->b_flags & B_CLUSTEROK) { BUF_UNLOCK(bp); vfs_bio_awrite(bp); } else { bremfree(bp); bp->b_flags |= B_ASYNC; BUF_WRITE(bp); } } else { bremfree(bp); (void) BUF_WRITE(bp); } break; } bremfree(bp); bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); } } while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); } splx(s); /* * Destroy the copy in the VM cache, too. */ simple_lock(&vp->v_interlock); object = vp->v_object; if (object != NULL) { vm_object_page_remove(object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); } simple_unlock(&vp->v_interlock); if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) panic("vinvalbuf: flush failed"); return (0); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(vp, cred, p, length, blksize) register struct vnode *vp; struct ucred *cred; struct proc *p; off_t length; int blksize; { register struct buf *bp; struct buf *nbp; int s, anyfreed; int trunclbn; /* * Round up to the *next* lbn. */ trunclbn = (length + blksize - 1) / blksize; s = splbio(); restart: anyfreed = 1; for (;anyfreed;) { anyfreed = 0; for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (bp->b_lblkno >= trunclbn) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); goto restart; } else { bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; } if (nbp && (((nbp->b_xflags & BX_VNCLEAN) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { goto restart; } } } for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (bp->b_lblkno >= trunclbn) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); goto restart; } else { bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; } if (nbp && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { goto restart; } } } } if (length > 0) { restartsync: for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); goto restart; } else { bremfree(bp); if (bp->b_vp == vp) { bp->b_flags |= B_ASYNC; } else { bp->b_flags &= ~B_ASYNC; } BUF_WRITE(bp); } goto restartsync; } } } while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); } splx(s); vnode_pager_setsize(vp, length); return (0); } /* * Associate a buffer with a vnode. */ void bgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { int s; KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); vhold(vp); bp->b_vp = vp; bp->b_dev = vn_todev(vp); /* * Insert onto list for new vnode. */ s = splbio(); bp->b_xflags |= BX_VNCLEAN; bp->b_xflags &= ~BX_VNDIRTY; TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); splx(s); } /* * Disassociate a buffer from a vnode. */ void brelvp(bp) register struct buf *bp; { struct vnode *vp; struct buflists *listheadp; int s; KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; s = splbio(); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { if (bp->b_xflags & BX_VNDIRTY) listheadp = &vp->v_dirtyblkhd; else listheadp = &vp->v_cleanblkhd; TAILQ_REMOVE(listheadp, bp, b_vnbufs); bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { vp->v_flag &= ~VONWORKLST; LIST_REMOVE(vp, v_synclist); } splx(s); bp->b_vp = (struct vnode *) 0; vdrop(vp); } /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct vnode *vp, int delay) { int s, slot; s = splbio(); if (vp->v_flag & VONWORKLST) { LIST_REMOVE(vp, v_synclist); } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); vp->v_flag |= VONWORKLST; splx(s); } struct proc *updateproc; static void sched_sync __P((void)); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) /* * System filesystem synchronizer daemon. */ void sched_sync(void) { struct synclist *slp; struct vnode *vp; long starttime; int s; struct proc *p = updateproc; EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p, SHUTDOWN_PRI_LAST); for (;;) { kproc_suspend_loop(p); starttime = time_second; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. */ s = splbio(); slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; splx(s); while ((vp = LIST_FIRST(slp)) != NULL) { if (VOP_ISLOCKED(vp, NULL) == 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); VOP_UNLOCK(vp, 0, p); } s = splbio(); if (LIST_FIRST(slp) == vp) { /* * Note: v_tag VT_VFS vps can remain on the * worklist too with no dirty blocks, but * since sync_fsync() moves it to a different * slot we are safe. */ if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && !vn_isdisk(vp, NULL)) panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(vp, syncdelay); } splx(s); } /* * Do soft update processing. */ #ifdef SOFTUPDATES softdep_process_worklist(NULL); #endif /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (time_second == starttime) tsleep(&lbolt, PPAUSE, "syncer", 0); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer() { int s; s = splhigh(); if (updateproc->p_wchan == &lbolt) setrunnable(updateproc); splx(s); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; return (1); } return(0); } /* * Associate a p-buffer with a vnode. * * Also sets B_PAGING flag to indicate that vnode is not fully associated * with the buffer. i.e. the bp has not been linked into the vnode or * ref-counted. */ void pbgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); bp->b_vp = vp; bp->b_flags |= B_PAGING; bp->b_dev = vn_todev(vp); } /* * Disassociate a p-buffer from a vnode. */ void pbrelvp(bp) register struct buf *bp; { KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); /* XXX REMOVE ME */ if (bp->b_vnbufs.tqe_next != NULL) { panic( "relpbuf(): b_vp was probably reassignbuf()d %p %x", bp, (int)bp->b_flags ); } bp->b_vp = (struct vnode *) 0; bp->b_flags &= ~B_PAGING; } void pbreassignbuf(bp, newvp) struct buf *bp; struct vnode *newvp; { if ((bp->b_flags & B_PAGING) == 0) { panic( "pbreassignbuf() on non phys bp %p", bp ); } bp->b_vp = newvp; } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { struct buflists *listheadp; int delay; int s; if (newvp == NULL) { printf("reassignbuf: NULL"); return; } ++reassignbufcalls; /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); s = splbio(); /* * Delete from old vnode list, if on one. */ if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { if (bp->b_xflags & BX_VNDIRTY) listheadp = &bp->b_vp->v_dirtyblkhd; else listheadp = &bp->b_vp->v_cleanblkhd; TAILQ_REMOVE(listheadp, bp, b_vnbufs); bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); if (bp->b_vp != newvp) { vdrop(bp->b_vp); bp->b_vp = NULL; /* for clarification */ } } /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { struct buf *tbp; listheadp = &newvp->v_dirtyblkhd; if ((newvp->v_flag & VONWORKLST) == 0) { switch (newvp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: case VBLK: if (newvp->v_specmountpoint != NULL) { delay = metadelay; break; } /* fall through */ default: delay = filedelay; } vn_syncer_add_to_worklist(newvp, delay); } bp->b_xflags |= BX_VNDIRTY; tbp = TAILQ_FIRST(listheadp); if (tbp == NULL || bp->b_lblkno == 0 || (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); ++reassignbufsortgood; } else if (bp->b_lblkno < 0) { TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); ++reassignbufsortgood; } else if (reassignbufmethod == 1) { /* * New sorting algorithm, only handle sequential case, * otherwise append to end (but before metadata) */ if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && (tbp->b_xflags & BX_VNDIRTY)) { /* * Found the best place to insert the buffer */ TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortgood; } else { /* * Missed, append to end, but before meta-data. * We know that the head buffer in the list is * not meta-data due to prior conditionals. * * Indirect effects: NFS second stage write * tends to wind up here, giving maximum * distance between the unstable write and the * commit rpc. */ tbp = TAILQ_LAST(listheadp, buflists); while (tbp && tbp->b_lblkno < 0) tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortbad; } } else { /* * Old sorting algorithm, scan queue and insert */ struct buf *ttbp; while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && (ttbp->b_lblkno < bp->b_lblkno)) { ++reassignbufloops; tbp = ttbp; } TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); } } else { bp->b_xflags |= BX_VNCLEAN; TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); if ((newvp->v_flag & VONWORKLST) && TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { newvp->v_flag &= ~VONWORKLST; LIST_REMOVE(newvp, v_synclist); } } if (bp->b_vp != newvp) { bp->b_vp = newvp; vhold(bp->b_vp); } splx(s); } /* * Create a vnode for a block device. * Used for mounting the root file system. * XXX: This now changed to a VCHR due to the block/char merging. */ int bdevvp(dev, vpp) dev_t dev; struct vnode **vpp; { register struct vnode *vp; struct vnode *nvp; int error; if (dev == NODEV) { *vpp = NULLVP; return (ENXIO); } error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); if (error) { *vpp = NULLVP; return (error); } vp = nvp; vp->v_type = VCHR; addalias(vp, dev); *vpp = vp; return (0); } /* * Add vnode to the alias list hung off the dev_t. * * The reason for this gunk is that multiple vnodes can reference * the same physical device, so checking vp->v_usecount to see * how many users there are is inadequate; the v_usecount for * the vnodes need to be accumulated. vcount() does that. */ void addaliasu(nvp, nvp_rdev) struct vnode *nvp; udev_t nvp_rdev; { if (nvp->v_type != VBLK && nvp->v_type != VCHR) panic("addaliasu on non-special vnode"); addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0)); } void addalias(nvp, dev) struct vnode *nvp; dev_t dev; { if (nvp->v_type != VBLK && nvp->v_type != VCHR) panic("addalias on non-special vnode"); nvp->v_rdev = dev; simple_lock(&spechash_slock); SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); simple_unlock(&spechash_slock); } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set if the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new file system type). */ int vget(vp, flags, p) register struct vnode *vp; int flags; struct proc *p; { int error; /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then * return failure. Cleaning is determined by checking that * the VXLOCK flag is set. */ if ((flags & LK_INTERLOCK) == 0) { simple_lock(&vp->v_interlock); } if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); return (ENOENT); } vp->v_usecount++; if (VSHOULDBUSY(vp)) vbusy(vp); if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { /* * must expand vrele here because we do not want * to call VOP_INACTIVE if the reference count * drops back to zero since it was never really * active. We must remove it from the free list * before sleeping so that multiple processes do * not try to recycle it. */ simple_lock(&vp->v_interlock); vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); simple_unlock(&vp->v_interlock); } return (error); } simple_unlock(&vp->v_interlock); return (0); } void vref(struct vnode *vp) { simple_lock(&vp->v_interlock); vp->v_usecount++; simple_unlock(&vp->v_interlock); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vrele: null vp")); simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; simple_unlock(&vp->v_interlock); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { VOP_INACTIVE(vp, p); } } else { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); simple_unlock(&vp->v_interlock); #endif panic("vrele: negative ref cnt"); } } void vput(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vput: null vp")); simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; VOP_UNLOCK(vp, LK_INTERLOCK, p); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ simple_unlock(&vp->v_interlock); VOP_INACTIVE(vp, p); } else { #ifdef DIAGNOSTIC vprint("vput: negative ref count", vp); #endif panic("vput: negative ref cnt"); } } /* * Somebody doesn't want the vnode recycled. */ void vhold(vp) register struct vnode *vp; { int s; s = splbio(); vp->v_holdcnt++; if (VSHOULDBUSY(vp)) vbusy(vp); splx(s); } /* * One less who cares about this vnode. */ void vdrop(vp) register struct vnode *vp; { int s; s = splbio(); if (vp->v_holdcnt <= 0) panic("vdrop: holdcnt"); vp->v_holdcnt--; if (VSHOULDFREE(vp)) vfree(vp); splx(s); } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If MNT_NOFORCE is specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If MNT_FORCE is specified, detach any active vnodes * that are found. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush(mp, skipvp, flags) struct mount *mp; struct vnode *skipvp; int flags; { struct proc *p = curproc; /* XXX */ struct vnode *vp, *nvp; int busy = 0; simple_lock(&mntvnode_slock); loop: for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { /* * Make sure this vnode wasn't reclaimed in getnewvnode(). * Start over if it has (it won't be on the list anymore). */ if (vp->v_mount != mp) goto loop; nvp = LIST_NEXT(vp, v_mntvnodes); /* * Skip over a selected vnode. */ if (vp == skipvp) continue; simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { simple_unlock(&vp->v_interlock); continue; } /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) { simple_unlock(&vp->v_interlock); continue; } /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. */ if (vp->v_usecount == 0) { simple_unlock(&mntvnode_slock); vgonel(vp, p); simple_lock(&mntvnode_slock); continue; } /* * If FORCECLOSE is set, forcibly close the vnode. For block * or character devices, revert to an anonymous device. For * all other files, just kill them. */ if (flags & FORCECLOSE) { simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { vgonel(vp, p); } else { vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } simple_lock(&mntvnode_slock); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif simple_unlock(&vp->v_interlock); busy++; } simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); } /* * Disassociate the underlying file system from a vnode. */ static void vclean(vp, flags, p) struct vnode *vp; int flags; struct proc *p; { int active; vm_object_t obj; /* * Check to see if the vnode is in use. If so we have to reference it * before we clean it out so that its count cannot fall to zero and * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) vp->v_usecount++; /* * Prevent the vnode from being recycled or brought into use while we * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* * Even if the count is zero, the VOP_INACTIVE routine may still * have the object locked while it cleans it out. The VOP_LOCK * ensures that the VOP_INACTIVE routine is done with its work. * For active vnodes, it ensures that no other activity can * occur while the underlying object is being cleaned out. */ VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ if (flags & DOCLOSE) { if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) vinvalbuf(vp, 0, NOCRED, p, 0, 0); } if ((obj = vp->v_object) != NULL) { if (obj->ref_count == 0) { /* * vclean() may be called twice. The first time * removes the primary reference to the object, * the second time goes one further and is a * special-case to terminate the object. */ vm_object_terminate(obj); } else { /* * Woe to the process that tries to page now :-). */ vm_pager_deallocate(obj); } } /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. Note that the * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); VOP_INACTIVE(vp, p); } else { /* * Any other processes trying to obtain this lock must first * wait for VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp, 0, p); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) { /* * Inline copy of vrele() since VOP_INACTIVE * has already been called. */ simple_lock(&vp->v_interlock); if (--vp->v_usecount <= 0) { #ifdef DIAGNOSTIC if (vp->v_usecount < 0 || vp->v_writecount != 0) { vprint("vclean: bad ref count", vp); panic("vclean: ref cnt"); } #endif vfree(vp); } simple_unlock(&vp->v_interlock); } cache_purge(vp); if (vp->v_vnlock) { FREE(vp->v_vnlock, M_VNODE); vp->v_vnlock = NULL; } if (VSHOULDFREE(vp)) vfree(vp); /* * Done with purge, notify sleepers of the grim news. */ vp->v_op = dead_vnodeop_p; vn_pollgone(vp); vp->v_tag = VT_NON; vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup((caddr_t) vp); } } /* * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ int vop_revoke(ap) struct vop_revoke_args /* { struct vnode *a_vp; int a_flags; } */ *ap; { struct vnode *vp, *vq; dev_t dev; KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); vp = ap->a_vp; /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); return (0); } dev = vp->v_rdev; for (;;) { simple_lock(&spechash_slock); vq = SLIST_FIRST(&dev->si_hlist); simple_unlock(&spechash_slock); if (!vq) break; vgone(vq); } return (0); } /* * Recycle an unused vnode to the front of the free list. * Release the passed interlock if the vnode will be recycled. */ int vrecycle(vp, inter_lkp, p) struct vnode *vp; struct simplelock *inter_lkp; struct proc *p; { simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { if (inter_lkp) { simple_unlock(inter_lkp); } vgonel(vp, p); return (1); } simple_unlock(&vp->v_interlock); return (0); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(vp) register struct vnode *vp; { struct proc *p = curproc; /* XXX */ simple_lock(&vp->v_interlock); vgonel(vp, p); } /* * vgone, with the vp interlock held. */ void vgonel(vp, p) struct vnode *vp; struct proc *p; { int s; /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ vclean(vp, DOCLOSE, p); simple_lock(&vp->v_interlock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) insmntque(vp, (struct mount *)0); /* * If special device, remove it from special device alias list * if it is on one. */ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { simple_lock(&spechash_slock); SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext); freedev(vp->v_rdev); simple_unlock(&spechash_slock); vp->v_rdev = NULL; } /* * If it is on the freelist and not already at the head, * move it to the head of the list. The test of the * VDOOMED flag and the reference count of zero is because * it will be removed from the free list by getnewvnode, * but will not have its reference count incremented until * after calling vgone. If the reference count were * incremented first, vgone would (incorrectly) try to * close the previous instance of the underlying object. */ if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VFREE) TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); else freevnodes++; vp->v_flag |= VFREE; TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); simple_unlock(&vnode_free_list_slock); splx(s); } vp->v_type = VBAD; simple_unlock(&vp->v_interlock); } /* * Lookup a vnode by device number. */ int vfinddev(dev, type, vpp) dev_t dev; enum vtype type; struct vnode **vpp; { struct vnode *vp; simple_lock(&spechash_slock); SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { if (type == vp->v_type) { *vpp = vp; simple_unlock(&spechash_slock); return (1); } } simple_unlock(&spechash_slock); return (0); } /* * Calculate the total number of references to a special device. */ int vcount(vp) struct vnode *vp; { struct vnode *vq; int count; count = 0; simple_lock(&spechash_slock); SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext) count += vq->v_usecount; simple_unlock(&spechash_slock); return (count); } /* * Same as above, but using the dev_t as argument */ int count_dev(dev) dev_t dev; { struct vnode *vp; vp = SLIST_FIRST(&dev->si_hlist); if (vp == NULL) return (0); return(vcount(vp)); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) char *label; struct vnode *vp; { char buf[96]; if (label != NULL) printf("%s: %p: ", label, (void *)vp); else printf("%p: ", (void *)vp); printf("type %s, usecount %d, writecount %d, refcount %d,", typename[vp->v_type], vp->v_usecount, vp->v_writecount, vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); if (vp->v_flag & VTEXT) strcat(buf, "|VTEXT"); if (vp->v_flag & VSYSTEM) strcat(buf, "|VSYSTEM"); if (vp->v_flag & VXLOCK) strcat(buf, "|VXLOCK"); if (vp->v_flag & VXWANT) strcat(buf, "|VXWANT"); if (vp->v_flag & VBWAIT) strcat(buf, "|VBWAIT"); if (vp->v_flag & VDOOMED) strcat(buf, "|VDOOMED"); if (vp->v_flag & VFREE) strcat(buf, "|VFREE"); if (vp->v_flag & VOBJBUF) strcat(buf, "|VOBJBUF"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { printf("\n"); } else { printf("\n\t"); VOP_PRINT(vp); } } #ifdef DDB #include /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *vp; printf("Locked vnodes\n"); simple_lock(&mountlist_slock); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { if (VOP_ISLOCKED(vp, NULL)) vprint((char *)0, vp); } simple_lock(&mountlist_slock); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS)); static int -vfs_sysctl (SYSCTL_HANDLER_ARGS) +vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif #ifdef notyet /* all sysctl names at this level are at least name and field */ if (namelen < 2) return (ENOTDIR); /* overloaded */ if (name[0] != VFS_GENERIC) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[0]) break; if (vfsp == NULL) return (EOPNOTSUPP); return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, oldp, oldlenp, newp, newlen, p)); } #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) return (EOPNOTSUPP); return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); } return (EOPNOTSUPP); } SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int -sysctl_ovfs_conf (SYSCTL_HANDLER_ARGS) +sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error) return error; } return 0; } #endif /* 1 || COMPAT_PRELITE2 */ #if 0 #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). * Copyout address of vnode followed by vnode. */ /* ARGSUSED */ static int -sysctl_vnode (SYSCTL_HANDLER_ARGS) +sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *nvp, *vp; int error; #define VPTRSZ sizeof (struct vnode *) #define VNODESZ sizeof (struct vnode) req->lock = 0; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); simple_lock(&mountlist_slock); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } again: simple_lock(&mntvnode_slock); for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { simple_unlock(&mntvnode_slock); goto again; } nvp = LIST_NEXT(vp, v_mntvnodes); simple_unlock(&mntvnode_slock); if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) return (error); simple_lock(&mntvnode_slock); } simple_unlock(&mntvnode_slock); simple_lock(&mountlist_slock); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); return (0); } #endif /* * XXX * Exporting the vnode list on large systems causes them to crash. * Exporting the vnode list on medium systems causes sysctl to coredump. */ #if 0 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,vnode", ""); #endif /* * Check to see if a filesystem is mounted on a block device. */ int vfs_mountedon(vp) struct vnode *vp; { if (vp->v_specmountpoint != NULL) return (EBUSY); return (0); } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall() { struct mount *mp; struct proc *p; int error; if (curproc != NULL) p = curproc; else p = initproc; /* XXX XXX should this be proc0? */ /* * Since this only runs when rebooting, it is not interlocked. */ while(!TAILQ_EMPTY(&mountlist)) { mp = TAILQ_LAST(&mountlist, mntlist); error = dounmount(mp, MNT_FORCE, p); if (error) { TAILQ_REMOVE(&mountlist, mp, mnt_list); printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } else { /* The unmount has removed mp from the mountlist */ } } } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ static int vfs_hang_addrlist(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { register struct netcred *np; register struct radix_node_head *rnh; register int i; struct radix_node *rn; struct sockaddr *saddr, *smask = 0; struct domain *dom; int error; if (argp->ex_addrlen == 0) { if (mp->mnt_flag & MNT_DEFEXPORTED) return (EPERM); np = &nep->ne_defexported; np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; mp->mnt_flag |= MNT_DEFEXPORTED; return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); bzero((caddr_t) np, i); saddr = (struct sockaddr *) (np + 1); if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) smask->sa_len = argp->ex_masklen; } i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* * Seems silly to initialize every AF when most are not used, * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { dom->dom_rtattach((void **) &nep->ne_rtable[i], dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { error = ENOBUFS; goto out; } } rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, np->netc_rnodes); if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; return (0); out: free(np, M_NETADDR); return (error); } /* ARGSUSED */ static int vfs_free_netcred(rn, w) struct radix_node *rn; void *w; { register struct radix_node_head *rnh = (struct radix_node_head *) w; (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); free((caddr_t) rn, M_NETADDR); return (0); } /* * Free the net address hash lists that are hanging off the mount points. */ static void vfs_free_addrlist(nep) struct netexport *nep; { register int i; register struct radix_node_head *rnh; for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { (*rnh->rnh_walktree) (rnh, vfs_free_netcred, (caddr_t) rnh); free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } int vfs_export(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; if (argp->ex_flags & MNT_DELEXPORT) { if (mp->mnt_flag & MNT_EXPUBLIC) { vfs_setpublicfs(NULL, NULL, NULL); mp->mnt_flag &= ~MNT_EXPUBLIC; } vfs_free_addrlist(nep); mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); } if (argp->ex_flags & MNT_EXPORTED) { if (argp->ex_flags & MNT_EXPUBLIC) { if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) return (error); mp->mnt_flag |= MNT_EXPUBLIC; } if ((error = vfs_hang_addrlist(mp, nep, argp))) return (error); mp->mnt_flag |= MNT_EXPORTED; } return (0); } /* * Set the publicly exported filesystem (WebNFS). Currently, only * one public filesystem is possible in the spec (RFC 2054 and 2055) */ int vfs_setpublicfs(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; struct vnode *rvp; char *cp; /* * mp == NULL -> invalidate the current info, the FS is * no longer exported. May be called from either vfs_export * or unmount, so check if it hasn't already been done. */ if (mp == NULL) { if (nfs_pub.np_valid) { nfs_pub.np_valid = 0; if (nfs_pub.np_index != NULL) { FREE(nfs_pub.np_index, M_TEMP); nfs_pub.np_index = NULL; } } return (0); } /* * Only one allowed at a time. */ if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) return (EBUSY); /* * Get real filehandle for root of exported FS. */ bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; if ((error = VFS_ROOT(mp, &rvp))) return (error); if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) return (error); vput(rvp); /* * If an indexfile was specified, pull it in. */ if (argp->ex_indexfile != NULL) { MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, M_WAITOK); error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, MAXNAMLEN, (size_t *)0); if (!error) { /* * Check for illegal filenames. */ for (cp = nfs_pub.np_index; *cp; cp++) { if (*cp == '/') { error = EINVAL; break; } } } if (error) { FREE(nfs_pub.np_index, M_TEMP); return (error); } } nfs_pub.np_mount = mp; nfs_pub.np_valid = 1; return (0); } struct netcred * vfs_export_lookup(mp, nep, nam) register struct mount *mp; struct netexport *nep; struct sockaddr *nam; { register struct netcred *np; register struct radix_node_head *rnh; struct sockaddr *saddr; np = NULL; if (mp->mnt_flag & MNT_EXPORTED) { /* * Lookup in the export list first. */ if (nam != NULL) { saddr = nam; rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } } /* * If no address match, use the default if it exists. */ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) np = &nep->ne_defexported; } return (np); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; struct vm_object *obj; int anyio, tries; tries = 5; loop: anyio = 0; for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { nvp = LIST_NEXT(vp, v_mntvnodes); if (vp->v_mount != mp) { goto loop; } if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ continue; if (flags != MNT_WAIT) { obj = vp->v_object; if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) continue; if (VOP_ISLOCKED(vp, NULL)) continue; } simple_lock(&vp->v_interlock); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { if (!vget(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); anyio = 1; } vput(vp); } } else { simple_unlock(&vp->v_interlock); } } if (anyio && (--tries > 0)) goto loop; } /* * Create the VM object needed for VMIO and mmap support. This * is done for all VREG files in the system. Some filesystems might * afford the additional metadata buffering capability of the * VMIO code by making the device node be VMIO mode also. * * vp must be locked when vfs_object_create is called. */ int vfs_object_create(vp, p, cred) struct vnode *vp; struct proc *p; struct ucred *cred; { struct vattr vat; vm_object_t object; int error = 0; if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) return 0; retry: if ((object = vp->v_object) == NULL) { if (vp->v_type == VREG || vp->v_type == VDIR) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; object = vnode_pager_alloc(vp, vat.va_size, 0, 0); } else if (devsw(vp->v_rdev) != NULL) { /* * This simply allocates the biggest object possible * for a disk vnode. This should be fixed, but doesn't * cause any problems (yet). */ object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); } else { goto retn; } /* * Dereference the reference we just created. This assumes * that the object is associated with the vp. */ object->ref_count--; vp->v_usecount--; } else { if (object->flags & OBJ_DEAD) { VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } } KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); vp->v_flag |= VOBJBUF; retn: return error; } void vfree(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free")); if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~VAGE; vp->v_flag |= VFREE; splx(s); } void vbusy(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free")); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~(VFREE|VAGE); splx(s); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(vp, p, events) struct vnode *vp; struct proc *p; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo.vpi_revents; vp->v_pollinfo.vpi_revents &= ~events; simple_unlock(&vp->v_pollinfo.vpi_lock); return events; } vp->v_pollinfo.vpi_events |= events; selrecord(p, &vp->v_pollinfo.vpi_selinfo); simple_unlock(&vp->v_pollinfo.vpi_lock); return 0; } /* * Note the occurrence of an event. If the VN_POLLEVENT macro is used, * it is possible for us to miss an event due to race conditions, but * that condition is expected to be rare, so for the moment it is the * preferred interface. */ void vn_pollevent(vp, events) struct vnode *vp; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events & events) { /* * We clear vpi_events so that we don't * call selwakeup() twice if two events are * posted before the polling process(es) is * awakened. This also ensures that we take at * most one selwakeup() if the polling process * is no longer interested. However, it does * mean that only one event can be noticed at * a time. (Perhaps we should only clear those * event bits which we note?) XXX */ vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ vp->v_pollinfo.vpi_revents |= events; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); } /* * Wake up anyone polling on vp because it is being revoked. * This depends on dead_poll() returning POLLHUP for correct * behavior. */ void vn_pollgone(vp) struct vnode *vp; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events) { vp->v_pollinfo.vpi_events = 0; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*) __P((struct vop_close_args *)))nullop) static int sync_fsync __P((struct vop_fsync_args *)); static int sync_inactive __P((struct vop_inactive_args *)); static int sync_reclaim __P((struct vop_reclaim_args *)); #define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) #define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) static int sync_print __P((struct vop_print_args *)); #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) static vop_t **sync_vnodeop_p; static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_eopnotsupp }, { &vop_close_desc, (vop_t *) sync_close }, /* close */ { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ { &vop_print_desc, (vop_t *) sync_print }, /* print */ { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ { NULL, NULL } }; static struct vnodeopv_desc sync_vnodeop_opv_desc = { &sync_vnodeop_p, sync_vnodeop_entries }; VNODEOP_SET(sync_vnodeop_opv_desc); /* * Create a new filesystem syncer vnode for the specified mount point. */ int vfs_allocate_syncvnode(mp) struct mount *mp; { struct vnode *vp; static long start, incr, next; int error; /* Allocate a new vnode */ if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { mp->mnt_syncer = NULL; return (error); } vp->v_type = VNON; /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); mp->mnt_syncer = vp; return (0); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; struct proc *p = ap->a_p; int asyncflag; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ vn_syncer_add_to_worklist(syncvp, syncdelay); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ simple_lock(&mountlist_slock); if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { simple_unlock(&mountlist_slock); return (0); } asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; vfs_unbusy(mp, p); return (0); } /* * The syncer vnode is no referenced. */ static int sync_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected at splbio(). */ static int sync_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; int s; s = splbio(); vp->v_mount->mnt_syncer = NULL; if (vp->v_flag & VONWORKLST) { LIST_REMOVE(vp, v_synclist); vp->v_flag &= ~VONWORKLST; } splx(s); return (0); } /* * Print out a syncer vnode. */ static int sync_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; printf("syncer vnode"); if (vp->v_vnlock != NULL) lockmgr_printinfo(vp->v_vnlock); printf("\n"); return (0); } /* * extract the dev_t from a VBLK or VCHR */ dev_t vn_todev(vp) struct vnode *vp; { if (vp->v_type != VBLK && vp->v_type != VCHR) return (NODEV); return (vp->v_rdev); } /* * Check if vnode represents a disk device */ int vn_isdisk(vp, errp) struct vnode *vp; int *errp; { if (vp->v_type != VBLK && vp->v_type != VCHR) { if (errp != NULL) *errp = ENOTBLK; return (0); } if (vp->v_rdev == NULL) { if (errp != NULL) *errp = ENXIO; return (0); } if (!devsw(vp->v_rdev)) { if (errp != NULL) *errp = ENXIO; return (0); } if (!(devsw(vp->v_rdev)->d_flags & D_DISK)) { if (errp != NULL) *errp = ENOTBLK; return (0); } if (errp != NULL) *errp = 0; return (1); } void NDFREE(ndp, flags) struct nameidata *ndp; const uint flags; { if (!(flags & NDF_NO_FREE_PNBUF) && (ndp->ni_cnd.cn_flags & HASBUF)) { zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } if (!(flags & NDF_NO_DVP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKPARENT) && ndp->ni_dvp != ndp->ni_vp) VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc); if (!(flags & NDF_NO_DVP_RELE) && (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; } if (!(flags & NDF_NO_VP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc); if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { vrele(ndp->ni_vp); ndp->ni_vp = NULL; } if (!(flags & NDF_NO_STARTDIR_RELE) && (ndp->ni_cnd.cn_flags & SAVESTART)) { vrele(ndp->ni_startdir); ndp->ni_startdir = NULL; } } Index: head/sys/net/bridge.c =================================================================== --- head/sys/net/bridge.c (revision 62572) +++ head/sys/net/bridge.c (revision 62573) @@ -1,829 +1,829 @@ /* * Copyright (c) 1998-2000 Luigi Rizzo * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This code implements bridging in FreeBSD. It only acts on ethernet * type of interfaces (others are still usable for routing). * A bridging table holds the source MAC address/dest. interface for each * known node. The table is indexed using an hash of the source address. * * Input packets are tapped near the beginning of ether_input(), and * analysed by calling bridge_in(). Depending on the result, the packet * can be forwarded to one or more output interfaces using bdg_forward(), * and/or sent to the upper layer (e.g. in case of multicast). * * Output packets are intercepted near the end of ether_output(), * the correct destination is selected calling bdg_dst_lookup(), * and then forwarding is done using bdg_forward(). * Bridging is controlled by the sysctl variable net.link.ether.bridge * * The arp code is also modified to let a machine answer to requests * irrespective of the port the request came from. * * In case of loops in the bridging topology, the bridge detects this * event and temporarily mutes output bridging on one of the ports. * Periodically, interfaces are unmuted by bdg_timeout(). * Muting is only implemented as a safety measure, and also as * a mechanism to support a user-space implementation of the spanning * tree algorithm. In the final release, unmuting will only occur * because of explicit action of the user-level daemon. * * To build a bridging kernel, use the following option * option BRIDGE * and then at runtime set the sysctl variable to enable bridging. * * Only one interface is supposed to have addresses set (but * there are no problems in practice if you set addresses for more * than one interface). * Bridging will act before routing, but nothing prevents a machine * from doing both (modulo bugs in the implementation...). * * THINGS TO REMEMBER * - bridging requires some (small) modifications to the interface * driver. Not all of them have been changed, see the "ed" and "de" * drivers as examples on how to operate. * - bridging is incompatible with multicast routing on the same * machine. There is not an easy fix to this. * - loop detection is still not very robust. * - the interface of bdg_forward() could be improved. */ #include #include #include #include #include /* for net/if.h */ #include #include #include #include #include /* for struct arpcom */ #include #include #include #include /* for struct arpcom */ #include "opt_ipfw.h" #include "opt_ipdn.h" #if defined(IPFIREWALL) #include #include #if defined(DUMMYNET) #include #endif #endif #include /* * For debugging, you can use the following macros. * remember, rdtsc() only works on Pentium-class machines quad_t ticks; DDB(ticks = rdtsc();) ... interesting code ... DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;) * */ #define DDB(x) x #define DEB(x) static void bdginit(void *); static void flush_table(void); static void bdg_promisc_on(void); static void parse_bdg_cfg(void); static int bdg_ipfw = 0 ; int do_bridge = 0; bdg_hash_table *bdg_table = NULL ; /* * System initialization */ SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL) /* * We need additional info for the bridge. The bdg_ifp2sc[] array * provides a pointer to this struct using the if_index. * bdg_softc has a backpointer to the struct ifnet, the bridge * flags, and a cluster (bridging occurs only between port of the * same cluster). */ struct bdg_softc { struct ifnet *ifp ; /* ((struct arpcom *)ifp)->ac_enaddr is the eth. addr */ int flags ; #define IFF_BDG_PROMISC 0x0001 /* set promisc mode on this if. */ #define IFF_MUTE 0x0002 /* mute this if for bridging. */ #define IFF_USED 0x0004 /* use this if for bridging. */ short cluster_id ; /* in network format */ u_long magic; } ; static struct bdg_stats bdg_stats ; static struct bdg_softc *ifp2sc = NULL ; /* XXX make it static of size BDG_MAX_PORTS */ #define USED(ifp) (ifp2sc[ifp->if_index].flags & IFF_USED) #define MUTED(ifp) (ifp2sc[ifp->if_index].flags & IFF_MUTE) #define MUTE(ifp) ifp2sc[ifp->if_index].flags |= IFF_MUTE #define UNMUTE(ifp) ifp2sc[ifp->if_index].flags &= ~IFF_MUTE #define CLUSTER(ifp) (ifp2sc[ifp->if_index].cluster_id) #define IFP_CHK(ifp, x) \ if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; } #define SAMECLUSTER(ifp,src) \ (src == NULL || CLUSTER(ifp) == CLUSTER(src) ) /* * turn off promisc mode, optionally clear the IFF_USED flag */ static void bdg_promisc_off(int clear_used) { struct ifnet *ifp ; for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next ) { if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { int s, ret ; s = splimp(); ret = ifpromisc(ifp, 0); splx(s); ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; if (clear_used) ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", ifp->if_name, ifp->if_unit, ifp->if_flags, ifp2sc[ifp->if_index].flags); } } } /* * set promisc mode on the interfaces we use. */ static void bdg_promisc_on() { struct ifnet *ifp ; int s ; for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next ) { if ( !USED(ifp) ) continue ; if ( 0 == ( ifp->if_flags & IFF_UP) ) { s = splimp(); if_up(ifp); splx(s); } if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { int ret ; s = splimp(); ret = ifpromisc(ifp, 1); splx(s); ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", ifp->if_name, ifp->if_unit, ifp->if_flags, ifp2sc[ifp->if_index].flags); } if (MUTED(ifp)) { printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); UNMUTE(ifp) ; } } } static int -sysctl_bdg (SYSCTL_HANDLER_ARGS) +sysctl_bdg(SYSCTL_HANDLER_ARGS) { int error, oldval = do_bridge ; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n", oidp->oid_name, oidp->oid_arg2, oldval, do_bridge); ) if (bdg_table == NULL) do_bridge = 0 ; if (oldval != do_bridge) { flush_table(); if (do_bridge == 0) bdg_promisc_off( 0 ); /* leave IFF_USED set */ else bdg_promisc_on(); } return error ; } static char bridge_cfg[256] = { "" } ; /* * parse the config string, set IFF_USED, name and cluster_id * for all interfaces found. */ static void parse_bdg_cfg() { char *p, *beg ; int i, l, cluster; struct bdg_softc *b; for (p= bridge_cfg; *p ; p++) { /* interface names begin with [a-z] and continue up to ':' */ if (*p < 'a' || *p > 'z') continue ; for ( beg = p ; *p && *p != ':' ; p++ ) ; if (*p == 0) /* end of string, ':' not found */ return ; l = p - beg ; /* length of name string */ p++ ; DDB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);) for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++) cluster = cluster*10 + (*p -'0'); /* * now search in bridge strings */ for (i=0, b = ifp2sc ; i < if_index ; i++, b++) { char buf[32]; struct ifnet *ifp = b->ifp ; if (ifp == NULL) continue; sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit); if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */ b->cluster_id = htons(cluster) ; b->flags |= IFF_USED ; sprintf(bdg_stats.s[ifp->if_index].name+l, ":%d", cluster); DDB(printf("--++ found %s\n", bdg_stats.s[ifp->if_index].name);) break ; } } } } static int -sysctl_bdg_cfg (SYSCTL_HANDLER_ARGS) +sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS) { int error = 0 ; char oldval[256] ; strcpy(oldval, bridge_cfg) ; error = sysctl_handle_string(oidp, bridge_cfg, oidp->oid_arg2, req); DEB( printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n", oidp->oid_name, oidp->oid_arg2, error, oldval, bridge_cfg); ) if (strcmp(oldval, bridge_cfg)) { bdg_promisc_off( 1 ); /* reset previously-used interfaces */ flush_table(); parse_bdg_cfg(); /* and set new ones... */ if (do_bridge) bdg_promisc_on(); /* re-enable interfaces */ } return error ; } SYSCTL_DECL(_net_link_ether); SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW, &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A", "Bridge configuration"); SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW, &do_bridge, 0, &sysctl_bdg, "I", "Bridging"); SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, &bdg_ipfw,0,"Pass bridged pkts through firewall"); int bdg_ipfw_drops; SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop, CTLFLAG_RW, &bdg_ipfw_drops,0,""); int bdg_ipfw_colls; SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions, CTLFLAG_RW, &bdg_ipfw_colls,0,""); #if 1 /* diagnostic vars */ int bdg_in_count = 0 , bdg_in_ticks = 0 , bdg_fw_count = 0, bdg_fw_ticks = 0 ; SYSCTL_INT(_net_link_ether, OID_AUTO, bdginc, CTLFLAG_RW, &bdg_in_count,0,""); SYSCTL_INT(_net_link_ether, OID_AUTO, bdgint, CTLFLAG_RW, &bdg_in_ticks,0,""); SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwc, CTLFLAG_RW, &bdg_fw_count,0,""); SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwt, CTLFLAG_RW, &bdg_fw_ticks,0,""); #endif SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats, CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics"); static int bdg_loops ; /* * completely flush the bridge table. */ static void flush_table() { int s,i; if (bdg_table == NULL) return ; s = splimp(); for (i=0; i< HASH_SIZE; i++) bdg_table[i].name= NULL; /* clear table */ splx(s); } /* * called periodically to flush entries etc. */ static void bdg_timeout(void *dummy) { static int slowtimer = 0 ; if (do_bridge) { static int age_index = 0 ; /* index of table position to age */ int l = age_index + HASH_SIZE/4 ; /* * age entries in the forwarding table. */ if (l > HASH_SIZE) l = HASH_SIZE ; for (; age_index < l ; age_index++) if (bdg_table[age_index].used) bdg_table[age_index].used = 0 ; else if (bdg_table[age_index].name) { /* printf("xx flushing stale entry %d\n", age_index); */ bdg_table[age_index].name = NULL ; } if (age_index >= HASH_SIZE) age_index = 0 ; if (--slowtimer <= 0 ) { slowtimer = 5 ; bdg_promisc_on() ; /* we just need unmute, really */ bdg_loops = 0 ; } } timeout(bdg_timeout, (void *)0, 2*hz ); } /* * local MAC addresses are held in a small array. This makes comparisons * much faster. */ unsigned char bdg_addresses[6*BDG_MAX_PORTS]; int bdg_ports ; /* * initialization of bridge code. This needs to be done after all * interfaces have been configured. */ static void bdginit(void *dummy) { int i ; struct ifnet *ifp; struct arpcom *ac ; u_char *eth_addr ; struct bdg_softc *bp; if (bdg_table == NULL) bdg_table = (struct hash_table *) malloc(HASH_SIZE * sizeof(struct hash_table), M_IFADDR, M_WAITOK); flush_table(); ifp2sc = malloc(BDG_MAX_PORTS * sizeof(struct bdg_softc), M_IFADDR, M_WAITOK ); bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); bzero(&bdg_stats, sizeof(bdg_stats) ); bdg_ports = 0 ; eth_addr = bdg_addresses ; printf("BRIDGE 990810, have %d interfaces\n", if_index); for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ; i++, ifp = ifp->if_link.tqe_next) if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ bp = &ifp2sc[ifp->if_index] ; ac = (struct arpcom *)ifp; sprintf(bridge_cfg + strlen(bridge_cfg), "%s%d:1,", ifp->if_name, ifp->if_unit); printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", ifp->if_index, bdg_stats.s[ifp->if_index].name, (int)ifp->if_type, (int) ifp->if_physical, (int)ifp->if_addrlen, ac->ac_enaddr, "." ); bcopy(ac->ac_enaddr, eth_addr, 6); eth_addr += 6 ; bp->ifp = ifp ; bp->flags = IFF_USED ; bp->cluster_id = htons(1) ; bp->magic = 0xDEADBEEF ; sprintf(bdg_stats.s[ifp->if_index].name, "%s%d:%d", ifp->if_name, ifp->if_unit, ntohs(bp->cluster_id)); bdg_ports ++ ; } bdg_timeout(0); do_bridge=0; } /* * bridge_in() is invoked to perform bridging decision on input packets. * * On Input: * eh Ethernet header of the incoming packet. * * On Return: destination of packet, one of * BDG_BCAST broadcast * BDG_MCAST multicast * BDG_LOCAL is only for a local address (do not forward) * BDG_DROP drop the packet * ifp ifp of the destination interface. * * Forwarding is not done directly to give a chance to some drivers * to fetch more of the packet, or simply drop it completely. */ struct ifnet * bridge_in(struct ifnet *ifp, struct ether_header *eh) { int index; struct ifnet *dst , *old ; int dropit = MUTED(ifp) ; /* * hash the source address */ index= HASH_FN(eh->ether_shost); bdg_table[index].used = 1 ; old = bdg_table[index].name ; if ( old ) { /* the entry is valid. */ IFP_CHK(old, printf("bridge_in-- reading table\n") ); if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) { bdg_ipfw_colls++ ; bdg_table[index].name = NULL ; } else if (old != ifp) { /* * found a loop. Either a machine has moved, or there * is a misconfiguration/reconfiguration of the network. * First, do not forward this packet! * Record the relocation anyways; then, if loops persist, * suspect a reconfiguration and disable forwarding * from the old interface. */ bdg_table[index].name = ifp ; /* relocate address */ printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n", bdg_loops, eh->ether_shost, ".", ifp->if_name, ifp->if_unit, old->if_name, old->if_unit, MUTED(old) ? "muted":"active"); dropit = 1 ; if ( !MUTED(old) ) { if (++bdg_loops > 10) MUTE(old) ; } } } /* * now write the source address into the table */ if (bdg_table[index].name == NULL) { DEB(printf("new addr %6D at %d for %s%d\n", eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);) bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6); bdg_table[index].name = ifp ; } dst = bridge_dst_lookup(eh); /* Return values: * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp. * For muted interfaces, the first 3 are changed in BDG_LOCAL, * and others to BDG_DROP. Also, for incoming packets, ifp is changed * to BDG_DROP in case ifp == src . These mods are not necessary * for outgoing packets from ether_output(). */ BDG_STAT(ifp, BDG_IN); switch ((int)dst) { case (int)BDG_BCAST: case (int)BDG_MCAST: case (int)BDG_LOCAL: case (int)BDG_UNKNOWN: case (int)BDG_DROP: BDG_STAT(ifp, dst); break ; default : if (dst == ifp || dropit ) BDG_STAT(ifp, BDG_DROP); else BDG_STAT(ifp, BDG_FORWARD); break ; } if ( dropit ) { if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL) return BDG_LOCAL ; else return BDG_DROP ; } else { return (dst == ifp ? BDG_DROP : dst ) ; } } /* * Forward to dst, excluding src port and muted interfaces. * The packet is freed if possible (i.e. surely not of interest for * the upper layer), otherwise a copy is left for use by the caller * (pointer in *m0). * * It would be more efficient to make bdg_forward() always consume * the packet, leaving to the caller the task to check if it needs a copy * and get one in case. As it is now, bdg_forward() can sometimes make * a copy whereas it is not necessary. * * INPUT: * *m0 -- ptr to pkt (not null at call time) * *dst -- destination (special value or ifnet *) * (*m0)->m_pkthdr.rcvif -- NULL only for output pkts. * OUTPUT: * *m0 -- pointer to the packet (NULL if still existent) */ int bdg_forward(struct mbuf **m0, struct ether_header *const eh, struct ifnet *dst) { struct ifnet *src = (*m0)->m_pkthdr.rcvif; /* could be NULL in output */ struct ifnet *ifp, *last = NULL ; int error=0, s ; int canfree = 0 ; /* can free the buf at the end if set */ int once = 0; /* loop only once */ struct mbuf *m ; if (dst == BDG_DROP) { /* this should not happen */ printf("xx bdg_forward for BDG_DROP\n"); m_freem(*m0) ; *m0 = NULL ; return 0; } if (dst == BDG_LOCAL) { /* this should not happen as well */ printf("xx ouch, bdg_forward for local pkt\n"); return 0; } if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) { ifp = ifnet.tqh_first ; /* scan all ports */ once = 0 ; if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ canfree = 0 ; } else { ifp = dst ; once = 1 ; } if ( (u_int)(ifp) <= (u_int)BDG_FORWARD ) panic("bdg_forward: bad dst"); #ifdef IPFIREWALL /* * do filtering in a very similar way to what is done * in ip_output. Only for IP packets, and only pass/fail/dummynet * is supported. The tricky thing is to make sure that enough of * the packet (basically, Eth+IP+TCP/UDP headers) is contiguous * so that calls to m_pullup in ip_fw_chk will not kill the * ethernet header. */ if (ip_fw_chk_ptr) { struct ip_fw_chain *rule = NULL ; int off; struct ip *ip ; m = *m0 ; #ifdef DUMMYNET if (m->m_type == MT_DUMMYNET) { /* * the packet was already tagged, so part of the * processing was already done, and we need to go down. */ rule = (struct ip_fw_chain *)(m->m_data) ; (*m0) = m = m->m_next ; src = m->m_pkthdr.rcvif; /* could be NULL in output */ canfree = 1 ; /* for sure, a copy is not needed later. */ goto forward; /* HACK! I should obey the fw_one_pass */ } #endif if (bdg_ipfw == 0) /* this test must be here. */ goto forward ; if (src == NULL) goto forward ; /* do not apply to packets from ether_output */ if (ntohs(eh->ether_type) != ETHERTYPE_IP) goto forward ; /* not an IP packet, ipfw is not appropriate */ /* * In this section, canfree=1 means m is the same as *m0. * canfree==0 means m is a copy. We need to make a copy here * (to be destroyed on exit from the firewall section) because * the firewall itself might destroy the packet. * (This is not very smart... i should really change ipfw to * leave the pkt alive!) */ if (canfree == 0 ) { /* * Need to make a copy (and for good measure, make sure that * the header is contiguous). The original is still in *m0 */ int needed = min(MHLEN, max_protohdr) ; needed = min(needed, (*m0)->m_len ) ; m = m_copypacket( (*m0), M_DONTWAIT); if (m == NULL) { printf("-- bdg: m_copypacket failed.\n") ; return ENOBUFS ; } if (m->m_len < needed && (m = m_pullup(m, needed)) == NULL) { printf("-- bdg: pullup failed.\n") ; return ENOBUFS ; } } /* * before calling the firewall, swap fields the same as IP does. * here we assume the pkt is an IP one and the header is contiguous */ ip = mtod(m, struct ip *); NTOHS(ip->ip_len); NTOHS(ip->ip_id); NTOHS(ip->ip_off); /* * The third parameter to the firewall code is the dst. interface. * Since we apply checks only on input pkts we use NULL. */ off = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL, &m, &rule, NULL); if (m == NULL) { /* pkt discarded by firewall */ /* * At this point, if canfree==1, m and *m0 were the same * thing, so just clear ptr. Otherwise, leave it alone, the * upper layer might still make use of it somewhere. */ if (canfree) *m0 = NULL ; return 0 ; } /* * If we get here, the firewall has passed the pkt, but the * mbuf pointer might have changed. Restore the fields NTOHS()'d. * Then, if canfree==1, also restore *m0. */ HTONS(ip->ip_len); HTONS(ip->ip_id); HTONS(ip->ip_off); if (canfree) /* m was a reference to *m0, so update *m0 */ *m0 = m ; if (off == 0) { /* a PASS rule. */ if (canfree == 0) /* destroy the copy */ m_freem(m); goto forward ; } #ifdef DUMMYNET if (off & 0x10000) { /* * pass the pkt to dummynet. Need to include m, dst, rule. * Dummynet consumes the packet in all cases. */ dummynet_io((off & 0xffff), DN_TO_BDG_FWD, m, dst, NULL, 0, rule, 0); if (canfree) /* dummynet has consumed the original one */ *m0 = NULL ; return 0 ; } #endif /* * XXX add divert/forward actions... */ /* if none of the above matches, we have to drop the pkt */ bdg_ipfw_drops++ ; m_freem(m); if (canfree == 0) /* m was a copy */ m_freem(*m0); *m0 = NULL ; return 0 ; } forward: #endif /* IPFIREWALL */ /* * Now *m0 is the only pkt left. If canfree != 0 the pkt might be * used by the upper layers which could scramble header fields. * (basically ntoh*() etc.). To avoid problems, make sure that * all fields that might be changed by the local network stack are not * in a cluster by calling m_pullup on *m0. We lose some efficiency * but better than having packets corrupt! */ if (canfree == 0 ) { int needed = min(MHLEN, max_protohdr) ; needed = min(needed, (*m0)->m_len ) ; if ((*m0)->m_len < needed && (*m0 = m_pullup(*m0, needed)) == NULL) { printf("-- bdg: pullup failed.\n") ; return ENOBUFS ; } } for (;;) { if (last) { /* need to forward packet */ if (canfree && once ) { /* no need to copy */ m = *m0 ; *m0 = NULL ; /* original is gone */ } else /* on a P5-90, m_copypacket takes 540 ticks */ m = m_copypacket(*m0, M_DONTWAIT); if (m == NULL) { printf("bdg_forward: sorry, m_copy failed!\n"); return ENOBUFS ; /* the original is still there... */ } /* * Last part of ether_output: add header, queue pkt and start * output if interface not yet active. */ M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); if (m == NULL) return ENOBUFS; bcopy(eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); s = splimp(); if (IF_QFULL(&last->if_snd)) { IF_DROP(&last->if_snd); #if 0 MUTE(last); /* should I also mute ? */ #endif splx(s); m_freem(m); /* consume the pkt anyways */ error = ENOBUFS ; } else { last->if_obytes += m->m_pkthdr.len ; if (m->m_flags & M_MCAST) last->if_omcasts++; IF_ENQUEUE(&last->if_snd, m); if ((last->if_flags & IFF_OACTIVE) == 0) (*last->if_start)(last); splx(s); } BDG_STAT(last, BDG_OUT); last = NULL ; if (once) break ; } if (ifp == NULL) break ; if (ifp != src && /* do not send to self */ USED(ifp) && /* if used for bridging */ ! IF_QFULL(&ifp->if_snd) && (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) && SAMECLUSTER(ifp, src) && !MUTED(ifp) ) last = ifp ; ifp = ifp->if_link.tqe_next ; if (ifp == NULL) once = 1 ; } return error ; } Index: head/sys/net/if_mib.c =================================================================== --- head/sys/net/if_mib.c (revision 62572) +++ head/sys/net/if_mib.c (revision 62573) @@ -1,149 +1,149 @@ /* * Copyright 1996 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include /* * A sysctl(3) MIB for generic interface information. This information * is exported in the net.link.generic branch, which has the following * structure: * * net.link.generic .system - system-wide control variables * and statistics (node) * .ifdata..general * - what's in `struct ifdata' * plus some other info * .ifdata..linkspecific * - a link-type-specific data * structure (as might be used * by an SNMP agent * * Perhaps someday we will make addresses accessible via this interface * as well (then there will be four such...). The reason that the * index comes before the last element in the name is because it * seems more orthogonal that way, particularly with the possibility * of other per-interface data living down here as well (e.g., integrated * services stuff). */ SYSCTL_DECL(_net_link_generic); SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0, "Variables global to all interfaces"); SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD, &if_index, 0, "Number of configured interfaces"); static int -sysctl_ifdata (SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */ +sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */ { int *name = (int *)arg1; int error, ifnlen; u_int namelen = arg2; struct ifnet *ifp; char workbuf[64]; struct ifmibdata ifmd; if (namelen != 2) return EINVAL; if (name[0] <= 0 || name[0] > if_index) return ENOENT; ifp = ifnet_addrs[name[0] - 1]->ifa_ifp; switch(name[1]) { default: return ENOENT; case IFDATA_GENERAL: ifnlen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit); if(ifnlen + 1 > sizeof ifmd.ifmd_name) { return ENAMETOOLONG; } else { strcpy(ifmd.ifmd_name, workbuf); } #define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld COPY(pcount); COPY(flags); COPY(data); #undef COPY ifmd.ifmd_snd_len = ifp->if_snd.ifq_len; ifmd.ifmd_snd_maxlen = ifp->if_snd.ifq_maxlen; ifmd.ifmd_snd_drops = ifp->if_snd.ifq_drops; error = SYSCTL_OUT(req, &ifmd, sizeof ifmd); if (error || !req->newptr) return error; error = SYSCTL_IN(req, &ifmd, sizeof ifmd); if (error) return error; #define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld DONTCOPY(type); DONTCOPY(physical); DONTCOPY(addrlen); DONTCOPY(hdrlen); DONTCOPY(mtu); DONTCOPY(metric); DONTCOPY(baudrate); #undef DONTCOPY #define COPY(fld) ifp->if_##fld = ifmd.ifmd_##fld COPY(data); ifp->if_snd.ifq_maxlen = ifmd.ifmd_snd_maxlen; ifp->if_snd.ifq_drops = ifmd.ifmd_snd_drops; #undef COPY break; case IFDATA_LINKSPECIFIC: error = SYSCTL_OUT(req, ifp->if_linkmib, ifp->if_linkmiblen); if (error || !req->newptr) return error; error = SYSCTL_IN(req, ifp->if_linkmib, ifp->if_linkmiblen); if (error) return error; } return 0; } SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW, sysctl_ifdata, "Interface table"); Index: head/sys/net/rtsock.c =================================================================== --- head/sys/net/rtsock.c (revision 62572) +++ head/sys/net/rtsock.c (revision 62573) @@ -1,1003 +1,1003 @@ /* * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.5 (Berkeley) 11/2/94 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); static struct sockaddr route_dst = { 2, PF_ROUTE, }; static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; static struct sockproto route_proto = { PF_ROUTE, }; struct walkarg { int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; }; static struct mbuf * rt_msg1 __P((int, struct rt_addrinfo *)); static int rt_msg2 __P((int, struct rt_addrinfo *, caddr_t, struct walkarg *)); static int rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *)); static int sysctl_dumpentry __P((struct radix_node *rn, void *vw)); static int sysctl_iflist __P((int af, struct walkarg *w)); static int route_output __P((struct mbuf *, struct socket *)); static void rt_setmetrics __P((u_long, struct rt_metrics *, struct rt_metrics *)); /* Sleazy use of local variables throughout file, warning!!!! */ #define dst info.rti_info[RTAX_DST] #define gate info.rti_info[RTAX_GATEWAY] #define netmask info.rti_info[RTAX_NETMASK] #define genmask info.rti_info[RTAX_GENMASK] #define ifpaddr info.rti_info[RTAX_IFP] #define ifaaddr info.rti_info[RTAX_IFA] #define brdaddr info.rti_info[RTAX_BRD] /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static int rts_abort(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_abort(so); splx(s); return error; } /* pru_accept is EOPNOTSUPP */ static int rts_attach(struct socket *so, int proto, struct proc *p) { struct rawcb *rp; int s, error; if (sotorawcb(so) != 0) return EISCONN; /* XXX panic? */ MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK); /* XXX */ if (rp == 0) return ENOBUFS; bzero(rp, sizeof *rp); /* * The splnet() is necessary to block protocols from sending * error notifications (like RTM_REDIRECT or RTM_LOSING) while * this PCB is extant but incompletely initialized. * Probably we should try to do more of this work beforehand and * eliminate the spl. */ s = splnet(); so->so_pcb = (caddr_t)rp; error = raw_usrreqs.pru_attach(so, proto, p); rp = sotorawcb(so); if (error) { splx(s); free(rp, M_PCB); return error; } switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count++; break; case AF_INET6: route_cb.ip6_count++; break; case AF_IPX: route_cb.ipx_count++; break; case AF_NS: route_cb.ns_count++; break; } rp->rcb_faddr = &route_src; route_cb.any_count++; soisconnected(so); so->so_options |= SO_USELOOPBACK; splx(s); return 0; } static int rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { int s, error; s = splnet(); error = raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */ splx(s); return error; } static int rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { int s, error; s = splnet(); error = raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */ splx(s); return error; } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static int rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); int s, error; s = splnet(); if (rp != 0) { switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count--; break; case AF_INET6: route_cb.ip6_count--; break; case AF_IPX: route_cb.ipx_count--; break; case AF_NS: route_cb.ns_count--; break; } route_cb.any_count--; } error = raw_usrreqs.pru_detach(so); splx(s); return error; } static int rts_disconnect(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_disconnect(so); splx(s); return error; } /* pru_listen is EOPNOTSUPP */ static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { int s, error; s = splnet(); error = raw_usrreqs.pru_peeraddr(so, nam); splx(s); return error; } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p) { int s, error; s = splnet(); error = raw_usrreqs.pru_send(so, flags, m, nam, control, p); splx(s); return error; } /* pru_sense is null */ static int rts_shutdown(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_shutdown(so); splx(s); return error; } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { int s, error; s = splnet(); error = raw_usrreqs.pru_sockaddr(so, nam); splx(s); return error; } static struct pr_usrreqs route_usrreqs = { rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect, pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect, pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, rts_send, pru_sense_null, rts_shutdown, rts_sockaddr, sosend, soreceive, sopoll }; /*ARGSUSED*/ static int route_output(m, so) register struct mbuf *m; struct socket *so; { register struct rt_msghdr *rtm = 0; register struct rtentry *rt = 0; struct rtentry *saved_nrt = 0; struct radix_node_head *rnh; struct rt_addrinfo info; int len, error = 0; struct ifnet *ifp = 0; struct ifaddr *ifa = 0; #define senderr(e) { error = e; goto flush;} if (m == 0 || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == 0)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { dst = 0; senderr(EINVAL); } R_Malloc(rtm, struct rt_msghdr *, len); if (rtm == 0) { dst = 0; senderr(ENOBUFS); } m_copydata(m, 0, len, (caddr_t)rtm); if (rtm->rtm_version != RTM_VERSION) { dst = 0; senderr(EPROTONOSUPPORT); } rtm->rtm_pid = curproc->p_pid; info.rti_addrs = rtm->rtm_addrs; if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { dst = 0; senderr(EINVAL); } if (dst == 0 || (dst->sa_family >= AF_MAX) || (gate != 0 && (gate->sa_family >= AF_MAX))) senderr(EINVAL); if (genmask) { struct radix_node *t; t = rn_addmask((caddr_t)genmask, 0, 1); if (t && Bcmp((caddr_t *)genmask + 1, (caddr_t *)t->rn_key + 1, *(u_char *)t->rn_key - 1) == 0) genmask = (struct sockaddr *)(t->rn_key); else senderr(ENOBUFS); } switch (rtm->rtm_type) { case RTM_ADD: if (gate == 0) senderr(EINVAL); error = rtrequest(RTM_ADD, dst, gate, netmask, rtm->rtm_flags, &saved_nrt); if (error == 0 && saved_nrt) { rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &saved_nrt->rt_rmx); saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); saved_nrt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); saved_nrt->rt_refcnt--; saved_nrt->rt_genmask = genmask; } break; case RTM_DELETE: error = rtrequest(RTM_DELETE, dst, gate, netmask, rtm->rtm_flags, &saved_nrt); if (error == 0) { if ((rt = saved_nrt)) rt->rt_refcnt++; goto report; } break; case RTM_GET: case RTM_CHANGE: case RTM_LOCK: if ((rnh = rt_tables[dst->sa_family]) == 0) { senderr(EAFNOSUPPORT); } else if ((rt = (struct rtentry *) rnh->rnh_lookup(dst, netmask, rnh)) != NULL) rt->rt_refcnt++; else senderr(ESRCH); switch(rtm->rtm_type) { case RTM_GET: report: dst = rt_key(rt); gate = rt->rt_gateway; netmask = rt_mask(rt); genmask = rt->rt_genmask; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr; ifaaddr = rt->rt_ifa->ifa_addr; rtm->rtm_index = ifp->if_index; } else { ifpaddr = 0; ifaaddr = 0; } } len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0, (struct walkarg *)0); if (len > rtm->rtm_msglen) { struct rt_msghdr *new_rtm; R_Malloc(new_rtm, struct rt_msghdr *, len); if (new_rtm == 0) senderr(ENOBUFS); Bcopy(rtm, new_rtm, rtm->rtm_msglen); Free(rtm); rtm = new_rtm; } (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, (struct walkarg *)0); rtm->rtm_flags = rt->rt_flags; rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_addrs = info.rti_addrs; break; case RTM_CHANGE: if (gate && (error = rt_setgate(rt, rt_key(rt), gate))) senderr(error); /* * If they tried to change things but didn't specify * the required gateway, then just use the old one. * This can happen if the user tries to change the * flags on the default route without changing the * default gateway. Changing flags still doesn't work. */ if ((rt->rt_flags & RTF_GATEWAY) && !gate) gate = rt->rt_gateway; /* new gateway could require new ifaddr, ifp; flags may also be different; ifp may be specified by ll sockaddr when protocol address is ambiguous */ if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) && (ifp = ifa->ifa_ifp) && (ifaaddr || gate)) ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate, ifp); else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) || (gate && (ifa = ifa_ifwithroute(rt->rt_flags, rt_key(rt), gate)))) ifp = ifa->ifa_ifp; if (ifa) { register struct ifaddr *oifa = rt->rt_ifa; if (oifa != ifa) { if (oifa && oifa->ifa_rtrequest) oifa->ifa_rtrequest(RTM_DELETE, rt, gate); IFAFREE(rt->rt_ifa); rt->rt_ifa = ifa; ifa->ifa_refcnt++; rt->rt_ifp = ifp; } } rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate); if (genmask) rt->rt_genmask = genmask; /* * Fall into */ case RTM_LOCK: rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); rt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); break; } break; default: senderr(EOPNOTSUPP); } flush: if (rtm) { if (error) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; } if (rt) rtfree(rt); { register struct rawcb *rp = 0; /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (route_cb.any_count <= 1) { if (rtm) Free(rtm); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm) { m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); Free(rtm); } if (rp) rp->rcb_proto.sp_family = 0; /* Avoid us */ if (dst) route_proto.sp_protocol = dst->sa_family; raw_input(m, &route_proto, &route_src, &route_dst); if (rp) rp->rcb_proto.sp_family = PF_ROUTE; } return (error); } static void rt_setmetrics(which, in, out) u_long which; register struct rt_metrics *in, *out; { #define metric(f, e) if (which & (f)) out->e = in->e; metric(RTV_RPIPE, rmx_recvpipe); metric(RTV_SPIPE, rmx_sendpipe); metric(RTV_SSTHRESH, rmx_ssthresh); metric(RTV_RTT, rmx_rtt); metric(RTV_RTTVAR, rmx_rttvar); metric(RTV_HOPCOUNT, rmx_hopcount); metric(RTV_MTU, rmx_mtu); metric(RTV_EXPIRE, rmx_expire); #undef metric } #define ROUNDUP(a) \ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(cp, cplim, rtinfo) register caddr_t cp, cplim; register struct rt_addrinfo *rtinfo; { register struct sockaddr *sa; register int i; bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if ( (cp + sa->sa_len) > cplim ) { return (EINVAL); } /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ rtinfo->rti_info[i] = sa; ADVANCE(cp, sa); } return (0); } static struct mbuf * rt_msg1(type, rtinfo) int type; register struct rt_addrinfo *rtinfo; { register struct rt_msghdr *rtm; register struct mbuf *m; register int i; register struct sockaddr *sa; int len, dlen; m = m_gethdr(M_DONTWAIT, MT_DATA); if (m == 0) return (m); switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; default: len = sizeof(struct rt_msghdr); } if (len > MHLEN) panic("rt_msg1"); m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = 0; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = ROUNDUP(sa->sa_len); m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } static int rt_msg2(type, rtinfo, cp, w) int type; register struct rt_addrinfo *rtinfo; caddr_t cp; struct walkarg *w; { register int i; int len, dlen, second_time = 0; caddr_t cp0; rtinfo->rti_addrs = 0; again: switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; default: len = sizeof(struct rt_msghdr); } cp0 = cp; if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { register struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == 0) continue; rtinfo->rti_addrs |= (1 << i); dlen = ROUNDUP(sa->sa_len); if (cp) { bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; } len += dlen; } if (cp == 0 && w != NULL && !second_time) { register struct walkarg *rw = w; if (rw->w_req) { if (rw->w_tmemsize < len) { if (rw->w_tmem) free(rw->w_tmem, M_RTABLE); rw->w_tmem = (caddr_t) malloc(len, M_RTABLE, M_NOWAIT); if (rw->w_tmem) rw->w_tmemsize = len; } if (rw->w_tmem) { cp = rw->w_tmem; second_time = 1; goto again; } } } if (cp) { register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } return (len); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg(type, rtinfo, flags, error) int type, flags, error; register struct rt_addrinfo *rtinfo; { register struct rt_msghdr *rtm; register struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); if (m == 0) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; route_proto.sp_protocol = sa ? sa->sa_family : 0; raw_input(m, &route_proto, &route_src, &route_dst); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(ifp) register struct ifnet *ifp; { register struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rt_msg1(RTM_IFINFO, &info); if (m == 0) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = (u_short)ifp->if_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = 0; route_proto.sp_protocol = 0; raw_input(m, &route_proto, &route_src, &route_dst); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. * if we ever reverse the logic and replace messages TO the routing * socket indicate a request to configure interfaces, then it will * be unnecessary as the routing socket will automatically generate * copies of it. */ void rt_newaddrmsg(cmd, ifa, error, rt) int cmd, error; register struct ifaddr *ifa; register struct rtentry *rt; { struct rt_addrinfo info; struct sockaddr *sa = 0; int pass; struct mbuf *m = 0; struct ifnet *ifp = ifa->ifa_ifp; if (route_cb.any_count == 0) return; for (pass = 1; pass < 3; pass++) { bzero((caddr_t)&info, sizeof(info)); if ((cmd == RTM_ADD && pass == 1) || (cmd == RTM_DELETE && pass == 2)) { register struct ifa_msghdr *ifam; int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; ifaaddr = sa = ifa->ifa_addr; ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr; netmask = ifa->ifa_netmask; brdaddr = ifa->ifa_dstaddr; if ((m = rt_msg1(ncmd, &info)) == NULL) continue; ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; } if ((cmd == RTM_ADD && pass == 2) || (cmd == RTM_DELETE && pass == 1)) { register struct rt_msghdr *rtm; if (rt == 0) continue; netmask = rt_mask(rt); dst = sa = rt_key(rt); gate = rt->rt_gateway; if ((m = rt_msg1(cmd, &info)) == NULL) continue; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_index = ifp->if_index; rtm->rtm_flags |= rt->rt_flags; rtm->rtm_errno = error; rtm->rtm_addrs = info.rti_addrs; } route_proto.sp_protocol = sa ? sa->sa_family : 0; raw_input(m, &route_proto, &route_src, &route_dst); } } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(cmd, ifma) int cmd; struct ifmultiaddr *ifma; { struct rt_addrinfo info; struct mbuf *m = 0; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); ifaaddr = ifma->ifma_addr; ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ gate = ifma->ifma_lladdr; if ((m = rt_msg1(cmd, &info)) == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; route_proto.sp_protocol = ifma->ifma_addr->sa_family; raw_input(m, &route_proto, &route_src, &route_dst); } /* * This is used in dumping the kernel table via sysctl(). */ int sysctl_dumpentry(rn, vw) struct radix_node *rn; void *vw; { register struct walkarg *w = vw; register struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; bzero((caddr_t)&info, sizeof(info)); dst = rt_key(rt); gate = rt->rt_gateway; netmask = rt_mask(rt); genmask = rt->rt_genmask; size = rt_msg2(RTM_GET, &info, 0, w); if (w->w_req && w->w_tmem) { register struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; rtm->rtm_flags = rt->rt_flags; rtm->rtm_use = rt->rt_use; rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } int sysctl_iflist(af, w) int af; register struct walkarg *w; { register struct ifnet *ifp; register struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; bzero((caddr_t)&info, sizeof(info)); for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifp->if_addrhead.tqh_first; ifpaddr = ifa->ifa_addr; len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w); ifpaddr = 0; if (w->w_req && w->w_tmem) { register struct if_msghdr *ifm; ifm = (struct if_msghdr *)w->w_tmem; ifm->ifm_index = ifp->if_index; ifm->ifm_flags = (u_short)ifp->if_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len); if (error) return (error); } while ((ifa = ifa->ifa_link.tqe_next) != 0) { if (af && af != ifa->ifa_addr->sa_family) continue; if (curproc->p_prison && prison_if(curproc, ifa->ifa_addr)) continue; ifaaddr = ifa->ifa_addr; netmask = ifa->ifa_netmask; brdaddr = ifa->ifa_dstaddr; len = rt_msg2(RTM_NEWADDR, &info, 0, w); if (w->w_req && w->w_tmem) { register struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) return (error); } } ifaaddr = netmask = brdaddr = 0; } return (0); } static int -sysctl_rtsock (SYSCTL_HANDLER_ARGS) +sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; register struct radix_node_head *rnh; int i, s, error = EINVAL; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (namelen != 3) return (EINVAL); af = name[0]; Bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; s = splnet(); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: for (i = 1; i <= AF_MAX; i++) if ((rnh = rt_tables[i]) && (af == 0 || af == i) && (error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w))) break; break; case NET_RT_IFLIST: error = sysctl_iflist(af, &w); } splx(s); if (w.w_tmem) free(w.w_tmem, M_RTABLE); return (error); } SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ extern struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, 0, route_output, raw_ctlinput, 0, 0, raw_init, 0, 0, 0, &route_usrreqs } }; static struct domain routedomain = { PF_ROUTE, "route", 0, 0, 0, routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] }; DOMAIN_SET(route); Index: head/sys/netinet/in_pcb.c =================================================================== --- head/sys/netinet/in_pcb.c (revision 62572) +++ head/sys/netinet/in_pcb.c (revision 62573) @@ -1,1027 +1,1027 @@ /* * Copyright (c) 1982, 1986, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif /* INET6 */ #include "faith.h" #ifdef IPSEC #include #include #include #endif /* IPSEC */ struct in_addr zeroin_addr; static void in_rtchange __P((struct inpcb *, int)); /* * These configure the range of local port addresses assigned to * "unspecified" outgoing connections/packets/whatever. */ int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } static int -sysctl_net_ipport_check (SYSCTL_HANDLER_ARGS) +sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error) { RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); } return error; } #undef RANGECHK SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); /* * in_pcb.c: manage the Protocol Control Blocks. * * NOTE: It is assumed that most of these functions will be called at * splnet(). XXX - There are, unfortunately, a few exceptions to this * rule that should be fixed. */ /* * Allocate a PCB and associate it with the socket. */ int in_pcballoc(so, pcbinfo, p) struct socket *so; struct inpcbinfo *pcbinfo; struct proc *p; { register struct inpcb *inp; inp = zalloci(pcbinfo->ipi_zone); if (inp == NULL) return (ENOBUFS); bzero((caddr_t)inp, sizeof(*inp)); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; #if defined(INET6) if (ip6_mapped_addr_on) inp->inp_flags &= ~IN6P_BINDV6ONLY; else inp->inp_flags |= IN6P_BINDV6ONLY; #endif LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; return (0); } int in_pcbbind(inp, nam, p) register struct inpcb *inp; struct sockaddr *nam; struct proc *p; { register struct socket *so = inp->inp_socket; unsigned short *lastport; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error, prison = 0; if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sin)) return (EINVAL); #ifdef notdef /* * We should check the family, but old programs * incorrectly fail to initialize it. */ if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); #endif if (prison_ip(p, 0, &sin->sin_addr.s_addr)) return(EINVAL); lport = sin->sin_port; if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow complete duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (sin->sin_addr.s_addr != INADDR_ANY) { sin->sin_port = 0; /* yech... */ if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) return (EADDRNOTAVAIL); } if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) < IPPORT_RESERVED && p && suser_xxx(0, p, PRISON_ROOT)) return (EACCES); if (p && p->p_prison) prison = 1; if (so->so_cred->cr_uid != 0 && !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { t = in_pcblookup_local(inp->inp_pcbinfo, sin->sin_addr, lport, prison ? 0 : INPLOOKUP_WILDCARD); if (t && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid)) { #if defined(INET6) if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 || ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, prison ? 0 : wild); if (t && (reuseport & t->inp_socket->so_options) == 0) { #if defined(INET6) if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 || ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } inp->inp_laddr = sin->sin_addr; } if (lport == 0) { ushort first, last; int count; if (prison_ip(p, 0, &inp->inp_laddr.s_addr )) return (EINVAL); inp->inp_flags |= INP_ANONPORT; if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { if (p && (error = suser_xxx(0, p, PRISON_ROOT))) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing * a deadlock here. * * We split the two cases (up and down) so that the direction * is not being tested on each round of the loop. */ if (first > last) { /* * counting down */ count = first - last; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->inp_laddr.s_addr = INADDR_ANY; return (EAGAIN); } --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, lport, wild)); } else { /* * counting up */ count = last - first; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->inp_laddr.s_addr = INADDR_ANY; return (EAGAIN); } ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, lport, wild)); } } inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } return (0); } /* * Transform old in_pcbconnect() into an inner subroutine for new * in_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ int in_pcbladdr(inp, nam, plocal_sin) register struct inpcb *inp; struct sockaddr *nam; struct sockaddr_in **plocal_sin; { struct in_ifaddr *ia; register struct sockaddr_in *sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sin)) return (EINVAL); if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); if (!TAILQ_EMPTY(&in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, * use the primary local address. * If the supplied address is INADDR_BROADCAST, * and the primary interface supports broadcast, * choose the broadcast address for that interface. */ #define satosin(sa) ((struct sockaddr_in *)(sa)) #define sintosa(sin) ((struct sockaddr *)(sin)) #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) if (sin->sin_addr.s_addr == INADDR_ANY) sin->sin_addr = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr; else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && (in_ifaddrhead.tqh_first->ia_ifp->if_flags & IFF_BROADCAST)) sin->sin_addr = satosin(&in_ifaddrhead.tqh_first->ia_broadaddr)->sin_addr; } if (inp->inp_laddr.s_addr == INADDR_ANY) { register struct route *ro; ia = (struct in_ifaddr *)0; /* * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. */ ro = &inp->inp_route; if (ro->ro_rt && (satosin(&ro->ro_dst)->sin_addr.s_addr != sin->sin_addr.s_addr || inp->inp_socket->so_options & SO_DONTROUTE)) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ (ro->ro_rt == (struct rtentry *)0 || ro->ro_rt->rt_ifp == (struct ifnet *)0)) { /* No route yet, so try to acquire one */ ro->ro_dst.sa_family = AF_INET; ro->ro_dst.sa_len = sizeof(struct sockaddr_in); ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sin->sin_addr; rtalloc(ro); } /* * If we found a route, use the address * corresponding to the outgoing interface * unless it is the loopback (in case a route * to our address on another net goes to loopback). */ if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) ia = ifatoia(ro->ro_rt->rt_ifa); if (ia == 0) { u_short fport = sin->sin_port; sin->sin_port = 0; ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); if (ia == 0) ia = ifatoia(ifa_ifwithnet(sintosa(sin))); sin->sin_port = fport; if (ia == 0) ia = in_ifaddrhead.tqh_first; if (ia == 0) return (EADDRNOTAVAIL); } /* * If the destination address is multicast and an outgoing * interface has been set as a multicast option, use the * address of that interface as our source address. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && inp->inp_moptions != NULL) { struct ip_moptions *imo; struct ifnet *ifp; imo = inp->inp_moptions; if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) if (ia->ia_ifp == ifp) break; if (ia == 0) return (EADDRNOTAVAIL); } } /* * Don't do pcblookup call here; return interface in plocal_sin * and exit to caller, that will do the lookup. */ *plocal_sin = &ia->ia_addr; } return(0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in_pcbconnect(inp, nam, p) register struct inpcb *inp; struct sockaddr *nam; struct proc *p; { struct sockaddr_in *ifaddr; register struct sockaddr_in *sin = (struct sockaddr_in *)nam; int error; /* * Call inner routine, to assign local interface address. */ if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0) return(error); if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (inp->inp_laddr.s_addr == INADDR_ANY) { if (inp->inp_lport == 0) { error = in_pcbbind(inp, (struct sockaddr *)0, p); if (error) return (error); } inp->inp_laddr = ifaddr->sin_addr; } inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; in_pcbrehash(inp); return (0); } void in_pcbdisconnect(inp) struct inpcb *inp; { inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; in_pcbrehash(inp); if (inp->inp_socket->so_state & SS_NOFDREF) in_pcbdetach(inp); } void in_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; #ifdef IPSEC if (inp->inp_sp != NULL) ipsec4_delete_pcbpolicy(inp); #endif /*IPSEC*/ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); so->so_pcb = 0; sofree(so); if (inp->inp_options) (void)m_free(inp->inp_options); if (inp->inp_route.ro_rt) rtfree(inp->inp_route.ro_rt); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; zfreei(ipi->ipi_zone, inp); } /* * The calling convention of in_setsockaddr() and in_setpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points * in struct pr_usrreqs, so that protocols can just reference then directly * without the need for a wrapper function. The socket must have a valid * (i.e., non-nil) PCB, but it should be impossible to get an invalid one * except through a kernel programming error, so it is acceptable to panic * (or in this case trap) if the PCB is invalid. (Actually, we don't trap * because there actually /is/ a programming error somewhere... XXX) */ int in_setsockaddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; register struct inpcb *inp; register struct sockaddr_in *sin; /* * Do the malloc first in case it blocks. */ MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); bzero(sin, sizeof *sin); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); free(sin, M_SONAME); return ECONNRESET; } sin->sin_port = inp->inp_lport; sin->sin_addr = inp->inp_laddr; splx(s); *nam = (struct sockaddr *)sin; return 0; } int in_setpeeraddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; struct inpcb *inp; register struct sockaddr_in *sin; /* * Do the malloc first in case it blocks. */ MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); bzero(sin, sizeof (*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); free(sin, M_SONAME); return ECONNRESET; } sin->sin_port = inp->inp_fport; sin->sin_addr = inp->inp_faddr; splx(s); *nam = (struct sockaddr *)sin; return 0; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. */ void in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify) struct inpcbhead *head; struct sockaddr *dst; u_int fport_arg, lport_arg; struct in_addr laddr; int cmd; void (*notify) __P((struct inpcb *, int)); { register struct inpcb *inp, *oinp; struct in_addr faddr; u_short fport = fport_arg, lport = lport_arg; int errno, s; if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET) return; faddr = ((struct sockaddr_in *)dst)->sin_addr; if (faddr.s_addr == INADDR_ANY) return; /* * Redirects go to all references to the destination, * and use in_rtchange to invalidate the route cache. * Dead host indications: notify all references to the destination. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; laddr.s_addr = 0; if (cmd != PRC_HOSTDEAD) notify = in_rtchange; } errno = inetctlerrmap[cmd]; s = splnet(); for (inp = head->lh_first; inp != NULL;) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) { inp = LIST_NEXT(inp, inp_list); continue; } #endif if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) || (fport && inp->inp_fport != fport)) { inp = inp->inp_list.le_next; continue; } oinp = inp; inp = inp->inp_list.le_next; if (notify) (*notify)(oinp, errno); } splx(s); } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in_losing(inp) struct inpcb *inp; { register struct rtentry *rt; struct rt_addrinfo info; if ((rt = inp->inp_route.ro_rt)) { inp->inp_route.ro_rt = 0; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&inp->inp_route.ro_dst; info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) (void) rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); else /* * A new route can be allocated * the next time output is attempted. */ rtfree(rt); } } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ static void in_rtchange(inp, errno) register struct inpcb *inp; int errno; { if (inp->inp_route.ro_rt) { rtfree(inp->inp_route.ro_rt); inp->inp_route.ro_rt = 0; /* * A new route can be allocated the next time * output is attempted. */ } } /* * Lookup a PCB based on the local address and port. */ struct inpcb * in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; struct in_addr laddr; u_int lport_arg; int wild_okay; { register struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; if (!wild_okay) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_lport == lport) { /* * Found. */ return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ for (inp = phd->phd_pcblist.lh_first; inp != NULL; inp = inp->inp_portlist.le_next) { wildcard = 0; #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (laddr.s_addr == INADDR_ANY) wildcard++; else if (inp->inp_laddr.s_addr != laddr.s_addr) continue; } else { if (laddr.s_addr != INADDR_ANY) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) { break; } } } } return (match); } } /* * Lookup PCB in hash list. */ struct inpcb * in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) struct inpcbinfo *pcbinfo; struct in_addr faddr, laddr; u_int fport_arg, lport_arg; int wildcard; struct ifnet *ifp; { struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * Found. */ return (inp); } } if (wildcard) { struct inpcb *local_wild = NULL; #if defined(INET6) struct inpcb *local_wild_mapped = NULL; #endif /* defined(INET6) */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_lport == lport) { #if defined(NFAITH) && NFAITH > 0 if (ifp && ifp->if_type == IFT_FAITH && (inp->inp_flags & INP_FAITH) == 0) continue; #endif if (inp->inp_laddr.s_addr == laddr.s_addr) return (inp); else if (inp->inp_laddr.s_addr == INADDR_ANY) { #if defined(INET6) if (INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) local_wild_mapped = inp; else #endif /* defined(INET6) */ local_wild = inp; } } } #if defined(INET6) if (local_wild == NULL) return (local_wild_mapped); #endif /* defined(INET6) */ return (local_wild); } /* * Not found. */ return (NULL); } /* * Insert PCB onto various hash lists. */ int in_pcbinshash(inp) struct inpcb *inp; { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbport *phd; u_int32_t hashkey_faddr; #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; /* * Go through port list and look for a head for this lport. */ for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { if (phd->phd_port == inp->inp_lport) break; } /* * If none exists, malloc one and tack it on. */ if (phd == NULL) { MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); if (phd == NULL) { return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); return (0); } /* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the * hashed port list would have to be updated as well), so the lport must * not change after in_pcbinshash() has been called. */ void in_pcbrehash(inp) struct inpcb *inp; { struct inpcbhead *head; u_int32_t hashkey_faddr; #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); } /* * Remove PCB from various lists. */ void in_pcbremlists(inp) struct inpcb *inp; { inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; if (inp->inp_lport) { struct inpcbport *phd = inp->inp_phd; LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (phd->phd_pcblist.lh_first == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } } LIST_REMOVE(inp, inp_list); inp->inp_pcbinfo->ipi_count--; } int prison_xinpcb(struct proc *p, struct inpcb *inp) { if (!p->p_prison) return (0); if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip) return (0); return (1); } Index: head/sys/netinet/raw_ip.c =================================================================== --- head/sys/netinet/raw_ip.c (revision 62572) +++ head/sys/netinet/raw_ip.c (revision 62573) @@ -1,662 +1,662 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 * $FreeBSD$ */ #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #define _IP_VHL #include #include #include #include #include #include #include #include #ifdef IPSEC #include #endif /*IPSEC*/ #include "opt_ipdn.h" #ifdef DUMMYNET #include #endif struct inpcbhead ripcb; struct inpcbinfo ripcbinfo; /* * Nominal space allocated to a raw ip socket. */ #define RIPSNDQ 8192 #define RIPRCVQ 8192 /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ void rip_init() { LIST_INIT(&ripcb); ripcbinfo.listhead = &ripcb; /* * XXX We don't use the hash list for raw IP, but it's easier * to allocate a one entry hash list than it is to check all * over the place for hashbase == NULL. */ ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); ripcbinfo.ipi_zone = zinit("ripcb", sizeof(struct inpcb), maxsockets, ZONE_INTERRUPT, 0); } static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; /* * Setup generic address and protocol structures * for raw_input routine, then pass them along with * mbuf chain. */ void rip_input(m, off, proto) struct mbuf *m; int off, proto; { register struct ip *ip = mtod(m, struct ip *); register struct inpcb *inp; struct inpcb *last = 0; struct mbuf *opts = 0; ripsrc.sin_addr = ip->ip_src; LIST_FOREACH(inp, &ripcb, inp_list) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; if (inp->inp_laddr.s_addr && inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); if (n) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, n); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&ripsrc, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) m_freem(opts); } else sorwakeup(last->inp_socket); opts = 0; } } last = inp; } if (last) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, m); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&ripsrc, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); } else sorwakeup(last->inp_socket); } else { m_freem(m); ipstat.ips_noproto++; ipstat.ips_delivered--; } } /* * Generate IP header and pass packet to ip_output. * Tack on options user may have setup with control call. */ int rip_output(m, so, dst) struct mbuf *m; struct socket *so; u_long dst; { register struct ip *ip; register struct inpcb *inp = sotoinpcb(so); int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; /* * If the user handed us a complete IP packet, use it. * Otherwise, allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_WAIT); ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_off = 0; ip->ip_p = inp->inp_ip_p; ip->ip_len = m->m_pkthdr.len; ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; ip->ip_ttl = MAXTTL; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } ip = mtod(m, struct ip *); /* don't allow both user specified and setsockopt options, and don't allow packet length sizes that will crash */ if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2)) && inp->inp_options) || (ip->ip_len > m->m_pkthdr.len) || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) { m_freem(m); return EINVAL; } if (ip->ip_id == 0) ip->ip_id = htons(ip_id++); /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; ipstat.ips_rawout++; } #ifdef IPSEC m->m_pkthdr.rcvif = (struct ifnet *)so; /*XXX*/ #endif /*IPSEC*/ return (ip_output(m, inp->inp_options, &inp->inp_route, flags | IP_SOCKINMRCVIF, inp->inp_moptions)); } /* * Raw IP socket option processing. */ int rip_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { struct inpcb *inp = sotoinpcb(so); int error, optval; if (sopt->sopt_level != IPPROTO_IP) return (EINVAL); error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case IP_HDRINCL: optval = inp->inp_flags & INP_HDRINCL; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_FW_GET: if (ip_fw_ctl_ptr == 0) error = ENOPROTOOPT; else error = ip_fw_ctl_ptr(sopt); break; #ifdef DUMMYNET case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr == NULL) error = ENOPROTOOPT ; else error = ip_dn_ctl_ptr(sopt); break ; #endif /* DUMMYNET */ case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: error = ip_mrouter_get(so, sopt); break; default: error = ip_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case IP_HDRINCL: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; break; case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: if (ip_fw_ctl_ptr == 0) error = ENOPROTOOPT; else error = ip_fw_ctl_ptr(sopt); break; #ifdef DUMMYNET case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr == NULL) error = ENOPROTOOPT ; else error = ip_dn_ctl_ptr(sopt); break ; #endif case IP_RSVP_ON: error = ip_rsvp_init(so); break; case IP_RSVP_OFF: error = ip_rsvp_done(); break; /* XXX - should be combined */ case IP_RSVP_VIF_ON: error = ip_rsvp_vif_init(so, sopt); break; case IP_RSVP_VIF_OFF: error = ip_rsvp_vif_done(so, sopt); break; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: error = ip_mrouter_set(so, sopt); break; default: error = ip_ctloutput(so, sopt); break; } break; } return (error); } /* * This function exists solely to receive the PRC_IFDOWN messages which * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, * and calls in_ifadown() to remove all routes corresponding to that address. * It also receives the PRC_IFUP messages from if_up() and reinstalls the * interface routes. */ void rip_ctlinput(cmd, sa, vip) int cmd; struct sockaddr *sa; void *vip; { struct in_ifaddr *ia; struct ifnet *ifp; int err; int flags; switch (cmd) { case PRC_IFDOWN: for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { /* * in_ifscrub kills the interface route. */ in_ifscrub(ia->ia_ifp, ia); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right * thing to do, but at least if we are running * a routing process they will come back. */ in_ifadown(&ia->ia_ifa); break; } } break; case PRC_IFUP: for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) return; flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; if ((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; break; } } u_long rip_sendspace = RIPSNDQ; u_long rip_recvspace = RIPRCVQ; SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace, 0, "Maximum incoming raw IP datagram size"); static int rip_attach(struct socket *so, int proto, struct proc *p) { struct inpcb *inp; int error, s; inp = sotoinpcb(so); if (inp) panic("rip_attach"); if (p && (error = suser(p)) != 0) return error; error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return error; s = splnet(); error = in_pcballoc(so, &ripcbinfo, p); splx(s); if (error) return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; #ifdef IPSEC error = ipsec_init_policy(so, &inp->inp_sp); if (error != 0) { in_pcbdetach(inp); return error; } #endif /*IPSEC*/ return 0; } static int rip_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); if (inp == 0) panic("rip_detach"); if (so == ip_mrouter) ip_mrouter_done(); ip_rsvp_force_done(so); if (so == ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); return 0; } static int rip_abort(struct socket *so) { soisdisconnected(so); return rip_detach(so); } static int rip_disconnect(struct socket *so) { if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; return rip_abort(so); } static int rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp = sotoinpcb(so); struct sockaddr_in *addr = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) && (addr->sin_family != AF_IMPLINK)) || (addr->sin_addr.s_addr && ifa_ifwithaddr((struct sockaddr *)addr) == 0)) return EADDRNOTAVAIL; inp->inp_laddr = addr->sin_addr; return 0; } static int rip_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp = sotoinpcb(so); struct sockaddr_in *addr = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet)) return EADDRNOTAVAIL; if ((addr->sin_family != AF_INET) && (addr->sin_family != AF_IMPLINK)) return EAFNOSUPPORT; inp->inp_faddr = addr->sin_addr; soisconnected(so); return 0; } static int rip_shutdown(struct socket *so) { socantsendmore(so); return 0; } static int rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p) { struct inpcb *inp = sotoinpcb(so); register u_long dst; if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return EISCONN; } dst = inp->inp_faddr.s_addr; } else { if (nam == NULL) { m_freem(m); return ENOTCONN; } dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; } return rip_output(m, so, dst); } static int -rip_pcblist (SYSCTL_HANDLER_ARGS) +rip_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n, s; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = ripcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ s = splnet(); gencnt = ripcbinfo.ipi_gencnt; n = ripcbinfo.ipi_count; splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; s = splnet(); for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; inp = inp->inp_list.le_next) { if (inp->inp_gencnt <= gencnt) inp_list[i++] = inp; } splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ s = splnet(); xig.xig_gen = ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = ripcbinfo.ipi_count; splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); struct pr_usrreqs rip_usrreqs = { rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, in_setsockaddr, sosend, soreceive, sopoll }; Index: head/sys/netinet/tcp_subr.c =================================================================== --- head/sys/netinet/tcp_subr.c (revision 62572) +++ head/sys/netinet/tcp_subr.c (revision 62573) @@ -1,1301 +1,1301 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #define _IP_VHL #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef TCPDEBUG #include #endif #include #ifdef IPSEC #include #endif /*IPSEC*/ #include int tcp_mssdflt = TCP_MSS; SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); #ifdef INET6 int tcp_v6mssdflt = TCP6_MSS; SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, CTLFLAG_RW, &tcp_v6mssdflt , 0, "Default TCP Maximum Segment Size for IPv6"); #endif #if 0 static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time"); #endif static int tcp_do_rfc1323 = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); static int tcp_do_rfc1644 = 0; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions"); static int tcp_tcbhashsize = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD, &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); static int do_tcpdrain = 1; SYSCTL_INT(_debug, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable non Net3 compliant tcp_drain"); SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count, 0, "Number of active PCBs"); static void tcp_cleartaocache __P((void)); static void tcp_notify __P((struct inpcb *, int)); /* * Target size of TCP PCB hash tables. Must be a power of two. * * Note that this can be overridden by the kernel environment * variable net.inet.tcp.tcbhashsize */ #ifndef TCBHASHSIZE #define TCBHASHSIZE 512 #endif /* * This is the actual shape of what we allocate using the zone * allocator. Doing it this way allows us to protect both structures * using the same generation count, and also eliminates the overhead * of allocating tcpcbs separately. By hiding the structure here, * we avoid changing most of the rest of the code (although it needs * to be changed, eventually, for greater efficiency). */ #define ALIGNMENT 32 #define ALIGNM1 (ALIGNMENT - 1) struct inp_tp { union { struct inpcb inp; char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; } inp_tp_u; struct tcpcb tcb; struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl; struct callout inp_tp_delack; }; #undef ALIGNMENT #undef ALIGNM1 /* * Tcp initialization */ void tcp_init() { int hashsize; tcp_iss = random(); /* wrong, but better than a constant */ tcp_ccgen = 1; tcp_cleartaocache(); tcp_delacktime = TCPTV_DELACK; tcp_keepinit = TCPTV_KEEP_INIT; tcp_keepidle = TCPTV_KEEP_IDLE; tcp_keepintvl = TCPTV_KEEPINTVL; tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; LIST_INIT(&tcb); tcbinfo.listhead = &tcb; TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", TCBHASHSIZE, hashsize); if (!powerof2(hashsize)) { printf("WARNING: TCB hash size not a power of 2\n"); hashsize = 512; /* safe default */ } tcp_tcbhashsize = hashsize; tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(hashsize, M_PCB, &tcbinfo.porthashmask); tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets, ZONE_INTERRUPT, 0); #ifdef INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) #endif /* INET6 */ if (max_protohdr < TCP_MINPROTOHDR) max_protohdr = TCP_MINPROTOHDR; if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR } /* * Create template to be used to send tcp packets on a connection. * Call after host entry created, allocates an mbuf and fills * in a skeletal tcp/ip header, minimizing the amount of work * necessary when the connection is used. */ struct tcptemp * tcp_template(tp) struct tcpcb *tp; { register struct inpcb *inp = tp->t_inpcb; register struct mbuf *m; register struct tcptemp *n; if ((n = tp->t_template) == 0) { m = m_get(M_DONTWAIT, MT_HEADER); if (m == NULL) return (0); m->m_len = sizeof (struct tcptemp); n = mtod(m, struct tcptemp *); } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { register struct ip6_hdr *ip6; ip6 = (struct ip6_hdr *)n->tt_ipgen; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = sizeof(struct tcphdr); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; n->tt_t.th_sum = 0; } else #endif { struct ip *ip = (struct ip *)n->tt_ipgen; bzero(ip, sizeof(struct ip)); /* XXX overkill? */ ip->ip_vhl = IP_VHL_BORING; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP)); } n->tt_t.th_sport = inp->inp_lport; n->tt_t.th_dport = inp->inp_fport; n->tt_t.th_seq = 0; n->tt_t.th_ack = 0; n->tt_t.th_x2 = 0; n->tt_t.th_off = 5; n->tt_t.th_flags = 0; n->tt_t.th_win = 0; n->tt_t.th_urp = 0; return (n); } /* * Send a single message to the TCP at address specified by * the given TCP/IP header. If m == 0, then we make a copy * of the tcpiphdr at ti and send directly to the addressed host. * This is used to force keep alive messages out using the TCP * template for a connection tp->t_template. If flags are given * then we send a message back to the TCP which originated the * segment ti, and discard the mbuf containing it and any other * attached mbufs. * * In any case the ack and sequence number of the transmitted * segment are as specified by the parameters. * * NOTE: If m != NULL, then ti must point to *inside* the mbuf. */ void tcp_respond(tp, ipgen, th, m, ack, seq, flags) struct tcpcb *tp; void *ipgen; register struct tcphdr *th; register struct mbuf *m; tcp_seq ack, seq; int flags; { register int tlen; int win = 0; struct route *ro = 0; struct route sro; struct ip *ip; struct tcphdr *nth; #ifdef INET6 struct route_in6 *ro6 = 0; struct route_in6 sro6; struct ip6_hdr *ip6; int isipv6; #endif /* INET6 */ int ipflags = 0; #ifdef INET6 isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6; ip6 = ipgen; #endif /* INET6 */ ip = ipgen; if (tp) { if (!(flags & TH_RST)) { win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; } #ifdef INET6 if (isipv6) ro6 = &tp->t_inpcb->in6p_route; else #endif /* INET6 */ ro = &tp->t_inpcb->inp_route; } else { #ifdef INET6 if (isipv6) { ro6 = &sro6; bzero(ro6, sizeof *ro6); } else #endif /* INET6 */ { ro = &sro; bzero(ro, sizeof *ro); } } if (m == 0) { m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return; #ifdef TCP_COMPAT_42 tlen = 1; #else tlen = 0; #endif m->m_data += max_linkhdr; #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; } else { m_freem(m->m_next); m->m_next = 0; m->m_data = (caddr_t)ipgen; /* m_len is set later */ tlen = 0; #define xchg(a,b,type) { type t; t=a; a=b; b=t; } #ifdef INET6 if (isipv6) { xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long); nth = (struct tcphdr *)(ip + 1); } if (th != nth) { /* * this is usually a case when an extension header * exists between the IPv6 header and the * TCP header. */ nth->th_sport = th->th_sport; nth->th_dport = th->th_dport; } xchg(nth->th_dport, nth->th_sport, n_short); #undef xchg } #ifdef INET6 if (isipv6) { ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + tlen)); tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); } else #endif { tlen += sizeof (struct tcpiphdr); ip->ip_len = tlen; ip->ip_ttl = ip_defttl; } m->m_len = tlen; m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = (struct ifnet *) 0; nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; nth->th_off = sizeof (struct tcphdr) >> 2; nth->th_flags = flags; if (tp) nth->th_win = htons((u_short) (win >> tp->rcv_scale)); else nth->th_win = htons((u_short)win); nth->th_urp = 0; #ifdef INET6 if (isipv6) { nth->th_sum = 0; nth->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen - sizeof(struct ip6_hdr)); ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, ro6 && ro6->ro_rt ? ro6->ro_rt->rt_ifp : NULL); } else #endif /* INET6 */ { nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); } #ifdef TCPDEBUG if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif #ifdef IPSEC if (tp != NULL) { m->m_pkthdr.rcvif = (struct ifnet *)tp->t_inpcb->inp_socket; ipflags |= #ifdef INET6 isipv6 ? IPV6_SOCKINMRCVIF : #endif IP_SOCKINMRCVIF; } #endif #ifdef INET6 if (isipv6) { (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL); if (ro6 == &sro6 && ro6->ro_rt) { RTFREE(ro6->ro_rt); ro6->ro_rt = NULL; } } else #endif /* INET6 */ { (void) ip_output(m, NULL, ro, ipflags, NULL); if (ro == &sro && ro->ro_rt) { RTFREE(ro->ro_rt); ro->ro_rt = NULL; } } } /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument * protocol control block. The `inp' parameter must have * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(inp) struct inpcb *inp; { struct inp_tp *it; register struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ it = (struct inp_tp *)inp; tp = &it->tcb; bzero((char *) tp, sizeof(struct tcpcb)); LIST_INIT(&tp->t_segq); tp->t_maxseg = tp->t_maxopd = #ifdef INET6 isipv6 ? tcp_v6mssdflt : #endif /* INET6 */ tcp_mssdflt; /* Set up our timeouts. */ callout_init(tp->tt_rexmt = &it->inp_tp_rexmt); callout_init(tp->tt_persist = &it->inp_tp_persist); callout_init(tp->tt_keep = &it->inp_tp_keep); callout_init(tp->tt_2msl = &it->inp_tp_2msl); callout_init(tp->tt_delack = &it->inp_tp_delack); if (tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (tcp_do_rfc1644) tp->t_flags |= TF_REQ_CC; tp->t_inpcb = inp; /* XXX */ /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives * reasonable initial retransmit time. */ tp->t_srtt = TCPTV_SRTTBASE; tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; tp->t_rttmin = TCPTV_MIN; tp->t_rxtcur = TCPTV_RTOBASE; tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = ip_defttl; inp->inp_ppcb = (caddr_t)tp; return (tp); /* XXX */ } /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ struct tcpcb * tcp_drop(tp, errno) register struct tcpcb *tp; int errno; { struct socket *so = tp->t_inpcb->inp_socket; if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_state = TCPS_CLOSED; (void) tcp_output(tp); tcpstat.tcps_drops++; } else tcpstat.tcps_conndrops++; if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } /* * Close a TCP control block: * discard all space held by the tcp * discard internet protocol block * wake up any sleepers */ struct tcpcb * tcp_close(tp) register struct tcpcb *tp; { register struct tseg_qent *q; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ register struct rtentry *rt; int dosavessthresh; /* * Make sure that all of our timers are stopped before we * delete the PCB. */ callout_stop(tp->tt_rexmt); callout_stop(tp->tt_persist); callout_stop(tp->tt_keep); callout_stop(tp->tt_2msl); callout_stop(tp->tt_delack); /* * If we got enough samples through the srtt filter, * save the rtt and rttvar in the routing entry. * 'Enough' is arbitrarily defined as the 16 samples. * 16 samples is enough for the srtt filter to converge * to within 5% of the correct value; fewer samples and * we could save a very bogus rtt. * * Don't update the default route's characteristics and don't * update anything that the user "locked". */ if (tp->t_rttupdated >= 16) { register u_long i = 0; #ifdef INET6 if (isipv6) { struct sockaddr_in6 *sin6; if ((rt = inp->in6p_route.ro_rt) == NULL) goto no_valid_rt; sin6 = (struct sockaddr_in6 *)rt_key(rt); if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) goto no_valid_rt; } else #endif /* INET6 */ if ((rt = inp->inp_route.ro_rt) == NULL || ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr == INADDR_ANY) goto no_valid_rt; if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { i = tp->t_srtt * (RTM_RTTUNIT / (hz * TCP_RTT_SCALE)); if (rt->rt_rmx.rmx_rtt && i) /* * filter this update to half the old & half * the new values, converting scale. * See route.h and tcp_var.h for a * description of the scaling constants. */ rt->rt_rmx.rmx_rtt = (rt->rt_rmx.rmx_rtt + i) / 2; else rt->rt_rmx.rmx_rtt = i; tcpstat.tcps_cachedrtt++; } if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { i = tp->t_rttvar * (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE)); if (rt->rt_rmx.rmx_rttvar && i) rt->rt_rmx.rmx_rttvar = (rt->rt_rmx.rmx_rttvar + i) / 2; else rt->rt_rmx.rmx_rttvar = i; tcpstat.tcps_cachedrttvar++; } /* * The old comment here said: * update the pipelimit (ssthresh) if it has been updated * already or if a pipesize was specified & the threshhold * got below half the pipesize. I.e., wait for bad news * before we start updating, then update on both good * and bad news. * * But we want to save the ssthresh even if no pipesize is * specified explicitly in the route, because such * connections still have an implicit pipesize specified * by the global tcp_sendspace. In the absence of a reliable * way to calculate the pipesize, it will have to do. */ i = tp->snd_ssthresh; if (rt->rt_rmx.rmx_sendpipe != 0) dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); else dosavessthresh = (i < so->so_snd.sb_hiwat / 2); if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && i != 0 && rt->rt_rmx.rmx_ssthresh != 0) || dosavessthresh) { /* * convert the limit from user data bytes to * packets then to packet data bytes. */ i = (i + tp->t_maxseg / 2) / tp->t_maxseg; if (i < 2) i = 2; i *= (u_long)(tp->t_maxseg + #ifdef INET6 (isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : #endif sizeof (struct tcpiphdr) #ifdef INET6 ) #endif ); if (rt->rt_rmx.rmx_ssthresh) rt->rt_rmx.rmx_ssthresh = (rt->rt_rmx.rmx_ssthresh + i) / 2; else rt->rt_rmx.rmx_ssthresh = i; tcpstat.tcps_cachedssthresh++; } } no_valid_rt: /* free the reassembly queue, if any */ while((q = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); FREE(q, M_TSEGQ); } if (tp->t_template) (void) m_free(dtom(tp->t_template)); inp->inp_ppcb = NULL; soisdisconnected(so); #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) in6_pcbdetach(inp); else #endif /* INET6 */ in_pcbdetach(inp); tcpstat.tcps_closed++; return ((struct tcpcb *)0); } void tcp_drain() { if (do_tcpdrain) { struct inpcb *inpb; struct tcpcb *tcpb; struct tseg_qent *te; /* * Walk the tcpbs, if existing, and flush the reassembly queue, * if there is one... * XXX: The "Net/3" implementation doesn't imply that the TCP * reassembly queue should be flushed, but in a situation * where we're really low on mbufs, this is potentially * usefull. */ for (inpb = tcbinfo.listhead->lh_first; inpb; inpb = inpb->inp_list.le_next) { if ((tcpb = intotcpcb(inpb))) { while ((te = LIST_FIRST(&tcpb->t_segq)) != NULL) { LIST_REMOVE(te, tqe_q); m_freem(te->tqe_m); FREE(te, M_TSEGQ); } } } } } /* * Notify a tcp user of an asynchronous error; * store error as soft error, but wake up user * (for now, won't do anything until can select for soft error). */ static void tcp_notify(inp, error) struct inpcb *inp; int error; { register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; register struct socket *so = inp->inp_socket; /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { return; } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) so->so_error = error; else tp->t_softerror = error; wakeup((caddr_t) &so->so_timeo); sorwakeup(so); sowwakeup(so); } static int -tcp_pcblist (SYSCTL_HANDLER_ARGS) +tcp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n, s; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = tcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xtcpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ s = splnet(); gencnt = tcbinfo.ipi_gencnt; n = tcbinfo.ipi_count; splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; s = splnet(); for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n; inp = inp->inp_list.le_next) { if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) inp_list[i++] = inp; } splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; caddr_t inp_ppcb; xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb != NULL) bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); else bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xt.xt_socket); error = SYSCTL_OUT(req, &xt, sizeof xt); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ s = splnet(); xig.xig_gen = tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = tcbinfo.ipi_count; splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); static int -tcp_getcred (SYSCTL_HANDLER_ARGS) +tcp_getcred(SYSCTL_HANDLER_ARGS) { struct sockaddr_in addrs[2]; struct inpcb *inp; int error, s; error = suser(req->p); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, tcp_getcred, "S,ucred", "Get the ucred of a TCP connection"); #ifdef INET6 static int -tcp6_getcred (SYSCTL_HANDLER_ARGS) +tcp6_getcred(SYSCTL_HANDLER_ARGS) { struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error, s, mapped = 0; error = suser(req->p); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) mapped = 1; else return (EINVAL); } s = splnet(); if (mapped == 1) inp = in_pcblookup_hash(&tcbinfo, *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], addrs[1].sin6_port, *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], addrs[0].sin6_port, 0, NULL); else inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, tcp6_getcred, "S,ucred", "Get the ucred of a TCP6 connection"); #endif void tcp_ctlinput(cmd, sa, vip) int cmd; struct sockaddr *sa; void *vip; { register struct ip *ip = vip; register struct tcphdr *th; void (*notify) __P((struct inpcb *, int)) = tcp_notify; if (cmd == PRC_QUENCH) notify = tcp_quench; else if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) return; if (ip) { th = (struct tcphdr *)((caddr_t)ip + (IP_VHL_HL(ip->ip_vhl) << 2)); in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, cmd, notify); } else in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); } #ifdef INET6 void tcp6_ctlinput(cmd, sa, d) int cmd; struct sockaddr *sa; void *d; { register struct tcphdr *thp; struct tcphdr th; void (*notify) __P((struct inpcb *, int)) = tcp_notify; struct sockaddr_in6 sa6; struct ip6_hdr *ip6; struct mbuf *m; int off; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if (cmd == PRC_QUENCH) notify = tcp_quench; else if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; } else { m = NULL; ip6 = NULL; } /* * Translate addresses into internal form. * Sa check if it is AF_INET6 is done at the top of this funciton. */ sa6 = *(struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) != 0 && m != NULL && m->m_pkthdr.rcvif != NULL) sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ struct in6_addr s; /* translate addresses into internal form */ memcpy(&s, &ip6->ip6_src, sizeof(s)); if (IN6_IS_ADDR_LINKLOCAL(&s) != 0 && m != NULL && m->m_pkthdr.rcvif != NULL) s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); if (m->m_len < off + sizeof(*thp)) { /* * this should be rare case * because now MINCLSIZE is "(MHLEN + 1)", * so we compromise on this copy... */ m_copydata(m, off, sizeof(th), (caddr_t)&th); thp = &th; } else thp = (struct tcphdr *)(mtod(m, caddr_t) + off); in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport, &s, thp->th_sport, cmd, notify); } else in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr, 0, cmd, notify); } #endif /* INET6 */ /* * When a source quench is received, close congestion window * to one segment. We will gradually open it again as we proceed. */ void tcp_quench(inp, errno) struct inpcb *inp; int errno; { struct tcpcb *tp = intotcpcb(inp); if (tp) tp->snd_cwnd = tp->t_maxseg; } /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value in the route. Also nudge TCP to send something, * since we know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ void tcp_mtudisc(inp, errno) struct inpcb *inp; int errno; { struct tcpcb *tp = intotcpcb(inp); struct rtentry *rt; struct rmxp_tao *taop; struct socket *so = inp->inp_socket; int offered; int mss; #ifdef INET6 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ if (tp) { #ifdef INET6 if (isipv6) rt = tcp_rtlookup6(inp); else #endif /* INET6 */ rt = tcp_rtlookup(inp); if (!rt || !rt->rt_rmx.rmx_mtu) { tp->t_maxopd = tp->t_maxseg = #ifdef INET6 isipv6 ? tcp_v6mssdflt : #endif /* INET6 */ tcp_mssdflt; return; } taop = rmx_taop(rt->rt_rmx); offered = taop->tao_mssopt; mss = rt->rt_rmx.rmx_mtu - #ifdef INET6 (isipv6 ? sizeof(struct ip6_hdr) + sizeof(struct tcphdr) : #endif /* INET6 */ sizeof(struct tcpiphdr) #ifdef INET6 ) #endif /* INET6 */ ; if (offered) mss = min(mss, offered); /* * XXX - The above conditional probably violates the TCP * spec. The problem is that, since we don't know the * other end's MSS, we are supposed to use a conservative * default. But, if we do that, then MTU discovery will * never actually take place, because the conservative * default is much less than the MTUs typically seen * on the Internet today. For the moment, we'll sweep * this under the carpet. * * The conservative default might not actually be a problem * if the only case this occurs is when sending an initial * SYN with options and data to a host we've never talked * to before. Then, they will reply with an MSS value which * will get recorded and the new parameters should get * recomputed. For Further Study. */ if (tp->t_maxopd <= mss) return; tp->t_maxopd = mss; if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) mss -= TCPOLEN_TSTAMP_APPA; if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC) mss -= TCPOLEN_CC_APPA; #if (MCLBYTES & (MCLBYTES - 1)) == 0 if (mss > MCLBYTES) mss &= ~(MCLBYTES-1); #else if (mss > MCLBYTES) mss = mss / MCLBYTES * MCLBYTES; #endif if (so->so_snd.sb_hiwat < mss) mss = so->so_snd.sb_hiwat; tp->t_maxseg = mss; tcpstat.tcps_mturesent++; tp->t_rtttime = 0; tp->snd_nxt = tp->snd_una; tcp_output(tp); } } /* * Look-up the routing entry to the peer of this inpcb. If no route * is found and it cannot be allocated the return NULL. This routine * is called by TCP routines that access the rmx structure and by tcp_mss * to get the interface MTU. */ struct rtentry * tcp_rtlookup(inp) struct inpcb *inp; { struct route *ro; struct rtentry *rt; ro = &inp->inp_route; rt = ro->ro_rt; if (rt == NULL || !(rt->rt_flags & RTF_UP)) { /* No route yet, so try to acquire one */ if (inp->inp_faddr.s_addr != INADDR_ANY) { ro->ro_dst.sa_family = AF_INET; ro->ro_dst.sa_len = sizeof(ro->ro_dst); ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = inp->inp_faddr; rtalloc(ro); rt = ro->ro_rt; } } return rt; } #ifdef INET6 struct rtentry * tcp_rtlookup6(inp) struct inpcb *inp; { struct route_in6 *ro6; struct rtentry *rt; ro6 = &inp->in6p_route; rt = ro6->ro_rt; if (rt == NULL || !(rt->rt_flags & RTF_UP)) { /* No route yet, so try to acquire one */ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { ro6->ro_dst.sin6_family = AF_INET6; ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst); ro6->ro_dst.sin6_addr = inp->in6p_faddr; rtalloc((struct route *)ro6); rt = ro6->ro_rt; } } return rt; } #endif /* INET6 */ #ifdef IPSEC /* compute ESP/AH header size for TCP, including outer IP header. */ size_t ipsec_hdrsiz_tcp(tp) struct tcpcb *tp; { struct inpcb *inp; struct mbuf *m; size_t hdrsiz; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif /* INET6 */ struct tcphdr *th; if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) return 0; MGETHDR(m, M_DONTWAIT, MT_DATA); if (!m) return 0; #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, sizeof(struct ip6_hdr)); bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, sizeof(struct tcphdr)); hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } else #endif /* INET6 */ { ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, sizeof(struct ip)); bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, sizeof(struct tcphdr)); hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } m_free(m); return hdrsiz; } #endif /*IPSEC*/ /* * Return a pointer to the cached information about the remote host. * The cached information is stored in the protocol specific part of * the route metrics. */ struct rmxp_tao * tcp_gettaocache(inp) struct inpcb *inp; { struct rtentry *rt; #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) rt = tcp_rtlookup6(inp); else #endif /* INET6 */ rt = tcp_rtlookup(inp); /* Make sure this is a host route and is up. */ if (rt == NULL || (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) return NULL; return rmx_taop(rt->rt_rmx); } /* * Clear all the TAO cache entries, called from tcp_init. * * XXX * This routine is just an empty one, because we assume that the routing * routing tables are initialized at the same time when TCP, so there is * nothing in the cache left over. */ static void tcp_cleartaocache() { } Index: head/sys/netinet/tcp_timer.c =================================================================== --- head/sys/netinet/tcp_timer.c (revision 62572) +++ head/sys/netinet/tcp_timer.c (revision 62573) @@ -1,472 +1,472 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include /* before tcp_seq.h, for tcp_random18() */ #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif static int -sysctl_msec_to_ticks (SYSCTL_HANDLER_ARGS) +sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) { int error, s, tt; tt = *(int *)oidp->oid_arg1; s = tt * 1000 / hz; error = sysctl_handle_int(oidp, &s, 0, req); if (error || !req->newptr) return (error); tt = s * hz / 1000; if (tt < 1) return (EINVAL); *(int *)oidp->oid_arg1 = tt; return (0); } int tcp_keepinit; SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); int tcp_keepidle; SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); int tcp_keepintvl; SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); int tcp_delacktime; SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", "Time before a delayed ACK is sent"); int tcp_msl; SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); static int always_keepalive = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); static int tcp_keepcnt = TCPTV_KEEPCNT; /* max idle probes */ int tcp_maxpersistidle; /* max idle time in persist */ int tcp_maxidle; /* * Tcp protocol timeout routine called every 500 ms. * Updates timestamps used for TCP * causes finite state machine actions if timers expire. */ void tcp_slowtimo() { int s; s = splnet(); tcp_maxidle = tcp_keepcnt * tcp_keepintvl; tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ #ifdef TCP_COMPAT_42 if ((int)tcp_iss < 0) tcp_iss = TCP_ISSINCR; /* XXX */ #endif splx(s); } /* * Cancel all timers for TCP tp. */ void tcp_canceltimers(tp) struct tcpcb *tp; { callout_stop(tp->tt_2msl); callout_stop(tp->tt_persist); callout_stop(tp->tt_keep); callout_stop(tp->tt_rexmt); } int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ /* * TCP timer processing. */ void tcp_timer_delack(xtp) void *xtp; { struct tcpcb *tp = xtp; int s; s = splnet(); if (callout_pending(tp->tt_delack)) { splx(s); return; } callout_deactivate(tp->tt_delack); tp->t_flags |= TF_ACKNOW; tcpstat.tcps_delack++; (void) tcp_output(tp); splx(s); } void tcp_timer_2msl(xtp) void *xtp; { struct tcpcb *tp = xtp; int s; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif s = splnet(); if (callout_pending(tp->tt_2msl)) { splx(s); return; } callout_deactivate(tp->tt_2msl); /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle * too long, or if 2MSL time is up from TIME_WAIT, delete connection * control block. Otherwise, check again in a bit. */ if (tp->t_state != TCPS_TIME_WAIT && (ticks - tp->t_rcvtime) <= tcp_maxidle) callout_reset(tp->tt_2msl, tcp_keepintvl, tcp_timer_2msl, tp); else tp = tcp_close(tp); #ifdef TCPDEBUG if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif splx(s); } void tcp_timer_keep(xtp) void *xtp; { struct tcpcb *tp = xtp; int s; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif s = splnet(); if (callout_pending(tp->tt_keep)) { splx(s); return; } callout_deactivate(tp->tt_keep); /* * Keep-alive timer went off; send something * or drop connection if idle for too long. */ tcpstat.tcps_keeptimeo++; if (tp->t_state < TCPS_ESTABLISHED) goto dropit; if ((always_keepalive || tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) goto dropit; /* * Send a packet designed to force a response * if the peer is up and reachable: * either an ACK if the connection is still alive, * or an RST if the peer has closed the connection * due to timeout or reboot. * Using sequence number tp->snd_una-1 * causes the transmitted zero-length segment * to lie outside the receive window; * by the protocol spec, this requires the * correspondent TCP to respond. */ tcpstat.tcps_keepprobe++; #ifdef TCP_COMPAT_42 /* * The keepalive packet must have nonzero length * to get a 4.2 host to respond. */ tcp_respond(tp, tp->t_template->tt_ipgen, &tp->t_template->tt_t, (struct mbuf *)NULL, tp->rcv_nxt - 1, tp->snd_una - 1, 0); #else tcp_respond(tp, tp->t_template->tt_ipgen, &tp->t_template->tt_t, (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0); #endif callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); } else callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); #ifdef TCPDEBUG if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif splx(s); return; dropit: tcpstat.tcps_keepdrops++; tp = tcp_drop(tp, ETIMEDOUT); #ifdef TCPDEBUG if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif splx(s); } void tcp_timer_persist(xtp) void *xtp; { struct tcpcb *tp = xtp; int s; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif s = splnet(); if (callout_pending(tp->tt_persist)) { splx(s); return; } callout_deactivate(tp->tt_persist); /* * Persistance timer into zero window. * Force a byte to be output, if possible. */ tcpstat.tcps_persisttimeo++; /* * Hack: if the peer is dead/unreachable, we do not * time out if the window is closed. After a full * backoff, drop the connection if the idle time * (no responses to probes) reaches the maximum * backoff that we would use if retransmitting. */ if (tp->t_rxtshift == TCP_MAXRXTSHIFT && ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { tcpstat.tcps_persistdrop++; tp = tcp_drop(tp, ETIMEDOUT); goto out; } tcp_setpersist(tp); tp->t_force = 1; (void) tcp_output(tp); tp->t_force = 0; out: #ifdef TCPDEBUG if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif splx(s); } void tcp_timer_rexmt(xtp) void *xtp; { struct tcpcb *tp = xtp; int s; int rexmt; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif s = splnet(); if (callout_pending(tp->tt_rexmt)) { splx(s); return; } callout_deactivate(tp->tt_rexmt); /* * Retransmission timer went off. Message has not * been acked within retransmit interval. Back off * to a longer retransmit interval and retransmit one segment. */ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; tcpstat.tcps_timeoutdrop++; tp = tcp_drop(tp, tp->t_softerror ? tp->t_softerror : ETIMEDOUT); goto out; } if (tp->t_rxtshift == 1) { /* * first retransmit; record ssthresh and cwnd so they can * be recovered if this turns out to be a "bad" retransmit. * A retransmit is considered "bad" if an ACK for this * segment is received within RTT/2 interval; the assumption * here is that the ACK was already in flight. See * "On Estimating End-to-End Network Path Properties" by * Allman and Paxson for more details. */ tp->snd_cwnd_prev = tp->snd_cwnd; tp->snd_ssthresh_prev = tp->snd_ssthresh; tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); } tcpstat.tcps_rexmttimeo++; rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX); /* * If losing, let the lower level know and try for * a better route. Also, if we backed off this far, * our srtt estimate is probably bogus. Clobber it * so we'll take the next rtt measurement as our srtt; * move the current srtt into rttvar to keep the current * retransmit times until then. */ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) in6_losing(tp->t_inpcb); else #endif in_losing(tp->t_inpcb); tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); tp->t_srtt = 0; } tp->snd_nxt = tp->snd_una; /* * Note: We overload snd_recover to function also as the * snd_last variable described in RFC 2582 */ tp->snd_recover = tp->snd_max; /* * Force a segment to be sent. */ tp->t_flags |= TF_ACKNOW; /* * If timing a segment in this window, stop the timer. */ tp->t_rtttime = 0; /* * Close the congestion window down to one segment * (we'll open it by one segment for each ack we get). * Since we probably have a window's worth of unacked * data accumulated, this "slow start" keeps us from * dumping all that data as back-to-back packets (which * might overwhelm an intermediate gateway). * * There are two phases to the opening: Initially we * open by one mss on each ack. This makes the window * size increase exponentially with time. If the * window is larger than the path can handle, this * exponential growth results in dropped packet(s) * almost immediately. To get more time between * drops but still "push" the network to take advantage * of improving conditions, we switch from exponential * to linear window opening at some threshhold size. * For a threshhold, we use half the current window * size, truncated to a multiple of the mss. * * (the minimum cwnd that will give us exponential * growth is 2 mss. We don't allow the threshhold * to go below this.) */ { u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) win = 2; tp->snd_cwnd = tp->t_maxseg; tp->snd_ssthresh = win * tp->t_maxseg; tp->t_dupacks = 0; } (void) tcp_output(tp); out: #ifdef TCPDEBUG if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif splx(s); } Index: head/sys/netinet/tcp_timewait.c =================================================================== --- head/sys/netinet/tcp_timewait.c (revision 62572) +++ head/sys/netinet/tcp_timewait.c (revision 62573) @@ -1,1301 +1,1301 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #define _IP_VHL #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef TCPDEBUG #include #endif #include #ifdef IPSEC #include #endif /*IPSEC*/ #include int tcp_mssdflt = TCP_MSS; SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); #ifdef INET6 int tcp_v6mssdflt = TCP6_MSS; SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, CTLFLAG_RW, &tcp_v6mssdflt , 0, "Default TCP Maximum Segment Size for IPv6"); #endif #if 0 static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time"); #endif static int tcp_do_rfc1323 = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); static int tcp_do_rfc1644 = 0; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions"); static int tcp_tcbhashsize = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD, &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); static int do_tcpdrain = 1; SYSCTL_INT(_debug, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable non Net3 compliant tcp_drain"); SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count, 0, "Number of active PCBs"); static void tcp_cleartaocache __P((void)); static void tcp_notify __P((struct inpcb *, int)); /* * Target size of TCP PCB hash tables. Must be a power of two. * * Note that this can be overridden by the kernel environment * variable net.inet.tcp.tcbhashsize */ #ifndef TCBHASHSIZE #define TCBHASHSIZE 512 #endif /* * This is the actual shape of what we allocate using the zone * allocator. Doing it this way allows us to protect both structures * using the same generation count, and also eliminates the overhead * of allocating tcpcbs separately. By hiding the structure here, * we avoid changing most of the rest of the code (although it needs * to be changed, eventually, for greater efficiency). */ #define ALIGNMENT 32 #define ALIGNM1 (ALIGNMENT - 1) struct inp_tp { union { struct inpcb inp; char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; } inp_tp_u; struct tcpcb tcb; struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl; struct callout inp_tp_delack; }; #undef ALIGNMENT #undef ALIGNM1 /* * Tcp initialization */ void tcp_init() { int hashsize; tcp_iss = random(); /* wrong, but better than a constant */ tcp_ccgen = 1; tcp_cleartaocache(); tcp_delacktime = TCPTV_DELACK; tcp_keepinit = TCPTV_KEEP_INIT; tcp_keepidle = TCPTV_KEEP_IDLE; tcp_keepintvl = TCPTV_KEEPINTVL; tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; LIST_INIT(&tcb); tcbinfo.listhead = &tcb; TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", TCBHASHSIZE, hashsize); if (!powerof2(hashsize)) { printf("WARNING: TCB hash size not a power of 2\n"); hashsize = 512; /* safe default */ } tcp_tcbhashsize = hashsize; tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(hashsize, M_PCB, &tcbinfo.porthashmask); tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets, ZONE_INTERRUPT, 0); #ifdef INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) #endif /* INET6 */ if (max_protohdr < TCP_MINPROTOHDR) max_protohdr = TCP_MINPROTOHDR; if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR } /* * Create template to be used to send tcp packets on a connection. * Call after host entry created, allocates an mbuf and fills * in a skeletal tcp/ip header, minimizing the amount of work * necessary when the connection is used. */ struct tcptemp * tcp_template(tp) struct tcpcb *tp; { register struct inpcb *inp = tp->t_inpcb; register struct mbuf *m; register struct tcptemp *n; if ((n = tp->t_template) == 0) { m = m_get(M_DONTWAIT, MT_HEADER); if (m == NULL) return (0); m->m_len = sizeof (struct tcptemp); n = mtod(m, struct tcptemp *); } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { register struct ip6_hdr *ip6; ip6 = (struct ip6_hdr *)n->tt_ipgen; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = sizeof(struct tcphdr); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; n->tt_t.th_sum = 0; } else #endif { struct ip *ip = (struct ip *)n->tt_ipgen; bzero(ip, sizeof(struct ip)); /* XXX overkill? */ ip->ip_vhl = IP_VHL_BORING; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP)); } n->tt_t.th_sport = inp->inp_lport; n->tt_t.th_dport = inp->inp_fport; n->tt_t.th_seq = 0; n->tt_t.th_ack = 0; n->tt_t.th_x2 = 0; n->tt_t.th_off = 5; n->tt_t.th_flags = 0; n->tt_t.th_win = 0; n->tt_t.th_urp = 0; return (n); } /* * Send a single message to the TCP at address specified by * the given TCP/IP header. If m == 0, then we make a copy * of the tcpiphdr at ti and send directly to the addressed host. * This is used to force keep alive messages out using the TCP * template for a connection tp->t_template. If flags are given * then we send a message back to the TCP which originated the * segment ti, and discard the mbuf containing it and any other * attached mbufs. * * In any case the ack and sequence number of the transmitted * segment are as specified by the parameters. * * NOTE: If m != NULL, then ti must point to *inside* the mbuf. */ void tcp_respond(tp, ipgen, th, m, ack, seq, flags) struct tcpcb *tp; void *ipgen; register struct tcphdr *th; register struct mbuf *m; tcp_seq ack, seq; int flags; { register int tlen; int win = 0; struct route *ro = 0; struct route sro; struct ip *ip; struct tcphdr *nth; #ifdef INET6 struct route_in6 *ro6 = 0; struct route_in6 sro6; struct ip6_hdr *ip6; int isipv6; #endif /* INET6 */ int ipflags = 0; #ifdef INET6 isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6; ip6 = ipgen; #endif /* INET6 */ ip = ipgen; if (tp) { if (!(flags & TH_RST)) { win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; } #ifdef INET6 if (isipv6) ro6 = &tp->t_inpcb->in6p_route; else #endif /* INET6 */ ro = &tp->t_inpcb->inp_route; } else { #ifdef INET6 if (isipv6) { ro6 = &sro6; bzero(ro6, sizeof *ro6); } else #endif /* INET6 */ { ro = &sro; bzero(ro, sizeof *ro); } } if (m == 0) { m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return; #ifdef TCP_COMPAT_42 tlen = 1; #else tlen = 0; #endif m->m_data += max_linkhdr; #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; } else { m_freem(m->m_next); m->m_next = 0; m->m_data = (caddr_t)ipgen; /* m_len is set later */ tlen = 0; #define xchg(a,b,type) { type t; t=a; a=b; b=t; } #ifdef INET6 if (isipv6) { xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long); nth = (struct tcphdr *)(ip + 1); } if (th != nth) { /* * this is usually a case when an extension header * exists between the IPv6 header and the * TCP header. */ nth->th_sport = th->th_sport; nth->th_dport = th->th_dport; } xchg(nth->th_dport, nth->th_sport, n_short); #undef xchg } #ifdef INET6 if (isipv6) { ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + tlen)); tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); } else #endif { tlen += sizeof (struct tcpiphdr); ip->ip_len = tlen; ip->ip_ttl = ip_defttl; } m->m_len = tlen; m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = (struct ifnet *) 0; nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; nth->th_off = sizeof (struct tcphdr) >> 2; nth->th_flags = flags; if (tp) nth->th_win = htons((u_short) (win >> tp->rcv_scale)); else nth->th_win = htons((u_short)win); nth->th_urp = 0; #ifdef INET6 if (isipv6) { nth->th_sum = 0; nth->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen - sizeof(struct ip6_hdr)); ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, ro6 && ro6->ro_rt ? ro6->ro_rt->rt_ifp : NULL); } else #endif /* INET6 */ { nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); } #ifdef TCPDEBUG if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif #ifdef IPSEC if (tp != NULL) { m->m_pkthdr.rcvif = (struct ifnet *)tp->t_inpcb->inp_socket; ipflags |= #ifdef INET6 isipv6 ? IPV6_SOCKINMRCVIF : #endif IP_SOCKINMRCVIF; } #endif #ifdef INET6 if (isipv6) { (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL); if (ro6 == &sro6 && ro6->ro_rt) { RTFREE(ro6->ro_rt); ro6->ro_rt = NULL; } } else #endif /* INET6 */ { (void) ip_output(m, NULL, ro, ipflags, NULL); if (ro == &sro && ro->ro_rt) { RTFREE(ro->ro_rt); ro->ro_rt = NULL; } } } /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument * protocol control block. The `inp' parameter must have * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(inp) struct inpcb *inp; { struct inp_tp *it; register struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ it = (struct inp_tp *)inp; tp = &it->tcb; bzero((char *) tp, sizeof(struct tcpcb)); LIST_INIT(&tp->t_segq); tp->t_maxseg = tp->t_maxopd = #ifdef INET6 isipv6 ? tcp_v6mssdflt : #endif /* INET6 */ tcp_mssdflt; /* Set up our timeouts. */ callout_init(tp->tt_rexmt = &it->inp_tp_rexmt); callout_init(tp->tt_persist = &it->inp_tp_persist); callout_init(tp->tt_keep = &it->inp_tp_keep); callout_init(tp->tt_2msl = &it->inp_tp_2msl); callout_init(tp->tt_delack = &it->inp_tp_delack); if (tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (tcp_do_rfc1644) tp->t_flags |= TF_REQ_CC; tp->t_inpcb = inp; /* XXX */ /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives * reasonable initial retransmit time. */ tp->t_srtt = TCPTV_SRTTBASE; tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; tp->t_rttmin = TCPTV_MIN; tp->t_rxtcur = TCPTV_RTOBASE; tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = ip_defttl; inp->inp_ppcb = (caddr_t)tp; return (tp); /* XXX */ } /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ struct tcpcb * tcp_drop(tp, errno) register struct tcpcb *tp; int errno; { struct socket *so = tp->t_inpcb->inp_socket; if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_state = TCPS_CLOSED; (void) tcp_output(tp); tcpstat.tcps_drops++; } else tcpstat.tcps_conndrops++; if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } /* * Close a TCP control block: * discard all space held by the tcp * discard internet protocol block * wake up any sleepers */ struct tcpcb * tcp_close(tp) register struct tcpcb *tp; { register struct tseg_qent *q; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ register struct rtentry *rt; int dosavessthresh; /* * Make sure that all of our timers are stopped before we * delete the PCB. */ callout_stop(tp->tt_rexmt); callout_stop(tp->tt_persist); callout_stop(tp->tt_keep); callout_stop(tp->tt_2msl); callout_stop(tp->tt_delack); /* * If we got enough samples through the srtt filter, * save the rtt and rttvar in the routing entry. * 'Enough' is arbitrarily defined as the 16 samples. * 16 samples is enough for the srtt filter to converge * to within 5% of the correct value; fewer samples and * we could save a very bogus rtt. * * Don't update the default route's characteristics and don't * update anything that the user "locked". */ if (tp->t_rttupdated >= 16) { register u_long i = 0; #ifdef INET6 if (isipv6) { struct sockaddr_in6 *sin6; if ((rt = inp->in6p_route.ro_rt) == NULL) goto no_valid_rt; sin6 = (struct sockaddr_in6 *)rt_key(rt); if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) goto no_valid_rt; } else #endif /* INET6 */ if ((rt = inp->inp_route.ro_rt) == NULL || ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr == INADDR_ANY) goto no_valid_rt; if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { i = tp->t_srtt * (RTM_RTTUNIT / (hz * TCP_RTT_SCALE)); if (rt->rt_rmx.rmx_rtt && i) /* * filter this update to half the old & half * the new values, converting scale. * See route.h and tcp_var.h for a * description of the scaling constants. */ rt->rt_rmx.rmx_rtt = (rt->rt_rmx.rmx_rtt + i) / 2; else rt->rt_rmx.rmx_rtt = i; tcpstat.tcps_cachedrtt++; } if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { i = tp->t_rttvar * (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE)); if (rt->rt_rmx.rmx_rttvar && i) rt->rt_rmx.rmx_rttvar = (rt->rt_rmx.rmx_rttvar + i) / 2; else rt->rt_rmx.rmx_rttvar = i; tcpstat.tcps_cachedrttvar++; } /* * The old comment here said: * update the pipelimit (ssthresh) if it has been updated * already or if a pipesize was specified & the threshhold * got below half the pipesize. I.e., wait for bad news * before we start updating, then update on both good * and bad news. * * But we want to save the ssthresh even if no pipesize is * specified explicitly in the route, because such * connections still have an implicit pipesize specified * by the global tcp_sendspace. In the absence of a reliable * way to calculate the pipesize, it will have to do. */ i = tp->snd_ssthresh; if (rt->rt_rmx.rmx_sendpipe != 0) dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); else dosavessthresh = (i < so->so_snd.sb_hiwat / 2); if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && i != 0 && rt->rt_rmx.rmx_ssthresh != 0) || dosavessthresh) { /* * convert the limit from user data bytes to * packets then to packet data bytes. */ i = (i + tp->t_maxseg / 2) / tp->t_maxseg; if (i < 2) i = 2; i *= (u_long)(tp->t_maxseg + #ifdef INET6 (isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : #endif sizeof (struct tcpiphdr) #ifdef INET6 ) #endif ); if (rt->rt_rmx.rmx_ssthresh) rt->rt_rmx.rmx_ssthresh = (rt->rt_rmx.rmx_ssthresh + i) / 2; else rt->rt_rmx.rmx_ssthresh = i; tcpstat.tcps_cachedssthresh++; } } no_valid_rt: /* free the reassembly queue, if any */ while((q = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); FREE(q, M_TSEGQ); } if (tp->t_template) (void) m_free(dtom(tp->t_template)); inp->inp_ppcb = NULL; soisdisconnected(so); #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) in6_pcbdetach(inp); else #endif /* INET6 */ in_pcbdetach(inp); tcpstat.tcps_closed++; return ((struct tcpcb *)0); } void tcp_drain() { if (do_tcpdrain) { struct inpcb *inpb; struct tcpcb *tcpb; struct tseg_qent *te; /* * Walk the tcpbs, if existing, and flush the reassembly queue, * if there is one... * XXX: The "Net/3" implementation doesn't imply that the TCP * reassembly queue should be flushed, but in a situation * where we're really low on mbufs, this is potentially * usefull. */ for (inpb = tcbinfo.listhead->lh_first; inpb; inpb = inpb->inp_list.le_next) { if ((tcpb = intotcpcb(inpb))) { while ((te = LIST_FIRST(&tcpb->t_segq)) != NULL) { LIST_REMOVE(te, tqe_q); m_freem(te->tqe_m); FREE(te, M_TSEGQ); } } } } } /* * Notify a tcp user of an asynchronous error; * store error as soft error, but wake up user * (for now, won't do anything until can select for soft error). */ static void tcp_notify(inp, error) struct inpcb *inp; int error; { register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; register struct socket *so = inp->inp_socket; /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { return; } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) so->so_error = error; else tp->t_softerror = error; wakeup((caddr_t) &so->so_timeo); sorwakeup(so); sowwakeup(so); } static int -tcp_pcblist (SYSCTL_HANDLER_ARGS) +tcp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n, s; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = tcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xtcpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ s = splnet(); gencnt = tcbinfo.ipi_gencnt; n = tcbinfo.ipi_count; splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; s = splnet(); for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n; inp = inp->inp_list.le_next) { if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) inp_list[i++] = inp; } splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; caddr_t inp_ppcb; xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb != NULL) bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); else bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xt.xt_socket); error = SYSCTL_OUT(req, &xt, sizeof xt); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ s = splnet(); xig.xig_gen = tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = tcbinfo.ipi_count; splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); static int -tcp_getcred (SYSCTL_HANDLER_ARGS) +tcp_getcred(SYSCTL_HANDLER_ARGS) { struct sockaddr_in addrs[2]; struct inpcb *inp; int error, s; error = suser(req->p); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, tcp_getcred, "S,ucred", "Get the ucred of a TCP connection"); #ifdef INET6 static int -tcp6_getcred (SYSCTL_HANDLER_ARGS) +tcp6_getcred(SYSCTL_HANDLER_ARGS) { struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error, s, mapped = 0; error = suser(req->p); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) mapped = 1; else return (EINVAL); } s = splnet(); if (mapped == 1) inp = in_pcblookup_hash(&tcbinfo, *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], addrs[1].sin6_port, *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], addrs[0].sin6_port, 0, NULL); else inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, tcp6_getcred, "S,ucred", "Get the ucred of a TCP6 connection"); #endif void tcp_ctlinput(cmd, sa, vip) int cmd; struct sockaddr *sa; void *vip; { register struct ip *ip = vip; register struct tcphdr *th; void (*notify) __P((struct inpcb *, int)) = tcp_notify; if (cmd == PRC_QUENCH) notify = tcp_quench; else if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) return; if (ip) { th = (struct tcphdr *)((caddr_t)ip + (IP_VHL_HL(ip->ip_vhl) << 2)); in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, cmd, notify); } else in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); } #ifdef INET6 void tcp6_ctlinput(cmd, sa, d) int cmd; struct sockaddr *sa; void *d; { register struct tcphdr *thp; struct tcphdr th; void (*notify) __P((struct inpcb *, int)) = tcp_notify; struct sockaddr_in6 sa6; struct ip6_hdr *ip6; struct mbuf *m; int off; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if (cmd == PRC_QUENCH) notify = tcp_quench; else if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; } else { m = NULL; ip6 = NULL; } /* * Translate addresses into internal form. * Sa check if it is AF_INET6 is done at the top of this funciton. */ sa6 = *(struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) != 0 && m != NULL && m->m_pkthdr.rcvif != NULL) sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ struct in6_addr s; /* translate addresses into internal form */ memcpy(&s, &ip6->ip6_src, sizeof(s)); if (IN6_IS_ADDR_LINKLOCAL(&s) != 0 && m != NULL && m->m_pkthdr.rcvif != NULL) s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); if (m->m_len < off + sizeof(*thp)) { /* * this should be rare case * because now MINCLSIZE is "(MHLEN + 1)", * so we compromise on this copy... */ m_copydata(m, off, sizeof(th), (caddr_t)&th); thp = &th; } else thp = (struct tcphdr *)(mtod(m, caddr_t) + off); in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport, &s, thp->th_sport, cmd, notify); } else in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr, 0, cmd, notify); } #endif /* INET6 */ /* * When a source quench is received, close congestion window * to one segment. We will gradually open it again as we proceed. */ void tcp_quench(inp, errno) struct inpcb *inp; int errno; { struct tcpcb *tp = intotcpcb(inp); if (tp) tp->snd_cwnd = tp->t_maxseg; } /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value in the route. Also nudge TCP to send something, * since we know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ void tcp_mtudisc(inp, errno) struct inpcb *inp; int errno; { struct tcpcb *tp = intotcpcb(inp); struct rtentry *rt; struct rmxp_tao *taop; struct socket *so = inp->inp_socket; int offered; int mss; #ifdef INET6 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ if (tp) { #ifdef INET6 if (isipv6) rt = tcp_rtlookup6(inp); else #endif /* INET6 */ rt = tcp_rtlookup(inp); if (!rt || !rt->rt_rmx.rmx_mtu) { tp->t_maxopd = tp->t_maxseg = #ifdef INET6 isipv6 ? tcp_v6mssdflt : #endif /* INET6 */ tcp_mssdflt; return; } taop = rmx_taop(rt->rt_rmx); offered = taop->tao_mssopt; mss = rt->rt_rmx.rmx_mtu - #ifdef INET6 (isipv6 ? sizeof(struct ip6_hdr) + sizeof(struct tcphdr) : #endif /* INET6 */ sizeof(struct tcpiphdr) #ifdef INET6 ) #endif /* INET6 */ ; if (offered) mss = min(mss, offered); /* * XXX - The above conditional probably violates the TCP * spec. The problem is that, since we don't know the * other end's MSS, we are supposed to use a conservative * default. But, if we do that, then MTU discovery will * never actually take place, because the conservative * default is much less than the MTUs typically seen * on the Internet today. For the moment, we'll sweep * this under the carpet. * * The conservative default might not actually be a problem * if the only case this occurs is when sending an initial * SYN with options and data to a host we've never talked * to before. Then, they will reply with an MSS value which * will get recorded and the new parameters should get * recomputed. For Further Study. */ if (tp->t_maxopd <= mss) return; tp->t_maxopd = mss; if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) mss -= TCPOLEN_TSTAMP_APPA; if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC) mss -= TCPOLEN_CC_APPA; #if (MCLBYTES & (MCLBYTES - 1)) == 0 if (mss > MCLBYTES) mss &= ~(MCLBYTES-1); #else if (mss > MCLBYTES) mss = mss / MCLBYTES * MCLBYTES; #endif if (so->so_snd.sb_hiwat < mss) mss = so->so_snd.sb_hiwat; tp->t_maxseg = mss; tcpstat.tcps_mturesent++; tp->t_rtttime = 0; tp->snd_nxt = tp->snd_una; tcp_output(tp); } } /* * Look-up the routing entry to the peer of this inpcb. If no route * is found and it cannot be allocated the return NULL. This routine * is called by TCP routines that access the rmx structure and by tcp_mss * to get the interface MTU. */ struct rtentry * tcp_rtlookup(inp) struct inpcb *inp; { struct route *ro; struct rtentry *rt; ro = &inp->inp_route; rt = ro->ro_rt; if (rt == NULL || !(rt->rt_flags & RTF_UP)) { /* No route yet, so try to acquire one */ if (inp->inp_faddr.s_addr != INADDR_ANY) { ro->ro_dst.sa_family = AF_INET; ro->ro_dst.sa_len = sizeof(ro->ro_dst); ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = inp->inp_faddr; rtalloc(ro); rt = ro->ro_rt; } } return rt; } #ifdef INET6 struct rtentry * tcp_rtlookup6(inp) struct inpcb *inp; { struct route_in6 *ro6; struct rtentry *rt; ro6 = &inp->in6p_route; rt = ro6->ro_rt; if (rt == NULL || !(rt->rt_flags & RTF_UP)) { /* No route yet, so try to acquire one */ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { ro6->ro_dst.sin6_family = AF_INET6; ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst); ro6->ro_dst.sin6_addr = inp->in6p_faddr; rtalloc((struct route *)ro6); rt = ro6->ro_rt; } } return rt; } #endif /* INET6 */ #ifdef IPSEC /* compute ESP/AH header size for TCP, including outer IP header. */ size_t ipsec_hdrsiz_tcp(tp) struct tcpcb *tp; { struct inpcb *inp; struct mbuf *m; size_t hdrsiz; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif /* INET6 */ struct tcphdr *th; if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) return 0; MGETHDR(m, M_DONTWAIT, MT_DATA); if (!m) return 0; #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, sizeof(struct ip6_hdr)); bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, sizeof(struct tcphdr)); hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } else #endif /* INET6 */ { ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, sizeof(struct ip)); bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, sizeof(struct tcphdr)); hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } m_free(m); return hdrsiz; } #endif /*IPSEC*/ /* * Return a pointer to the cached information about the remote host. * The cached information is stored in the protocol specific part of * the route metrics. */ struct rmxp_tao * tcp_gettaocache(inp) struct inpcb *inp; { struct rtentry *rt; #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) rt = tcp_rtlookup6(inp); else #endif /* INET6 */ rt = tcp_rtlookup(inp); /* Make sure this is a host route and is up. */ if (rt == NULL || (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) return NULL; return rmx_taop(rt->rt_rmx); } /* * Clear all the TAO cache entries, called from tcp_init. * * XXX * This routine is just an empty one, because we assume that the routing * routing tables are initialized at the same time when TCP, so there is * nothing in the cache left over. */ static void tcp_cleartaocache() { } Index: head/sys/netinet/udp_usrreq.c =================================================================== --- head/sys/netinet/udp_usrreq.c (revision 62572) +++ head/sys/netinet/udp_usrreq.c (revision 62573) @@ -1,904 +1,904 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #ifdef INET6 #include #endif #include #include #include #include #ifdef IPSEC #include #endif /*IPSEC*/ #include /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ #ifndef COMPAT_42 static int udpcksum = 1; #else static int udpcksum = 0; /* XXX */ #endif SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udpcksum, 0, ""); int log_in_vain = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, &log_in_vain, 0, "Log all incoming UDP packets"); static int blackhole = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &blackhole, 0, "Do not send port unreachables for refused connects"); struct inpcbhead udb; /* from udp_var.h */ #define udb6 udb /* for KAME src sync over BSD*'s */ struct inpcbinfo udbinfo; #ifndef UDBHASHSIZE #define UDBHASHSIZE 16 #endif struct udpstat udpstat; /* from udp_var.h */ SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD, &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); static struct sockaddr_in udp_in = { sizeof(udp_in), AF_INET }; #ifdef INET6 struct udp_in6 { struct sockaddr_in6 uin6_sin; u_char uin6_init_done : 1; } udp_in6 = { { sizeof(udp_in6.uin6_sin), AF_INET6 }, 0 }; struct udp_ip6 { struct ip6_hdr uip6_ip6; u_char uip6_init_done : 1; } udp_ip6; #endif /* INET6 */ static void udp_append __P((struct inpcb *last, struct ip *ip, struct mbuf *n, int off)); #ifdef INET6 static void ip_2_ip6_hdr __P((struct ip6_hdr *ip6, struct ip *ip)); #endif static int udp_detach __P((struct socket *so)); static int udp_output __P((struct inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *)); void udp_init() { LIST_INIT(&udb); udbinfo.listhead = &udb; udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask); udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets, ZONE_INTERRUPT, 0); } void udp_input(m, off, proto) register struct mbuf *m; int off, proto; { int iphlen = off; register struct ip *ip; register struct udphdr *uh; register struct inpcb *inp; struct mbuf *opts = 0; int len; struct ip save_ip; struct sockaddr *append_sa; udpstat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof (struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); if (m->m_len < iphlen + sizeof(struct udphdr)) { if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { udpstat.udps_hdrops++; return; } ip = mtod(m, struct ip *); } uh = (struct udphdr *)((caddr_t)ip + iphlen); /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = ntohs((u_short)uh->uh_ulen); if (ip->ip_len != len) { if (len > ip->ip_len || len < sizeof(struct udphdr)) { udpstat.udps_badlen++; goto bad; } m_adj(m, len - ip->ip_len); /* ip->ip_len = len; */ } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ save_ip = *ip; /* * Checksum extended UDP header and data. */ if (uh->uh_sum) { if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh->uh_sum = m->m_pkthdr.csum_data; else uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(ip->ip_len + m->m_pkthdr.csum_data + IPPROTO_UDP)); uh->uh_sum ^= 0xffff; } else { bzero(((struct ipovly *)ip)->ih_x1, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; uh->uh_sum = in_cksum(m, len + sizeof (struct ip)); } if (uh->uh_sum) { udpstat.udps_badsum++; m_freem(m); return; } } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { struct inpcb *last; /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multi/broadcasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * Construct sockaddr format source address. */ udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; #ifdef INET6 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; #endif LIST_FOREACH(inp, &udb, inp_list) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_lport != uh->uh_dport) continue; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; } if (inp->inp_faddr.s_addr != INADDR_ANY) { if (inp->inp_faddr.s_addr != ip->ip_src.s_addr || inp->inp_fport != uh->uh_sport) continue; } if (last != NULL) { struct mbuf *n; #ifdef IPSEC /* check AH/ESP integrity. */ if (ipsec4_in_reject_so(m, last->inp_socket)) ipsecstat.in_polvio++; /* do not inject data to pcb */ else #endif /*IPSEC*/ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) udp_append(last, ip, n, iphlen + sizeof(struct udphdr)); } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It * assumes that an application will never * clear these options after setting them. */ if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udpstat.udps_noportbcast++; goto bad; } #ifdef IPSEC /* check AH/ESP integrity. */ if (ipsec4_in_reject_so(m, last->inp_socket)) { ipsecstat.in_polvio++; goto bad; } #endif /*IPSEC*/ udp_append(last, ip, m, iphlen + sizeof(struct udphdr)); return; } /* * Locate pcb for datagram. */ inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (inp == NULL) { if (log_in_vain) { char buf[4*sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), ntohs(uh->uh_sport)); } udpstat.udps_noport++; if (m->m_flags & (M_BCAST | M_MCAST)) { udpstat.udps_noportbcast++; goto bad; } *ip = save_ip; if (badport_bandlim(0) < 0) goto bad; if (blackhole) goto bad; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); return; } #ifdef IPSEC if (ipsec4_in_reject_so(m, inp->inp_socket)) { ipsecstat.in_polvio++; goto bad; } #endif /*IPSEC*/ /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) { #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { int savedflags; ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); savedflags = inp->inp_flags; inp->inp_flags &= ~INP_UNMAPPABLEOPTS; ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m); inp->inp_flags = savedflags; } else #endif ip_savecontrol(inp, &opts, ip, m); } iphlen += sizeof(struct udphdr); m_adj(m, iphlen); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); append_sa = (struct sockaddr *)&udp_in6; } else #endif append_sa = (struct sockaddr *)&udp_in; if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) { udpstat.udps_fullsock++; goto bad; } sorwakeup(inp->inp_socket); return; bad: m_freem(m); if (opts) m_freem(opts); return; } #if defined(INET6) static void ip_2_ip6_hdr(ip6, ip) struct ip6_hdr *ip6; struct ip *ip; { bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = ip->ip_len; ip6->ip6_nxt = ip->ip_p; ip6->ip6_hlim = ip->ip_ttl; ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_SMP; ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr; ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; } #endif /* * subroutine of udp_input(), mainly for source code readability. * caller must properly init udp_ip6 and udp_in6 beforehand. */ static void udp_append(last, ip, n, off) struct inpcb *last; struct ip *ip; struct mbuf *n; int off; { struct sockaddr *append_sa; struct mbuf *opts = 0; if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) { #ifdef INET6 if (last->inp_vflag & INP_IPV6) { int savedflags; if (udp_ip6.uip6_init_done == 0) { ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); udp_ip6.uip6_init_done = 1; } savedflags = last->inp_flags; last->inp_flags &= ~INP_UNMAPPABLEOPTS; ip6_savecontrol(last, &opts, &udp_ip6.uip6_ip6, n); last->inp_flags = savedflags; } else #endif ip_savecontrol(last, &opts, ip, n); } #ifdef INET6 if (last->inp_vflag & INP_IPV6) { if (udp_in6.uin6_init_done == 0) { in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); udp_in6.uin6_init_done = 1; } append_sa = (struct sockaddr *)&udp_in6.uin6_sin; } else #endif append_sa = (struct sockaddr *)&udp_in; m_adj(n, off); if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); udpstat.udps_fullsock++; } else sorwakeup(last->inp_socket); } /* * Notify a udp user of an asynchronous error; * just wake up so that he can collect error status. */ void udp_notify(inp, errno) register struct inpcb *inp; int errno; { inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); sowwakeup(inp->inp_socket); } void udp_ctlinput(cmd, sa, vip) int cmd; struct sockaddr *sa; void *vip; { register struct ip *ip = vip; register struct udphdr *uh; if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)) return; if (ip) { uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); in_pcbnotify(&udb, sa, uh->uh_dport, ip->ip_src, uh->uh_sport, cmd, udp_notify); } else in_pcbnotify(&udb, sa, 0, zeroin_addr, 0, cmd, udp_notify); } static int -udp_pcblist (SYSCTL_HANDLER_ARGS) +udp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n, s; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = udbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ s = splnet(); gencnt = udbinfo.ipi_gencnt; n = udbinfo.ipi_count; splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; s = splnet(); for (inp = udbinfo.listhead->lh_first, i = 0; inp && i < n; inp = inp->inp_list.le_next) { if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) inp_list[i++] = inp; } splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ s = splnet(); xig.xig_gen = udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = udbinfo.ipi_count; splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); static int -udp_getcred (SYSCTL_HANDLER_ARGS) +udp_getcred(SYSCTL_HANDLER_ARGS) { struct sockaddr_in addrs[2]; struct inpcb *inp; int error, s; error = suser(req->p); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); static int udp_output(inp, m, addr, control, p) register struct inpcb *inp; struct mbuf *m; struct sockaddr *addr; struct mbuf *control; struct proc *p; { register struct udpiphdr *ui; register int len = m->m_pkthdr.len; struct in_addr laddr; struct sockaddr_in *sin; int s = 0, error = 0; if (control) m_freem(control); /* XXX */ if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { error = EMSGSIZE; goto release; } if (addr) { sin = (struct sockaddr_in *)addr; prison_remote_ip(p, 0, &sin->sin_addr.s_addr); laddr = inp->inp_laddr; if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto release; } /* * Must block input while temporarily connected. */ s = splnet(); error = in_pcbconnect(inp, addr, p); if (error) { splx(s); goto release; } } else { if (inp->inp_faddr.s_addr == INADDR_ANY) { error = ENOTCONN; goto release; } } /* * Calculate data length and get a mbuf * for UDP and IP headers. */ M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT); if (m == 0) { error = ENOBUFS; if (addr) splx(s); goto release; } /* * Fill in mbuf with extended UDP header * and addresses and length put into network format. */ ui = mtod(m, struct udpiphdr *); bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ ui->ui_pr = IPPROTO_UDP; ui->ui_src = inp->inp_laddr; ui->ui_dst = inp->inp_faddr; ui->ui_sport = inp->inp_lport; ui->ui_dport = inp->inp_fport; ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); /* * Set up checksum and output datagram. */ if (udpcksum) { ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } else { ui->ui_sum = 0; } ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ udpstat.udps_opackets++; #ifdef IPSEC m->m_pkthdr.rcvif = (struct ifnet *)inp->inp_socket; #endif /*IPSEC*/ error = ip_output(m, inp->inp_options, &inp->inp_route, (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)) | IP_SOCKINMRCVIF, inp->inp_moptions); if (addr) { in_pcbdisconnect(inp); inp->inp_laddr = laddr; /* XXX rehash? */ splx(s); } return (error); release: m_freem(m); return (error); } u_long udp_sendspace = 9216; /* really max datagram size */ /* 40 1K datagrams */ SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); u_long udp_recvspace = 40 * (1024 + #ifdef INET6 sizeof(struct sockaddr_in6) #else sizeof(struct sockaddr_in) #endif ); SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &udp_recvspace, 0, "Maximum incoming UDP datagram size"); static int udp_abort(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; /* ??? possible? panic instead? */ soisdisconnected(so); s = splnet(); in_pcbdetach(inp); splx(s); return 0; } static int udp_attach(struct socket *so, int proto, struct proc *p) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp != 0) return EINVAL; error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return error; s = splnet(); error = in_pcballoc(so, &udbinfo, p); splx(s); if (error) return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_ttl = ip_defttl; #ifdef IPSEC error = ipsec_init_policy(so, &inp->inp_sp); if (error != 0) { in_pcbdetach(inp); return error; } #endif /*IPSEC*/ return 0; } static int udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; s = splnet(); error = in_pcbbind(inp, nam, p); splx(s); return error; } static int udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; int s, error; struct sockaddr_in *sin; inp = sotoinpcb(so); if (inp == 0) return EINVAL; if (inp->inp_faddr.s_addr != INADDR_ANY) return EISCONN; s = splnet(); sin = (struct sockaddr_in *)nam; prison_remote_ip(p, 0, &sin->sin_addr.s_addr); error = in_pcbconnect(inp, nam, p); splx(s); if (error == 0) soisconnected(so); return error; } static int udp_detach(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; s = splnet(); in_pcbdetach(inp); splx(s); return 0; } static int udp_disconnect(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; if (inp->inp_faddr.s_addr == INADDR_ANY) return ENOTCONN; s = splnet(); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } static int udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct proc *p) { struct inpcb *inp; inp = sotoinpcb(so); if (inp == 0) { m_freem(m); return EINVAL; } return udp_output(inp, m, addr, control, p); } int udp_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); if (inp == 0) return EINVAL; socantsendmore(so); return 0; } struct pr_usrreqs udp_usrreqs = { udp_abort, pru_accept_notsupp, udp_attach, udp_bind, udp_connect, pru_connect2_notsupp, in_control, udp_detach, udp_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp_send, pru_sense_null, udp_shutdown, in_setsockaddr, sosend, soreceive, sopoll }; Index: head/sys/netinet6/in6_proto.c =================================================================== --- head/sys/netinet6/in6_proto.c (revision 62572) +++ head/sys/netinet6/in6_proto.c (revision 62573) @@ -1,425 +1,425 @@ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 */ #include "opt_inet.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #include #include #ifdef IPSEC_ESP #include #include #endif #endif /*IPSEC*/ #include #include "gif.h" #if NGIF > 0 #include #endif #include #define offsetof(type, member) ((size_t)(&((type *)0)->member)) /* * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ extern struct domain inet6domain; static struct pr_usrreqs nousrreqs; struct ip6protosw inet6sw[] = { { 0, &inet6domain, IPPROTO_IPV6, 0, 0, 0, 0, 0, 0, ip6_init, 0, frag6_slowtimo, frag6_drain, &nousrreqs, }, { SOCK_DGRAM, &inet6domain, IPPROTO_UDP, PR_ATOMIC | PR_ADDR, udp6_input, 0, udp6_ctlinput, ip6_ctloutput, 0, 0, 0, 0, 0, &udp6_usrreqs, }, { SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED | PR_WANTRCVD, tcp6_input, 0, tcp6_ctlinput, tcp_ctloutput, 0, #ifdef INET /* don't call timeout routines twice */ tcp_init, 0, 0, tcp_drain, #else tcp_init, 0, tcp_slowtimo, tcp_drain, #endif &tcp6_usrreqs, }, { SOCK_RAW, &inet6domain, IPPROTO_RAW, PR_ATOMIC | PR_ADDR, rip6_input, rip6_output, 0, rip6_ctloutput, 0, 0, 0, 0, 0, &rip6_usrreqs }, { SOCK_RAW, &inet6domain, IPPROTO_ICMPV6, PR_ATOMIC | PR_ADDR, icmp6_input, rip6_output, 0, rip6_ctloutput, 0, icmp6_init, icmp6_fasttimo, 0, 0, &rip6_usrreqs }, { SOCK_RAW, &inet6domain, IPPROTO_DSTOPTS,PR_ATOMIC|PR_ADDR, dest6_input, 0, 0, 0, 0, 0, 0, 0, 0, &nousrreqs }, { SOCK_RAW, &inet6domain, IPPROTO_ROUTING,PR_ATOMIC|PR_ADDR, route6_input, 0, 0, 0, 0, 0, 0, 0, 0, &nousrreqs }, { SOCK_RAW, &inet6domain, IPPROTO_FRAGMENT,PR_ATOMIC|PR_ADDR, frag6_input, 0, 0, 0, 0, 0, 0, 0, 0, &nousrreqs }, #ifdef IPSEC { SOCK_RAW, &inet6domain, IPPROTO_AH, PR_ATOMIC|PR_ADDR, ah6_input, 0, 0, 0, 0, 0, 0, 0, 0, &nousrreqs, }, #ifdef IPSEC_ESP { SOCK_RAW, &inet6domain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR, esp6_input, 0, 0, 0, 0, 0, 0, 0, 0, &nousrreqs, }, #endif #endif /* IPSEC */ #if NGIF > 0 { SOCK_RAW, &inet6domain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, in6_gif_input,0, 0, 0, 0, 0, 0, 0, 0, &nousrreqs }, { SOCK_RAW, &inet6domain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, in6_gif_input,0, 0, 0, 0, 0, 0, 0, 0, &nousrreqs }, #endif /* GIF */ { SOCK_RAW, &inet6domain, IPPROTO_PIM, PR_ATOMIC|PR_ADDR, pim6_input, rip6_output, 0, rip6_ctloutput, 0, 0, 0, 0, 0, &rip6_usrreqs }, /* raw wildcard */ { SOCK_RAW, &inet6domain, 0, PR_ATOMIC | PR_ADDR, rip6_input, rip6_output, 0, rip6_ctloutput, 0, 0, 0, 0, 0, &rip6_usrreqs }, }; extern int in6_inithead __P((void **, int)); struct domain inet6domain = { AF_INET6, "internet6", 0, 0, 0, (struct protosw *)inet6sw, (struct protosw *)&inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])], 0, in6_inithead, offsetof(struct sockaddr_in6, sin6_addr) << 3, sizeof(struct sockaddr_in6) }; DOMAIN_SET(inet6); /* * Internet configuration info */ #ifndef IPV6FORWARDING #ifdef GATEWAY6 #define IPV6FORWARDING 1 /* forward IP6 packets not for us */ #else #define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */ #endif /* GATEWAY6 */ #endif /* !IPV6FORWARDING */ #ifndef IPV6_SENDREDIRECTS #define IPV6_SENDREDIRECTS 1 #endif int ip6_forwarding = IPV6FORWARDING; /* act as router? */ int ip6_sendredirects = IPV6_SENDREDIRECTS; int ip6_defhlim = IPV6_DEFHLIM; int ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS; int ip6_accept_rtadv = 0; /* "IPV6FORWARDING ? 0 : 1" is dangerous */ int ip6_maxfragpackets = 200; int ip6_log_interval = 5; int ip6_hdrnestlimit = 50; /* appropriate? */ int ip6_dad_count = 1; /* DupAddrDetectionTransmits */ u_int32_t ip6_flow_seq; int ip6_auto_flowlabel = 1; #if NGIF > 0 int ip6_gif_hlim = GIF_HLIM; #else int ip6_gif_hlim = 0; #endif int ip6_use_deprecated = 1; /* allow deprecated addr (RFC2462 5.5.4) */ int ip6_rr_prune = 5; /* router renumbering prefix * walk list every 5 sec. */ int ip6_mapped_addr_on = 1; u_int32_t ip6_id = 0UL; int ip6_keepfaith = 0; time_t ip6_log_time = (time_t)0L; /* icmp6 */ /* * BSDI4 defines these variables in in_proto.c... * XXX: what if we don't define INET? Should we define pmtu6_expire * or so? (jinmei@kame.net 19990310) */ int pmtu_expire = 60*10; int pmtu_probe = 60*2; /* raw IP6 parameters */ /* * Nominal space allocated to a raw ip socket. */ #define RIPV6SNDQ 8192 #define RIPV6RCVQ 8192 u_long rip6_sendspace = RIPV6SNDQ; u_long rip6_recvspace = RIPV6RCVQ; /* ICMPV6 parameters */ int icmp6_rediraccept = 1; /* accept and process redirects */ int icmp6_redirtimeout = 10 * 60; /* 10 minutes */ u_int icmp6errratelim = 1000; /* 1000usec = 1msec */ /* UDP on IP6 parameters */ int udp6_sendspace = 9216; /* really max datagram size */ int udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6)); /* 40 1K datagrams */ /* * sysctl related items. */ SYSCTL_NODE(_net, PF_INET6, inet6, CTLFLAG_RW, 0, "Internet6 Family"); /* net.inet6 */ SYSCTL_NODE(_net_inet6, IPPROTO_IPV6, ip6, CTLFLAG_RW, 0, "IP6"); SYSCTL_NODE(_net_inet6, IPPROTO_ICMPV6, icmp6, CTLFLAG_RW, 0, "ICMP6"); SYSCTL_NODE(_net_inet6, IPPROTO_UDP, udp6, CTLFLAG_RW, 0, "UDP6"); SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, CTLFLAG_RW, 0, "TCP6"); #ifdef IPSEC SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6"); #endif /* IPSEC */ /* net.inet6.ip6 */ static int -sysctl_ip6_forwarding (SYSCTL_HANDLER_ARGS) +sysctl_ip6_forwarding(SYSCTL_HANDLER_ARGS) { int error = 0; int old_ip6_forwarding; int changed; error = SYSCTL_OUT(req, arg1, sizeof(int)); if (error || !req->newptr) return (error); old_ip6_forwarding = ip6_forwarding; error = SYSCTL_IN(req, arg1, sizeof(int)); if (error != 0) return (error); changed = (ip6_forwarding ? 1 : 0) ^ (old_ip6_forwarding ? 1 : 0); if (changed == 0) return (error); if (ip6_forwarding != 0) { /* host becomes router */ int s = splnet(); struct nd_prefix *pr, *next; for (pr = LIST_FIRST(&nd_prefix); pr; pr = next) { next = LIST_NEXT(pr, ndpr_entry); if (!IN6_IS_ADDR_UNSPECIFIED(&pr->ndpr_addr)) in6_ifdel(pr->ndpr_ifp, &pr->ndpr_addr); prelist_remove(pr); } splx(s); } else { /* router becomes host */ struct socket so; /* XXX: init dummy so */ bzero(&so, sizeof(so)); while(!LIST_EMPTY(&rr_prefix)) delete_each_prefix(&so, LIST_FIRST(&rr_prefix), PR_ORIG_KERNEL); } return (error); } SYSCTL_OID(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLTYPE_INT|CTLFLAG_RW, &ip6_forwarding, 0, sysctl_ip6_forwarding, "I", ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW, &ip6_sendredirects, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW, &ip6_defhlim, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, CTLFLAG_RW, &ip6_maxfragpackets, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_RW, &ip6_accept_rtadv, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW, &ip6_keepfaith, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, CTLFLAG_RW, &ip6_log_interval, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit, CTLFLAG_RW, &ip6_hdrnestlimit, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, CTLFLAG_RW, &ip6_dad_count, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel, CTLFLAG_RW, &ip6_auto_flowlabel, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim, CTLFLAG_RW, &ip6_defmcasthlim, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW, &ip6_gif_hlim, 0, ""); SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version, CTLFLAG_RD, __KAME_VERSION, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated, CTLFLAG_RW, &ip6_use_deprecated, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune, CTLFLAG_RW, &ip6_rr_prune, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAPPED_ADDR, mapped_addr, CTLFLAG_RW, &ip6_mapped_addr_on, 0, ""); /* net.inet6.icmp6 */ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept, CTLFLAG_RW, &icmp6_rediraccept, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout, CTLFLAG_RW, &icmp6_redirtimeout, 0, ""); SYSCTL_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RD, &icmp6stat, icmp6stat, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRRATELIMIT, errratelimit, CTLFLAG_RW, &icmp6errratelim, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW, &nd6_prune, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW, &nd6_delay, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries, CTLFLAG_RW, &nd6_umaxtries, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries, CTLFLAG_RW, &nd6_mmaxtries, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback, CTLFLAG_RW, &nd6_useloopback, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PROXYALL, nd6_proxyall, CTLFLAG_RW, &nd6_proxyall, 0, ""); Index: head/sys/netinet6/udp6_usrreq.c =================================================================== --- head/sys/netinet6/udp6_usrreq.c (revision 62572) +++ head/sys/netinet6/udp6_usrreq.c (revision 62573) @@ -1,837 +1,837 @@ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif /*IPSEC*/ #include "faith.h" /* * UDP protocol inplementation. * Per RFC 768, August, 1980. */ extern struct protosw inetsw[]; static int in6_mcmatch __P((struct inpcb *, struct in6_addr *, struct ifnet *)); static int udp6_detach __P((struct socket *so)); static int in6_mcmatch(in6p, ia6, ifp) struct inpcb *in6p; register struct in6_addr *ia6; struct ifnet *ifp; { struct ip6_moptions *im6o = in6p->in6p_moptions; struct in6_multi_mship *imm; if (im6o == NULL) return 0; for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) { if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, ia6)) return 1; } return 0; } int udp6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp; register struct ip6_hdr *ip6; register struct udphdr *uh; register struct inpcb *in6p; struct mbuf *opts = 0; int off = *offp; int plen, ulen; struct sockaddr_in6 udp_in6; #if defined(NFAITH) && 0 < NFAITH if (m->m_pkthdr.rcvif) { if (m->m_pkthdr.rcvif->if_type == IFT_FAITH) { /* XXX send icmp6 host/port unreach? */ m_freem(m); return IPPROTO_DONE; } } #endif udpstat.udps_ipackets++; IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); uh = (struct udphdr *)((caddr_t)ip6 + off); ulen = ntohs((u_short)uh->uh_ulen); if (plen != ulen) { udpstat.udps_badlen++; goto bad; } /* * Checksum extended UDP header and data. */ if (uh->uh_sum == 0) udpstat.udps_nosum++; else if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) { udpstat.udps_badsum++; goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; /* * Deliver a multicast datagram to all sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multicasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * In a case that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet does not match with * laddr. To cure this situation, the matching is relaxed * if the receiving interface is the same as one specified * in the socket and if the destination multicast address * matches one of the multicast groups specified in the socket. */ /* * Construct sockaddr format source address. */ init_sin6(&udp_in6, m); /* general init */ udp_in6.sin6_port = uh->uh_sport; /* * KAME note: usually we drop udphdr from mbuf here. * We need udphdr for IPsec processing so we do that later. */ /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; LIST_FOREACH(in6p, &udb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->in6p_lport != uh->uh_dport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst) && !in6_mcmatch(in6p, &ip6->ip6_dst, m->m_pkthdr.rcvif)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src) || in6p->in6p_fport != uh->uh_sport) continue; } if (last != NULL) { struct mbuf *n; #ifdef IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject_so(m, last->inp_socket)) ipsec6stat.in_polvio++; /* do not inject data into pcb */ else #endif /*IPSEC*/ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { /* * KAME NOTE: do not * m_copy(m, offset, ...) above. * sbappendaddr() expects M_PKTHDR, * and m_copy() will copy M_PKTHDR * only if offset is 0. */ if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, &opts, ip6, n); m_adj(n, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&udp_in6, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); udpstat.udps_fullsock++; } else sorwakeup(last->in6p_socket); opts = 0; } } last = in6p; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It assumes that an application will never * clear these options after setting them. */ if ((last->in6p_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udpstat.udps_noport++; udpstat.udps_noportmcast++; goto bad; } #ifdef IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject_so(m, last->inp_socket)) { ipsec6stat.in_polvio++; goto bad; } #endif /*IPSEC*/ if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, &opts, ip6, m); m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&udp_in6, m, opts) == 0) { udpstat.udps_fullsock++; goto bad; } sorwakeup(last->in6p_socket); return IPPROTO_DONE; } /* * Locate pcb for datagram. */ in6p = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (in6p == 0) { if (log_in_vain) { char buf[INET6_ADDRSTRLEN]; strcpy(buf, ip6_sprintf(&ip6->ip6_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", buf, ntohs(uh->uh_dport), ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport)); } udpstat.udps_noport++; if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); udpstat.udps_noportmcast++; goto bad; } icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return IPPROTO_DONE; } #ifdef IPSEC /* * Check AH/ESP integrity. */ if (ipsec6_in_reject_so(m, in6p->in6p_socket)) { ipsec6stat.in_polvio++; goto bad; } #endif /*IPSEC*/ /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ init_sin6(&udp_in6, m); /* general init */ udp_in6.sin6_port = uh->uh_sport; if (in6p->in6p_flags & IN6P_CONTROLOPTS || in6p->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(in6p, &opts, ip6, m); m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&in6p->in6p_socket->so_rcv, (struct sockaddr *)&udp_in6, m, opts) == 0) { udpstat.udps_fullsock++; goto bad; } sorwakeup(in6p->in6p_socket); return IPPROTO_DONE; bad: if (m) m_freem(m); if (opts) m_freem(opts); return IPPROTO_DONE; } void udp6_ctlinput(cmd, sa, d) int cmd; struct sockaddr *sa; void *d; { register struct udphdr *uhp; struct udphdr uh; struct sockaddr_in6 sa6; struct ip6_hdr *ip6; struct mbuf *m; int off; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; off = 0; if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; } else { m = NULL; ip6 = NULL; } /* translate addresses into internal form */ sa6 = *(struct sockaddr_in6 *)sa; if (m != NULL && IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr)) sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ struct in6_addr s; /* translate addresses into internal form */ memcpy(&s, &ip6->ip6_src, sizeof(s)); if (IN6_IS_ADDR_LINKLOCAL(&s)) s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); if (m->m_len < off + sizeof(uh)) { /* * this should be rare case, * so we compromise on this copy... */ m_copydata(m, off, sizeof(uh), (caddr_t)&uh); uhp = &uh; } else uhp = (struct udphdr *)(mtod(m, caddr_t) + off); (void) in6_pcbnotify(&udb, (struct sockaddr *)&sa6, uhp->uh_dport, &s, uhp->uh_sport, cmd, udp_notify); } else (void) in6_pcbnotify(&udb, (struct sockaddr *)&sa6, 0, &zeroin6_addr, 0, cmd, udp_notify); } static int -udp6_getcred (SYSCTL_HANDLER_ARGS) +udp6_getcred(SYSCTL_HANDLER_ARGS) { struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error, s; error = suser(req->p); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); inp = in6_pcblookup_hash(&udbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1, NULL); if (!inp || !inp->inp_socket) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,ucred", "Get the ucred of a UDP6 connection"); int udp6_output(in6p, m, addr6, control, p) register struct inpcb *in6p; struct mbuf *m; struct sockaddr *addr6; struct mbuf *control; struct proc *p; { register int ulen = m->m_pkthdr.len; int plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr laddr6; int s = 0, error = 0; struct ip6_pktopts opt, *stickyopt = in6p->in6p_outputopts; if (control) { if ((error = ip6_setpktoptions(control, &opt, suser(p))) != 0) goto release; in6p->in6p_outputopts = &opt; } if (addr6) { laddr6 = in6p->in6p_laddr; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { error = EISCONN; goto release; } /* * Must block input while temporarily connected. */ s = splnet(); /* * XXX: the user might want to overwrite the local address * via an ancillary data. */ bzero(&in6p->in6p_laddr, sizeof(struct in6_addr)); error = in6_pcbconnect(in6p, addr6, p); if (error) { splx(s); goto release; } } else { if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { error = ENOTCONN; goto release; } } /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr), M_DONTWAIT); if (m == 0) { error = ENOBUFS; if (addr6) splx(s); goto release; } /* * Stuff checksum and output datagram. */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* ip6_plen will be filled in ip6_output. */ ip6->ip6_nxt = IPPROTO_UDP; ip6->ip6_hlim = in6_selecthlim(in6p, in6p->in6p_route.ro_rt ? in6p->in6p_route.ro_rt->rt_ifp : NULL); ip6->ip6_src = in6p->in6p_laddr; ip6->ip6_dst = in6p->in6p_faddr; udp6 = (struct udphdr *)(ip6 + 1); udp6->uh_sport = in6p->in6p_lport; udp6->uh_dport = in6p->in6p_fport; udp6->uh_ulen = htons((u_short)plen); udp6->uh_sum = 0; if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP, sizeof(struct ip6_hdr), plen)) == 0) { udp6->uh_sum = 0xffff; } udpstat.udps_opackets++; #ifdef IPSEC m->m_pkthdr.rcvif = (struct ifnet *)in6p->in6p_socket; #endif /*IPSEC*/ error = ip6_output(m, in6p->in6p_outputopts, &in6p->in6p_route, IPV6_SOCKINMRCVIF, in6p->in6p_moptions, NULL); if (addr6) { in6_pcbdisconnect(in6p); in6p->in6p_laddr = laddr6; splx(s); } goto releaseopt; release: m_freem(m); releaseopt: if (control) { in6p->in6p_outputopts = stickyopt; m_freem(control); } return(error); } static int udp6_abort(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; /* ??? possible? panic instead? */ soisdisconnected(so); s = splnet(); in6_pcbdetach(inp); splx(s); return 0; } static int udp6_attach(struct socket *so, int proto, struct proc *p) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp != 0) return EINVAL; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return error; } s = splnet(); error = in_pcballoc(so, &udbinfo, p); splx(s); if (error) return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; /* just to be sure */ /* * XXX: ugly!! * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = ip_defttl; #ifdef IPSEC error = ipsec_init_policy(so, &inp->in6p_sp); if (error != 0) { in6_pcbdetach(inp); return (error); } #endif /*IPSEC*/ return 0; } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) inp->inp_vflag |= INP_IPV4; else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; s = splnet(); error = in_pcbbind(inp, (struct sockaddr *)&sin, p); splx(s); return error; } } s = splnet(); error = in6_pcbbind(inp, nam, p); splx(s); return error; } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; if (inp->inp_faddr.s_addr != INADDR_ANY) return EISCONN; in6_sin6_2_sin(&sin, sin6_p); s = splnet(); error = in_pcbconnect(inp, (struct sockaddr *)&sin, p); splx(s); if (error == 0) { inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; soisconnected(so); } return error; } } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return EISCONN; s = splnet(); error = in6_pcbconnect(inp, nam, p); splx(s); if (error == 0) { inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; soisconnected(so); } return error; } static int udp6_detach(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; s = splnet(); in6_pcbdetach(inp); splx(s); return 0; } static int udp6_disconnect(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; return ((*pru->pru_disconnect)(so)); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return ENOTCONN; s = splnet(); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct proc *p) { struct inpcb *inp; inp = sotoinpcb(so); if (inp == 0) { m_freem(m); return EINVAL; } if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) { int hasv4addr; struct sockaddr_in6 *sin6 = 0; if (addr == 0) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; int error; if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; error = ((*pru->pru_send)(so, flags, m, addr, control, p)); /* addr will just be freed in sendit(). */ return error; } } return udp6_output(inp, m, addr, control, p); } struct pr_usrreqs udp6_usrreqs = { udp6_abort, pru_accept_notsupp, udp6_attach, udp6_bind, udp6_connect, pru_connect2_notsupp, in6_control, udp6_detach, udp6_disconnect, pru_listen_notsupp, in6_mapped_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp6_send, pru_sense_null, udp_shutdown, in6_mapped_sockaddr, sosend, soreceive, sopoll }; Index: head/sys/netncp/ncp_conn.c =================================================================== --- head/sys/netncp/ncp_conn.c (revision 62572) +++ head/sys/netncp/ncp_conn.c (revision 62573) @@ -1,540 +1,540 @@ /* * Copyright (c) 1999, Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Boris Popov. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * $FreeBSD$ * * Connection tables */ #include #include #include #include #include #include #include #include #include #include SLIST_HEAD(ncp_handle_head,ncp_handle); int ncp_burst_enabled = 1; struct ncp_conn_head conn_list={NULL}; static int ncp_conn_cnt = 0; static int ncp_next_ref = 1; static struct lock listlock; struct ncp_handle_head lhlist={NULL}; static int ncp_next_handle = 1; static struct lock lhlock; -static int ncp_sysctl_connstat (SYSCTL_HANDLER_ARGS); +static int ncp_sysctl_connstat(SYSCTL_HANDLER_ARGS); static int ncp_conn_lock_any(struct ncp_conn *conn, struct proc *p, struct ucred *cred); extern struct linker_set sysctl_net_ncp; SYSCTL_DECL(_net_ncp); SYSCTL_NODE(_net, OID_AUTO, ncp, CTLFLAG_RW, NULL, "NetWare requester"); SYSCTL_INT (_net_ncp, OID_AUTO, burst_enabled, CTLFLAG_RD, &ncp_burst_enabled, 0, ""); SYSCTL_INT (_net_ncp, OID_AUTO, conn_cnt, CTLFLAG_RD, &ncp_conn_cnt, 0, ""); SYSCTL_PROC(_net_ncp, OID_AUTO, conn_stat, CTLFLAG_RD|CTLTYPE_OPAQUE, NULL, 0, ncp_sysctl_connstat, "S,connstat", "Connections list"); MALLOC_DEFINE(M_NCPDATA, "NCP data", "NCP private data"); int ncp_conn_init(void) { lockinit(&listlock, PSOCK, "ncpll", 0, 0); lockinit(&lhlock, PSOCK, "ncplh", 0, 0); return 0; } int ncp_conn_locklist(int flags, struct proc *p){ return lockmgr(&listlock, flags | LK_CANRECURSE, 0, p); } void ncp_conn_unlocklist(struct proc *p){ lockmgr(&listlock, LK_RELEASE, 0, p); } int ncp_conn_access(struct ncp_conn *conn, struct ucred *cred, mode_t mode) { int error; if (cred == NOCRED || ncp_suser(cred) == 0 || cred->cr_uid == conn->nc_owner->cr_uid) return 0; mode >>= 3; if (!groupmember(conn->nc_group, cred)) mode >>= 3; error = (conn->li.access_mode & mode) == mode ? 0 : EACCES; return error; } int ncp_conn_lock_any(struct ncp_conn *conn, struct proc *p, struct ucred *cred) { int error; if (conn->nc_id == 0) return EACCES; error = lockmgr(&conn->nc_lock, LK_EXCLUSIVE | LK_CANRECURSE, 0, p); if (error == ERESTART) return EINTR; error = ncp_chkintr(conn, p); if (error) { lockmgr(&conn->nc_lock, LK_RELEASE, 0, p); return error; } if (conn->nc_id == 0) { lockmgr(&conn->nc_lock, LK_RELEASE, 0, p); return EACCES; } conn->procp = p; /* who currently operates */ conn->ucred = cred; return 0; } int ncp_conn_lock(struct ncp_conn *conn, struct proc *p, struct ucred *cred, int mode) { int error; error = ncp_conn_access(conn,cred,mode); if (error) return error; return ncp_conn_lock_any(conn, p, cred); } /* * Lock conn but unlock connlist */ static int ncp_conn_lock2(struct ncp_conn *conn, struct proc *p, struct ucred *cred, int mode) { int error; error = ncp_conn_access(conn,cred,mode); if (error) { ncp_conn_unlocklist(p); return error; } conn->nc_lwant++; ncp_conn_unlocklist(p); error = ncp_conn_lock_any(conn,p,cred); conn->nc_lwant--; if (conn->nc_lwant == 0) { wakeup(&conn->nc_lwant); } return error; } void ncp_conn_unlock(struct ncp_conn *conn, struct proc *p) { /* * note, that LK_RELASE will do wakeup() instead of wakeup_one(). * this will do a little overhead */ lockmgr(&conn->nc_lock, LK_RELEASE, 0, p); } int ncp_conn_assert_locked(struct ncp_conn *conn,char *checker, struct proc *p){ if (conn->nc_lock.lk_flags & LK_HAVE_EXCL) return 0; printf("%s: connection isn't locked!\n", checker); return EIO; } /* * create, fill with defaults and return in locked state */ int ncp_conn_alloc(struct proc *p, struct ucred *cred, struct ncp_conn **conn) { int error; struct ncp_conn *ncp; MALLOC(ncp, struct ncp_conn *, sizeof(struct ncp_conn), M_NCPDATA, M_WAITOK); if (ncp == NULL) return ENOMEM; error = 0; bzero(ncp,sizeof(*ncp)); lockinit(&ncp->nc_lock, PZERO, "ncplck", 0, 0); ncp_conn_cnt++; ncp->nc_id = ncp_next_ref++; ncp->nc_owner = cred; ncp->seq = 0; ncp->connid = 0xFFFF; ncp_conn_lock_any(ncp, p, ncp->nc_owner); *conn = ncp; ncp_conn_locklist(LK_EXCLUSIVE, p); SLIST_INSERT_HEAD(&conn_list,ncp,nc_next); ncp_conn_unlocklist(p); return (error); } /* * Remove the connection, on entry it must be locked */ int ncp_conn_free(struct ncp_conn *ncp) { int error; struct ncp_conn *ncp1; if (ncp->nc_id == 0) { printf("already!!!!\n"); return EACCES; } if (ncp==NULL) { NCPFATAL("conn==NULL !\n"); return(EIO); } error = ncp_conn_assert_locked(ncp, __FUNCTION__, ncp->procp); if (error) return error; if (ncp->ref_cnt) { NCPFATAL("there are %d referenses left\n",ncp->ref_cnt); return(EBUSY); } /* * Mark conn as died and wait for other process */ ncp->nc_id = 0; ncp_conn_unlock(ncp,ncp->procp); /* * if signal is raised - how I do react ? */ lockmgr(&ncp->nc_lock, LK_DRAIN, 0, ncp->procp); while (ncp->nc_lwant) { printf("lwant = %d\n", ncp->nc_lwant); tsleep(&ncp->nc_lwant, PZERO,"ncpdr",2*hz); } ncp_conn_locklist(LK_EXCLUSIVE, ncp->procp); /* * It is possible, that other process destroy connection while we draining, * and free it. So, we must rescan list */ SLIST_FOREACH(ncp1, &conn_list, nc_next) { if (ncp1 == ncp) break; } if (ncp1 == NULL) { ncp_conn_unlocklist(ncp->procp); return 0; } SLIST_REMOVE(&conn_list, ncp, ncp_conn, nc_next); ncp_conn_cnt--; ncp_conn_unlocklist(ncp->procp); if (ncp->li.user) free(ncp->li.user, M_NCPDATA); if (ncp->li.password) free(ncp->li.password, M_NCPDATA); crfree(ncp->nc_owner); FREE(ncp, M_NCPDATA); return (0); } /* * Lookup connection by handle, return a locked conn descriptor */ int ncp_conn_getbyref(int ref,struct proc *p,struct ucred *cred, int mode, struct ncp_conn **connpp){ struct ncp_conn *ncp; int error=0; ncp_conn_locklist(LK_SHARED, p); SLIST_FOREACH(ncp, &conn_list, nc_next) if (ncp->nc_id == ref) break; if (ncp == NULL) { ncp_conn_unlocklist(p); return(EBADF); } error = ncp_conn_lock2(ncp, p, cred, mode); if (!error) *connpp = ncp; return (error); } /* * find attached, but not logged in connection to specified server */ int ncp_conn_getattached(struct ncp_conn_args *li,struct proc *p,struct ucred *cred,int mode, struct ncp_conn **connpp){ struct ncp_conn *ncp, *ncp2=NULL; int error = 0; ncp_conn_locklist(LK_SHARED, p); SLIST_FOREACH(ncp, &conn_list, nc_next) { if ((ncp->flags & NCPFL_LOGGED) != 0 || strcmp(ncp->li.server,li->server) != 0 || ncp->li.saddr.sa_len != li->saddr.sa_len || bcmp(&ncp->li.saddr,&ncp->li.saddr,li->saddr.sa_len) != 0) continue; if (ncp_suser(cred) == 0 || cred->cr_uid == ncp->nc_owner->cr_uid) break; error = ncp_conn_access(ncp,cred,mode); if (!error && ncp2 == NULL) ncp2 = ncp; } if (ncp == NULL) ncp = ncp2; if (ncp == NULL) { ncp_conn_unlocklist(p); return(EBADF); } error = ncp_conn_lock2(ncp,p,cred,mode); if (!error) *connpp=ncp; return (error); } /* * Lookup connection by server/user pair, return a locked conn descriptor. * if li is NULL or server/user pair incomplete, try to select best connection * based on owner. * Connection selected in next order: * 1. Try to search conn with ucred owner, if li is NULL also find a primary * 2. If 1. fails try to get first suitable shared connection * 3. If 2. fails then nothing can help to poor ucred owner */ int ncp_conn_getbyli(struct ncp_conn_args *li,struct proc *p,struct ucred *cred,int mode, struct ncp_conn **connpp){ struct ncp_conn *ncp, *ncp2=NULL; int error=0, partial, haveserv; partial = (li == NULL || li->server[0] == 0 || li->user == NULL); haveserv = (li && li->server[0]); ncp_conn_locklist(LK_SHARED, p); SLIST_FOREACH(ncp, &conn_list, nc_next) { if (partial) { if (cred->cr_uid == ncp->nc_owner->cr_uid) { if (haveserv) { if (strcmp(ncp->li.server,li->server) == 0) break; } else { if (ncp->flags & NCPFL_PRIMARY) break; ncp2 = ncp; } continue; } } else { if (strcmp(ncp->li.server,li->server) != 0 || ncp->li.user == NULL || strcmp(ncp->li.user,li->user) != 0) continue; if (cred->cr_uid == ncp->nc_owner->cr_uid) break; if (ncp_suser(cred) == 0) ncp2 = ncp; } error = ncp_conn_access(ncp,cred,mode); if (!error && ncp2 == NULL) ncp2 = ncp; } if (ncp == NULL) ncp = ncp2; if (ncp == NULL) { ncp_conn_unlocklist(p); return(EBADF); } error = ncp_conn_lock2(ncp,p,cred,mode); if (!error) *connpp=ncp; return (error); } /* * Set primary connection flag, since it have sence only for an owner, * only owner can modify this flag. * connection expected to be locked. */ int ncp_conn_setprimary(struct ncp_conn *conn, int on){ struct ncp_conn *ncp=NULL; if (conn->ucred->cr_uid != conn->nc_owner->cr_uid) return EACCES; ncp_conn_locklist(LK_SHARED, conn->procp); SLIST_FOREACH(ncp, &conn_list, nc_next) { if (conn->ucred->cr_uid == ncp->nc_owner->cr_uid) ncp->flags &= ~NCPFL_PRIMARY; } ncp_conn_unlocklist(conn->procp); if (on) conn->flags |= NCPFL_PRIMARY; return 0; } /* * Lease conn to given proc, returning unique handle * problem: how locks should be applied ? */ int ncp_conn_gethandle(struct ncp_conn *conn, struct proc *p, struct ncp_handle **handle){ struct ncp_handle *refp; lockmgr(&lhlock, LK_EXCLUSIVE, 0, p); SLIST_FOREACH(refp, &lhlist, nh_next) if (refp->nh_conn == conn && p == refp->nh_proc) break; if (refp) { conn->ref_cnt++; refp->nh_ref++; *handle = refp; lockmgr(&lhlock, LK_RELEASE, 0, p); return 0; } MALLOC(refp,struct ncp_handle *,sizeof(struct ncp_handle),M_NCPDATA,M_WAITOK); if (refp == NULL) return ENOMEM; bzero(refp,sizeof(*refp)); SLIST_INSERT_HEAD(&lhlist,refp,nh_next); refp->nh_ref++; refp->nh_proc = p; refp->nh_conn = conn; refp->nh_id = ncp_next_handle++; *handle = refp; conn->ref_cnt++; lockmgr(&lhlock, LK_RELEASE, 0, p); return 0; } /* * release reference, if force - ignore refcount */ int ncp_conn_puthandle(struct ncp_handle *handle, struct proc *p, int force) { struct ncp_handle *refp = handle; lockmgr(&lhlock, LK_EXCLUSIVE, 0, p); refp->nh_ref--; refp->nh_conn->ref_cnt--; if (force) { refp->nh_conn->ref_cnt -= refp->nh_ref; refp->nh_ref = 0; } if (refp->nh_ref == 0) { SLIST_REMOVE(&lhlist, refp, ncp_handle, nh_next); FREE(refp, M_NCPDATA); } lockmgr(&lhlock, LK_RELEASE, 0, p); return 0; } /* * find a connHandle */ int ncp_conn_findhandle(int connHandle, struct proc *p, struct ncp_handle **handle) { struct ncp_handle *refp; lockmgr(&lhlock, LK_SHARED, 0, p); SLIST_FOREACH(refp, &lhlist, nh_next) if (refp->nh_proc == p && refp->nh_id == connHandle) break; lockmgr(&lhlock, LK_RELEASE, 0, p); if (refp == NULL) { return EBADF; } *handle = refp; return 0; } /* * Clear handles associated with specified process */ int ncp_conn_putprochandles(struct proc *p) { struct ncp_handle *hp, *nhp; int haveone = 0; lockmgr(&lhlock, LK_EXCLUSIVE, 0, p); for (hp = lhlist.slh_first; hp; hp = nhp) { nhp = hp->nh_next.sle_next; if (hp->nh_proc != p) continue; haveone = 1; hp->nh_conn->ref_cnt -= hp->nh_ref; SLIST_REMOVE(&lhlist, hp, ncp_handle, nh_next); FREE(hp, M_NCPDATA); } lockmgr(&lhlock, LK_RELEASE, 0, p); return haveone; } /* * remove references in all possible connections, * XXX - possible problem is a locked list. */ /*void ncp_conn_list_rm_ref(pid_t pid) { struct ncp_conn *ncp; ncp_conn_locklist(LK_SHARED, NULL); SLIST_FOREACH(ncp, &conn_list, nc_next) { ncp_conn_rm_ref(ncp,pid,1); } ncp_conn_unlocklist(NULL); return; } */ int ncp_conn_getinfo(struct ncp_conn *ncp, struct ncp_conn_stat *ncs) { bzero(ncs,sizeof(*ncs)); ncs->li = ncp->li; ncs->li.user = ncs->user; if (ncp->li.user) strcpy(ncs->user, ncp->li.user); ncs->li.password = NULL; ncs->connRef = ncp->nc_id; ncs->ref_cnt = ncp->ref_cnt; ncs->connid = ncp->connid; ncs->owner = ncp->nc_owner->cr_uid; ncs->group = ncp->nc_group; ncs->flags = ncp->flags; ncs->buffer_size = ncp->buffer_size; return 0; } static int -ncp_sysctl_connstat (SYSCTL_HANDLER_ARGS) { +ncp_sysctl_connstat(SYSCTL_HANDLER_ARGS) { int error; struct ncp_conn_stat ncs; struct ncp_conn *ncp; /* struct ucred *cred = req->p->p_ucred;*/ error = 0; ncp_conn_locklist(LK_SHARED, req->p); error = SYSCTL_OUT(req, &ncp_conn_cnt, sizeof(ncp_conn_cnt)); SLIST_FOREACH(ncp, &conn_list, nc_next) { if (error) break; /* I can't do conn_lock while list is locked */ ncp->nc_lwant++; if (!error) { ncp_conn_getinfo(ncp, &ncs); } else { bzero(&ncs,sizeof(ncs)); ncs.connRef = ncp->nc_id; strcpy(ncs.li.server,"***"); } ncp->nc_lwant--; error = SYSCTL_OUT(req, &ncs, sizeof(ncs)); } ncp_conn_unlocklist(req->p); return(error); } Index: head/sys/nwfs/nwfs_node.c =================================================================== --- head/sys/nwfs/nwfs_node.c (revision 62572) +++ head/sys/nwfs/nwfs_node.c (revision 62573) @@ -1,316 +1,316 @@ /* * Copyright (c) 1999, Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Boris Popov. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NWNOHASH(fhsum) (&nwhashtbl[(fhsum.f_id) & nwnodehash]) extern vop_t **nwfs_vnodeop_p; static LIST_HEAD(nwnode_hash_head,nwnode) *nwhashtbl; static u_long nwnodehash; static int nwhashlock = 0; MALLOC_DEFINE(M_NWNODE, "NWFS node", "NWFS vnode private part"); MALLOC_DEFINE(M_NWFSHASH, "NWFS hash", "NWFS has table"); -static int nwfs_sysctl_vnprint (SYSCTL_HANDLER_ARGS); +static int nwfs_sysctl_vnprint(SYSCTL_HANDLER_ARGS); extern struct linker_set sysctl_vfs_nwfs; SYSCTL_DECL(_vfs_nwfs); SYSCTL_PROC(_vfs_nwfs, OID_AUTO, vnprint, CTLFLAG_WR|CTLTYPE_OPAQUE, NULL, 0, nwfs_sysctl_vnprint, "S,vnlist", "vnode hash"); void nwfs_hash_init(void) { nwhashtbl = hashinit(desiredvnodes, M_NWFSHASH, &nwnodehash); } void nwfs_hash_free(void) { free(nwhashtbl, M_NWFSHASH); } int -nwfs_sysctl_vnprint (SYSCTL_HANDLER_ARGS) { +nwfs_sysctl_vnprint(SYSCTL_HANDLER_ARGS) { struct nwnode *np; struct nwnode_hash_head *nhpp; struct vnode *vp; int i; if (nwfs_debuglevel == 0) return 0; printf("Name:uc:hc:fid:pfid\n"); for(i = 0; i <= nwnodehash; i++) { nhpp = &nwhashtbl[i]; for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { vp = NWTOV(np); printf("%s:%d:%d:%d:%d\n",np->n_name,vp->v_usecount,vp->v_holdcnt, np->n_fid.f_id, np->n_fid.f_parent); } } return 0; } /* * Allocate new nwfsnode/vnode from given nwnode. * Vnode referenced and not locked. */ int nwfs_allocvp(struct mount *mp, ncpfid fid, struct vnode **vpp) { struct proc *p = curproc; /* XXX */ struct nwnode *np, *np2; struct nwnode_hash_head *nhpp; struct vnode *vp; int error; retry: nhpp = NWNOHASH(fid); loop: for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { vp = NWTOV(np); if (mp != vp->v_mount || !NWCMPF(&fid, &np->n_fid)) continue; if (vget(vp, LK_EXCLUSIVE, p)) goto loop; *vpp = vp; return(0); } /* lock list, or waiting in malloc can cause problems */ if (nwhashlock) { while(nwhashlock) { nwhashlock = -1; tsleep((caddr_t) &nwhashlock, PVM, "nwfsvp", 0); } goto loop; } nwhashlock = 1; /* * Do the MALLOC before the getnewvnode since doing so afterward * might cause a bogus v_data pointer to get dereferenced * elsewhere if MALLOC should block. */ MALLOC(np, struct nwnode *, sizeof *np, M_NWNODE, M_WAITOK); error = getnewvnode(VT_NWFS, mp, nwfs_vnodeop_p, &vp); if (error) { if (nwhashlock < 0) wakeup(&nwhashlock); nwhashlock = 0; *vpp = 0; FREE(np, M_NWNODE); return (error); } *vpp = vp; bzero(np,sizeof(*np)); vp->v_data = np; np->n_vnode = vp; np->n_mount = VFSTONWFS(mp); np->n_fid = fid; for (np2 = nhpp->lh_first; np2 != 0; np2 = np->n_hash.le_next) { if (mp != NWTOV(np2)->v_mount || !NWCMPF(&fid, &np2->n_fid)) continue; vrele(vp); FREE(np, M_NWNODE); if (nwhashlock < 0) wakeup(&nwhashlock); nwhashlock = 0; goto retry; } LIST_INSERT_HEAD(nhpp, np, n_hash); if (nwhashlock < 0) wakeup(&nwhashlock); nwhashlock = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); np->n_flag |= NNEW; return (error); } int nwfs_lookupnp(struct nwmount *nmp, ncpfid fid, struct nwnode **npp) { struct nwnode *np; struct nwnode_hash_head *nhpp; nhpp = NWNOHASH(fid); for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { if (nmp != np->n_mount || !NWCMPF(&fid, &np->n_fid)) continue; *npp = np; return(0); } return ENOENT; } /* * Free nwnode, and give vnode back to system */ int nwfs_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *dvp = NULL, *vp = ap->a_vp; struct nwnode *dnp, *np = VTONW(vp); struct nwmount *nmp=VTONWFS(vp); NCPVNDEBUG("%s,%d\n", np->n_name, vp->v_usecount); if (np->n_refparent) { np->n_refparent = 0; if (nwfs_lookupnp(nmp, np->n_parent, &dnp) == 0) { dvp = dnp->n_vnode; } else { NCPVNDEBUG("%s: has no parent ?\n",np->n_name); } } LIST_REMOVE(np, n_hash); cache_purge(vp); if (nmp->n_root == np) { nmp->n_root = NULL; } vp->v_data = NULL; FREE(np, M_NWNODE); if (dvp) { vrele(dvp); } return (0); } int nwfs_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { struct proc *p = ap->a_p; struct ucred *cred = p->p_ucred; struct vnode *vp = ap->a_vp; struct nwnode *np = VTONW(vp); int error; NCPVNDEBUG("%s: %d\n", VTONW(vp)->n_name, vp->v_usecount); if (np->opened) { error = nwfs_vinvalbuf(vp, V_SAVE, cred, p, 1); error = ncp_close_file(NWFSTOCONN(VTONWFS(vp)), &np->n_fh, p, cred); np->opened = 0; } VOP_UNLOCK(vp, 0, p); return (0); } /* * routines to maintain vnode attributes cache * nwfs_attr_cacheenter: unpack np.i to va structure */ void nwfs_attr_cacheenter(struct vnode *vp, struct nw_entry_info *fi) { struct nwnode *np = VTONW(vp); struct nwmount *nmp = VTONWFS(vp); register struct vattr *va = &np->n_vattr; va->va_type = vp->v_type; /* vnode type (for create) */ if (vp->v_type == VREG) { if (va->va_size != fi->dataStreamSize) { va->va_size = fi->dataStreamSize; vnode_pager_setsize(vp, va->va_size); } va->va_mode = nmp->m.file_mode; /* files access mode and type */ } else if (vp->v_type == VDIR) { va->va_size = 16384; /* should be a better way ... */ va->va_mode = nmp->m.dir_mode; /* files access mode and type */ } else return; np->n_size = va->va_size; va->va_nlink = 1; /* number of references to file */ va->va_uid = nmp->m.uid; /* owner user id */ va->va_gid = nmp->m.gid; /* owner group id */ va->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; va->va_fileid = np->n_fid.f_id; /* file id */ if (va->va_fileid == 0) va->va_fileid = NWFS_ROOT_INO; va->va_blocksize=nmp->connh->nh_conn->buffer_size;/* blocksize preferred for i/o */ /* time of last modification */ ncp_dos2unixtime(fi->modifyDate, fi->modifyTime, 0, nmp->m.tz, &va->va_mtime); /* time of last access */ ncp_dos2unixtime(fi->lastAccessDate, 0, 0, nmp->m.tz, &va->va_atime); va->va_ctime = va->va_mtime; /* time file changed */ va->va_gen = VNOVAL; /* generation number of file */ va->va_flags = 0; /* flags defined for file */ va->va_rdev = VNOVAL; /* device the special file represents */ va->va_bytes = va->va_size; /* bytes of disk space held by file */ va->va_filerev = 0; /* file modification number */ va->va_vaflags = 0; /* operations flags */ np->n_vattr = *va; if (np->n_mtime == 0) { np->n_mtime = va->va_mtime.tv_sec; } np->n_atime = time_second; return; } int nwfs_attr_cachelookup(struct vnode *vp, struct vattr *va) { struct nwnode *np = VTONW(vp); int diff; diff = time_second - np->n_atime; if (diff > 2) { /* XXX should be configurable */ return ENOENT; } *va = np->n_vattr; return 0; } Index: head/sys/pc98/cbus/clock.c =================================================================== --- head/sys/pc98/cbus/clock.c (revision 62572) +++ head/sys/pc98/cbus/clock.c (revision 62573) @@ -1,1621 +1,1621 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ /* * modified for PC98 by Kakefuda */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #ifdef PC98 #include #include #include #else #include #include #endif #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #ifdef PC98 #define TIMER_FREQ 2457600; #else /* IBM-PC */ #define TIMER_FREQ 1193182; #endif /* PC98 */ #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; #ifndef PC98 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; #endif static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; #ifdef PC98 static u_char timer1_state; #endif static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; #ifdef PC98 static void rtc_serialcombit __P((int)); static void rtc_serialcom __P((int)); static int rtc_inb __P((void)); static void rtc_outb __P((int)); #endif static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } #ifdef PC98 int acquire_timer1(int mode) { if (timer1_state != RELEASED) return (-1); timer1_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL1 | (mode & 0x3f)); return (0); } #endif int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } #ifdef PC98 int release_timer1() { if (timer1_state != ACQUIRED) return (-1); timer1_state = RELEASED; outb(TIMER_MODE, TIMER_SEL1 | TIMER_SQWAVE | TIMER_16BIT); return (0); } #endif int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } #ifndef PC98 /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ #endif /* for PC98 */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { #ifdef PC98 /* PC98 */ outb(IO_PPI, inb(IO_PPI)|0x08); /* disable counter1 output to speaker */ release_timer1(); #else outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); #endif beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); #ifdef PC98 if (acquire_timer1(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(0x3fdb, pitch); outb(0x3fdb, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter1 output to speaker */ outb(IO_PPI, (inb(IO_PPI) & 0xf7)); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } #else if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } #endif splx(x); return (0); } #ifndef PC98 /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } #endif #ifdef PC98 unsigned int delaycount; #define FIRST_GUESS 0x2000 static void findcpuspeed(void) { int i; int remainder; /* Put counter in count down mode */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_16BIT | TIMER_RATEGEN); outb(TIMER_CNTR0, 0xff); outb(TIMER_CNTR0, 0xff); for (i = FIRST_GUESS; i; i--) ; remainder = getit(); delaycount = (FIRST_GUESS * TIMER_DIV(1000)) / (0xffff - remainder); } #endif #ifdef PC98 static u_int calibrate_clocks(void) { int timeout; u_int count, prev_count, tot_count; u_short sec, start_sec; if (bootverbose) printf("Calibrating clock(s) ... "); /* Check ARTIC. */ if (!(PC98_SYSTEM_PARAMETER(0x458) & 0x80) && !(PC98_SYSTEM_PARAMETER(0x45b) & 0x04)) goto fail; timeout = 100000000; /* Read the ARTIC. */ sec = inw(0x5e); /* Wait for the ARTIC to changes. */ start_sec = sec; for (;;) { sec = inw(0x5e); if (sec != start_sec) break; if (--timeout == 0) goto fail; } prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) wrmsr(0x10, 0LL); /* XXX 0x10 is the MSR for the TSC */ start_sec = sec; for (;;) { sec = inw(0x5e); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if ((sec == start_sec + 1200) || (sec < start_sec && (u_int)sec + 0x10000 == (u_int)start_sec + 1200)) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc(); if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } #else static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } #endif /* !PC98 */ static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; #ifdef PC98 findcpuspeed(); if (pc98_machine_type & M_8M) timer_freq = 1996800L; /* 1.9968 MHz */ else timer_freq = 2457600L; /* 2.4576 MHz */ #endif /* PC98 */ if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; #ifndef PC98 writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); #endif set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } #ifdef PC98 static void rtc_serialcombit(int i) { outb(IO_RTC, ((i&0x01)<<5)|0x07); DELAY(1); outb(IO_RTC, ((i&0x01)<<5)|0x17); DELAY(1); outb(IO_RTC, ((i&0x01)<<5)|0x07); DELAY(1); } static void rtc_serialcom(int i) { rtc_serialcombit(i&0x01); rtc_serialcombit((i&0x02)>>1); rtc_serialcombit((i&0x04)>>2); rtc_serialcombit((i&0x08)>>3); outb(IO_RTC, 0x07); DELAY(1); outb(IO_RTC, 0x0f); DELAY(1); outb(IO_RTC, 0x07); DELAY(1); } static void rtc_outb(int val) { int s; int sa = 0; for (s=0;s<8;s++) { sa = ((val >> s) & 0x01) ? 0x27 : 0x07; outb(IO_RTC, sa); /* set DI & CLK 0 */ DELAY(1); outb(IO_RTC, sa | 0x10); /* CLK 1 */ DELAY(1); } outb(IO_RTC, sa & 0xef); /* CLK 0 */ } static int rtc_inb(void) { int s; int sa = 0; for (s=0;s<8;s++) { sa |= ((inb(0x33) & 0x01) << s); outb(IO_RTC, 0x17); /* CLK 1 */ DELAY(1); outb(IO_RTC, 0x07); /* CLK 0 */ DELAY(2); } return sa; } #endif /* PC-98 */ /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; #ifndef PC98 int yd; #endif int year, month; int y, m, s; struct timespec ts; #ifdef PC98 int second, min, hour; #endif if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } #ifdef PC98 rtc_serialcom(0x03); /* Time Read */ rtc_serialcom(0x01); /* Register shift command. */ DELAY(20); second = bcd2bin(rtc_inb() & 0xff); /* sec */ min = bcd2bin(rtc_inb() & 0xff); /* min */ hour = bcd2bin(rtc_inb() & 0xff); /* hour */ days = bcd2bin(rtc_inb() & 0xff) - 1; /* date */ month = (rtc_inb() >> 4) & 0x0f; /* month */ for (m = 1; m < month; m++) days += daysinmonth[m-1]; year = bcd2bin(rtc_inb() & 0xff) + 1900; /* year */ /* 2000 year problem */ if (year < 1995) year += 100; if (year < 1970) goto wrong_time; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); if ((month > 2) && LEAPYEAR(year)) days ++; sec = ((( days * 24 + hour) * 60 + min) * 60 + second); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ #else /* IBM-PC */ /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ #endif sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; #ifdef PC98 int wd; #endif if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); #ifdef PC98 rtc_serialcom(0x01); /* Register shift command. */ /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); rtc_outb(bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ rtc_outb(bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ rtc_outb(bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ wd = (tm+4)%7; for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } m++; rtc_outb(bin2bcd(tm+1)); /* Write back Day */ rtc_outb((m << 4) | wd); /* Write back Month & Weekday */ rtc_outb(bin2bcd(y%100)); /* Write back Year */ rtc_serialcom(0x02); /* Time set & Counter hold command. */ rtc_serialcom(0x00); /* Register hold command. */ #else /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); #endif } /* * Start both clocks running. */ void cpu_initclocks() { #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ #ifndef PC98 int diag; if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } #endif /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ #ifndef PC98 /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #endif /* !PC98 */ #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); #ifdef PC98 outb(IO_ICU1 + 2, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 2, 0x00); /* ignore slave */ outb(IO_ICU1 + 2, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 2, 0xfe); /* unmask INT0 */ #else outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ #endif /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { #ifndef PC98 if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); #endif } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/pc98/cbus/pcrtc.c =================================================================== --- head/sys/pc98/cbus/pcrtc.c (revision 62572) +++ head/sys/pc98/cbus/pcrtc.c (revision 62573) @@ -1,1621 +1,1621 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ /* * modified for PC98 by Kakefuda */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #ifdef PC98 #include #include #include #else #include #include #endif #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #ifdef PC98 #define TIMER_FREQ 2457600; #else /* IBM-PC */ #define TIMER_FREQ 1193182; #endif /* PC98 */ #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; #ifndef PC98 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; #endif static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; #ifdef PC98 static u_char timer1_state; #endif static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; #ifdef PC98 static void rtc_serialcombit __P((int)); static void rtc_serialcom __P((int)); static int rtc_inb __P((void)); static void rtc_outb __P((int)); #endif static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } #ifdef PC98 int acquire_timer1(int mode) { if (timer1_state != RELEASED) return (-1); timer1_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL1 | (mode & 0x3f)); return (0); } #endif int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } #ifdef PC98 int release_timer1() { if (timer1_state != ACQUIRED) return (-1); timer1_state = RELEASED; outb(TIMER_MODE, TIMER_SEL1 | TIMER_SQWAVE | TIMER_16BIT); return (0); } #endif int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } #ifndef PC98 /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ #endif /* for PC98 */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { #ifdef PC98 /* PC98 */ outb(IO_PPI, inb(IO_PPI)|0x08); /* disable counter1 output to speaker */ release_timer1(); #else outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); #endif beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); #ifdef PC98 if (acquire_timer1(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(0x3fdb, pitch); outb(0x3fdb, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter1 output to speaker */ outb(IO_PPI, (inb(IO_PPI) & 0xf7)); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } #else if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } #endif splx(x); return (0); } #ifndef PC98 /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } #endif #ifdef PC98 unsigned int delaycount; #define FIRST_GUESS 0x2000 static void findcpuspeed(void) { int i; int remainder; /* Put counter in count down mode */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_16BIT | TIMER_RATEGEN); outb(TIMER_CNTR0, 0xff); outb(TIMER_CNTR0, 0xff); for (i = FIRST_GUESS; i; i--) ; remainder = getit(); delaycount = (FIRST_GUESS * TIMER_DIV(1000)) / (0xffff - remainder); } #endif #ifdef PC98 static u_int calibrate_clocks(void) { int timeout; u_int count, prev_count, tot_count; u_short sec, start_sec; if (bootverbose) printf("Calibrating clock(s) ... "); /* Check ARTIC. */ if (!(PC98_SYSTEM_PARAMETER(0x458) & 0x80) && !(PC98_SYSTEM_PARAMETER(0x45b) & 0x04)) goto fail; timeout = 100000000; /* Read the ARTIC. */ sec = inw(0x5e); /* Wait for the ARTIC to changes. */ start_sec = sec; for (;;) { sec = inw(0x5e); if (sec != start_sec) break; if (--timeout == 0) goto fail; } prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) wrmsr(0x10, 0LL); /* XXX 0x10 is the MSR for the TSC */ start_sec = sec; for (;;) { sec = inw(0x5e); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if ((sec == start_sec + 1200) || (sec < start_sec && (u_int)sec + 0x10000 == (u_int)start_sec + 1200)) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc(); if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } #else static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } #endif /* !PC98 */ static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; #ifdef PC98 findcpuspeed(); if (pc98_machine_type & M_8M) timer_freq = 1996800L; /* 1.9968 MHz */ else timer_freq = 2457600L; /* 2.4576 MHz */ #endif /* PC98 */ if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; #ifndef PC98 writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); #endif set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } #ifdef PC98 static void rtc_serialcombit(int i) { outb(IO_RTC, ((i&0x01)<<5)|0x07); DELAY(1); outb(IO_RTC, ((i&0x01)<<5)|0x17); DELAY(1); outb(IO_RTC, ((i&0x01)<<5)|0x07); DELAY(1); } static void rtc_serialcom(int i) { rtc_serialcombit(i&0x01); rtc_serialcombit((i&0x02)>>1); rtc_serialcombit((i&0x04)>>2); rtc_serialcombit((i&0x08)>>3); outb(IO_RTC, 0x07); DELAY(1); outb(IO_RTC, 0x0f); DELAY(1); outb(IO_RTC, 0x07); DELAY(1); } static void rtc_outb(int val) { int s; int sa = 0; for (s=0;s<8;s++) { sa = ((val >> s) & 0x01) ? 0x27 : 0x07; outb(IO_RTC, sa); /* set DI & CLK 0 */ DELAY(1); outb(IO_RTC, sa | 0x10); /* CLK 1 */ DELAY(1); } outb(IO_RTC, sa & 0xef); /* CLK 0 */ } static int rtc_inb(void) { int s; int sa = 0; for (s=0;s<8;s++) { sa |= ((inb(0x33) & 0x01) << s); outb(IO_RTC, 0x17); /* CLK 1 */ DELAY(1); outb(IO_RTC, 0x07); /* CLK 0 */ DELAY(2); } return sa; } #endif /* PC-98 */ /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; #ifndef PC98 int yd; #endif int year, month; int y, m, s; struct timespec ts; #ifdef PC98 int second, min, hour; #endif if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } #ifdef PC98 rtc_serialcom(0x03); /* Time Read */ rtc_serialcom(0x01); /* Register shift command. */ DELAY(20); second = bcd2bin(rtc_inb() & 0xff); /* sec */ min = bcd2bin(rtc_inb() & 0xff); /* min */ hour = bcd2bin(rtc_inb() & 0xff); /* hour */ days = bcd2bin(rtc_inb() & 0xff) - 1; /* date */ month = (rtc_inb() >> 4) & 0x0f; /* month */ for (m = 1; m < month; m++) days += daysinmonth[m-1]; year = bcd2bin(rtc_inb() & 0xff) + 1900; /* year */ /* 2000 year problem */ if (year < 1995) year += 100; if (year < 1970) goto wrong_time; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); if ((month > 2) && LEAPYEAR(year)) days ++; sec = ((( days * 24 + hour) * 60 + min) * 60 + second); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ #else /* IBM-PC */ /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ #endif sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; #ifdef PC98 int wd; #endif if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); #ifdef PC98 rtc_serialcom(0x01); /* Register shift command. */ /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); rtc_outb(bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ rtc_outb(bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ rtc_outb(bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ wd = (tm+4)%7; for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } m++; rtc_outb(bin2bcd(tm+1)); /* Write back Day */ rtc_outb((m << 4) | wd); /* Write back Month & Weekday */ rtc_outb(bin2bcd(y%100)); /* Write back Year */ rtc_serialcom(0x02); /* Time set & Counter hold command. */ rtc_serialcom(0x00); /* Register hold command. */ #else /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); #endif } /* * Start both clocks running. */ void cpu_initclocks() { #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ #ifndef PC98 int diag; if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } #endif /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ #ifndef PC98 /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #endif /* !PC98 */ #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); #ifdef PC98 outb(IO_ICU1 + 2, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 2, 0x00); /* ignore slave */ outb(IO_ICU1 + 2, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 2, 0xfe); /* unmask INT0 */ #else outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ #endif /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { #ifndef PC98 if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); #endif } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/pc98/cbus/sio.c =================================================================== --- head/sys/pc98/cbus/sio.c (revision 62572) +++ head/sys/pc98/cbus/sio.c (revision 62573) @@ -1,5185 +1,5185 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * from: @(#)com.c 7.5 (Berkeley) 5/16/91 * from: i386/isa sio.c,v 1.234 */ #include "opt_comconsole.h" #include "opt_compat.h" #include "opt_ddb.h" #include "opt_sio.h" #include "card.h" #include "pci.h" #include "sio.h" /* * Serial driver, based on 386BSD-0.1 com driver. * Mostly rewritten to use pseudo-DMA. * Works for National Semiconductor NS8250-NS16550AF UARTs. * COM driver, based on HP dca driver. * * Changes for PC-Card integration: * - Added PC-Card driver table and handlers */ /*=============================================================== * 386BSD(98),FreeBSD-1.1x(98) com driver. * ----- * modified for PC9801 by M.Ishii * Kyoto University Microcomputer Club (KMC) * Chou "TEFUTEFU" Hirotomi * Kyoto Univ. the faculty of medicine *=============================================================== * FreeBSD-2.0.1(98) sio driver. * ----- * modified for pc98 Internal i8251 and MICRO CORE MC16550II * T.Koike(hfc01340@niftyserve.or.jp) * implement kernel device configuration * aizu@orient.center.nitech.ac.jp * * Notes. * ----- * PC98 localization based on 386BSD(98) com driver. Using its PC98 local * functions. * This driver is under debugging,has bugs. * * 1) config * options COM_MULTIPORT #if using MC16550II * device sio0 at nec? port 0x30 tty irq 4 #internal * device sio1 at nec? port 0xd2 tty irq 5 flags 0x101 #mc1 * device sio2 at nec? port 0x8d2 tty flags 0x101 #mc2 * # ~~~~~iobase ~~multi port flag * # ~ master device is sio1 * 2) device * cd /dev; MAKEDEV ttyd0 ttyd1 .. * 3) /etc/rc.serial * 57600bps is too fast for sio0(internal8251) * my ex. * #set default speed 9600 * modem() * : * stty last update: 15 Sep.1995 * * How to configure... * # options COM_MULTIPORT # support for MICROCORE MC16550II * ... comment-out this line, which will conflict with B98_01. * options "B98_01" # support for AIWA B98-01 * device sio1 at nec? port 0x00d1 tty irq ? * device sio2 at nec? port 0x00d5 tty irq ? * ... you can leave these lines `irq ?', irq will be autodetected. */ /* * Modified by Y.Takahashi of Kogakuin University. */ /* * modified for 8251(FIFO) by Seigo TANIMURA */ #ifdef PC98 #define COM_IF_INTERNAL 0x00 #define COM_IF_PC9861K_1 0x01 #define COM_IF_PC9861K_2 0x02 #define COM_IF_IND_SS_1 0x03 #define COM_IF_IND_SS_2 0x04 #define COM_IF_PIO9032B_1 0x05 #define COM_IF_PIO9032B_2 0x06 #define COM_IF_B98_01_1 0x07 #define COM_IF_B98_01_2 0x08 #define COM_IF_END1 COM_IF_B98_01_2 #define COM_IF_RSA98 0x10 /* same as COM_IF_NS16550 */ #define COM_IF_NS16550 0x11 #define COM_IF_SECOND_CCU 0x12 /* same as COM_IF_NS16550 */ #define COM_IF_MC16550II 0x13 #define COM_IF_MCRS98 0x14 /* same as COM_IF_MC16550II */ #define COM_IF_RSB3000 0x15 #define COM_IF_RSB384 0x16 #define COM_IF_MODEM_CARD 0x17 /* same as COM_IF_NS16550 */ #define COM_IF_RSA98III 0x18 #define COM_IF_ESP98 0x19 #define COM_IF_END2 COM_IF_ESP98 #endif /* PC98 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PC98 #include #include #include #else #include #endif #include #if NPCI > 0 #include #include #endif #include #include #include #ifndef SMP #include #endif #include #include #ifdef COM_ESP #include #endif #include #ifdef PC98 #include #endif #ifndef __i386__ #define disable_intr() #define enable_intr() #endif #ifdef SMP #define disable_intr() COM_DISABLE_INTR() #define enable_intr() COM_ENABLE_INTR() #endif /* SMP */ #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ #define CALLOUT_MASK 0x80 #define CONTROL_MASK 0x60 #define CONTROL_INIT_STATE 0x20 #define CONTROL_LOCK_STATE 0x40 #define DEV_TO_UNIT(dev) (MINOR_TO_UNIT(minor(dev))) #define MINOR_MAGIC_MASK (CALLOUT_MASK | CONTROL_MASK) #define MINOR_TO_UNIT(mynor) ((mynor) & ~MINOR_MAGIC_MASK) #ifdef COM_MULTIPORT /* checks in flags for multiport and which is multiport "master chip" * for a given card */ #define COM_ISMULTIPORT(flags) ((flags) & 0x01) #define COM_MPMASTER(flags) (((flags) >> 8) & 0x0ff) #define COM_NOTAST4(flags) ((flags) & 0x04) #endif /* COM_MULTIPORT */ #define COM_CONSOLE(flags) ((flags) & 0x10) #define COM_FORCECONSOLE(flags) ((flags) & 0x20) #define COM_LLCONSOLE(flags) ((flags) & 0x40) #define COM_DEBUGGER(flags) ((flags) & 0x80) #define COM_LOSESOUTINTS(flags) ((flags) & 0x08) #define COM_NOFIFO(flags) ((flags) & 0x02) #define COM_ST16650A(flags) ((flags) & 0x20000) #define COM_C_NOPROBE (0x40000) #define COM_NOPROBE(flags) ((flags) & COM_C_NOPROBE) #define COM_C_IIR_TXRDYBUG (0x80000) #define COM_IIR_TXRDYBUG(flags) ((flags) & COM_C_IIR_TXRDYBUG) #define COM_FIFOSIZE(flags) (((flags) & 0xff000000) >> 24) #ifdef PC98 #define com_emr com_msr /* Extension mode register for RSB-2000/3000 */ #endif #define com_scr 7 /* scratch register for 16450-16550 (R/W) */ #define sio_getreg(com, off) \ (bus_space_read_1((com)->bst, (com)->bsh, (off))) #define sio_setreg(com, off, value) \ (bus_space_write_1((com)->bst, (com)->bsh, (off), (value))) /* * com state bits. * (CS_BUSY | CS_TTGO) and (CS_BUSY | CS_TTGO | CS_ODEVREADY) must be higher * than the other bits so that they can be tested as a group without masking * off the low bits. * * The following com and tty flags correspond closely: * CS_BUSY = TS_BUSY (maintained by comstart(), siopoll() and * comstop()) * CS_TTGO = ~TS_TTSTOP (maintained by comparam() and comstart()) * CS_CTS_OFLOW = CCTS_OFLOW (maintained by comparam()) * CS_RTS_IFLOW = CRTS_IFLOW (maintained by comparam()) * TS_FLUSH is not used. * XXX I think TIOCSETA doesn't clear TS_TTSTOP when it clears IXON. * XXX CS_*FLOW should be CF_*FLOW in com->flags (control flags not state). */ #define CS_BUSY 0x80 /* output in progress */ #define CS_TTGO 0x40 /* output not stopped by XOFF */ #define CS_ODEVREADY 0x20 /* external device h/w ready (CTS) */ #define CS_CHECKMSR 1 /* check of MSR scheduled */ #define CS_CTS_OFLOW 2 /* use CTS output flow control */ #define CS_DTR_OFF 0x10 /* DTR held off */ #define CS_ODONE 4 /* output completed */ #define CS_RTS_IFLOW 8 /* use RTS input flow control */ #define CSE_BUSYCHECK 1 /* siobusycheck() scheduled */ static char const * const error_desc[] = { #define CE_OVERRUN 0 "silo overflow", #define CE_INTERRUPT_BUF_OVERFLOW 1 "interrupt-level buffer overflow", #define CE_TTY_BUF_OVERFLOW 2 "tty-level buffer overflow", }; #define CE_NTYPES 3 #define CE_RECORD(com, errnum) (++(com)->delta_error_counts[errnum]) /* types. XXX - should be elsewhere */ typedef u_int Port_t; /* hardware port */ typedef u_char bool_t; /* boolean */ /* queue of linear buffers */ struct lbq { u_char *l_head; /* next char to process */ u_char *l_tail; /* one past the last char to process */ struct lbq *l_next; /* next in queue */ bool_t l_queued; /* nonzero if queued */ }; /* com device structure */ struct com_s { u_int flags; /* Copy isa device flags */ u_char state; /* miscellaneous flag bits */ bool_t active_out; /* nonzero if the callout device is open */ u_char cfcr_image; /* copy of value written to CFCR */ #ifdef COM_ESP bool_t esp; /* is this unit a hayes esp board? */ #endif u_char extra_state; /* more flag bits, separate for order trick */ u_char fifo_image; /* copy of value written to FIFO */ bool_t hasfifo; /* nonzero for 16550 UARTs */ bool_t st16650a; /* Is a Startech 16650A or RTS/CTS compat */ bool_t loses_outints; /* nonzero if device loses output interrupts */ u_char mcr_image; /* copy of value written to MCR */ #ifdef COM_MULTIPORT bool_t multiport; /* is this unit part of a multiport device? */ #endif /* COM_MULTIPORT */ bool_t no_irq; /* nonzero if irq is not attached */ bool_t gone; /* hardware disappeared */ bool_t poll; /* nonzero if polling is required */ bool_t poll_output; /* nonzero if polling for output is required */ int unit; /* unit number */ int dtr_wait; /* time to hold DTR down on close (* 1/hz) */ u_int tx_fifo_size; u_int wopeners; /* # processes waiting for DCD in open() */ /* * The high level of the driver never reads status registers directly * because there would be too many side effects to handle conveniently. * Instead, it reads copies of the registers stored here by the * interrupt handler. */ u_char last_modem_status; /* last MSR read by intr handler */ u_char prev_modem_status; /* last MSR handled by high level */ u_char hotchar; /* ldisc-specific char to be handled ASAP */ u_char *ibuf; /* start of input buffer */ u_char *ibufend; /* end of input buffer */ u_char *ibufold; /* old input buffer, to be freed */ u_char *ihighwater; /* threshold in input buffer */ u_char *iptr; /* next free spot in input buffer */ int ibufsize; /* size of ibuf (not include error bytes) */ int ierroff; /* offset of error bytes in ibuf */ struct lbq obufq; /* head of queue of output buffers */ struct lbq obufs[2]; /* output buffers */ bus_space_tag_t bst; bus_space_handle_t bsh; #ifdef PC98 Port_t cmd_port; Port_t sts_port; Port_t in_modem_port; Port_t intr_ctrl_port; Port_t rsabase; /* iobase address of a I/O-DATA RSA board */ int intr_enable; int pc98_prev_modem_status; int pc98_modem_delta; int modem_car_chg_timer; int pc98_prev_siocmd; int pc98_prev_siomod; int modem_checking; int pc98_if_type; bool_t pc98_8251fifo; bool_t pc98_8251fifo_enable; #endif /* PC98 */ Port_t data_port; /* i/o ports */ #ifdef COM_ESP Port_t esp_port; #endif Port_t int_id_port; Port_t modem_ctl_port; Port_t line_status_port; Port_t modem_status_port; Port_t intr_ctl_port; /* Ports of IIR register */ struct tty *tp; /* cross reference */ /* Initial state. */ struct termios it_in; /* should be in struct tty */ struct termios it_out; /* Lock state. */ struct termios lt_in; /* should be in struct tty */ struct termios lt_out; bool_t do_timestamp; bool_t do_dcd_timestamp; struct timeval timestamp; struct timeval dcd_timestamp; struct pps_state pps; u_long bytes_in; /* statistics */ u_long bytes_out; u_int delta_error_counts[CE_NTYPES]; u_long error_counts[CE_NTYPES]; struct resource *irqres; struct resource *ioportres; void *cookie; /* * Data area for output buffers. Someday we should build the output * buffer queue without copying data. */ #ifdef PC98 int obufsize; u_char *obuf1; u_char *obuf2; #else u_char obuf1[256]; u_char obuf2[256]; #endif }; #ifdef COM_ESP static int espattach __P((struct com_s *com, Port_t esp_port)); #endif static int sioattach __P((device_t dev, int rid)); static int sio_isa_attach __P((device_t dev)); static timeout_t siobusycheck; static timeout_t siodtrwakeup; static void comhardclose __P((struct com_s *com)); static void sioinput __P((struct com_s *com)); static void siointr1 __P((struct com_s *com)); static void siointr __P((void *arg)); static int commctl __P((struct com_s *com, int bits, int how)); static int comparam __P((struct tty *tp, struct termios *t)); static swihand_t siopoll; static int sioprobe __P((device_t dev, int xrid)); static int sio_isa_probe __P((device_t dev)); static void siosettimeout __P((void)); static int siosetwater __P((struct com_s *com, speed_t speed)); static void comstart __P((struct tty *tp)); static void comstop __P((struct tty *tp, int rw)); static timeout_t comwakeup; static void disc_optim __P((struct tty *tp, struct termios *t, struct com_s *com)); #if NCARD > 0 static int sio_pccard_attach __P((device_t dev)); static int sio_pccard_detach __P((device_t dev)); static int sio_pccard_probe __P((device_t dev)); #endif /* NCARD > 0 */ #if NPCI > 0 static int sio_pci_attach __P((device_t dev)); static void sio_pci_kludge_unit __P((device_t dev)); static int sio_pci_probe __P((device_t dev)); #endif /* NPCI > 0 */ static char driver_name[] = "sio"; /* table and macro for fast conversion from a unit number to its com struct */ static devclass_t sio_devclass; #define com_addr(unit) ((struct com_s *) \ devclass_get_softc(sio_devclass, unit)) static device_method_t sio_isa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_isa_probe), DEVMETHOD(device_attach, sio_isa_attach), { 0, 0 } }; static driver_t sio_isa_driver = { driver_name, sio_isa_methods, sizeof(struct com_s), }; #if NCARD > 0 static device_method_t sio_pccard_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pccard_probe), DEVMETHOD(device_attach, sio_pccard_attach), DEVMETHOD(device_detach, sio_pccard_detach), { 0, 0 } }; static driver_t sio_pccard_driver = { driver_name, sio_pccard_methods, sizeof(struct com_s), }; #endif /* NCARD > 0 */ #if NPCI > 0 static device_method_t sio_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pci_probe), DEVMETHOD(device_attach, sio_pci_attach), { 0, 0 } }; static driver_t sio_pci_driver = { driver_name, sio_pci_methods, sizeof(struct com_s), }; #endif /* NPCI > 0 */ static d_open_t sioopen; static d_close_t sioclose; static d_read_t sioread; static d_write_t siowrite; static d_ioctl_t sioioctl; #define CDEV_MAJOR 28 static struct cdevsw sio_cdevsw = { /* open */ sioopen, /* close */ sioclose, /* read */ sioread, /* write */ siowrite, /* ioctl */ sioioctl, /* poll */ ttypoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ driver_name, /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ D_TTY, /* bmaj */ -1 }; int comconsole = -1; static volatile speed_t comdefaultrate = CONSPEED; #ifdef __alpha__ static volatile speed_t gdbdefaultrate = CONSPEED; #endif static u_int com_events; /* input chars + weighted output completions */ static Port_t siocniobase; static int siocnunit; static Port_t siogdbiobase; static int siogdbunit = -1; static bool_t sio_registered; static int sio_timeout; static int sio_timeouts_until_log; static struct callout_handle sio_timeout_handle = CALLOUT_HANDLE_INITIALIZER(&sio_timeout_handle); static int sio_numunits; #ifdef PC98 struct siodev { short if_type; short irq; Port_t cmd, sts, ctrl, mod; }; static int sysclock; #define COM_INT_DISABLE {int previpri; previpri=spltty(); #define COM_INT_ENABLE splx(previpri);} #define IEN_TxFLAG IEN_Tx #define COM_CARRIER_DETECT_EMULATE 0 #define PC98_CHECK_MODEM_INTERVAL (hz/10) #define DCD_OFF_TOLERANCE 2 #define DCD_ON_RECOGNITION 2 #define GET_IFTYPE(flags) ((flags >> 24) & 0x1f) #define SET_IFTYPE(type) (type << 24) #define IS_8251(if_type) (!(if_type & 0x10)) #define COM1_EXT_CLOCK 0x40000 static void commint __P((dev_t dev)); static void com_tiocm_set __P((struct com_s *com, int msr)); static void com_tiocm_bis __P((struct com_s *com, int msr)); static void com_tiocm_bic __P((struct com_s *com, int msr)); static int com_tiocm_get __P((struct com_s *com)); static int com_tiocm_get_delta __P((struct com_s *com)); static void pc98_msrint_start __P((dev_t dev)); static void com_cflag_and_speed_set __P((struct com_s *com, int cflag, int speed)); static int pc98_ttspeedtab __P((struct com_s *com, int speed)); static int pc98_get_modem_status __P((struct com_s *com)); static timeout_t pc98_check_msr; static void pc98_set_baud_rate __P((struct com_s *com, int count)); static void pc98_i8251_reset __P((struct com_s *com, int mode, int command)); static void pc98_disable_i8251_interrupt __P((struct com_s *com, int mod)); static void pc98_enable_i8251_interrupt __P((struct com_s *com, int mod)); static int pc98_check_i8251_interrupt __P((struct com_s *com)); static int pc98_i8251_get_cmd __P((struct com_s *com)); static int pc98_i8251_get_mod __P((struct com_s *com)); static void pc98_i8251_set_cmd __P((struct com_s *com, int x)); static void pc98_i8251_or_cmd __P((struct com_s *com, int x)); static void pc98_i8251_clear_cmd __P((struct com_s *com, int x)); static void pc98_i8251_clear_or_cmd __P((struct com_s *com, int clr, int x)); static int pc98_check_if_type __P((device_t dev, struct siodev *iod)); static int pc98_check_8251vfast __P((void)); static int pc98_check_8251fifo __P((void)); static void pc98_check_sysclock __P((void)); static void pc98_set_ioport __P((struct com_s *com)); #define com_int_Tx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Tx|IEN_TxEMP) #define com_int_Tx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_TxFLAG) #define com_int_Rx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Rx) #define com_int_Rx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_Rx) #define com_int_TxRx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Tx|IEN_TxEMP|IEN_Rx) #define com_int_TxRx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_TxFLAG|IEN_Rx) #define com_send_break_on(com) \ pc98_i8251_or_cmd(com,CMD8251_SBRK) #define com_send_break_off(com) \ pc98_i8251_clear_cmd(com,CMD8251_SBRK) static struct speedtab pc98speedtab[] = { /* internal RS232C interface */ { 0, 0, }, { 50, 50, }, { 75, 75, }, { 150, 150, }, { 200, 200, }, { 300, 300, }, { 600, 600, }, { 1200, 1200, }, { 2400, 2400, }, { 4800, 4800, }, { 9600, 9600, }, { 19200, 19200, }, { 38400, 38400, }, { 51200, 51200, }, { 76800, 76800, }, { 20800, 20800, }, { 31200, 31200, }, { 41600, 41600, }, { 62400, 62400, }, { -1, -1 } }; static struct speedtab pc98fast_speedtab[] = { { 9600, 0x80 | COMBRD(9600), }, { 19200, 0x80 | COMBRD(19200), }, { 38400, 0x80 | COMBRD(38400), }, { 57600, 0x80 | COMBRD(57600), }, { 115200, 0x80 | COMBRD(115200), }, { -1, -1 } }; static struct speedtab comspeedtab_pio9032b[] = { { 300, 6, }, { 600, 5, }, { 1200, 4, }, { 2400, 3, }, { 4800, 2, }, { 9600, 1, }, { 19200, 0, }, { 38400, 7, }, { -1, -1 } }; static struct speedtab comspeedtab_b98_01[] = { { 75, 11, }, { 150, 10, }, { 300, 9, }, { 600, 8, }, { 1200, 7, }, { 2400, 6, }, { 4800, 5, }, { 9600, 4, }, { 19200, 3, }, { 38400, 2, }, { 76800, 1, }, { 153600, 0, }, { -1, -1 } }; static struct speedtab comspeedtab_mc16550[] = { { 300, 1536, }, { 600, 768, }, { 1200, 384, }, { 2400, 192, }, { 4800, 96, }, { 9600, 48, }, { 19200, 24, }, { 38400, 12, }, { 57600, 8, }, { 115200, 4, }, { 153600, 3, }, { 230400, 2, }, { 460800, 1, }, { -1, -1 } }; static struct speedtab comspeedtab_rsb384[] = { { 300, 3840, }, { 600, 1920, }, { 1200, 960, }, { 2400, 480, }, { 4800, 240, }, { 9600, 120, }, { 19200, 60, }, { 38400, 30, }, { 57600, 20, }, { 115200, 10, }, { 128000, 9, }, { 144000, 8, }, { 192000, 6, }, { 230400, 5, }, { 288000, 4, }, { 384000, 3, }, { 576000, 2, }, { 1152000, 1, }, { -1, -1 } }; static struct speedtab comspeedtab_rsa[] = { { 0, 0 }, { 50, COMBRD_RSA(50) }, { 75, COMBRD_RSA(75) }, { 110, COMBRD_RSA(110) }, { 134, COMBRD_RSA(134) }, { 150, COMBRD_RSA(150) }, { 200, COMBRD_RSA(200) }, { 300, COMBRD_RSA(300) }, { 600, COMBRD_RSA(600) }, { 1200, COMBRD_RSA(1200) }, { 1800, COMBRD_RSA(1800) }, { 2400, COMBRD_RSA(2400) }, { 4800, COMBRD_RSA(4800) }, { 9600, COMBRD_RSA(9600) }, { 19200, COMBRD_RSA(19200) }, { 38400, COMBRD_RSA(38400) }, { 57600, COMBRD_RSA(57600) }, { 115200, COMBRD_RSA(115200) }, { 230400, COMBRD_RSA(230400) }, { 460800, COMBRD_RSA(460800) }, { 921600, COMBRD_RSA(921600) }, { -1, -1 } }; #endif /* PC98 */ static struct speedtab comspeedtab[] = { { 0, 0 }, { 50, COMBRD(50) }, { 75, COMBRD(75) }, { 110, COMBRD(110) }, { 134, COMBRD(134) }, { 150, COMBRD(150) }, { 200, COMBRD(200) }, { 300, COMBRD(300) }, { 600, COMBRD(600) }, { 1200, COMBRD(1200) }, { 1800, COMBRD(1800) }, { 2400, COMBRD(2400) }, { 4800, COMBRD(4800) }, { 9600, COMBRD(9600) }, { 19200, COMBRD(19200) }, { 38400, COMBRD(38400) }, { 57600, COMBRD(57600) }, { 115200, COMBRD(115200) }, { -1, -1 } }; #ifdef PC98 struct { char *name; short port_table[7]; short irr_mask; struct speedtab *speedtab; short check_irq; } if_8251_type[] = { /* COM_IF_INTERNAL */ { " (internal)", {0x30, 0x32, 0x32, 0x33, 0x35, -1, -1}, -1, pc98speedtab, 1 }, /* COM_IF_PC9861K_1 */ { " (PC9861K)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, -1, -1}, 3, NULL, 1 }, /* COM_IF_PC9861K_2 */ { " (PC9861K)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, -1, -1}, 3, NULL, 1 }, /* COM_IF_IND_SS_1 */ { " (IND-SS)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xb3, -1}, 3, comspeedtab_mc16550, 1 }, /* COM_IF_IND_SS_2 */ { " (IND-SS)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xbb, -1}, 3, comspeedtab_mc16550, 1 }, /* COM_IF_PIO9032B_1 */ { " (PIO9032B)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xb8, -1}, 7, comspeedtab_pio9032b, 1 }, /* COM_IF_PIO9032B_2 */ { " (PIO9032B)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xba, -1}, 7, comspeedtab_pio9032b, 1 }, /* COM_IF_B98_01_1 */ { " (B98-01)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xd1, 0xd3}, 7, comspeedtab_b98_01, 0 }, /* COM_IF_B98_01_2 */ { " (B98-01)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xd5, 0xd7}, 7, comspeedtab_b98_01, 0 }, }; #define PC98SIO_data_port(type) (if_8251_type[type].port_table[0]) #define PC98SIO_cmd_port(type) (if_8251_type[type].port_table[1]) #define PC98SIO_sts_port(type) (if_8251_type[type].port_table[2]) #define PC98SIO_in_modem_port(type) (if_8251_type[type].port_table[3]) #define PC98SIO_intr_ctrl_port(type) (if_8251_type[type].port_table[4]) #define PC98SIO_baud_rate_port(type) (if_8251_type[type].port_table[5]) #define PC98SIO_func_port(type) (if_8251_type[type].port_table[6]) #define I8251F_data 0x130 #define I8251F_lsr 0x132 #define I8251F_msr 0x134 #define I8251F_iir 0x136 #define I8251F_fcr 0x138 #define I8251F_div 0x13a static bus_addr_t port_table_0[] = {0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007}; static bus_addr_t port_table_1[] = {0x000, 0x002, 0x004, 0x006, 0x008, 0x00a, 0x00c, 0x00e}; static bus_addr_t port_table_8[] = {0x000, 0x100, 0x200, 0x300, 0x400, 0x500, 0x600, 0x700}; static bus_addr_t port_table_rsa[] = { 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007 }; struct { char *name; short irr_read; short irr_write; bus_addr_t *iat; bus_size_t iatsz; struct speedtab *speedtab; } if_16550a_type[] = { /* COM_IF_RSA98 */ {" (RSA-98)", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_NS16550 */ {"", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_SECOND_CCU */ {"", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_MC16550II */ {" (MC16550II)", -1, 0x1000, port_table_8, IO_COMSIZE, comspeedtab_mc16550}, /* COM_IF_MCRS98 */ {" (MC-RS98)", -1, 0x1000, port_table_8, IO_COMSIZE, comspeedtab_mc16550}, /* COM_IF_RSB3000 */ {" (RSB-3000)", 0xbf, -1, port_table_1, IO_COMSIZE, comspeedtab_rsb384}, /* COM_IF_RSB384 */ {" (RSB-384)", 0xbf, -1, port_table_1, IO_COMSIZE, comspeedtab_rsb384}, /* COM_IF_MODEM_CARD */ {"", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_RSA98III */ {" (RSA-98III)", -1, -1, port_table_rsa, 16, comspeedtab_rsa}, /* COM_IF_ESP98 */ {" (ESP98)", -1, -1, port_table_1, IO_COMSIZE, comspeedtab_mc16550}, }; #endif /* PC98 */ #ifdef COM_ESP #ifdef PC98 /* XXX configure this properly. */ static Port_t likely_com_ports[] = { 0, 0xb0, 0xb1, 0 }; static Port_t likely_esp_ports[] = { 0xc0d0, 0 }; #define ESP98_CMD1 (ESP_CMD1 * 0x100) #define ESP98_CMD2 (ESP_CMD2 * 0x100) #define ESP98_STATUS1 (ESP_STATUS1 * 0x100) #define ESP98_STATUS2 (ESP_STATUS2 * 0x100) #else /* PC98 */ /* XXX configure this properly. */ static Port_t likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, }; static Port_t likely_esp_ports[] = { 0x140, 0x180, 0x280, 0 }; #endif /* PC98 */ #endif /* * handle sysctl read/write requests for console speed * * In addition to setting comdefaultrate for I/O through /dev/console, * also set the initial and lock values for the /dev/ttyXX device * if there is one associated with the console. Finally, if the /dev/tty * device has already been open, change the speed on the open running port * itself. */ static int -sysctl_machdep_comdefaultrate (SYSCTL_HANDLER_ARGS) +sysctl_machdep_comdefaultrate(SYSCTL_HANDLER_ARGS) { int error, s; speed_t newspeed; struct com_s *com; struct tty *tp; newspeed = comdefaultrate; error = sysctl_handle_opaque(oidp, &newspeed, sizeof newspeed, req); if (error || !req->newptr) return (error); comdefaultrate = newspeed; if (comconsole < 0) /* serial console not selected? */ return (0); com = com_addr(comconsole); if (com == NULL) return (ENXIO); /* * set the initial and lock rates for /dev/ttydXX and /dev/cuaXX * (note, the lock rates really are boolean -- if non-zero, disallow * speed changes) */ com->it_in.c_ispeed = com->it_in.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_out.c_ispeed = com->it_out.c_ospeed = com->lt_out.c_ispeed = com->lt_out.c_ospeed = comdefaultrate; /* * if we're open, change the running rate too */ tp = com->tp; if (tp && (tp->t_state & TS_ISOPEN)) { tp->t_termios.c_ispeed = tp->t_termios.c_ospeed = comdefaultrate; s = spltty(); error = comparam(tp, &tp->t_termios); splx(s); } return error; } SYSCTL_PROC(_machdep, OID_AUTO, conspeed, CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_machdep_comdefaultrate, "I", ""); #define SET_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) | (bit)) #define CLR_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) & ~(bit)) #if NCARD > 0 static int sio_pccard_probe(dev) device_t dev; { /* Do not probe IRQ - pccard doesn't turn on the interrupt line */ /* until bus_setup_intr */ SET_FLAG(dev, COM_C_NOPROBE); return (sioprobe(dev, 0)); } static int sio_pccard_attach(dev) device_t dev; { return (sioattach(dev, 0)); } /* * sio_detach - unload the driver and clear the table. * XXX TODO: * This is usually called when the card is ejected, but * can be caused by a modunload of a controller driver. * The idea is to reset the driver's view of the device * and ensure that any driver entry points such as * read and write do not hang. */ static int sio_pccard_detach(dev) device_t dev; { struct com_s *com; com = (struct com_s *) device_get_softc(dev); if (com == NULL) { device_printf(dev, "NULL com in siounload\n"); return (0); } com->gone = 1; if (com->irqres) { bus_teardown_intr(dev, com->irqres, com->cookie); bus_release_resource(dev, SYS_RES_IRQ, 0, com->irqres); } if (com->ioportres) bus_release_resource(dev, SYS_RES_IOPORT, 0, com->ioportres); if (com->tp && (com->tp->t_state & TS_ISOPEN)) { device_printf(dev, "still open, forcing close\n"); com->tp->t_gen++; ttyclose(com->tp); ttwakeup(com->tp); ttwwakeup(com->tp); } else { if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); } device_printf(dev, "unloaded\n"); return (0); } #endif /* NCARD > 0 */ #if NPCI > 0 struct pci_ids { u_int32_t type; const char *desc; int rid; }; static struct pci_ids pci_ids[] = { { 0x100812b9, "3COM PCI FaxModem", 0x10 }, { 0x048011c1, "ActionTec 56k FAX PCI Modem", 0x14 }, { 0x00000000, NULL, 0 } }; static int sio_pci_attach(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); sio_pci_kludge_unit(dev); return (sioattach(dev, id->rid)); } /* * Don't cut and paste this to other drivers. It is a horrible kludge * which will fail to work and also be unnecessary in future versions. */ static void sio_pci_kludge_unit(dev) device_t dev; { devclass_t dc; int err; int start; int unit; unit = 0; start = 0; while (resource_int_value("sio", unit, "port", &start) == 0 && start > 0) unit++; if (device_get_unit(dev) < unit) { dc = device_get_devclass(dev); while (devclass_get_device(dc, unit)) unit++; device_printf(dev, "moving to sio%d\n", unit); err = device_set_unit(dev, unit); /* EVIL DO NOT COPY */ if (err) device_printf(dev, "error moving device %d\n", err); } } static int sio_pci_probe(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); device_set_desc(dev, id->desc); return (sioprobe(dev, id->rid)); } #endif /* NPCI > 0 */ static struct isa_pnp_id sio_ids[] = { {0x0005d041, "Standard PC COM port"}, /* PNP0500 */ {0x0105d041, "16550A-compatible COM port"}, /* PNP0501 */ {0x0205d041, "Multiport serial device (non-intelligent 16550)"}, /* PNP0502 */ {0x1005d041, "Generic IRDA-compatible device"}, /* PNP0510 */ {0x1105d041, "Generic IRDA-compatible device"}, /* PNP0511 */ /* Devices that do not have a compatid */ {0x7602a904, NULL}, /* AEI0276 - 56K v.90 Fax Modem (LKT) */ {0x00007905, NULL}, /* AKY0000 - 56K Plug&Play Modem */ {0x01405407, NULL}, /* AZT4001 - AZT3000 PnP SOUND DEVICE, MODEM */ {0x56039008, NULL}, /* BDP0356 - Best Data 56x2 */ {0x36339008, NULL}, /* BDP3336 - Best Data Prods. 336F */ {0x0014490a, NULL}, /* BRI1400 - Boca 33.6 PnP */ {0x0015490a, NULL}, /* BRI1500 - Internal Fax Data */ {0x0034490a, NULL}, /* BRI3400 - Internal ACF Modem */ {0x00b4490a, NULL}, /* BRIB400 - Boca 56k PnP */ {0x0030320d, NULL}, /* CIR3000 - Cirrus Logic V43 */ {0x0100440e, NULL}, /* CRD0001 - Cardinal MVP288IV ? */ {0x1200c31e, NULL}, /* GVC0012 - VF1128HV-R9 (win modem?) */ {0x0303c31e, NULL}, /* GVC0303 - MaxTech 33.6 PnP D/F/V */ {0x0505c31e, NULL}, /* GVC0505 - GVC 56k Faxmodem */ {0x0050c31e, NULL}, /* GVC5000 - some GVC modem */ {0x3800f91e, NULL}, /* GWY0038 - Telepath with v.90 */ {0x9062f91e, NULL}, /* GWY6290 - Telepath with x2 Technology */ {0x0000f435, NULL}, /* MOT0000 - Motorola ModemSURFR 33.6 Intern */ {0x5015f435, NULL}, /* MOT1550 - Motorola ModemSURFR 56K Modem */ {0xf015f435, NULL}, /* MOT15F0 - Motorola VoiceSURFR 56K Modem */ {0x6045f435, NULL}, /* MOT4560 - Motorola ? */ {0x61e7a338, NULL}, /* NECE761 - 33.6Modem */ {0x39804f3f, NULL}, /* OZO8039 - Zoom 56k flex */ {0x3024a341, NULL}, /* PMC2430 - Pace 56 Voice Internal Modem */ {0x1000eb49, NULL}, /* ROK0010 - Rockwell ? */ {0x5002734a, NULL}, /* RSS0250 - 5614Jx3(G) Internal Modem */ {0xc100ad4d, NULL}, /* SMM00C1 - Leopard 56k PnP */ {0x9012b04e, NULL}, /* SUP1290 - Supra ? */ {0x1013b04e, NULL}, /* SUP1310 - SupraExpress 336i PnP */ {0x8013b04e, NULL}, /* SUP1380 - SupraExpress 288i PnP Voice */ {0x8113b04e, NULL}, /* SUP1381 - SupraExpress 336i PnP Voice */ {0x5016b04e, NULL}, /* SUP1650 - Supra 336i Sp Intl */ {0x7420b04e, NULL}, /* SUP2070 - Supra ? */ {0x8020b04e, NULL}, /* SUP2080 - Supra ? */ {0x8420b04e, NULL}, /* SUP2084 - SupraExpress 56i PnP */ {0x7121b04e, NULL}, /* SUP2171 - SupraExpress 56i Sp? */ {0x8024b04e, NULL}, /* SUP2480 - Supra ? */ {0x01007256, NULL}, /* USR0001 - U.S. Robotics Inc., Sportster W */ {0x02007256, NULL}, /* USR0002 - U.S. Robotics Inc. Sportster 33. */ {0x04007256, NULL}, /* USR0004 - USR Sportster 14.4k */ {0x06007256, NULL}, /* USR0006 - USR Sportster 33.6k */ {0x11007256, NULL}, /* USR0011 - USR ? */ {0x01017256, NULL}, /* USR0101 - USR ? */ {0x30207256, NULL}, /* USR2030 - U.S.Robotics Inc. Sportster 560 */ {0x50207256, NULL}, /* USR2050 - U.S.Robotics Inc. Sportster 33. */ {0x70207256, NULL}, /* USR2070 - U.S.Robotics Inc. Sportster 560 */ {0x30307256, NULL}, /* USR3030 - U.S. Robotics 56K FAX INT */ {0x31307256, NULL}, /* USR3031 - U.S. Robotics 56K FAX INT */ {0x50307256, NULL}, /* USR3050 - U.S. Robotics 56K FAX INT */ {0x70307256, NULL}, /* USR3070 - U.S. Robotics 56K Voice INT */ {0x90307256, NULL}, /* USR3090 - USR ? */ {0x90917256, NULL}, /* USR9190 - USR 56k Voice INT */ {0x0300695c, NULL}, /* WCI0003 - Fax/Voice/Modem/Speakphone/Asvd */ {0x61f7896a, NULL}, /* ZTIF761 - Zoom ComStar 33.6 */ #ifdef PC98 {0x0100e4a5, "RSA-98III"}, #endif {0} }; static int sio_isa_probe(dev) device_t dev; { #ifdef PC98 int logical_id; #endif /* Check isapnp ids */ if (ISA_PNP_PROBE(device_get_parent(dev), dev, sio_ids) == ENXIO) return (ENXIO); #ifdef PC98 logical_id = isa_get_logicalid(dev); if (logical_id == 0x0100e4a5) /* RSA-98III */ device_set_flags(dev, SET_IFTYPE(COM_IF_RSA98III)); #endif return (sioprobe(dev, 0)); } static int sioprobe(dev, xrid) device_t dev; int xrid; { #if 0 static bool_t already_init; device_t xdev; #endif struct com_s *com; bool_t failures[10]; int fn; device_t idev; Port_t iobase; intrmask_t irqmap[4]; intrmask_t irqs; u_char mcr_image; int result; u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; #ifdef PC98 int tmp; struct siodev iod; #endif #ifdef PC98 iod.if_type = GET_IFTYPE(flags); if ((iod.if_type < 0 || iod.if_type > COM_IF_END1) && (iod.if_type < 0x10 || iod.if_type > COM_IF_END2)) return ENXIO; #endif rid = xrid; #ifdef PC98 if (IS_8251(iod.if_type)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, 1, RF_ACTIVE); } else if (iod.if_type == COM_IF_RSA98III || isa_get_vendorid(dev)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, if_16550a_type[iod.if_type & 0x0f].iatsz, RF_ACTIVE); } else { port = isa_alloc_resourcev(dev, SYS_RES_IOPORT, &rid, if_16550a_type[iod.if_type & 0x0f].iat, if_16550a_type[iod.if_type & 0x0f].iatsz, RF_ACTIVE); } #else port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); #endif if (!port) return (ENXIO); #ifdef PC98 if (!IS_8251(iod.if_type)) { if (isa_load_resourcev(port, if_16550a_type[iod.if_type & 0x0f].iat, if_16550a_type[iod.if_type & 0x0f].iatsz) != 0) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } } #endif com = device_get_softc(dev); com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); #if 0 /* * XXX this is broken - when we are first called, there are no * previously configured IO ports. We could hard code * 0x3f8, 0x2f8, 0x3e8, 0x2e8 etc but that's probably worse. * This code has been doing nothing since the conversion since * "count" is zero the first time around. */ if (!already_init) { /* * Turn off MCR_IENABLE for all likely serial ports. An unused * port with its MCR_IENABLE gate open will inhibit interrupts * from any used port that shares the interrupt vector. * XXX the gate enable is elsewhere for some multiports. */ device_t *devs; int count, i, xioport; #ifdef PC98 int xiftype; #endif devclass_get_devices(sio_devclass, &devs, &count); #ifdef PC98 for (i = 0; i < count; i++) { xdev = devs[i]; xioport = bus_get_resource_start(xdev, SYS_RES_IOPORT, 0); xiftype = GET_IFTYPE(device_get_flags(xdev)); if (device_is_enabled(xdev) && xioport > 0) { if (IS_8251(xiftype)) outb((xioport & 0xff00) | PC98SIO_cmd_port(xiftype & 0x0f), 0xf2); else outb(xioport + if_16550a_type[xiftype & 0x0f].iat[com_mcr], 0); } } #else for (i = 0; i < count; i++) { xdev = devs[i]; if (device_is_enabled(xdev) && bus_get_resource(xdev, SYS_RES_IOPORT, 0, &xioport, NULL) == 0) outb(xioport + com_mcr, 0); } #endif free(devs, M_TEMP); already_init = TRUE; } #endif if (COM_LLCONSOLE(flags)) { printf("sio%d: reserved for low-level i/o\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } #ifdef PC98 DELAY(10); /* * If the port is i8251 UART (internal, B98_01) */ if (pc98_check_if_type(dev, &iod) == -1) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } if (iod.irq > 0) bus_set_resource(dev, SYS_RES_IRQ, 0, iod.irq, 1); if (IS_8251(iod.if_type)) { outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, CMD8251_RESET); DELAY(1000); /* for a while...*/ outb(iod.cmd, 0xf2); /* MODE (dummy) */ DELAY(10); outb(iod.cmd, 0x01); /* CMD (dummy) */ DELAY(1000); /* for a while...*/ if (( inb(iod.sts) & STS8251_TxEMP ) == 0 ) { result = (ENXIO); } if (if_8251_type[iod.if_type & 0x0f].check_irq) { COM_INT_DISABLE tmp = ( inb( iod.ctrl ) & ~(IEN_Rx|IEN_TxEMP|IEN_Tx)); outb( iod.ctrl, tmp|IEN_TxEMP ); DELAY(10); result = isa_irq_pending() ? 0 : ENXIO; outb( iod.ctrl, tmp ); COM_INT_ENABLE } else { /* * B98_01 doesn't activate TxEMP interrupt line * when being reset, so we can't check irq pending. */ result = 0; } if (epson_machine_id==0x20) { /* XXX */ result = 0; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return result; } #endif /* PC98 */ /* * If the device is on a multiport card and has an AST/4 * compatible interrupt control register, initialize this * register and prepare to leave MCR_IENABLE clear in the mcr. * Otherwise, prepare to set MCR_IENABLE in the mcr. * Point idev to the device struct giving the correct id_irq. * This is the struct for the master device if there is one. */ idev = dev; mcr_image = MCR_IENABLE; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { #ifndef PC98 Port_t xiobase; u_long io; #endif idev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); if (idev == NULL) { printf("sio%d: master device %d not configured\n", device_get_unit(dev), COM_MPMASTER(flags)); idev = dev; } #ifndef PC98 if (!COM_NOTAST4(flags)) { if (bus_get_resource(idev, SYS_RES_IOPORT, 0, &io, NULL) == 0) { xiobase = io; if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) == 0) outb(xiobase + com_scr, 0x80); else outb(xiobase + com_scr, 0); } mcr_image = 0; } #endif } #endif /* COM_MULTIPORT */ if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) mcr_image = 0; bzero(failures, sizeof failures); iobase = rman_get_start(port); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) { mcr_image = 0; outb(iobase + rsa_msr, 0x04); outb(iobase + rsa_frr, 0x00); if ((inb(iobase + rsa_srr) & 0x36) != 0x36) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } outb(iobase + rsa_ier, 0x00); outb(iobase + rsa_frr, 0x00); outb(iobase + rsa_tivsr, 0x00); outb(iobase + rsa_tcr, 0x00); } tmp = if_16550a_type[iod.if_type & 0x0f].irr_write; if (tmp != -1) { /* MC16550II */ int irqout; switch (isa_get_irq(idev)) { case 3: irqout = 4; break; case 5: irqout = 5; break; case 6: irqout = 6; break; case 12: irqout = 7; break; default: printf("sio%d: irq configuration error\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } outb((iobase & 0x00ff) | tmp, irqout); } #endif /* * We don't want to get actual interrupts, just masked ones. * Interrupts from this line should already be masked in the ICU, * but mask them in the processor as well in case there are some * (misconfigured) shared interrupts. */ disable_intr(); /* EXTRA DELAY? */ /* * Initialize the speed and the word size and wait long enough to * drain the maximum of 16 bytes of junk in device output queues. * The speed is undefined after a master reset and must be set * before relying on anything related to output. There may be * junk after a (very fast) soft reboot and (apparently) after * master reset. * XXX what about the UART bug avoided by waiting in comparam()? * We don't want to to wait long enough to drain at 2 bps. */ if (iobase == siocniobase) DELAY((16 + 1) * 1000000 / (comdefaultrate / 10)); else { #ifdef PC98 tmp = ttspeedtab(SIO_TEST_SPEED, if_16550a_type[iod.if_type & 0x0f].speedtab); sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); sio_setreg(com, com_dlbl, tmp & 0xff); sio_setreg(com, com_dlbh, (tmp >> 8) & 0xff); sio_setreg(com, com_cfcr, CFCR_8BITS); #else sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); sio_setreg(com, com_dlbl, COMBRD(SIO_TEST_SPEED) & 0xff); sio_setreg(com, com_dlbh, (u_int) COMBRD(SIO_TEST_SPEED) >> 8); sio_setreg(com, com_cfcr, CFCR_8BITS); #endif DELAY((16 + 1) * 1000000 / (SIO_TEST_SPEED / 10)); } /* * Enable the interrupt gate and disable device interupts. This * should leave the device driving the interrupt line low and * guarantee an edge trigger if an interrupt can be generated. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ irqmap[0] = isa_irq_pending(); /* * Attempt to set loopback mode so that we can send a null byte * without annoying any external device. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image | MCR_LOOPBACK); /* * Attempt to generate an output interrupt. On 8250's, setting * IER_ETXRDY generates an interrupt independent of the current * setting and independent of whether the THR is empty. On 16450's, * setting IER_ETXRDY generates an interrupt independent of the * current setting. On 16550A's, setting IER_ETXRDY only * generates an interrupt when IER_ETXRDY is not already set. */ sio_setreg(com, com_ier, IER_ETXRDY); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) outb(iobase + rsa_ier, 0x04); #endif /* * On some 16x50 incompatibles, setting IER_ETXRDY doesn't generate * an interrupt. They'd better generate one for actually doing * output. Loopback may be broken on the same incompatibles but * it's unlikely to do more than allow the null byte out. */ sio_setreg(com, com_data, 0); DELAY((1 + 2) * 1000000 / (SIO_TEST_SPEED / 10)); /* * Turn off loopback mode so that the interrupt gate works again * (MCR_IENABLE was hidden). This should leave the device driving * an interrupt line high. It doesn't matter if the interrupt * line oscillates while we are not looking at it, since interrupts * are disabled. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); /* * Some pcmcia cards have the "TXRDY bug", so we check everyone * for IIR_TXRDY implementation ( Palido 321s, DC-1S... ) */ if (COM_NOPROBE(flags)) { /* Reading IIR register twice */ for (fn = 0; fn < 2; fn ++) { DELAY(10000); failures[6] = sio_getreg(com, com_iir); } /* Check IIR_TXRDY clear ? */ result = 0; if (failures[6] & IIR_TXRDY) { /* Nop, Double check with clearing IER */ sio_setreg(com, com_ier, 0); if (sio_getreg(com, com_iir) & IIR_NOPEND) { /* Ok. we're familia this gang */ SET_FLAG(dev, COM_C_IIR_TXRDYBUG); } else { /* Unknown, Just omit this chip.. XXX */ result = ENXIO; } } else { /* OK. this is well-known guys */ CLR_FLAG(dev, COM_C_IIR_TXRDYBUG); } sio_setreg(com, com_cfcr, CFCR_8BITS); enable_intr(); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } /* * Check that * o the CFCR, IER and MCR in UART hold the values written to them * (the values happen to be all distinct - this is good for * avoiding false positive tests from bus echoes). * o an output interrupt is generated and its vector is correct. * o the interrupt goes away when the IIR in the UART is read. */ /* EXTRA DELAY? */ failures[0] = sio_getreg(com, com_cfcr) - CFCR_8BITS; failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ irqmap[1] = isa_irq_pending(); failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) inb(iobase + rsa_srr); #endif DELAY(1000); /* XXX */ irqmap[2] = isa_irq_pending(); failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) inb(iobase + rsa_srr); #endif /* * Turn off all device interrupts and check that they go off properly. * Leave MCR_IENABLE alone. For ports without a master port, it gates * the OUT2 output of the UART to * the ICU input. Closing the gate would give a floating ICU input * (unless there is another device driving it) and spurious interrupts. * (On the system that this was first tested on, the input floats high * and gives a (masked) interrupt as soon as the gate is closed.) */ sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); /* dummy to avoid bus echo */ failures[7] = sio_getreg(com, com_ier); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) outb(iobase + rsa_ier, 0x00); #endif DELAY(1000); /* XXX */ irqmap[3] = isa_irq_pending(); failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) { inb(iobase + rsa_srr); outb(iobase + rsa_frr, 0x00); } #endif enable_intr(); irqs = irqmap[1] & ~irqmap[0]; if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && ((1 << xirq) & irqs) == 0) printf( "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", device_get_unit(dev), xirq, irqs); if (bootverbose) printf("sio%d: irq maps: %#x %#x %#x %#x\n", device_get_unit(dev), irqmap[0], irqmap[1], irqmap[2], irqmap[3]); result = 0; for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) { sio_setreg(com, com_mcr, 0); result = ENXIO; if (bootverbose) { printf("sio%d: probe failed test(s):", device_get_unit(dev)); for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) printf(" %d", fn); printf("\n"); } break; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } #ifdef COM_ESP static int espattach(com, esp_port) struct com_s *com; Port_t esp_port; { u_char dips; u_char val; /* * Check the ESP-specific I/O port to see if we're an ESP * card. If not, return failure immediately. */ if ((inb(esp_port) & 0xf3) == 0) { printf(" port 0x%x is not an ESP board?\n", esp_port); return (0); } /* * We've got something that claims to be a Hayes ESP card. * Let's hope so. */ /* Get the dip-switch configuration */ #ifdef PC98 outb(esp_port + ESP98_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP98_STATUS1); #else outb(esp_port + ESP_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP_STATUS1); #endif /* * Bits 0,1 of dips say which COM port we are. */ #ifdef PC98 if ((rman_get_start(com->ioportres) & 0xff) == likely_com_ports[dips & 0x03]) #else if (rman_get_start(com->ioportres) == likely_com_ports[dips & 0x03]) #endif printf(" : ESP"); else { printf(" esp_port has com %d\n", dips & 0x03); return (0); } /* * Check for ESP version 2.0 or later: bits 4,5,6 = 010. */ #ifdef PC98 outb(esp_port + ESP98_CMD1, ESP_GETTEST); val = inb(esp_port + ESP98_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP98_STATUS2); #else outb(esp_port + ESP_CMD1, ESP_GETTEST); val = inb(esp_port + ESP_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP_STATUS2); #endif if ((val & 0x70) < 0x20) { printf("-old (%o)", val & 0x70); return (0); } /* * Check for ability to emulate 16550: bit 7 == 1 */ if ((dips & 0x80) == 0) { printf(" slave"); return (0); } /* * Okay, we seem to be a Hayes ESP card. Whee. */ com->esp = TRUE; com->esp_port = esp_port; return (1); } #endif /* COM_ESP */ static int sio_isa_attach(dev) device_t dev; { return (sioattach(dev, 0)); } static int sioattach(dev, xrid) device_t dev; int xrid; { struct com_s *com; #ifdef COM_ESP Port_t *espp; #endif Port_t iobase; int unit; u_int flags; int rid; struct resource *port; int ret; #ifdef PC98 u_char *obuf; u_long obufsize; int if_type = GET_IFTYPE(device_get_flags(dev)); #endif rid = xrid; #ifdef PC98 if (IS_8251(if_type)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, 1, RF_ACTIVE); } else if (if_type == COM_IF_RSA98III || isa_get_vendorid(dev)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, if_16550a_type[if_type & 0x0f].iatsz, RF_ACTIVE); } else { port = isa_alloc_resourcev(dev, SYS_RES_IOPORT, &rid, if_16550a_type[if_type & 0x0f].iat, if_16550a_type[if_type & 0x0f].iatsz, RF_ACTIVE); } #else port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); #endif if (!port) return (ENXIO); #ifdef PC98 if (!IS_8251(if_type)) { if (isa_load_resourcev(port, if_16550a_type[if_type & 0x0f].iat, if_16550a_type[if_type & 0x0f].iatsz) != 0) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } } #endif iobase = rman_get_start(port); unit = device_get_unit(dev); com = device_get_softc(dev); flags = device_get_flags(dev); if (unit >= sio_numunits) sio_numunits = unit + 1; #ifdef PC98 obufsize = 256; if (if_type == COM_IF_RSA98III) obufsize = 2048; if ((obuf = malloc(obufsize * 2, M_DEVBUF, M_NOWAIT)) == NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } bzero(obuf, obufsize * 2); #endif /* * sioprobe() has initialized the device registers as follows: * o cfcr = CFCR_8BITS. * It is most important that CFCR_DLAB is off, so that the * data port is not hidden when we enable interrupts. * o ier = 0. * Interrupts are only enabled when the line is open. * o mcr = MCR_IENABLE, or 0 if the port has AST/4 compatible * interrupt control register or the config specifies no irq. * Keeping MCR_DTR and MCR_RTS off might stop the external * device from sending before we are ready. */ bzero(com, sizeof *com); com->unit = unit; com->ioportres = port; com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); com->cfcr_image = CFCR_8BITS; com->dtr_wait = 3 * hz; com->loses_outints = COM_LOSESOUTINTS(flags) != 0; com->no_irq = bus_get_resource(dev, SYS_RES_IRQ, 0, NULL, NULL) != 0; com->tx_fifo_size = 1; #ifdef PC98 com->obufsize = obufsize; com->obuf1 = obuf; com->obuf2 = obuf + obufsize; #endif com->obufs[0].l_head = com->obuf1; com->obufs[1].l_head = com->obuf2; #ifdef PC98 com->pc98_if_type = if_type; if (IS_8251(if_type)) { pc98_set_ioport(com); if (if_type == COM_IF_INTERNAL && pc98_check_8251fifo()) { com->pc98_8251fifo = 1; com->pc98_8251fifo_enable = 0; } } else { bus_addr_t *iat = if_16550a_type[if_type & 0x0f].iat; com->data_port = iobase + iat[com_data]; com->int_id_port = iobase + iat[com_iir]; com->modem_ctl_port = iobase + iat[com_mcr]; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + iat[com_lsr]; com->modem_status_port = iobase + iat[com_msr]; com->intr_ctl_port = iobase + iat[com_ier]; } #else /* not PC98 */ com->data_port = iobase + com_data; com->int_id_port = iobase + com_iir; com->modem_ctl_port = iobase + com_mcr; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + com_lsr; com->modem_status_port = iobase + com_msr; com->intr_ctl_port = iobase + com_ier; #endif /* * We don't use all the flags from since they * are only relevant for logins. It's important to have echo off * initially so that the line doesn't start blathering before the * echo flag can be turned off. */ com->it_in.c_iflag = 0; com->it_in.c_oflag = 0; com->it_in.c_cflag = TTYDEF_CFLAG; com->it_in.c_lflag = 0; if (unit == comconsole) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) DELAY(100000); #endif com->it_in.c_iflag = TTYDEF_IFLAG; com->it_in.c_oflag = TTYDEF_OFLAG; com->it_in.c_cflag = TTYDEF_CFLAG | CLOCAL; com->it_in.c_lflag = TTYDEF_LFLAG; com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL; com->lt_out.c_ispeed = com->lt_out.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate; } else com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED; if (siosetwater(com, com->it_in.c_ispeed) != 0) { enable_intr(); /* * Leave i/o resources allocated if this is a `cn'-level * console, so that other devices can't snarf them. */ if (iobase != siocniobase) bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } enable_intr(); termioschars(&com->it_in); com->it_out = com->it_in; /* attempt to determine UART type */ printf("sio%d: type", unit); #ifndef PC98 #ifdef COM_MULTIPORT if (!COM_ISMULTIPORT(flags) && !COM_IIR_TXRDYBUG(flags)) #else if (!COM_IIR_TXRDYBUG(flags)) #endif { u_char scr; u_char scr1; u_char scr2; scr = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0xa5); scr1 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0x5a); scr2 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, scr); if (scr1 != 0xa5 || scr2 != 0x5a) { printf(" 8250"); goto determined_type; } } #endif /* !PC98 */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo && !COM_NOFIFO(flags)) com->tx_fifo_size = 16; com_int_TxRx_disable( com ); com_cflag_and_speed_set( com, com->it_in.c_cflag, comdefaultrate ); com_tiocm_bic( com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE ); com_send_break_off( com ); if (com->pc98_if_type == COM_IF_INTERNAL) { printf(" (internal%s%s)", com->pc98_8251fifo ? " fifo" : "", PC98SIO_baud_rate_port(com->pc98_if_type) != -1 ? " v-fast" : ""); } else { printf(" 8251%s", if_8251_type[com->pc98_if_type & 0x0f].name); } } else { #endif /* PC98 */ sio_setreg(com, com_fifo, FIFO_ENABLE | FIFO_RX_HIGH); DELAY(100); com->st16650a = 0; switch (inb(com->int_id_port) & IIR_FIFO_MASK) { case FIFO_RX_LOW: printf(" 16450"); break; case FIFO_RX_MEDL: printf(" 16450?"); break; case FIFO_RX_MEDH: printf(" 16550?"); break; case FIFO_RX_HIGH: if (COM_NOFIFO(flags)) { printf(" 16550A fifo disabled"); } else { com->hasfifo = TRUE; #ifdef PC98 com->tx_fifo_size = 0; /* XXX flag conflicts. */ printf(" 16550A"); #else if (COM_ST16650A(flags)) { com->st16650a = 1; com->tx_fifo_size = 32; printf(" ST16650A"); } else { com->tx_fifo_size = COM_FIFOSIZE(flags); printf(" 16550A"); } #endif } #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { com->tx_fifo_size = 2048; com->rsabase = iobase; outb(com->rsabase + rsa_ier, 0x00); outb(com->rsabase + rsa_frr, 0x00); } #endif #ifdef COM_ESP #ifdef PC98 if (com->pc98_if_type == COM_IF_ESP98) #endif for (espp = likely_esp_ports; *espp != 0; espp++) if (espattach(com, *espp)) { com->tx_fifo_size = 1024; break; } #endif if (!com->st16650a) { if (!com->tx_fifo_size) com->tx_fifo_size = 16; else printf(" lookalike with %d bytes FIFO", com->tx_fifo_size); } break; } #ifdef PC98 if (com->pc98_if_type == COM_IF_RSB3000) { /* Set RSB-2000/3000 Extended Buffer mode. */ u_char lcr; lcr = sio_getreg(com, com_cfcr); sio_setreg(com, com_cfcr, lcr | CFCR_DLAB); sio_setreg(com, com_emr, EMR_EXBUFF | EMR_EFMODE); sio_setreg(com, com_cfcr, lcr); } #endif #ifdef COM_ESP if (com->esp) { /* * Set 16550 compatibility mode. * We don't use the ESP_MODE_SCALE bit to increase the * fifo trigger levels because we can't handle large * bursts of input. * XXX flow control should be set in comparam(), not here. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETMODE); outb(com->esp_port + ESP98_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); #else outb(com->esp_port + ESP_CMD1, ESP_SETMODE); outb(com->esp_port + ESP_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); #endif /* Set RTS/CTS flow control. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP98_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP98_CMD2, ESP_FLOW_CTS); #else outb(com->esp_port + ESP_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP_CMD2, ESP_FLOW_CTS); #endif /* Set flow-control levels. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP98_CMD2, HIBYTE(768)); outb(com->esp_port + ESP98_CMD2, LOBYTE(768)); outb(com->esp_port + ESP98_CMD2, HIBYTE(512)); outb(com->esp_port + ESP98_CMD2, LOBYTE(512)); #else outb(com->esp_port + ESP_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP_CMD2, HIBYTE(768)); outb(com->esp_port + ESP_CMD2, LOBYTE(768)); outb(com->esp_port + ESP_CMD2, HIBYTE(512)); outb(com->esp_port + ESP_CMD2, LOBYTE(512)); #endif #ifdef PC98 /* Set UART clock prescaler. */ outb(com->esp_port + ESP98_CMD1, ESP_SETCLOCK); outb(com->esp_port + ESP98_CMD2, 2); /* 4 times */ #endif } #endif /* COM_ESP */ sio_setreg(com, com_fifo, 0); #ifdef PC98 printf("%s", if_16550a_type[com->pc98_if_type & 0x0f].name); #else determined_type: ; #endif #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { device_t masterdev; com->multiport = TRUE; printf(" (multiport"); if (unit == COM_MPMASTER(flags)) printf(" master"); printf(")"); masterdev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); com->no_irq = (masterdev == NULL || bus_get_resource(masterdev, SYS_RES_IRQ, 0, NULL, NULL) != 0); } #endif /* COM_MULTIPORT */ #ifdef PC98 } #endif if (unit == comconsole) printf(", console"); if (COM_IIR_TXRDYBUG(flags)) printf(" with a bogus IIR_TXRDY register"); printf("\n"); if (!sio_registered) { register_swi(SWI_TTY, siopoll); sio_registered = TRUE; } make_dev(&sio_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, "ttyd%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_INIT_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyid%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_LOCK_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyld%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK, UID_UUCP, GID_DIALER, 0660, "cuaa%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_INIT_STATE, UID_UUCP, GID_DIALER, 0660, "cuaia%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_LOCK_STATE, UID_UUCP, GID_DIALER, 0660, "cuala%r", unit); com->flags = flags; com->pps.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR; pps_init(&com->pps); rid = 0; com->irqres = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0ul, ~0ul, 1, RF_ACTIVE); if (com->irqres) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY | INTR_TYPE_FAST, siointr, com, &com->cookie); if (ret) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY, siointr, com, &com->cookie); if (ret == 0) device_printf(dev, "unable to activate interrupt in fast mode - using normal mode"); } if (ret) device_printf(dev, "could not activate interrupt\n"); } return (0); } static int sioopen(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; int unit; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL) return (ENXIO); if (com->gone) return (ENXIO); if (mynor & CONTROL_MASK) return (0); tp = dev->si_tty = com->tp = ttymalloc(com->tp); s = spltty(); /* * We jump to this label after all non-interrupted sleeps to pick * up any changes of the device state. */ open_top: while (com->state & CS_DTR_OFF) { error = tsleep(&com->dtr_wait, TTIPRI | PCATCH, "siodtr", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; } if (tp->t_state & TS_ISOPEN) { /* * The device is open, so everything has been initialized. * Handle conflicts. */ if (mynor & CALLOUT_MASK) { if (!com->active_out) { error = EBUSY; goto out; } } else { if (com->active_out) { if (flag & O_NONBLOCK) { error = EBUSY; goto out; } error = tsleep(&com->active_out, TTIPRI | PCATCH, "siobi", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; goto open_top; } } if (tp->t_state & TS_XCLUDE && suser(p)) { error = EBUSY; goto out; } } else { /* * The device isn't open, so there are no conflicts. * Initialize it. Initialization is done twice in many * cases: to preempt sleeping callin opens if we are * callout, and to complete a callin open after DCD rises. */ tp->t_oproc = comstart; tp->t_param = comparam; tp->t_stop = comstop; tp->t_dev = dev; tp->t_termios = mynor & CALLOUT_MASK ? com->it_out : com->it_in; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) #endif (void)commctl(com, TIOCM_DTR | TIOCM_RTS, DMSET); com->poll = com->no_irq; com->poll_output = com->loses_outints; ++com->wopeners; error = comparam(tp, &tp->t_termios); --com->wopeners; if (error != 0) goto out; #ifdef PC98 if (IS_8251(com->pc98_if_type)) { com_tiocm_bis(com, TIOCM_DTR|TIOCM_RTS); pc98_msrint_start(dev); if (com->pc98_8251fifo) { com->pc98_8251fifo_enable = 1; outb(I8251F_fcr, CTRL8251F_ENABLE | CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); } } #endif /* * XXX we should goto open_top if comparam() slept. */ if (com->hasfifo) { /* * (Re)enable and drain fifos. * * Certain SMC chips cause problems if the fifos * are enabled while input is ready. Turn off the * fifo if necessary to clear the input. We test * the input ready bit after enabling the fifos * since we've already enabled them in comparam() * and to handle races between enabling and fresh * input. */ while (TRUE) { sio_setreg(com, com_fifo, FIFO_RCV_RST | FIFO_XMT_RST | com->fifo_image); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) outb(com->rsabase + rsa_frr , 0x00); #endif /* * XXX the delays are for superstitious * historical reasons. It must be less than * the character time at the maximum * supported speed (87 usec at 115200 bps * 8N1). Otherwise we might loop endlessly * if data is streaming in. We used to use * delays of 100. That usually worked * because DELAY(100) used to usually delay * for about 85 usec instead of 100. */ DELAY(50); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III ? !(inb(com->rsabase + rsa_srr) & 0x08) : !(inb(com->line_status_port) & LSR_RXRDY)) break; #else if (!(inb(com->line_status_port) & LSR_RXRDY)) break; #endif sio_setreg(com, com_fifo, 0); DELAY(50); (void) inb(com->data_port); } } disable_intr(); #ifdef PC98 if (IS_8251(com->pc98_if_type)) { com_tiocm_bis(com, TIOCM_LE); com->pc98_prev_modem_status = pc98_get_modem_status(com); com_int_Rx_enable(com); } else { #endif (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status = inb(com->modem_status_port); if (COM_IIR_TXRDYBUG(com->flags)) { outb(com->intr_ctl_port, IER_ERXRDY | IER_ERLS | IER_EMSC); } else { outb(com->intr_ctl_port, IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC); } #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { outb(com->rsabase + rsa_ier, 0x1d); outb(com->intr_ctl_port, IER_ERLS | IER_EMSC); } #endif #ifdef PC98 } #endif enable_intr(); /* * Handle initial DCD. Callout devices get a fake initial * DCD (trapdoor DCD). If we are callout, then any sleeping * callin opens get woken up and resume sleeping on "siobi" * instead of "siodcd". */ /* * XXX `mynor & CALLOUT_MASK' should be * `tp->t_cflag & (SOFT_CARRIER | TRAPDOOR_CARRIER) where * TRAPDOOR_CARRIER is the default initial state for callout * devices and SOFT_CARRIER is like CLOCAL except it hides * the true carrier. */ #ifdef PC98 if ((IS_8251(com->pc98_if_type) && (pc98_get_modem_status(com) & TIOCM_CAR)) || (!IS_8251(com->pc98_if_type) && (com->prev_modem_status & MSR_DCD)) || mynor & CALLOUT_MASK) (*linesw[tp->t_line].l_modem)(tp, 1); #else if (com->prev_modem_status & MSR_DCD || mynor & CALLOUT_MASK) (*linesw[tp->t_line].l_modem)(tp, 1); #endif } /* * Wait for DCD if necessary. */ if (!(tp->t_state & TS_CARR_ON) && !(mynor & CALLOUT_MASK) && !(tp->t_cflag & CLOCAL) && !(flag & O_NONBLOCK)) { ++com->wopeners; error = tsleep(TSA_CARR_ON(tp), TTIPRI | PCATCH, "siodcd", 0); if (com_addr(unit) == NULL) return (ENXIO); --com->wopeners; if (error != 0 || com->gone) goto out; goto open_top; } error = (*linesw[tp->t_line].l_open)(dev, tp); disc_optim(tp, &tp->t_termios, com); if (tp->t_state & TS_ISOPEN && mynor & CALLOUT_MASK) com->active_out = TRUE; siosettimeout(); out: splx(s); if (!(tp->t_state & TS_ISOPEN) && com->wopeners == 0) comhardclose(com); return (error); } static int sioclose(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int mynor; int s; struct tty *tp; mynor = minor(dev); if (mynor & CONTROL_MASK) return (0); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL) return (ENODEV); tp = com->tp; s = spltty(); (*linesw[tp->t_line].l_close)(tp, flag); #ifdef PC98 com->modem_checking = 0; #endif disc_optim(tp, &tp->t_termios, com); comstop(tp, FREAD | FWRITE); comhardclose(com); ttyclose(tp); siosettimeout(); splx(s); if (com->gone) { printf("sio%d: gone\n", com->unit); s = spltty(); if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); bzero(tp, sizeof *tp); splx(s); } return (0); } static void comhardclose(com) struct com_s *com; { int s; struct tty *tp; int unit; unit = com->unit; s = spltty(); com->poll = FALSE; com->poll_output = FALSE; com->do_timestamp = FALSE; com->do_dcd_timestamp = FALSE; com->pps.ppsparam.mode = 0; #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_send_break_off(com); else #endif sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); { #ifdef PC98 int tmp; if (IS_8251(com->pc98_if_type)) com_int_TxRx_disable(com); else sio_setreg(com, com_ier, 0); if (com->pc98_if_type == COM_IF_RSA98III) outb(com->rsabase + rsa_ier, 0x00); #else sio_setreg(com, com_ier, 0); #endif tp = com->tp; #ifdef PC98 if (IS_8251(com->pc98_if_type)) tmp = pc98_get_modem_status(com) & TIOCM_CAR; else tmp = com->prev_modem_status & MSR_DCD; #endif if (tp->t_cflag & HUPCL /* * XXX we will miss any carrier drop between here and the * next open. Perhaps we should watch DCD even when the * port is closed; it is not sufficient to check it at * the next open because it might go up and down while * we're not watching. */ || (!com->active_out #ifdef PC98 && !(tmp) #else && !(com->prev_modem_status & MSR_DCD) #endif && !(com->it_in.c_cflag & CLOCAL)) || !(tp->t_state & TS_ISOPEN)) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_tiocm_bic(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); else #endif (void)commctl(com, TIOCM_DTR, DMBIC); if (com->dtr_wait != 0 && !(com->state & CS_DTR_OFF)) { timeout(siodtrwakeup, com, com->dtr_wait); com->state |= CS_DTR_OFF; } } #ifdef PC98 else { if (IS_8251(com->pc98_if_type)) com_tiocm_bic(com, TIOCM_LE); } #endif } #ifdef PC98 if (com->pc98_8251fifo) { if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); com->pc98_8251fifo_enable = 0; } #endif if (com->hasfifo) { /* * Disable fifos so that they are off after controlled * reboots. Some BIOSes fail to detect 16550s when the * fifos are enabled. */ sio_setreg(com, com_fifo, 0); } com->active_out = FALSE; wakeup(&com->active_out); wakeup(TSA_CARR_ON(tp)); /* restart any wopeners */ splx(s); } static int sioread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); return ((*linesw[com->tp->t_line].l_read)(com->tp, uio, flag)); } static int siowrite(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; int unit; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL || com->gone) return (ENODEV); /* * (XXX) We disallow virtual consoles if the physical console is * a serial port. This is in case there is a display attached that * is not the console. In that situation we don't need/want the X * server taking over the console. */ if (constty != NULL && unit == comconsole) constty = NULL; return ((*linesw[com->tp->t_line].l_write)(com->tp, uio, flag)); } static void siobusycheck(chan) void *chan; { struct com_s *com; int s; com = (struct com_s *)chan; /* * Clear TS_BUSY if low-level output is complete. * spl locking is sufficient because siointr1() does not set CS_BUSY. * If siointr1() clears CS_BUSY after we look at it, then we'll get * called again. Reading the line status port outside of siointr1() * is safe because CS_BUSY is clear so there are no output interrupts * to lose. */ s = spltty(); if (com->state & CS_BUSY) com->extra_state &= ~CSE_BUSYCHECK; /* False alarm. */ #ifdef PC98 else if ((IS_8251(com->pc98_if_type) && ((com->pc98_8251fifo_enable && (inb(I8251F_lsr) & (STS8251F_TxRDY | STS8251F_TxEMP)) == (STS8251F_TxRDY | STS8251F_TxEMP)) || (!com->pc98_8251fifo_enable && (inb(com->sts_port) & (STS8251_TxRDY | STS8251_TxEMP)) == (STS8251_TxRDY | STS8251_TxEMP)))) || ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY))) { #else else if ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY)) { #endif com->tp->t_state &= ~TS_BUSY; ttwwakeup(com->tp); com->extra_state &= ~CSE_BUSYCHECK; } else timeout(siobusycheck, com, hz / 100); splx(s); } static void siodtrwakeup(chan) void *chan; { struct com_s *com; com = (struct com_s *)chan; com->state &= ~CS_DTR_OFF; wakeup(&com->dtr_wait); } static void sioinput(com) struct com_s *com; { u_char *buf; int incc; u_char line_status; int recv_data; struct tty *tp; buf = com->ibuf; tp = com->tp; if (!(tp->t_state & TS_ISOPEN) || !(tp->t_cflag & CREAD)) { com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; return; } if (tp->t_state & TS_CAN_BYPASS_L_RINT) { /* * Avoid the grotesquely inefficient lineswitch routine * (ttyinput) in "raw" mode. It usually takes about 450 * instructions (that's without canonical processing or echo!). * slinput is reasonably fast (usually 40 instructions plus * call overhead). */ do { enable_intr(); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat && (com->state & CS_RTS_IFLOW || tp->t_iflag & IXOFF) && !(tp->t_state & TS_TBLOCK)) ttyblock(tp); com->delta_error_counts[CE_TTY_BUF_OVERFLOW] += b_to_q((char *)buf, incc, &tp->t_rawq); buf += incc; tk_nin += incc; tk_rawcc += incc; tp->t_rawcc += incc; ttwakeup(tp); if (tp->t_state & TS_TTSTOP && (tp->t_iflag & IXANY || tp->t_cc[VSTART] == tp->t_cc[VSTOP])) { tp->t_state &= ~TS_TTSTOP; tp->t_lflag &= ~FLUSHO; comstart(tp); } disable_intr(); } while (buf < com->iptr); } else { do { enable_intr(); line_status = buf[com->ierroff]; recv_data = *buf++; if (line_status & (LSR_BI | LSR_FE | LSR_OE | LSR_PE)) { if (line_status & LSR_BI) recv_data |= TTY_BI; if (line_status & LSR_FE) recv_data |= TTY_FE; if (line_status & LSR_OE) recv_data |= TTY_OE; if (line_status & LSR_PE) recv_data |= TTY_PE; } (*linesw[tp->t_line].l_rint)(recv_data, tp); disable_intr(); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; /* * There is now room for another low-level buffer full of input, * so enable RTS if it is now disabled and there is room in the * high-level buffer. */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if ((com->state & CS_RTS_IFLOW) && !(com_tiocm_get(com) & TIOCM_RTS) && !(tp->t_state & TS_TBLOCK)) com_tiocm_bis(com, TIOCM_RTS); } else { if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } #else if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #endif } void siointr(arg) void *arg; { #ifndef COM_MULTIPORT COM_LOCK(); siointr1((struct com_s *) arg); COM_UNLOCK(); #else /* COM_MULTIPORT */ bool_t possibly_more_intrs; int unit; struct com_s *com; #ifdef PC98 u_char rsa_buf_status; #endif /* * Loop until there is no activity on any port. This is necessary * to get an interrupt edge more than to avoid another interrupt. * If the IRQ signal is just an OR of the IRQ signals from several * devices, then the edge from one may be lost because another is * on. */ COM_LOCK(); do { possibly_more_intrs = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); /* * XXX COM_LOCK(); * would it work here, or be counter-productive? */ #ifdef PC98 if (com != NULL && !com->gone && IS_8251(com->pc98_if_type)) { siointr1(com); } else if (com != NULL && !com->gone && com->pc98_if_type == COM_IF_RSA98III) { rsa_buf_status = inb(com->rsabase + rsa_srr) & 0xc9; if ((rsa_buf_status & 0xc8) || !(rsa_buf_status & 0x01)) { siointr1(com); if (rsa_buf_status != (inb(com->rsabase + rsa_srr) & 0xc9)) possibly_more_intrs = TRUE; } } else #endif if (com != NULL && !com->gone && (inb(com->int_id_port) & IIR_IMASK) != IIR_NOPEND) { siointr1(com); possibly_more_intrs = TRUE; } /* XXX COM_UNLOCK(); */ } } while (possibly_more_intrs); COM_UNLOCK(); #endif /* COM_MULTIPORT */ } static void siointr1(com) struct com_s *com; { u_char line_status; u_char modem_status; u_char *ioptr; u_char recv_data; u_char int_ctl; u_char int_ctl_new; struct timecounter *tc; u_int count; #ifdef PC98 u_char tmp = 0; u_char rsa_buf_status = 0; int rsa_tx_fifo_size = 0; #endif /* PC98 */ int_ctl = inb(com->intr_ctl_port); int_ctl_new = int_ctl; while (!com->gone) { #ifdef PC98 status_read:; if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) tmp = inb(I8251F_lsr); else tmp = inb(com->sts_port); more_intr: line_status = 0; if (com->pc98_8251fifo_enable) { if (tmp & STS8251F_TxRDY) line_status |= LSR_TXRDY; if (tmp & STS8251F_RxRDY) line_status |= LSR_RXRDY; if (tmp & STS8251F_TxEMP) line_status |= LSR_TSRE; if (tmp & STS8251F_PE) line_status |= LSR_PE; if (tmp & STS8251F_OE) line_status |= LSR_OE; if (tmp & STS8251F_BD_SD) line_status |= LSR_BI; } else { if (tmp & STS8251_TxRDY) line_status |= LSR_TXRDY; if (tmp & STS8251_RxRDY) line_status |= LSR_RXRDY; if (tmp & STS8251_TxEMP) line_status |= LSR_TSRE; if (tmp & STS8251_PE) line_status |= LSR_PE; if (tmp & STS8251_OE) line_status |= LSR_OE; if (tmp & STS8251_FE) line_status |= LSR_FE; if (tmp & STS8251_BD_SD) line_status |= LSR_BI; } } else { #endif /* PC98 */ if (com->pps.ppsparam.mode & PPS_CAPTUREBOTH) { modem_status = inb(com->modem_status_port); if ((modem_status ^ com->last_modem_status) & MSR_DCD) { tc = timecounter; count = tc->tc_get_timecount(tc); pps_event(&com->pps, tc, count, (modem_status & MSR_DCD) ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR); } } line_status = inb(com->line_status_port); #ifdef PC98 } if (com->pc98_if_type == COM_IF_RSA98III) rsa_buf_status = inb(com->rsabase + rsa_srr); #endif /* PC98 */ /* input event? (check first to help avoid overruns) */ #ifndef PC98 while (line_status & LSR_RCV_MASK) { #else while ((line_status & LSR_RCV_MASK) || (com->pc98_if_type == COM_IF_RSA98III && (rsa_buf_status & 0x08))) { #endif /* PC98 */ /* break/unnattached error bits or real input? */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) { recv_data = inb(I8251F_data); if (tmp & (STS8251F_PE | STS8251F_OE | STS8251F_BD_SD)) { pc98_i8251_or_cmd(com, CMD8251_ER); recv_data = 0; } } else { recv_data = inb(com->data_port); if (tmp & (STS8251_PE | STS8251_OE | STS8251_FE | STS8251_BD_SD)) { pc98_i8251_or_cmd(com, CMD8251_ER); recv_data = 0; } } } else if (com->pc98_if_type == COM_IF_RSA98III) { if (!(rsa_buf_status & 0x08)) recv_data = 0; else recv_data = inb(com->data_port); } else #endif if (!(line_status & LSR_RXRDY)) recv_data = 0; else recv_data = inb(com->data_port); if (line_status & (LSR_BI | LSR_FE | LSR_PE)) { /* * Don't store BI if IGNBRK or FE/PE if IGNPAR. * Otherwise, push the work to a higher level * (to handle PARMRK) if we're bypassing. * Otherwise, convert BI/FE and PE+INPCK to 0. * * This makes bypassing work right in the * usual "raw" case (IGNBRK set, and IGNPAR * and INPCK clear). * * Note: BI together with FE/PE means just BI. */ if (line_status & LSR_BI) { #if defined(DDB) && defined(BREAK_TO_DEBUGGER) if (com->unit == comconsole) { breakpoint(); goto cont; } #endif if (com->tp == NULL || com->tp->t_iflag & IGNBRK) goto cont; } else { if (com->tp == NULL || com->tp->t_iflag & IGNPAR) goto cont; } if (com->tp->t_state & TS_CAN_BYPASS_L_RINT && (line_status & (LSR_BI | LSR_FE) || com->tp->t_iflag & INPCK)) recv_data = 0; } ++com->bytes_in; if (com->hotchar != 0 && recv_data == com->hotchar) setsofttty(); ioptr = com->iptr; if (ioptr >= com->ibufend) CE_RECORD(com, CE_INTERRUPT_BUF_OVERFLOW); else { if (com->do_timestamp) microtime(&com->timestamp); ++com_events; schedsofttty(); #if 0 /* for testing input latency vs efficiency */ if (com->iptr - com->ibuf == 8) setsofttty(); #endif ioptr[0] = recv_data; ioptr[com->ierroff] = line_status; com->iptr = ++ioptr; if (ioptr == com->ihighwater && com->state & CS_RTS_IFLOW) #ifdef PC98 IS_8251(com->pc98_if_type) ? com_tiocm_bic(com, TIOCM_RTS) : #endif outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); if (line_status & LSR_OE) CE_RECORD(com, CE_OVERRUN); } cont: /* * "& 0x7F" is to avoid the gcc-1.40 generating a slow * jump from the top of the loop to here */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) goto status_read; else #endif line_status = inb(com->line_status_port) & 0x7F; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) rsa_buf_status = inb(com->rsabase + rsa_srr); #endif /* PC98 */ } /* modem status change? (always check before doing output) */ #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif modem_status = inb(com->modem_status_port); if (modem_status != com->last_modem_status) { if (com->do_dcd_timestamp && !(com->last_modem_status & MSR_DCD) && modem_status & MSR_DCD) microtime(&com->dcd_timestamp); /* * Schedule high level to handle DCD changes. Note * that we don't use the delta bits anywhere. Some * UARTs mess them up, and it's easy to remember the * previous bits and calculate the delta. */ com->last_modem_status = modem_status; if (!(com->state & CS_CHECKMSR)) { com_events += LOTS_OF_EVENTS; com->state |= CS_CHECKMSR; setsofttty(); } /* handle CTS change immediately for crisp flow ctl */ if (com->state & CS_CTS_OFLOW) { if (modem_status & MSR_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } } #ifdef PC98 } #endif /* output queued and everything ready? */ #ifndef PC98 if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { #else if (((com->pc98_if_type == COM_IF_RSA98III) ? (rsa_buf_status & 0x02) : (line_status & LSR_TXRDY)) && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { #endif #ifdef PC98 Port_t tmp_data_port; if (IS_8251(com->pc98_if_type) && com->pc98_8251fifo_enable) tmp_data_port = I8251F_data; else tmp_data_port = com->data_port; #endif ioptr = com->obufq.l_head; if (com->tx_fifo_size > 1) { u_int ocount; ocount = com->obufq.l_tail - ioptr; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { rsa_buf_status = inb(com->rsabase + rsa_srr); rsa_tx_fifo_size = 1024; if (!(rsa_buf_status & 0x01)) rsa_tx_fifo_size = 2048; if (ocount > rsa_tx_fifo_size) ocount = rsa_tx_fifo_size; } else #endif if (ocount > com->tx_fifo_size) ocount = com->tx_fifo_size; com->bytes_out += ocount; do #ifdef PC98 outb(tmp_data_port, *ioptr++); #else outb(com->data_port, *ioptr++); #endif while (--ocount != 0); } else { #ifdef PC98 outb(tmp_data_port, *ioptr++); #else outb(com->data_port, *ioptr++); #endif ++com->bytes_out; } #ifdef PC98 if (IS_8251(com->pc98_if_type)) if (!(pc98_check_i8251_interrupt(com) & IEN_TxFLAG)) com_int_Tx_enable(com); #endif com->obufq.l_head = ioptr; if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl | IER_ETXRDY; } if (ioptr >= com->obufq.l_tail) { struct lbq *qp; qp = com->obufq.l_next; qp->l_queued = FALSE; qp = qp->l_next; if (qp != NULL) { com->obufq.l_head = qp->l_head; com->obufq.l_tail = qp->l_tail; com->obufq.l_next = qp; } else { /* output just completed */ if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl & ~IER_ETXRDY; } com->state &= ~CS_BUSY; #if defined(PC98) if (IS_8251(com->pc98_if_type) && pc98_check_i8251_interrupt(com) & IEN_TxFLAG) com_int_Tx_disable(com); #endif } if (!(com->state & CS_ODONE)) { com_events += LOTS_OF_EVENTS; com->state |= CS_ODONE; setsofttty(); /* handle at high level ASAP */ } } if (COM_IIR_TXRDYBUG(com->flags) && (int_ctl != int_ctl_new)) { #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { int_ctl_new &= ~(IER_ETXRDY | IER_ERXRDY); outb(com->intr_ctl_port, int_ctl_new); outb(com->rsabase + rsa_ier, 0x1d); } else #endif outb(com->intr_ctl_port, int_ctl_new); } } #ifdef PC98 else if (line_status & LSR_TXRDY) { if (IS_8251(com->pc98_if_type)) if (pc98_check_i8251_interrupt(com) & IEN_TxFLAG) com_int_Tx_disable(com); } if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) { if ((tmp = inb(I8251F_lsr)) & STS8251F_RxRDY) goto more_intr; } else { if ((tmp = inb(com->sts_port)) & STS8251_RxRDY) goto more_intr; } } #endif /* finished? */ #ifndef COM_MULTIPORT #ifdef PC98 if (IS_8251(com->pc98_if_type)) return; #endif if ((inb(com->int_id_port) & IIR_IMASK) == IIR_NOPEND) #endif /* COM_MULTIPORT */ return; } } static int sioioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) u_long oldcmd; struct termios term; #endif mynor = minor(dev); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); if (mynor & CONTROL_MASK) { struct termios *ct; switch (mynor & CONTROL_MASK) { case CONTROL_INIT_STATE: ct = mynor & CALLOUT_MASK ? &com->it_out : &com->it_in; break; case CONTROL_LOCK_STATE: ct = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; break; default: return (ENODEV); /* /dev/nodev */ } switch (cmd) { case TIOCSETA: error = suser(p); if (error != 0) return (error); *ct = *(struct termios *)data; return (0); case TIOCGETA: *(struct termios *)data = *ct; return (0); case TIOCGETD: *(int *)data = TTYDISC; return (0); case TIOCGWINSZ: bzero(data, sizeof(struct winsize)); return (0); default: return (ENOTTY); } } tp = com->tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) term = tp->t_termios; oldcmd = cmd; error = ttsetcompat(tp, &cmd, data, &term); if (error != 0) return (error); if (cmd != oldcmd) data = (caddr_t)&term; #endif if (cmd == TIOCSETA || cmd == TIOCSETAW || cmd == TIOCSETAF) { int cc; struct termios *dt = (struct termios *)data; struct termios *lt = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; dt->c_iflag = (tp->t_iflag & lt->c_iflag) | (dt->c_iflag & ~lt->c_iflag); dt->c_oflag = (tp->t_oflag & lt->c_oflag) | (dt->c_oflag & ~lt->c_oflag); dt->c_cflag = (tp->t_cflag & lt->c_cflag) | (dt->c_cflag & ~lt->c_cflag); dt->c_lflag = (tp->t_lflag & lt->c_lflag) | (dt->c_lflag & ~lt->c_lflag); for (cc = 0; cc < NCCS; ++cc) if (lt->c_cc[cc] != 0) dt->c_cc[cc] = tp->t_cc[cc]; if (lt->c_ispeed != 0) dt->c_ispeed = tp->t_ispeed; if (lt->c_ospeed != 0) dt->c_ospeed = tp->t_ospeed; } error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (error != ENOIOCTL) return (error); s = spltty(); error = ttioctl(tp, cmd, data, flag); disc_optim(tp, &tp->t_termios, com); if (error != ENOIOCTL) { splx(s); return (error); } #ifdef PC98 if (IS_8251(com->pc98_if_type)) { switch (cmd) { case TIOCSBRK: com_send_break_on(com); break; case TIOCCBRK: com_send_break_off(com); break; case TIOCSDTR: com_tiocm_bis(com, TIOCM_DTR | TIOCM_RTS); break; case TIOCCDTR: com_tiocm_bic(com, TIOCM_DTR); break; /* * XXX should disallow changing MCR_RTS if CS_RTS_IFLOW is set. The * changes get undone on the next call to comparam(). */ case TIOCMSET: com_tiocm_set(com, *(int *)data); break; case TIOCMBIS: com_tiocm_bis(com, *(int *)data); break; case TIOCMBIC: com_tiocm_bic(com, *(int *)data); break; case TIOCMGET: *(int *)data = com_tiocm_get(com); break; case TIOCMSDTRWAIT: /* must be root since the wait applies to following logins */ error = suser(p); if (error != 0) { splx(s); return (error); } com->dtr_wait = *(int *)data * hz / 100; break; case TIOCMGDTRWAIT: *(int *)data = com->dtr_wait * 100 / hz; break; case TIOCTIMESTAMP: com->do_timestamp = TRUE; *(struct timeval *)data = com->timestamp; break; case TIOCDCDTIMESTAMP: com->do_dcd_timestamp = TRUE; *(struct timeval *)data = com->dcd_timestamp; break; default: splx(s); error = pps_ioctl(cmd, data, &com->pps); if (error == ENODEV) error = ENOTTY; return (error); } } else { #endif switch (cmd) { case TIOCSBRK: sio_setreg(com, com_cfcr, com->cfcr_image |= CFCR_SBREAK); break; case TIOCCBRK: sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); break; case TIOCSDTR: (void)commctl(com, TIOCM_DTR, DMBIS); break; case TIOCCDTR: (void)commctl(com, TIOCM_DTR, DMBIC); break; /* * XXX should disallow changing MCR_RTS if CS_RTS_IFLOW is set. The * changes get undone on the next call to comparam(). */ case TIOCMSET: (void)commctl(com, *(int *)data, DMSET); break; case TIOCMBIS: (void)commctl(com, *(int *)data, DMBIS); break; case TIOCMBIC: (void)commctl(com, *(int *)data, DMBIC); break; case TIOCMGET: *(int *)data = commctl(com, 0, DMGET); break; case TIOCMSDTRWAIT: /* must be root since the wait applies to following logins */ error = suser(p); if (error != 0) { splx(s); return (error); } com->dtr_wait = *(int *)data * hz / 100; break; case TIOCMGDTRWAIT: *(int *)data = com->dtr_wait * 100 / hz; break; case TIOCTIMESTAMP: com->do_timestamp = TRUE; *(struct timeval *)data = com->timestamp; break; case TIOCDCDTIMESTAMP: com->do_dcd_timestamp = TRUE; *(struct timeval *)data = com->dcd_timestamp; break; default: splx(s); error = pps_ioctl(cmd, data, &com->pps); if (error == ENODEV) error = ENOTTY; return (error); } #ifdef PC98 } #endif splx(s); return (0); } static void siopoll() { int unit; if (com_events == 0) return; repeat: for (unit = 0; unit < sio_numunits; ++unit) { struct com_s *com; int incc; struct tty *tp; com = com_addr(unit); if (com == NULL) continue; tp = com->tp; if (tp == NULL || com->gone) { /* * Discard any events related to never-opened or * going-away devices. */ disable_intr(); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { incc += LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; } com_events -= incc; enable_intr(); continue; } if (com->iptr != com->ibuf) { disable_intr(); sioinput(com); enable_intr(); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif disable_intr(); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; enable_intr(); if (delta_modem_status & MSR_DCD) (*linesw[tp->t_line].l_modem) (tp, com->prev_modem_status & MSR_DCD); #ifdef PC98 } #endif } if (com->state & CS_ODONE) { disable_intr(); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; enable_intr(); if (!(com->state & CS_BUSY) && !(com->extra_state & CSE_BUSYCHECK)) { timeout(siobusycheck, com, hz / 100); com->extra_state |= CSE_BUSYCHECK; } (*linesw[tp->t_line].l_start)(tp); } if (com_events == 0) break; } if (com_events >= LOTS_OF_EVENTS) goto repeat; } static int comparam(tp, t) struct tty *tp; struct termios *t; { u_int cfcr; int cflag; struct com_s *com; int divisor; u_char dlbh; u_char dlbl; int s; int unit; #ifdef PC98 u_char param = 0; #endif #ifdef PC98 unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); cfcr = 0; if (IS_8251(com->pc98_if_type)) { divisor = pc98_ttspeedtab(com, t->c_ospeed); } else { /* do historical conversions */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* check requested parameters */ divisor = ttspeedtab(t->c_ospeed, if_16550a_type[com->pc98_if_type & 0x0f].speedtab); } #else /* do historical conversions */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* check requested parameters */ divisor = ttspeedtab(t->c_ospeed, comspeedtab); #endif if (divisor < 0 || (divisor > 0 && t->c_ispeed != t->c_ospeed)) return (EINVAL); #ifndef PC98 /* parameters are OK, convert them to the com struct and the device */ unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return (ENODEV); #endif s = spltty(); #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (divisor == 0) com_tiocm_bic(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); else com_tiocm_bis(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); } else #endif if (divisor == 0) (void)commctl(com, TIOCM_DTR, DMBIC); /* hang up line */ else (void)commctl(com, TIOCM_DTR, DMBIS); cflag = t->c_cflag; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif switch (cflag & CSIZE) { case CS5: cfcr = CFCR_5BITS; break; case CS6: cfcr = CFCR_6BITS; break; case CS7: cfcr = CFCR_7BITS; break; default: cfcr = CFCR_8BITS; break; } if (cflag & PARENB) { cfcr |= CFCR_PENAB; if (!(cflag & PARODD)) cfcr |= CFCR_PEVEN; } if (cflag & CSTOPB) cfcr |= CFCR_STOPB; if (com->hasfifo && divisor != 0) { /* * Use a fifo trigger level low enough so that the input * latency from the fifo is less than about 16 msec and * the total latency is less than about 30 msec. These * latencies are reasonable for humans. Serial comms * protocols shouldn't expect anything better since modem * latencies are larger. */ com->fifo_image = t->c_ospeed <= 4800 ? FIFO_ENABLE : FIFO_ENABLE | FIFO_RX_HIGH; #ifdef COM_ESP /* * The Hayes ESP card needs the fifo DMA mode bit set * in compatibility mode. If not, it will interrupt * for each character received. */ if (com->esp) com->fifo_image |= FIFO_DMA_MODE; #endif sio_setreg(com, com_fifo, com->fifo_image); } #ifdef PC98 } #endif /* * This returns with interrupts disabled so that we can complete * the speed change atomically. Keeping interrupts disabled is * especially important while com_data is hidden. */ (void) siosetwater(com, t->c_ispeed); #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_cflag_and_speed_set(com, cflag, t->c_ospeed); else { #endif if (divisor != 0) { sio_setreg(com, com_cfcr, cfcr | CFCR_DLAB); /* * Only set the divisor registers if they would change, * since on some 16550 incompatibles (UMC8669F), setting * them while input is arriving them loses sync until * data stops arriving. */ dlbl = divisor & 0xFF; if (sio_getreg(com, com_dlbl) != dlbl) sio_setreg(com, com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sio_getreg(com, com_dlbh) != dlbh) sio_setreg(com, com_dlbh, dlbh); } sio_setreg(com, com_cfcr, com->cfcr_image = cfcr); #ifdef PC98 } #endif if (!(tp->t_state & TS_TTSTOP)) com->state |= CS_TTGO; if (cflag & CRTS_IFLOW) { #ifndef PC98 if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x40); } #endif com->state |= CS_RTS_IFLOW; /* * If CS_RTS_IFLOW just changed from off to on, the change * needs to be propagated to MCR_RTS. This isn't urgent, * so do it later by calling comstart() instead of repeating * a lot of code from comstart() here. */ } else if (com->state & CS_RTS_IFLOW) { com->state &= ~CS_RTS_IFLOW; /* * CS_RTS_IFLOW just changed from on to off. Force MCR_RTS * on here, since comstart() won't do it later. */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_tiocm_bis(com, TIOCM_RTS); else outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #else outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x40); } #endif } /* * Set up state to handle output flow control. * XXX - worth handling MDMBUF (DCD) flow control at the lowest level? * Now has 10+ msec latency, while CTS flow has 50- usec latency. */ com->state |= CS_ODEVREADY; com->state &= ~CS_CTS_OFLOW; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { param = inb(com->rsabase + rsa_msr); outb(com->rsabase + rsa_msr, param & 0x14); } #endif if (cflag & CCTS_OFLOW) { com->state |= CS_CTS_OFLOW; #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (!(pc98_get_modem_status(com) & TIOCM_CTS)) com->state &= ~CS_ODEVREADY; } else { if (com->pc98_if_type == COM_IF_RSA98III) { /* Set automatic flow control mode */ outb(com->rsabase + rsa_msr, param | 0x08); } else #endif if (!(com->last_modem_status & MSR_CTS)) com->state &= ~CS_ODEVREADY; #ifdef PC98 } #else if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x80); } } else { if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x80); } #endif } #ifdef PC98 if (!IS_8251(com->pc98_if_type)) #endif sio_setreg(com, com_cfcr, com->cfcr_image); /* XXX shouldn't call functions while intrs are disabled. */ disc_optim(tp, t, com); /* * Recover from fiddling with CS_TTGO. We used to call siointr1() * unconditionally, but that defeated the careful discarding of * stale input in sioopen(). */ if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); enable_intr(); splx(s); comstart(tp); if (com->ibufold != NULL) { free(com->ibufold, M_DEVBUF); com->ibufold = NULL; } return (0); } static int siosetwater(com, speed) struct com_s *com; speed_t speed; { int cp4ticks; u_char *ibuf; int ibufsize; struct tty *tp; /* * Make the buffer size large enough to handle a softtty interrupt * latency of about 2 ticks without loss of throughput or data * (about 3 ticks if input flow control is not used or not honoured, * but a bit less for CS5-CS7 modes). */ cp4ticks = speed / 10 / hz * 4; for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) ibufsize = 2048; #endif if (ibufsize == com->ibufsize) { disable_intr(); return (0); } /* * Allocate input buffer. The extra factor of 2 in the size is * to allow for an error byte for each input byte. */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { disable_intr(); return (ENOMEM); } /* Initialize non-critical variables. */ com->ibufold = com->ibuf; com->ibufsize = ibufsize; tp = com->tp; if (tp != NULL) { tp->t_ififosize = 2 * ibufsize; tp->t_ispeedwat = (speed_t)-1; tp->t_ospeedwat = (speed_t)-1; } /* * Read current input buffer, if any. Continue with interrupts * disabled. */ disable_intr(); if (com->iptr != com->ibuf) sioinput(com); /*- * Initialize critical variables, including input buffer watermarks. * The external device is asked to stop sending when the buffer * exactly reaches high water, or when the high level requests it. * The high level is notified immediately (rather than at a later * clock tick) when this watermark is reached. * The buffer size is chosen so the watermark should almost never * be reached. * The low watermark is invisibly 0 since the buffer is always * emptied all at once. */ com->iptr = com->ibuf = ibuf; com->ibufend = ibuf + ibufsize; com->ierroff = ibufsize; com->ihighwater = ibuf + 3 * ibufsize / 4; return (0); } static void comstart(tp) struct tty *tp; { struct com_s *com; int s; int unit; unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return; s = spltty(); disable_intr(); if (tp->t_state & TS_TTSTOP) com->state &= ~CS_TTGO; else com->state |= CS_TTGO; if (tp->t_state & TS_TBLOCK) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if ((com_tiocm_get(com) & TIOCM_RTS) && (com->state & CS_RTS_IFLOW)) com_tiocm_bic(com, TIOCM_RTS); } else { if ((com->mcr_image & MCR_RTS) && (com->state & CS_RTS_IFLOW)) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); } #else if (com->mcr_image & MCR_RTS && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); #endif } else { #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (!(com_tiocm_get(com) & TIOCM_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) com_tiocm_bis(com, TIOCM_RTS); } else { if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } #else if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #endif } enable_intr(); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); return; } if (tp->t_outq.c_cc != 0) { struct lbq *qp; struct lbq *next; if (!com->obufs[0].l_queued) { com->obufs[0].l_tail = com->obuf1 + q_to_b(&tp->t_outq, com->obuf1, #ifdef PC98 com->obufsize); #else sizeof com->obuf1); #endif com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[0]; } else { com->obufq.l_head = com->obufs[0].l_head; com->obufq.l_tail = com->obufs[0].l_tail; com->obufq.l_next = &com->obufs[0]; com->state |= CS_BUSY; } enable_intr(); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { com->obufs[1].l_tail = com->obuf2 + q_to_b(&tp->t_outq, com->obuf2, #ifdef PC98 com->obufsize); #else sizeof com->obuf2); #endif com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[1]; } else { com->obufq.l_head = com->obufs[1].l_head; com->obufq.l_tail = com->obufs[1].l_tail; com->obufq.l_next = &com->obufs[1]; com->state |= CS_BUSY; } enable_intr(); } tp->t_state |= TS_BUSY; } disable_intr(); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ enable_intr(); ttwwakeup(tp); splx(s); } static void comstop(tp, rw) struct tty *tp; int rw; { struct com_s *com; #ifdef PC98 int rsa98_tmp = 0; #endif com = com_addr(DEV_TO_UNIT(tp->t_dev)); if (com == NULL || com->gone) return; disable_intr(); if (rw & FWRITE) { #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) for (rsa98_tmp = 0; rsa98_tmp < 2048; rsa98_tmp++) sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); } #endif com->obufs[0].l_queued = FALSE; com->obufs[1].l_queued = FALSE; if (com->state & CS_ODONE) com_events -= LOTS_OF_EVENTS; com->state &= ~(CS_ODONE | CS_BUSY); com->tp->t_state &= ~TS_BUSY; } if (rw & FREAD) { #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { if (com->pc98_if_type == COM_IF_RSA98III) for (rsa98_tmp = 0; rsa98_tmp < 2048; rsa98_tmp++) sio_getreg(com, com_data); #endif if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_RCV_RST | com->fifo_image); #ifdef PC98 } #endif com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } enable_intr(); comstart(tp); } static int commctl(com, bits, how) struct com_s *com; int bits; int how; { int mcr; int msr; if (how == DMGET) { bits = TIOCM_LE; /* XXX - always enabled while open */ mcr = com->mcr_image; if (mcr & MCR_DTR) bits |= TIOCM_DTR; if (mcr & MCR_RTS) bits |= TIOCM_RTS; msr = com->prev_modem_status; if (msr & MSR_CTS) bits |= TIOCM_CTS; if (msr & MSR_DCD) bits |= TIOCM_CD; if (msr & MSR_DSR) bits |= TIOCM_DSR; /* * XXX - MSR_RI is naturally volatile, and we make MSR_TERI * more volatile by reading the modem status a lot. Perhaps * we should latch both bits until the status is read here. */ if (msr & (MSR_RI | MSR_TERI)) bits |= TIOCM_RI; return (bits); } mcr = 0; if (bits & TIOCM_DTR) mcr |= MCR_DTR; if (bits & TIOCM_RTS) mcr |= MCR_RTS; if (com->gone) return(0); disable_intr(); switch (how) { case DMSET: outb(com->modem_ctl_port, com->mcr_image = mcr | (com->mcr_image & MCR_IENABLE)); break; case DMBIS: outb(com->modem_ctl_port, com->mcr_image |= mcr); break; case DMBIC: outb(com->modem_ctl_port, com->mcr_image &= ~mcr); break; } enable_intr(); return (0); } static void siosettimeout() { struct com_s *com; bool_t someopen; int unit; /* * Set our timeout period to 1 second if no polled devices are open. * Otherwise set it to max(1/200, 1/hz). * Enable timeouts iff some device is open. */ untimeout(comwakeup, (void *)NULL, sio_timeout_handle); sio_timeout = hz; someopen = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && com->tp != NULL && com->tp->t_state & TS_ISOPEN && !com->gone) { someopen = TRUE; if (com->poll || com->poll_output) { sio_timeout = hz > 200 ? hz / 200 : 1; break; } } } if (someopen) { sio_timeouts_until_log = hz / sio_timeout; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); } else { /* Flush error messages, if any. */ sio_timeouts_until_log = 1; comwakeup((void *)NULL); untimeout(comwakeup, (void *)NULL, sio_timeout_handle); } } static void comwakeup(chan) void *chan; { struct com_s *com; int unit; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); /* * Recover from lost output interrupts. * Poll any lines that don't use interrupts. */ for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && !com->gone && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { disable_intr(); siointr1(com); enable_intr(); } } /* * Check for and log errors, but not too often. */ if (--sio_timeouts_until_log > 0) return; sio_timeouts_until_log = hz / sio_timeout; for (unit = 0; unit < sio_numunits; ++unit) { int errnum; com = com_addr(unit); if (com == NULL) continue; if (com->gone) continue; for (errnum = 0; errnum < CE_NTYPES; ++errnum) { u_int delta; u_long total; disable_intr(); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; enable_intr(); if (delta == 0) continue; total = com->error_counts[errnum] += delta; log(LOG_ERR, "sio%d: %u more %s%s (total %lu)\n", unit, delta, error_desc[errnum], delta == 1 ? "" : "s", total); } } } #ifdef PC98 /* commint is called when modem control line changes */ static void commint(dev_t dev) { register struct tty *tp; int stat,delta; struct com_s *com; int mynor,unit; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); tp = com->tp; stat = com_tiocm_get(com); delta = com_tiocm_get_delta(com); if (com->state & CS_CTS_OFLOW) { if (stat & TIOCM_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } if ((delta & TIOCM_CAR) && (mynor & CALLOUT_MASK) == 0) { if (stat & TIOCM_CAR ) (void)(*linesw[tp->t_line].l_modem)(tp, 1); else if ((*linesw[tp->t_line].l_modem)(tp, 0) == 0) { /* negate DTR, RTS */ com_tiocm_bic(com, (tp->t_cflag & HUPCL) ? TIOCM_DTR|TIOCM_RTS|TIOCM_LE : TIOCM_LE ); /* disable IENABLE */ com_int_TxRx_disable( com ); } } } #endif static void disc_optim(tp, t, com) struct tty *tp; struct termios *t; struct com_s *com; { if (!(t->c_iflag & (ICRNL | IGNCR | IMAXBEL | INLCR | ISTRIP | IXON)) && (!(t->c_iflag & BRKINT) || (t->c_iflag & IGNBRK)) && (!(t->c_iflag & PARMRK) || (t->c_iflag & (IGNPAR | IGNBRK)) == (IGNPAR | IGNBRK)) && !(t->c_lflag & (ECHO | ICANON | IEXTEN | ISIG | PENDIN)) && linesw[tp->t_line].l_rint == ttyinput) tp->t_state |= TS_CAN_BYPASS_L_RINT; else tp->t_state &= ~TS_CAN_BYPASS_L_RINT; com->hotchar = linesw[tp->t_line].l_hotchar; } /* * Following are all routines needed for SIO to act as console */ #include struct siocnstate { u_char dlbl; u_char dlbh; u_char ier; u_char cfcr; u_char mcr; }; static speed_t siocngetspeed __P((Port_t, struct speedtab *)); static void siocnclose __P((struct siocnstate *sp, Port_t iobase)); static void siocnopen __P((struct siocnstate *sp, Port_t iobase, int speed)); static void siocntxwait __P((Port_t iobase)); static cn_probe_t siocnprobe; static cn_init_t siocninit; static cn_checkc_t siocncheckc; static cn_getc_t siocngetc; static cn_putc_t siocnputc; #ifdef __i386__ CONS_DRIVER(sio, siocnprobe, siocninit, NULL, siocngetc, siocncheckc, siocnputc, NULL); #endif /* To get the GDB related variables */ #if DDB > 0 #include #endif static void siocntxwait(iobase) Port_t iobase; { int timo; /* * Wait for any pending transmission to finish. Required to avoid * the UART lockup bug when the speed is changed, and for normal * transmits. */ timo = 100000; while ((inb(iobase + com_lsr) & (LSR_TSRE | LSR_TXRDY)) != (LSR_TSRE | LSR_TXRDY) && --timo != 0) ; } /* * Read the serial port specified and try to figure out what speed * it's currently running at. We're assuming the serial port has * been initialized and is basicly idle. This routine is only intended * to be run at system startup. * * If the value read from the serial port doesn't make sense, return 0. */ static speed_t siocngetspeed(iobase, table) Port_t iobase; struct speedtab *table; { int code; u_char dlbh; u_char dlbl; u_char cfcr; cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); dlbl = inb(iobase + com_dlbl); dlbh = inb(iobase + com_dlbh); outb(iobase + com_cfcr, cfcr); code = dlbh << 8 | dlbl; for (; table->sp_speed != -1; table++) if (table->sp_code == code) return (table->sp_speed); return (0); /* didn't match anything sane */ } static void siocnopen(sp, iobase, speed) struct siocnstate *sp; Port_t iobase; int speed; { int divisor; u_char dlbh; u_char dlbl; /* * Save all the device control registers except the fifo register * and set our default ones (cs8 -parenb speed=comdefaultrate). * We can't save the fifo register since it is read-only. */ sp->ier = inb(iobase + com_ier); outb(iobase + com_ier, 0); /* spltty() doesn't stop siointr() */ siocntxwait(iobase); sp->cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); sp->dlbl = inb(iobase + com_dlbl); sp->dlbh = inb(iobase + com_dlbh); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (Startech), setting them clears the * data input register. This also reduces the effects of the * UMC8669F bug. */ divisor = ttspeedtab(speed, comspeedtab); dlbl = divisor & 0xFF; if (sp->dlbl != dlbl) outb(iobase + com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sp->dlbh != dlbh) outb(iobase + com_dlbh, dlbh); outb(iobase + com_cfcr, CFCR_8BITS); sp->mcr = inb(iobase + com_mcr); /* * We don't want interrupts, but must be careful not to "disable" * them by clearing the MCR_IENABLE bit, since that might cause * an interrupt by floating the IRQ line. */ outb(iobase + com_mcr, (sp->mcr & MCR_IENABLE) | MCR_DTR | MCR_RTS); } static void siocnclose(sp, iobase) struct siocnstate *sp; Port_t iobase; { /* * Restore the device control registers. */ siocntxwait(iobase); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); if (sp->dlbl != inb(iobase + com_dlbl)) outb(iobase + com_dlbl, sp->dlbl); if (sp->dlbh != inb(iobase + com_dlbh)) outb(iobase + com_dlbh, sp->dlbh); outb(iobase + com_cfcr, sp->cfcr); /* * XXX damp oscillations of MCR_DTR and MCR_RTS by not restoring them. */ outb(iobase + com_mcr, sp->mcr | MCR_DTR | MCR_RTS); outb(iobase + com_ier, sp->ier); } static void siocnprobe(cp) struct consdev *cp; { speed_t boot_speed; u_char cfcr; int s, unit; struct siocnstate sp; /* * Find our first enabled console, if any. If it is a high-level * console device, then initialize it and return successfully. * If it is a low-level console device, then initialize it and * return unsuccessfully. It must be initialized in both cases * for early use by console drivers and debuggers. Initializing * the hardware is not necessary in all cases, since the i/o * routines initialize it on the fly, but it is necessary if * input might arrive while the hardware is switched back to an * uninitialized state. We can't handle multiple console devices * yet because our low-level routines don't take a device arg. * We trust the user to set the console flags properly so that we * don't need to probe. */ cp->cn_pri = CN_DEAD; for (unit = 0; unit < 16; unit++) { /* XXX need to know how many */ int flags; int disabled; if (resource_int_value("sio", unit, "disabled", &disabled) == 0) { if (disabled) continue; } if (resource_int_value("sio", unit, "flags", &flags)) continue; if (COM_CONSOLE(flags) || COM_DEBUGGER(flags)) { int port; Port_t iobase; if (resource_int_value("sio", unit, "port", &port)) continue; iobase = port; s = spltty(); if (boothowto & RB_SERIAL) { boot_speed = siocngetspeed(iobase, comspeedtab); if (boot_speed) comdefaultrate = boot_speed; } /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); outb(iobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(iobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(iobase + com_cfcr, cfcr); siocnopen(&sp, iobase, comdefaultrate); splx(s); if (COM_CONSOLE(flags) && !COM_LLCONSOLE(flags)) { cp->cn_dev = makedev(CDEV_MAJOR, unit); cp->cn_pri = COM_FORCECONSOLE(flags) || boothowto & RB_SERIAL ? CN_REMOTE : CN_NORMAL; siocniobase = iobase; siocnunit = unit; } if (COM_DEBUGGER(flags)) { printf("sio%d: gdb debugging port\n", unit); siogdbiobase = iobase; siogdbunit = unit; #if DDB > 0 gdbdev = makedev(CDEV_MAJOR, unit); gdb_getc = siocngetc; gdb_putc = siocnputc; #endif } } } #ifdef __i386__ #if DDB > 0 /* * XXX Ugly Compatability. * If no gdb port has been specified, set it to be the console * as some configuration files don't specify the gdb port. */ if (gdbdev == NODEV && (boothowto & RB_GDB)) { printf("Warning: no GDB port specified. Defaulting to sio%d.\n", siocnunit); printf("Set flag 0x80 on desired GDB port in your\n"); printf("configuration file (currently sio only).\n"); siogdbiobase = siocniobase; siogdbunit = siocnunit; gdbdev = makedev(CDEV_MAJOR, siocnunit); gdb_getc = siocngetc; gdb_putc = siocnputc; } #endif #endif } #ifdef __alpha__ CONS_DRIVER(sio, NULL, NULL, NULL, siocngetc, siocncheckc, siocnputc, NULL); int siocnattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siocniobase = port; comdefaultrate = speed; sio_consdev.cn_pri = CN_NORMAL; sio_consdev.cn_dev = makedev(CDEV_MAJOR, 0); s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siocniobase + com_cfcr); outb(siocniobase + com_cfcr, CFCR_DLAB | cfcr); outb(siocniobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(siocniobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(siocniobase + com_cfcr, cfcr); siocnopen(&sp, siocniobase, comdefaultrate); splx(s); cn_tab = &sio_consdev; return (0); } int siogdbattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siogdbiobase = port; gdbdefaultrate = speed; s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siogdbiobase + com_cfcr); outb(siogdbiobase + com_cfcr, CFCR_DLAB | cfcr); outb(siogdbiobase + com_dlbl, COMBRD(gdbdefaultrate) & 0xff); outb(siogdbiobase + com_dlbh, (u_int) COMBRD(gdbdefaultrate) >> 8); outb(siogdbiobase + com_cfcr, cfcr); siocnopen(&sp, siogdbiobase, gdbdefaultrate); splx(s); return (0); } #endif static void siocninit(cp) struct consdev *cp; { comconsole = DEV_TO_UNIT(cp->cn_dev); } static int siocncheckc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); if (inb(iobase + com_lsr) & LSR_RXRDY) c = inb(iobase + com_data); else c = -1; siocnclose(&sp, iobase); splx(s); return (c); } int siocngetc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siocnputc(dev, c) dev_t dev; int c; { int s; struct siocnstate sp; Port_t iobase; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); siocntxwait(iobase); outb(iobase + com_data, c); siocnclose(&sp, iobase); splx(s); } #ifdef __alpha__ int siogdbgetc() { int c; Port_t iobase; int s; struct siocnstate sp; iobase = siogdbiobase; s = spltty(); siocnopen(&sp, iobase, gdbdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siogdbputc(c) int c; { int s; struct siocnstate sp; s = spltty(); siocnopen(&sp, siogdbiobase, gdbdefaultrate); siocntxwait(siogdbiobase); outb(siogdbiobase + com_data, c); siocnclose(&sp, siogdbiobase); splx(s); } #endif DRIVER_MODULE(sio, isa, sio_isa_driver, sio_devclass, 0, 0); #if NCARD > 0 DRIVER_MODULE(sio, pccard, sio_pccard_driver, sio_devclass, 0, 0); #endif #if NPCI > 0 DRIVER_MODULE(sio, pci, sio_pci_driver, sio_devclass, 0, 0); #endif #ifdef PC98 /* * pc98 local function */ static void com_tiocm_set(struct com_s *com, int msr) { int s; int tmp = 0; int mask = CMD8251_TxEN|CMD8251_RxEN|CMD8251_DTR|CMD8251_RTS; s=spltty(); com->pc98_prev_modem_status = ( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ) | ( com->pc98_prev_modem_status & ~(TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); tmp |= (CMD8251_TxEN|CMD8251_RxEN); if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_clear_or_cmd( com, mask, tmp ); splx(s); } static void com_tiocm_bis(struct com_s *com, int msr) { int s; int tmp = 0; s=spltty(); com->pc98_prev_modem_status |= ( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); tmp |= CMD8251_TxEN|CMD8251_RxEN; if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_or_cmd( com, tmp ); splx(s); } static void com_tiocm_bic(struct com_s *com, int msr) { int s; int tmp = msr; s=spltty(); com->pc98_prev_modem_status &= ~( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_clear_cmd( com, tmp ); splx(s); } static int com_tiocm_get(struct com_s *com) { return( com->pc98_prev_modem_status ); } static int com_tiocm_get_delta(struct com_s *com) { int tmp; tmp = com->pc98_modem_delta; com->pc98_modem_delta = 0; return( tmp ); } /* convert to TIOCM_?? ( ioctl.h ) */ static int pc98_get_modem_status(struct com_s *com) { register int msr; msr = com->pc98_prev_modem_status & ~(TIOCM_CAR|TIOCM_RI|TIOCM_DSR|TIOCM_CTS); if (com->pc98_8251fifo_enable) { int stat2; stat2 = inb(I8251F_msr); if ( stat2 & CICSCDF_CD ) msr |= TIOCM_CAR; if ( stat2 & CICSCDF_CI ) msr |= TIOCM_RI; if ( stat2 & CICSCDF_DR ) msr |= TIOCM_DSR; if ( stat2 & CICSCDF_CS ) msr |= TIOCM_CTS; #if COM_CARRIER_DETECT_EMULATE if ( msr & (TIOCM_DSR|TIOCM_CTS) ) { msr |= TIOCM_CAR; } #endif } else { int stat, stat2; stat = inb(com->sts_port); stat2 = inb(com->in_modem_port); if ( !(stat2 & CICSCD_CD) ) msr |= TIOCM_CAR; if ( !(stat2 & CICSCD_CI) ) msr |= TIOCM_RI; if ( stat & STS8251_DSR ) msr |= TIOCM_DSR; if ( !(stat2 & CICSCD_CS) ) msr |= TIOCM_CTS; #if COM_CARRIER_DETECT_EMULATE if ( msr & (TIOCM_DSR|TIOCM_CTS) ) { msr |= TIOCM_CAR; } #endif } return(msr); } static void pc98_check_msr(void* chan) { int msr, delta; int s; register struct tty *tp; struct com_s *com; int mynor; int unit; dev_t dev; dev=(dev_t)chan; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); tp = com->tp; s = spltty(); msr = pc98_get_modem_status(com); /* make change flag */ delta = msr ^ com->pc98_prev_modem_status; if ( delta & TIOCM_CAR ) { if ( com->modem_car_chg_timer ) { if ( -- com->modem_car_chg_timer ) msr ^= TIOCM_CAR; } else { if ((com->modem_car_chg_timer = (msr & TIOCM_CAR) ? DCD_ON_RECOGNITION : DCD_OFF_TOLERANCE) != 0) msr ^= TIOCM_CAR; } } else com->modem_car_chg_timer = 0; delta = ( msr ^ com->pc98_prev_modem_status ) & (TIOCM_CAR|TIOCM_RI|TIOCM_DSR|TIOCM_CTS); com->pc98_prev_modem_status = msr; delta = ( com->pc98_modem_delta |= delta ); splx(s); if ( com->modem_checking || (tp->t_state & (TS_ISOPEN)) ) { if ( delta ) { commint(dev); } timeout(pc98_check_msr, (caddr_t)dev, PC98_CHECK_MODEM_INTERVAL); } else { com->modem_checking = 0; } } static void pc98_msrint_start(dev_t dev) { struct com_s *com; int mynor; int unit; int s = spltty(); mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); /* modem control line check routine envoke interval is 1/10 sec */ if ( com->modem_checking == 0 ) { com->pc98_prev_modem_status = pc98_get_modem_status(com); com->pc98_modem_delta = 0; timeout(pc98_check_msr, (caddr_t)dev, PC98_CHECK_MODEM_INTERVAL); com->modem_checking = 1; } splx(s); } static void pc98_disable_i8251_interrupt(struct com_s *com, int mod) { /* disable interrupt */ register int tmp; mod |= ~(IEN_Tx|IEN_TxEMP|IEN_Rx); COM_INT_DISABLE tmp = inb( com->intr_ctrl_port ) & ~(IEN_Tx|IEN_TxEMP|IEN_Rx); outb( com->intr_ctrl_port, (com->intr_enable&=~mod) | tmp ); COM_INT_ENABLE } static void pc98_enable_i8251_interrupt(struct com_s *com, int mod) { register int tmp; COM_INT_DISABLE tmp = inb( com->intr_ctrl_port ) & ~(IEN_Tx|IEN_TxEMP|IEN_Rx); outb( com->intr_ctrl_port, (com->intr_enable|=mod) | tmp ); COM_INT_ENABLE } static int pc98_check_i8251_interrupt(struct com_s *com) { return ( com->intr_enable & 0x07 ); } static void pc98_i8251_clear_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE tmp = com->pc98_prev_siocmd & ~(x); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_or_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = com->pc98_prev_siocmd | (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_set_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_clear_or_cmd(struct com_s *com, int clr, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = com->pc98_prev_siocmd & ~(clr); tmp |= (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static int pc98_i8251_get_cmd(struct com_s *com) { return com->pc98_prev_siocmd; } static int pc98_i8251_get_mod(struct com_s *com) { return com->pc98_prev_siomod; } static void pc98_i8251_reset(struct com_s *com, int mode, int command) { if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, CMD8251_RESET); /* internal reset */ DELAY(2); outb(com->cmd_port, mode ); /* mode register */ com->pc98_prev_siomod = mode; DELAY(2); pc98_i8251_set_cmd( com, (command|CMD8251_ER) ); DELAY(10); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE | CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); } static void pc98_check_sysclock(void) { /* get system clock from port */ if ( pc98_machine_type & M_8M ) { /* 8 MHz system & H98 */ sysclock = 8; } else { /* 5 MHz system */ sysclock = 5; } } static void com_cflag_and_speed_set( struct com_s *com, int cflag, int speed) { int cfcr=0, count; int previnterrupt; count = pc98_ttspeedtab( com, speed ); if ( count < 0 ) return; previnterrupt = pc98_check_i8251_interrupt(com); pc98_disable_i8251_interrupt( com, IEN_Tx|IEN_TxEMP|IEN_Rx ); switch ( cflag&CSIZE ) { case CS5: cfcr = MOD8251_5BITS; break; case CS6: cfcr = MOD8251_6BITS; break; case CS7: cfcr = MOD8251_7BITS; break; case CS8: cfcr = MOD8251_8BITS; break; } if ( cflag&PARENB ) { if ( cflag&PARODD ) cfcr |= MOD8251_PODD; else cfcr |= MOD8251_PEVEN; } else cfcr |= MOD8251_PDISAB; if ( cflag&CSTOPB ) cfcr |= MOD8251_STOP2; else cfcr |= MOD8251_STOP1; if ( count & 0x10000 ) cfcr |= MOD8251_CLKX1; else cfcr |= MOD8251_CLKX16; if (epson_machine_id != 0x20) { /* XXX */ int tmp; while (!((tmp = inb(com->sts_port)) & STS8251_TxEMP)) ; } /* set baud rate from ospeed */ pc98_set_baud_rate( com, count ); if ( cfcr != pc98_i8251_get_mod(com) ) pc98_i8251_reset(com, cfcr, pc98_i8251_get_cmd(com) ); pc98_enable_i8251_interrupt( com, previnterrupt ); } static int pc98_ttspeedtab(struct com_s *com, int speed) { int if_type, effect_sp, count = -1, mod; if_type = com->pc98_if_type & 0x0f; switch (com->pc98_if_type) { case COM_IF_INTERNAL: if (PC98SIO_baud_rate_port(if_type) != -1) { count = ttspeedtab(speed, if_8251_type[if_type].speedtab); if (count > 0) { count |= COM1_EXT_CLOCK; break; } } /* for *1CLK asynchronous! mode, TEFUTEFU */ mod = (sysclock == 5) ? 2457600 : 1996800; effect_sp = ttspeedtab( speed, pc98speedtab ); if ( effect_sp < 0 ) /* XXX */ effect_sp = ttspeedtab( (speed - 1), pc98speedtab ); if ( effect_sp <= 0 ) return effect_sp; if ( effect_sp == speed ) mod /= 16; if ( mod % effect_sp ) return(-1); count = mod / effect_sp; if ( count > 65535 ) return(-1); if ( effect_sp != speed ) count |= 0x10000; break; case COM_IF_PC9861K_1: case COM_IF_PC9861K_2: count = 1; break; case COM_IF_IND_SS_1: case COM_IF_IND_SS_2: case COM_IF_PIO9032B_1: case COM_IF_PIO9032B_2: if ( speed == 0 ) return 0; count = ttspeedtab( speed, if_8251_type[if_type].speedtab ); break; case COM_IF_B98_01_1: case COM_IF_B98_01_2: if ( speed == 0 ) return 0; count = ttspeedtab( speed, if_8251_type[if_type].speedtab ); #ifdef B98_01_OLD if (count == 0 || count == 1) { count += 4; count |= 0x20000; /* x1 mode for 76800 and 153600 */ } #endif break; } return count; } static void pc98_set_baud_rate( struct com_s *com, int count ) { int if_type, io, s; if_type = com->pc98_if_type & 0x0f; io = rman_get_start(com->ioportres) & 0xff00; switch (com->pc98_if_type) { case COM_IF_INTERNAL: if (PC98SIO_baud_rate_port(if_type) != -1) { if (count & COM1_EXT_CLOCK) { outb((Port_t)PC98SIO_baud_rate_port(if_type), count & 0xff); break; } else { outb((Port_t)PC98SIO_baud_rate_port(if_type), 0x09); } } if ( count < 0 ) { printf( "[ Illegal count : %d ]", count ); return; } else if ( count == 0 ) return; /* set i8253 */ s = splclock(); if (count != 3) outb( 0x77, 0xb6 ); else outb( 0x77, 0xb4 ); outb( 0x5f, 0); outb( 0x75, count & 0xff ); outb( 0x5f, 0); outb( 0x75, (count >> 8) & 0xff ); splx(s); break; case COM_IF_IND_SS_1: case COM_IF_IND_SS_2: outb(io | PC98SIO_intr_ctrl_port(if_type), 0); outb(io | PC98SIO_baud_rate_port(if_type), 0); outb(io | PC98SIO_baud_rate_port(if_type), 0xc0); outb(io | PC98SIO_baud_rate_port(if_type), (count >> 8) | 0x80); outb(io | PC98SIO_baud_rate_port(if_type), count & 0xff); break; case COM_IF_PIO9032B_1: case COM_IF_PIO9032B_2: outb(io | PC98SIO_baud_rate_port(if_type), count); break; case COM_IF_B98_01_1: case COM_IF_B98_01_2: outb(io | PC98SIO_baud_rate_port(if_type), count & 0x0f); #ifdef B98_01_OLD /* * Some old B98_01 board should be controlled * in different way, but this hasn't been tested yet. */ outb(io | PC98SIO_func_port(if_type), (count & 0x20000) ? 0xf0 : 0xf2); #endif break; } } static int pc98_check_if_type(device_t dev, struct siodev *iod) { int irr, io, if_type, tmp; static short irq_tab[2][8] = { { 3, 5, 6, 9, 10, 12, 13, -1}, { 3, 10, 12, 13, 5, 6, 9, -1} }; if_type = iod->if_type & 0x0f; iod->irq = 0; io = isa_get_port(dev) & 0xff00; if (IS_8251(iod->if_type)) { if (PC98SIO_func_port(if_type) != -1) { outb(io | PC98SIO_func_port(if_type), 0xf2); tmp = ttspeedtab(9600, if_8251_type[if_type].speedtab); if (tmp != -1 && PC98SIO_baud_rate_port(if_type) != -1) outb(io | PC98SIO_baud_rate_port(if_type), tmp); } iod->cmd = io | PC98SIO_cmd_port(if_type); iod->sts = io | PC98SIO_sts_port(if_type); iod->mod = io | PC98SIO_in_modem_port(if_type); iod->ctrl = io | PC98SIO_intr_ctrl_port(if_type); if (iod->if_type == COM_IF_INTERNAL) { iod->irq = 4; if (pc98_check_8251vfast()) { PC98SIO_baud_rate_port(if_type) = I8251F_div; if_8251_type[if_type].speedtab = pc98fast_speedtab; } } else { tmp = inb( iod->mod ) & if_8251_type[if_type].irr_mask; if ((isa_get_port(dev) & 0xff) == IO_COM2) iod->irq = irq_tab[0][tmp]; else iod->irq = irq_tab[1][tmp]; } } else { irr = if_16550a_type[if_type].irr_read; #ifdef COM_MULTIPORT if (!COM_ISMULTIPORT(device_get_flags(dev)) || device_get_unit(dev) == COM_MPMASTER(device_get_flags(dev))) #endif if (irr != -1) { tmp = inb(io | irr); if (isa_get_port(dev) & 0x01) /* XXX depend on RSB-384 */ iod->irq = irq_tab[1][tmp >> 3]; else iod->irq = irq_tab[0][tmp & 0x07]; } } if ( iod->irq == -1 ) return -1; return 0; } static void pc98_set_ioport(struct com_s *com) { int if_type = com->pc98_if_type & 0x0f; Port_t io = rman_get_start(com->ioportres) & 0xff00; pc98_check_sysclock(); com->data_port = io | PC98SIO_data_port(if_type); com->cmd_port = io | PC98SIO_cmd_port(if_type); com->sts_port = io | PC98SIO_sts_port(if_type); com->in_modem_port = io | PC98SIO_in_modem_port(if_type); com->intr_ctrl_port = io | PC98SIO_intr_ctrl_port(if_type); } static int pc98_check_8251vfast(void) { int i; outb(I8251F_div, 0x8c); DELAY(10); for (i = 0; i < 100; i++) { if ((inb(I8251F_div) & 0x80) != 0) { i = 0; break; } DELAY(1); } outb(I8251F_div, 0); DELAY(10); for (; i < 100; i++) { if ((inb(I8251F_div) & 0x80) == 0) return 1; DELAY(1); } return 0; } static int pc98_check_8251fifo(void) { u_char tmp1, tmp2; tmp1 = inb(I8251F_iir); DELAY(10); tmp2 = inb(I8251F_iir); if (((tmp1 ^ tmp2) & 0x40) != 0 && ((tmp1 | tmp2) & 0x20) == 0) return 1; return 0; } #endif /* PC98 defined */ Index: head/sys/pc98/i386/machdep.c =================================================================== --- head/sys/pc98/i386/machdep.c (revision 62572) +++ head/sys/pc98/i386/machdep.c (revision 62573) @@ -1,2731 +1,2731 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD$ */ #include "apm.h" #include "npx.h" #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" #include "opt_smp.h" #include "opt_user_ldt.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* pcb.h included via sys/user.h */ #ifdef SMP #include #include #endif #ifdef PERFMON #include #endif #ifdef OLD_BUS_ARCH #include #endif #include #ifdef PC98 #include #include #else #include #endif #include #include #include extern void init386 __P((int first)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ #endif int _udatasel, _ucodesel; u_int atdevbase; #if defined(SWTCH_OPTIM_STATS) extern int swtch_optim_stats; SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, CTLFLAG_RD, &swtch_optim_stats, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif #ifdef PC98 static int ispc98 = 1; #else static int ispc98 = 0; #endif SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); int physmem = 0; int cold = 1; static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code)); static int -sysctl_hw_physmem (SYSCTL_HANDLER_ARGS) +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int -sysctl_hw_usermem (SYSCTL_HANDLER_ARGS) +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); static int -sysctl_hw_availpages (SYSCTL_HANDLER_ARGS) +sysctl_hw_availpages(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, i386_btop(avail_end - avail_start), req); return (error); } SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_availpages, "I", ""); static int -sysctl_machdep_msgbuf (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS) { int error; /* Unwind the buffer, so that it's linear (possibly starting with * some initial nulls). */ error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr, msgbufp->msg_size-msgbufp->msg_bufr,req); if(error) return(error); if(msgbufp->msg_bufr>0) { error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr, msgbufp->msg_bufr,req); } return(error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer"); static int msgbuf_clear; static int -sysctl_machdep_msgbuf_clear (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) { /* Clear the buffer and reset write pointer */ bzero(msgbufp->msg_ptr,msgbufp->msg_size); msgbufp->msg_bufr=msgbufp->msg_bufx=0; msgbuf_clear=0; } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW, &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I", "Clear kernel message buffer"); int bootverbose = 0, Maxmem = 0; #ifdef PC98 int Maxmem_under16M = 0; #endif long dumplo; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %u (%uK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %u bytes (%u pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } /* * Calculate callout wheel size */ for (callwheelsize = 1, callwheelbits = 0; callwheelsize < ncallout; callwheelsize <<= 1, ++callwheelbits) ; callwheelmask = callwheelsize - 1; /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); valloc(callwheel, struct callout_tailq, callwheelsize); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional * buffers to cover 1/20 of our ram over 64MB. * * factor represents the 1/4 x ram conversion. */ if (nbuf == 0) { int factor = 4 * BKVASIZE / PAGE_SIZE; nbuf = 50; if (physmem > 1024) nbuf += min((physmem - 1024) / factor, 16384 / factor); if (physmem > 16384) nbuf += (physmem - 16384) * 2 / (factor * 5); } /* * Do not allow the buffer_map to be more then 1/2 the size of the * kernel_map. */ if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2)) { nbuf = (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2); printf("Warning: nbufs capped at %d\n", nbuf); } nswbuf = max(min(nbuf/4, 256), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); v = bufhashinit(v); /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE)); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size); pager_map->system_map = 1; exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*(ARG_MAX+(PAGE_SIZE*3)))); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size); mb_map->system_map = 1; } /* * Initialize callouts */ SLIST_INIT(&callfree); for (i = 0; i < ncallout; i++) { callout_init(&callout[i]); callout[i].c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle); } for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&callwheel[i]); } #if defined(USERCONFIG) userconfig(); cninit(); /* the preferred console may have changed */ #endif printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! */ mp_start(); /* fire up the APs and APICs */ mp_announce(); #endif /* SMP */ } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void osendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct osigframe sf; struct osigframe *fp; struct proc *p; struct sigacts *psp; struct trapframe *regs; int oonstack; p = curproc; psp = p->p_sigacts; regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct osigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else fp = (struct osigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *fp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)fp) == 0 || !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = oonstack; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf; struct proc *p; struct sigacts *psp; struct trapframe *regs; struct sigframe *sfp; int oonstack; p = curproc; psp = p->p_sigacts; if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = oonstack; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else sfp = (struct sigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *sfp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)sfp) == 0 || !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG printf("process %d has trashed its stack\n", p->p_pid); #endif SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill siginfo structure. */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * We should never have PSL_T set when returning from vm86 * mode. It may be set here if we deliver a signal before * getting to vm86 mode, so turn it off. * * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int osigreturn(p, uap) struct proc *p; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct trapframe *regs; struct osigcontext *scp; int eflags; regs = p->p_md.md_regs; scp = uap->sigcntxp; if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ)) return (EFAULT); eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; if (scp->sc_onstack & 01) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; SIGSETOLD(p->p_sigmask, scp->sc_mask); SIG_CANTMASK(p->p_sigmask); regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; return (EJUSTRETURN); } int sigreturn(p, uap) struct proc *p; struct sigreturn_args /* { ucontext_t *sigcntxp; } */ *uap; { struct trapframe *regs; ucontext_t *ucp; int cs, eflags; ucp = uap->sigcntxp; if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ)) return (EFAULT); if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516) return (osigreturn(p, (struct osigreturn_args *)uap)); /* * Since ucp is not an osigcontext but a ucontext_t, we have to * check again if all of it is accessible. A ucontext_t is * much larger, so instead of just checking for the pointer * being valid for the size of an osigcontext, now check for * it being valid for a whole, new-style ucontext_t. */ if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ)) return (EFAULT); regs = p->p_md.md_regs; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack, ps_strings) struct proc *p; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = p->p_md.md_regs; struct pcb *pcb = &p->p_addr->u_pcb; #ifdef USER_LDT /* was i386_user_cleanup() in NetBSD */ user_ldt_free(pcb); #endif bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* reset %gs as well */ if (pcb == curpcb) load_gs(_udatasel); else pcb->pcb_gs = _udatasel; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ p->p_retval[1] = 0; } static int -sysctl_machdep_adjkerntz (SYSCTL_HANDLER_ARGS) +sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; #ifdef SMP union descriptor gdt[NGDT * NCPU]; /* global descriptor table */ #else union descriptor gdt[NGDT]; /* global descriptor table */ #endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #ifdef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif #ifndef SMP extern struct segment_descriptor common_tssd, *tss_gdt; #endif int private_tss; /* flag indicating private tss */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 4 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 5 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 6 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 7 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 9 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #define PHYSMAP_SIZE (2 * 8) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. */ #ifdef PC98 static void getmemsize(int first) { u_int biosbasemem, biosextmem; u_int pagesinbase, pagesinext; int pa_indx; int pg_n; int speculative_mprobe; #if NNPX > 0 int msize; #endif unsigned under16; vm_offset_t target_page; pc98_getmemsize(&biosbasemem, &biosextmem, &under16); #ifdef SMP /* make hole for AP bootstrap code */ pagesinbase = mp_bootaddress(biosbasemem) / PAGE_SIZE; #else pagesinbase = biosbasemem * 1024 / PAGE_SIZE; #endif pagesinext = biosextmem * 1024 / PAGE_SIZE; Maxmem_under16M = under16 * 1024 / PAGE_SIZE; #ifndef MAXMEM /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = pagesinext + 0x100000/PAGE_SIZE; /* * Indicate that we wish to do a speculative search for memory beyond * the end of the reported size if the indicated amount is 64MB (0x4000 * pages) - which is the largest amount that the BIOS/bootblocks can * currently report. If a specific amount of memory is indicated via * the MAXMEM option or the npx0 "msize", then don't do the speculative * memory probe. */ if (Maxmem >= 0x4000) speculative_mprobe = TRUE; else speculative_mprobe = FALSE; #else Maxmem = MAXMEM/4; speculative_mprobe = FALSE; #endif #if NNPX > 0 if (resource_int_value("npx", 0, "msize", &msize) == 0) { if (msize != 0) { Maxmem = msize / 4; speculative_mprobe = FALSE; } } #endif #ifdef SMP /* look for the MP hardware - needed for apic addresses */ mp_probe(); #endif /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); /* * Size up each available chunk of physical memory. */ /* * We currently don't bother testing base memory. * XXX ...but we probably should. */ pa_indx = 0; if (pagesinbase > 1) { phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ physmem = pagesinbase - 1; } else { /* point at first chunk end */ pa_indx++; } /* XXX - some of EPSON machines can't use PG_N */ pg_n = PG_N; if (pc98_machine_type & M_EPSON_PC98) { switch (epson_machine_id) { #ifdef WB_CACHE default: #endif case 0x34: /* PC-486HX */ case 0x35: /* PC-486HG */ case 0x3B: /* PC-486HA */ pg_n = 0; break; } } speculative_mprobe = FALSE; #ifdef notdef /* XXX - see below */ /* * Certain 'CPU accelerator' supports over 16MB memory on the machines * whose BIOS doesn't store true size. * To support this, we don't trust BIOS values if Maxmem < 16MB (0x1000 * pages) - which is the largest amount that the OLD PC-98 can report. * * OK: PC-9801NS/R(9.6M) * OK: PC-9801DA(5.6M)+EUD-H(32M)+Cyrix 5x86 * OK: PC-9821Ap(14.6M)+EUA-T(8M)+Cyrix 5x86-100 * NG: PC-9821Ap(14.6M)+EUA-T(8M)+AMD DX4-100 -> freeze */ if (Maxmem < 0x1000) { int tmp, page_bad; page_bad = FALSE; /* * For Max14.6MB machines, the 0x10f0 page is same as 0x00f0, * which is BIOS ROM, by overlapping. * So, we check that page's ability of writing. */ target_page = ptoa(0x10f0); /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust Maxmem if valid/good page. */ if (page_bad == FALSE) { /* '+ 2' is needed to make speculative_mprobe sure */ Maxmem = 0x1000 + 2; speculative_mprobe = TRUE; } } #endif for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { int tmp, page_bad; page_bad = FALSE; /* skip system area */ if (target_page >= ptoa(Maxmem_under16M) && target_page < ptoa(4096)) continue; /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == FALSE) { /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == target_page) { phys_avail[pa_indx] += PAGE_SIZE; if (speculative_mprobe == TRUE && phys_avail[pa_indx] >= (16*1024*1024)) Maxmem++; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = target_page; /* start */ phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ } physmem++; } } *(int *)CMAP1 = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } #else static void getmemsize(int first) { int i, physmap_idx, pa_indx; u_int basemem, extmem; struct vm86frame vmf; struct vm86context vmc; vm_offset_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t pte; const char *cp; struct { u_int64_t base; u_int64_t length; u_int32_t type; } *smap; bzero(&vmf, sizeof(struct vm86frame)); bzero(physmap, sizeof(physmap)); /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { pte = (pt_entry_t)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT)); *pte = (1 << PAGE_SHIFT) | PG_RW | PG_V; /* * get memory map with INT 15:E820 */ #define SMAPSIZ sizeof(*smap) #define SMAP_SIG 0x534D4150 /* 'SMAP' */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); physmap_idx = 0; vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = SMAPSIZ; i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n", smap->type, *(u_int32_t *)((char *)&smap->base + 4), (u_int32_t)smap->base, *(u_int32_t *)((char *)&smap->length + 4), (u_int32_t)smap->length); if (smap->type != 0x01) goto next_run; if (smap->length == 0) goto next_run; if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); goto next_run; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; goto next_run; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; next_run: } while (vmf.vmf_ebx != 0); if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); /* look for the MP hardware - needed for apic addresses */ mp_probe(); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif /* * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ if ((cp = getenv("hw.physmem")) != NULL) { u_int64_t AllowMem, sanity; char *ep; sanity = AllowMem = strtouq(cp, &ep, 0); if ((ep != cp) && (*ep != 0)) { switch(*ep) { case 'g': case 'G': AllowMem <<= 10; case 'm': case 'M': AllowMem <<= 10; case 'k': case 'K': AllowMem <<= 10; break; default: AllowMem = sanity = 0; } if (AllowMem < sanity) AllowMem = 0; } if (AllowMem == 0) printf("Ignoring invalid memory size of '%s'\n", cp); else Maxmem = atop(AllowMem); } if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %uK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa(Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first, 0); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; #if 0 pte = (pt_entry_t)vtopte(KERNBASE); #else pte = (pt_entry_t)CMAP1; #endif /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_offset_t end; end = ptoa(Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; #if 0 int *ptr = 0; #else int *ptr = (int *)CADDR1; #endif /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } #endif void init386(first) int first; { int x; struct gate_descriptor *gdp; int gsel_tss; #ifndef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif int off; /* * Prevent lowering of the ipl if we call tsleep() early. */ safepri = cpl; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; #ifdef PC98 /* * Initialize DMAC */ pc98_init_dmac(); #endif if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; #ifdef SMP gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(sizeof(struct privatespace)) - 1; gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0]; gdt_segs[GPROC0_SEL].ssd_base = (int) &SMP_prvspace[0].globaldata.gd_common_tss; SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0]; #else gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GPROC0_SEL].ssd_base = (int) &common_tss; #endif for (x = 0; x < NGDT; x++) { #ifdef BDE_DEBUGGER /* avoid overwriting db entries with APM ones */ if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL) continue; #endif ssdtosd(&gdt_segs[x], &gdt[x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); #ifdef USER_LDT currentldt = _default_ldt; #endif /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; tss_gdt = &gdt[GPROC0_SEL].sd; common_tssd = *tss_gdt; common_tss.tss_ioopt = (sizeof common_tss) << 16; ltr(gsel_tss); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = (int)IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; #ifdef SMP proc0.p_addr->u_pcb.pcb_mpnest = 1; #endif proc0.p_addr->u_pcb.pcb_ext = 0; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; #ifndef SMP struct region_descriptor r_idt; #endif vm_offset_t tmp; if (!has_f00f_bug) return; printf("Intel Pentium detected, installing workaround for F00F bug\n"); r_idt.rd_limit = sizeof(idt0) - 1; tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) panic("kmem_alloc returned non-page-aligned memory"); /* Put the first seven entries in the lower page */ new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ int ptrace_set_pc(p, addr) struct proc *p; unsigned long addr; { p->p_md.md_regs->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { p->p_md.md_regs->tf_eflags |= PSL_T; return (0); } int ptrace_read_u_check(p, addr, len) struct proc *p; vm_offset_t addr; size_t len; { vm_offset_t gap; if ((vm_offset_t) (addr + len) < addr) return EPERM; if ((vm_offset_t) (addr + len) <= sizeof(struct user)) return 0; gap = (char *) p->p_md.md_regs - (char *) p->p_addr; if ((vm_offset_t) addr < gap) return EPERM; if ((vm_offset_t) (addr + len) <= (vm_offset_t) (gap + sizeof(struct trapframe))) return 0; return EPERM; } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; long data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - (char *)p->p_addr; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = p->p_md.md_regs; frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = &p->p_addr->u_pcb; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = &p->p_addr->u_pcb; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs); return (0); } int set_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs); return (0); } int fill_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; dbregs->dr0 = pcb->pcb_dr0; dbregs->dr1 = pcb->pcb_dr1; dbregs->dr2 = pcb->pcb_dr2; dbregs->dr3 = pcb->pcb_dr3; dbregs->dr4 = 0; dbregs->dr5 = 0; dbregs->dr6 = pcb->pcb_dr6; dbregs->dr7 = pcb->pcb_dr7; return (0); } int set_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space, unless, perhaps, we were called by * uid 0. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (p->p_ucred->cr_uid != 0) { if (dbregs->dr7 & 0x3) { /* dr0 is enabled */ if (dbregs->dr0 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<2)) { /* dr1 is enabled */ if (dbregs->dr1 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<4)) { /* dr2 is enabled */ if (dbregs->dr2 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<6)) { /* dr3 is enabled */ if (dbregs->dr3 >= VM_MAXUSER_ADDRESS) return (EINVAL); } } pcb->pcb_dr0 = dbregs->dr0; pcb->pcb_dr1 = dbregs->dr1; pcb->pcb_dr2 = dbregs->dr2; pcb->pcb_dr3 = dbregs->dr3; pcb->pcb_dr6 = dbregs->dr6; pcb->pcb_dr7 = dbregs->dr7; pcb->pcb_flags |= PCB_DBREGS; return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i=0; i /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->bio_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR && (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->bio_blkno == maxsz) { bp->bio_resid = bp->bio_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->bio_blkno; if (sz <= 0) { bp->bio_error = EINVAL; goto bad; } bp->bio_bcount = sz << DEV_BSHIFT; } bp->bio_pblkno = bp->bio_blkno + p->p_offset; return(1); bad: bp->bio_flags |= BIO_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/pc98/i386/userconfig.c =================================================================== --- head/sys/pc98/i386/userconfig.c (revision 62572) +++ head/sys/pc98/i386/userconfig.c (revision 62573) @@ -1,3475 +1,3475 @@ /** ** Copyright (c) 1995 ** Michael Smith, msmith@freebsd.org. All rights reserved. ** ** This code contains a module marked : * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 Jordan K. Hubbard * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * Many additional changes by Bruce Evans * * This code is derived from software contributed by the * University of California Berkeley, Jordan K. Hubbard, * David Greenman and Bruce Evans. ** As such, it contains code subject to the above copyrights. ** The module and its copyright can be found below. ** ** Redistribution and use in source and binary forms, with or without ** modification, are permitted provided that the following conditions ** are met: ** 1. Redistributions of source code must retain the above copyright ** notice, this list of conditions and the following disclaimer as ** the first lines of this file unmodified. ** 2. Redistributions in binary form must reproduce the above copyright ** notice, this list of conditions and the following disclaimer in the ** documentation and/or other materials provided with the distribution. ** 3. All advertising materials mentioning features or use of this software ** must display the following acknowledgment: ** This product includes software developed by Michael Smith. ** 4. The name of the author may not be used to endorse or promote products ** derived from this software without specific prior written permission. ** ** THIS SOFTWARE IS PROVIDED BY MICHAEL SMITH ``AS IS'' AND ANY EXPRESS OR ** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ** IN NO EVENT SHALL MICHAEL SMITH BE LIABLE FOR ANY DIRECT, INDIRECT, ** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ** ** $FreeBSD$ **/ /** ** USERCONFIG ** ** Kernel boot-time configuration manipulation tool for FreeBSD. ** ** Two modes of operation are supported : the default is the line-editor mode, ** the command "visual" invokes the fullscreen mode. ** ** The line-editor mode is the old favorite from FreeBSD 2.0/20.05 &c., the ** fullscreen mode requires syscons or a minimal-ansi serial console. **/ /** ** USERCONFIG, visual mode. ** ** msmith@freebsd.org ** ** Look for "EDIT THIS LIST" to add to the list of known devices ** ** ** There are a number of assumptions made in this code. ** ** - That the console supports a minimal set of ANSI escape sequences ** (See the screen manipulation section for a summary) ** and has at least 24 rows. ** - That values less than or equal to zero for any of the device ** parameters indicate that the driver does not use the parameter. ** - That flags are _always_ editable. ** ** Devices marked as disabled are imported as such. ** ** For this tool to be useful, the list of devices below _MUST_ be updated ** when a new driver is brought into the kernel. It is not possible to ** extract this information from the drivers in the kernel. ** ** XXX - TODO: ** ** - Display _what_ a device conflicts with. ** - Implement page up/down (as what?) ** - Wizard mode (no restrictions) ** - Find out how to put syscons back into low-intensity mode so that the ** !b escape is useful on the console. (It seems to be that it actually ** gets low/high intensity backwards. That looks OK.) ** ** - Only display headings with devices under them. (difficult) **/ /* * PC-9801 port by KATO Takenori */ #include "opt_userconfig.h" #define COMPAT_OLDISA /* get the definitions */ #include #include #include #include #include #include #include #include #include #include #include #define _I386_ISA_ISA_DEVICE_H_ #undef NPNP #define NPNP 0 #if NPNP > 0 #include #endif static MALLOC_DEFINE(M_DEVL, "uc_devlist", "uc_device lists in userconfig()"); #include static struct uc_device *uc_devlist; /* list read by kget to extract changes */ static struct uc_device *uc_devtab; /* fake uc_device table */ static int userconfig_boot_parsing; /* set if we are reading from the boot instructions */ #define putchar(x) cnputc(x) static void load_devtab(void); static void free_devtab(void); static void save_resource(struct uc_device *); static int -sysctl_machdep_uc_devlist (SYSCTL_HANDLER_ARGS) +sysctl_machdep_uc_devlist(SYSCTL_HANDLER_ARGS) { struct uc_device *id; int error=0; char name[8]; if(!req->oldptr) { /* Only sizing */ id=uc_devlist; while(id) { error+=sizeof(struct uc_device)+8; id=id->id_next; } return(SYSCTL_OUT(req,0,error)); } else { /* Output the data. The buffer is filled with consecutive * struct uc_device and char buf[8], containing the name * (not guaranteed to end with '\0'). */ id=uc_devlist; while(id) { error=sysctl_handle_opaque(oidp,id, sizeof(struct uc_device),req); if(error) return(error); strncpy(name,id->id_name,8); error=sysctl_handle_opaque(oidp,name, 8,req); if(error) return(error); id=id->id_next; } return(0); } } SYSCTL_PROC( _machdep, OID_AUTO, uc_devlist, CTLFLAG_RD, 0, 0, sysctl_machdep_uc_devlist, "A", "List of ISA devices changed in UserConfig"); /* ** Obtain command input. ** ** Initially, input is read from a possibly-loaded script. ** At the end of the script, or if no script is supplied, ** behaviour is determined by the RB_CONFIG (-c) flag. If ** the flag is set, user input is read from the console; if ** unset, the 'quit' command is invoked and userconfig ** will exit. ** ** Note that quit commands encountered in the script will be ** ignored if the RB_CONFIG flag is supplied. */ static const char *config_script; static int config_script_size; /* use of int for -ve magic value */ #define has_config_script() (config_script_size > 0) static int init_config_script(void) { caddr_t autoentry, autoattr; /* Look for loaded userconfig script */ autoentry = preload_search_by_type("userconfig_script"); if (autoentry != NULL) { /* We have one, get size and data */ config_script_size = 0; if ((autoattr = preload_search_info(autoentry, MODINFO_SIZE)) != NULL) config_script_size = (size_t)*(u_int32_t *)autoattr; config_script = NULL; if ((autoattr = preload_search_info(autoentry, MODINFO_ADDR)) != NULL) config_script = *(const char **)autoattr; /* sanity check */ if ((config_script_size == 0) || (config_script == NULL)) { config_script_size = 0; config_script = NULL; } } return has_config_script(); } static int getchar(void) { int c = -1; #ifdef INTRO_USERCONFIG static int intro = 0; #endif if (has_config_script()) { /* Consume character from loaded userconfig script, display */ userconfig_boot_parsing = 1; c = *config_script; config_script++; config_script_size--; } else { #ifdef INTRO_USERCONFIG if (userconfig_boot_parsing) { if (!(boothowto & RB_CONFIG)) { /* userconfig_script, !RB_CONFIG -> quit */ if (intro == 0) { c = 'q'; config_script = "uit\n"; config_script_size = strlen(config_script); /* userconfig_script will be 1 on the next pass */ } } else { /* userconfig_script, RB_CONFIG -> cngetc() */ } } else { if (!(boothowto & RB_CONFIG)) { /* no userconfig_script, !RB_CONFIG -> show intro */ if (intro == 0) { intro = 1; c = 'i'; config_script = "ntro\n"; config_script_size = strlen(config_script); /* userconfig_script will be 1 on the next pass */ } } else { /* no userconfig_script, RB_CONFIG -> cngetc() */ } } #else /* !INTRO_USERCONFIG */ /* assert(boothowto & RB_CONFIG) */ #endif /* INTRO_USERCONFIG */ userconfig_boot_parsing = 0; if (c <= 0) c = cngetc(); } return(c); } #ifndef FALSE #define FALSE (0) #define TRUE (!FALSE) #endif #ifdef VISUAL_USERCONFIG typedef struct { char dev[16]; /* device basename */ char name[60]; /* long name */ int attrib; /* things to do with the device */ int class; /* device classification */ } DEV_INFO; #define FLG_INVISIBLE (1<<0) /* device should not be shown */ #define FLG_MANDATORY (1<<1) /* device can be edited but not disabled */ #define FLG_FIXIRQ (1<<2) /* device IRQ cannot be changed */ #define FLG_FIXIOBASE (1<<3) /* device iobase cannot be changed */ #define FLG_FIXMADDR (1<<4) /* device maddr cannot be changed */ #define FLG_FIXMSIZE (1<<5) /* device msize cannot be changed */ #define FLG_FIXDRQ (1<<6) /* device DRQ cannot be changed */ #define FLG_FIXED (FLG_FIXIRQ|FLG_FIXIOBASE|FLG_FIXMADDR|FLG_FIXMSIZE|FLG_FIXDRQ) #define FLG_IMMUTABLE (FLG_FIXED|FLG_MANDATORY) #define CLS_STORAGE 1 /* storage devices */ #define CLS_NETWORK 2 /* network interfaces */ #define CLS_COMMS 3 /* serial, parallel ports */ #define CLS_INPUT 4 /* user input : mice, keyboards, joysticks etc */ #define CLS_MMEDIA 5 /* "multimedia" devices (sound, video, etc) */ #define CLS_MISC 255 /* none of the above */ typedef struct { char name[60]; int number; } DEVCLASS_INFO; static DEVCLASS_INFO devclass_names[] = { { "Storage : ", CLS_STORAGE}, { "Network : ", CLS_NETWORK}, { "Communications : ", CLS_COMMS}, { "Input : ", CLS_INPUT}, { "Multimedia : ", CLS_MMEDIA}, { "Miscellaneous : ", CLS_MISC}, { "",0}}; /********************* EDIT THIS LIST **********************/ /** Notes : ** ** - Devices that shouldn't be seen or removed should be marked FLG_INVISIBLE. ** - XXX The list below should be reviewed by the driver authors to verify ** that the correct flags have been set for each driver, and that the ** descriptions are accurate. **/ static DEV_INFO device_info[] = { /*---Name----- ---Description---------------------------------------------- */ #ifdef PC98 {"bs", "PC-9801-55 SCSI Interface", 0, CLS_STORAGE}, #endif {"adv", "AdvanSys SCSI narrow controller", 0, CLS_STORAGE}, {"bt", "Buslogic SCSI controller", 0, CLS_STORAGE}, {"aha", "Adaptec 154x SCSI controller", 0, CLS_STORAGE}, {"aic", "Adaptec 152x SCSI and compatible SCSI cards", 0, CLS_STORAGE}, {"nca", "ProAudio Spectrum SCSI and compatibles", 0, CLS_STORAGE}, {"sea", "Seagate ST01/ST02 SCSI and compatibles", 0, CLS_STORAGE}, {"wdc", "IDE/ESDI/MFM disk controller", 0, CLS_STORAGE}, {"ata", "ATA/ATAPI compatible disk controller", 0, CLS_STORAGE}, {"fdc", "Floppy disk controller", FLG_FIXED, CLS_STORAGE}, {"mcd", "Mitsumi CD-ROM", 0, CLS_STORAGE}, {"scd", "Sony CD-ROM", 0, CLS_STORAGE}, {"matcd", "Matsushita/Panasonic/Creative CDROM", 0, CLS_STORAGE}, {"wt", "Wangtek/Archive QIC-02 Tape drive", 0, CLS_STORAGE}, {"wd", "IDE or ST506 compatible storage device", FLG_INVISIBLE, CLS_STORAGE}, {"ad", "ATA/ATAPI compatible storage device", FLG_INVISIBLE, CLS_STORAGE}, {"fd", "Floppy disk device", FLG_INVISIBLE, CLS_STORAGE}, {"cs", "IBM EtherJet, CS89x0-based Ethernet adapters",0, CLS_NETWORK}, #ifdef PC98 {"ed", "NS8390 Ethernet adapters", 0, CLS_NETWORK}, #else {"ed", "NE1000,NE2000,3C503,WD/SMC80xx Ethernet adapters",0, CLS_NETWORK}, #endif {"el", "3C501 Ethernet adapter", 0, CLS_NETWORK}, {"ep", "3C509 Ethernet adapter", 0, CLS_NETWORK}, {"ex", "Intel EtherExpress Pro/10 Ethernet adapter", 0, CLS_NETWORK}, {"fe", "Fujitsu MD86960A/MB869685A Ethernet adapters", 0, CLS_NETWORK}, {"ie", "AT&T Starlan 10 and EN100, 3C507, NI5210 Ethernet adapters",0,CLS_NETWORK}, {"le", "DEC Etherworks 2 and 3 Ethernet adapters", 0, CLS_NETWORK}, {"lnc", "Isolan, Novell NE2100/NE32-VL Ethernet adapters", 0,CLS_NETWORK}, {"sn", "SMC/Megahertz Ethernet adapters", 0,CLS_NETWORK}, {"xe", "Xircom PC Card Ethernet adapter", 0, CLS_NETWORK}, {"rdp", "RealTek RTL8002 Pocket Ethernet", 0, CLS_NETWORK}, {"sio", "8250/16450/16550 Serial port", 0, CLS_COMMS}, {"cx", "Cronyx/Sigma multiport sync/async adapter",0, CLS_COMMS}, {"rc", "RISCom/8 multiport async adapter", 0, CLS_COMMS}, {"cy", "Cyclades multiport async adapter", 0, CLS_COMMS}, {"dgb", "Digiboard PC/Xe, PC/Xi async adapter", 0, CLS_COMMS}, {"si", "Specialix SI/XIO/SX async adapter", 0, CLS_COMMS}, {"stl", "Stallion EasyIO/Easy Connection 8/32 async adapter",0, CLS_COMMS}, {"stli", "Stallion intelligent async adapter" ,0, CLS_COMMS}, #ifdef PC98 {"olpt", "Parallel printer port", 0, CLS_COMMS}, #endif {"ppc", "Parallel Port chipset", 0, CLS_COMMS}, {"gp", "National Instruments AT-GPIB/TNT driver", 0, CLS_COMMS}, {"atkbdc", "Keyboard controller", FLG_INVISIBLE, CLS_INPUT}, {"atkbd", "Keyboard", FLG_FIXED, CLS_INPUT}, #ifdef PC98 {"pckbd", "Keyboard", FLG_FIXED, CLS_INPUT}, #endif {"mse", "Microsoft Bus Mouse", 0, CLS_INPUT}, {"psm", "PS/2 Mouse", FLG_FIXED, CLS_INPUT}, {"joy", "Joystick", FLG_FIXED, CLS_INPUT}, {"vt", "PCVT console driver", FLG_IMMUTABLE, CLS_INPUT}, {"sc", "Syscons console driver", FLG_IMMUTABLE, CLS_INPUT}, #ifdef PC98 {"nss", "PC-9801-86 Sound Board", 0, CLS_MMEDIA}, #endif {"sbc", "PCM Creative SoundBlaster/ESS/Avance sounce cards", 0,CLS_MMEDIA}, {"gusc", "PCM Gravis UltraSound sound cards", 0, CLS_MMEDIA}, {"pcm", "PCM Generic soundcard support", 0, CLS_MMEDIA}, {"sb", "VOXWARE Soundblaster PCM (SB/Pro/16, ProAudio Spectrum)",0,CLS_MMEDIA}, {"sbxvi", "VOXWARE Soundblaster 16", 0, CLS_MMEDIA}, {"sbmidi", "VOXWARE Soundblaster MIDI interface", 0, CLS_MMEDIA}, {"awe", "VOXWARE AWE32 MIDI", 0, CLS_MMEDIA}, {"pas", "VOXWARE ProAudio Spectrum PCM and MIDI", 0, CLS_MMEDIA}, {"gus", "VOXWARE Gravis Ultrasound, Ultrasound 16 and Ultrasound MAX",0,CLS_MMEDIA}, {"gusxvi", "VOXWARE Gravis Ultrasound 16-bit PCM", 0, CLS_MMEDIA}, {"gusmax", "VOXWARE Gravis Ultrasound MAX", 0, CLS_MMEDIA}, {"mss", "VOXWARE Microsoft Sound System", 0, CLS_MMEDIA}, {"opl", "VOXWARE OPL-2/3 FM, SB/Pro/16, ProAudio Spectrum",0,CLS_MMEDIA}, {"mpu", "VOXWARE Roland MPU401 MIDI", 0, CLS_MMEDIA}, {"sscape", "VOXWARE Ensoniq Soundscape MIDI interface", 0, CLS_MMEDIA}, {"sscape_mss", "VOXWARE Ensoniq Soundscape PCM", 0, CLS_MMEDIA}, {"uart", "VOXWARE 6850 MIDI UART", 0, CLS_MMEDIA}, {"pca", "PC speaker PCM audio driver", FLG_FIXED, CLS_MMEDIA}, {"ctx", "Coretex-I frame grabber", 0, CLS_MMEDIA}, {"spigot", "Creative Labs Video Spigot video capture", 0, CLS_MMEDIA}, {"scc", "IBM Smart Capture Card", 0, CLS_MMEDIA}, {"gsc", "Genius GS-4500 hand scanner", 0, CLS_MMEDIA}, {"asc", "AmiScan scanner", 0, CLS_MMEDIA}, {"apm", "Advanced Power Management", FLG_FIXED, CLS_MISC}, {"labpc", "National Instruments Lab-PC/Lab-PC+", 0, CLS_MISC}, {"pcic", "PC-card controller", 0, CLS_MISC}, {"npx", "Math coprocessor", FLG_IMMUTABLE, CLS_MISC}, #ifdef PC98 {"gdc", "Graphic Display Controller", FLG_INVISIBLE, CLS_MISC}, #endif {"vga", "Catchall PCI VGA driver", FLG_INVISIBLE, CLS_MISC}, {"","",0,0}}; typedef struct _devlist_struct { char name[80]; int attrib; /* flag values as per the FLG_* defines above */ int class; /* disk, etc as per the CLS_* defines above */ char dev[16]; int iobase,irq,drq,maddr,msize,unit,flags,id; int comment; /* 0 = device, 1 = comment, 2 = collapsed comment */ int conflicts; /* set/reset by findconflict, count of conflicts */ int changed; /* nonzero if the device has been edited */ struct uc_device *device; struct _devlist_struct *prev,*next; } DEV_LIST; #define DEV_DEVICE 0 #define DEV_COMMENT 1 #define DEV_ZOOMED 2 #define LIST_CURRENT (1<<0) #define LIST_SELECTED (1<<1) #define KEY_EXIT 0 /* return codes from dolist() and friends */ #define KEY_DO 1 #define KEY_DEL 2 #define KEY_TAB 3 #define KEY_REDRAW 4 #define KEY_UP 5 /* these only returned from editval() */ #define KEY_DOWN 6 #define KEY_LEFT 7 #define KEY_RIGHT 8 #define KEY_NULL 9 /* this allows us to spin & redraw */ #define KEY_ZOOM 10 /* these for zoom all/collapse all */ #define KEY_UNZOOM 11 #define KEY_HELP 12 /* duh? */ static void redraw(void); static void insdev(DEV_LIST *dev, DEV_LIST *list); static int devinfo(DEV_LIST *dev); static int visuserconfig(void); static DEV_LIST *active = NULL,*inactive = NULL; /* driver lists */ static DEV_LIST *alist,*ilist; /* visible heads of the driver lists */ static DEV_LIST scratch; /* scratch record */ static int conflicts; /* total conflict count */ static char lines[] = "--------------------------------------------------------------------------------"; static char spaces[] = " "; /** ** Device manipulation stuff : find, describe, configure. **/ /** ** setdev ** ** Sets the device referenced by (*dev) to the parameters in the struct, ** and the enable flag according to (enabled) **/ static void setdev(DEV_LIST *dev, int enabled) { dev->device->id_iobase = dev->iobase; /* copy happy */ dev->device->id_irq = (u_short)(dev->irq < 16 ? 1<irq : 0); /* IRQ is bitfield */ dev->device->id_drq = (short)dev->drq; dev->device->id_maddr = (caddr_t)dev->maddr; dev->device->id_msize = dev->msize; dev->device->id_flags = dev->flags; dev->device->id_enabled = enabled; } /** ** getdevs ** ** Walk the kernel device tables and build the active and inactive lists **/ static void getdevs(void) { int i; struct uc_device *ap; ap = uc_devtab; /* pointer to array of devices */ for (i = 0; ap[i].id_id; i++) /* for each device in this table */ { scratch.unit = ap[i].id_unit; /* device parameters */ strcpy(scratch.dev,ap[i].id_name); scratch.iobase = ap[i].id_iobase; scratch.irq = ffs(ap[i].id_irq)-1; scratch.drq = ap[i].id_drq; scratch.maddr = (int)ap[i].id_maddr; scratch.msize = ap[i].id_msize; scratch.flags = ap[i].id_flags; scratch.comment = DEV_DEVICE; /* admin stuff */ scratch.conflicts = 0; scratch.device = &ap[i]; /* save pointer for later reference */ scratch.changed = 0; if (!devinfo(&scratch)) /* get more info on the device */ insdev(&scratch,ap[i].id_enabled?active:inactive); } } /** ** Devinfo ** ** Fill in (dev->name), (dev->attrib) and (dev->type) from the device_info array. ** If the device is unknown, put it in the CLS_MISC class, with no flags. ** ** If the device is marked "invisible", return nonzero; the caller should ** not insert any such device into either list. ** **/ static int devinfo(DEV_LIST *dev) { int i; for (i = 0; device_info[i].class; i++) { if (!strcmp(dev->dev,device_info[i].dev)) { if (device_info[i].attrib & FLG_INVISIBLE) /* forget we ever saw this one */ return(1); strcpy(dev->name,device_info[i].name); /* get the name */ dev->attrib = device_info[i].attrib; dev->class = device_info[i].class; return(0); } } strcpy(dev->name,"Unknown device"); dev->attrib = 0; dev->class = CLS_MISC; return(0); } /** ** List manipulation stuff : add, move, initialise, free, traverse ** ** Note that there are assumptions throughout this code that ** the first entry in a list will never move. (assumed to be ** a comment). **/ /** ** Adddev ** ** appends a copy of (dev) to the end of (*list) **/ static void addev(DEV_LIST *dev, DEV_LIST **list) { DEV_LIST *lp,*ap; lp = (DEV_LIST *)malloc(sizeof(DEV_LIST),M_DEVL,M_WAITOK); bcopy(dev,lp,sizeof(DEV_LIST)); /* create copied record */ if (*list) /* list exists */ { ap = *list; while(ap->next) ap = ap->next; /* scoot to end of list */ lp->prev = ap; lp->next = NULL; ap->next = lp; }else{ /* list does not yet exist */ *list = lp; lp->prev = lp->next = NULL; /* list now exists */ } } /** ** Findspot ** ** Finds the 'appropriate' place for (dev) in (list) ** ** 'Appropriate' means in numeric order with other devices of the same type, ** or in alphabetic order following a comment of the appropriate type. ** or at the end of the list if an appropriate comment is not found. (this should ** never happen) ** (Note that the appropriate point is never the top, but may be the bottom) **/ static DEV_LIST * findspot(DEV_LIST *dev, DEV_LIST *list) { DEV_LIST *ap = NULL; /* search for a previous instance of the same device */ for (ap = list; ap; ap = ap->next) { if (ap->comment != DEV_DEVICE) /* ignore comments */ continue; if (!strcmp(dev->dev,ap->dev)) /* same base device */ { if ((dev->unit <= ap->unit) /* belongs before (equal is bad) */ || !ap->next) /* or end of list */ { ap = ap->prev; /* back up one */ break; /* done here */ } if (ap->next) /* if the next item exists */ { if (ap->next->comment != DEV_DEVICE) /* next is a comment */ break; if (strcmp(dev->dev,ap->next->dev)) /* next is a different device */ break; } } } if (!ap) /* not sure yet */ { /* search for a class that the device might belong to */ for (ap = list; ap; ap = ap->next) { if (ap->comment != DEV_DEVICE) /* look for simlar devices */ continue; if (dev->class != ap->class) /* of same class too 8) */ continue; if (strcmp(dev->dev,ap->dev) < 0) /* belongs before the current entry */ { ap = ap->prev; /* back up one */ break; /* done here */ } if (ap->next) /* if the next item exists */ if (ap->next->comment != DEV_DEVICE) /* next is a comment, go here */ break; } } if (!ap) /* didn't find a match */ { for (ap = list; ap->next; ap = ap->next) /* try for a matching comment */ if ((ap->comment != DEV_DEVICE) && (ap->class == dev->class)) /* appropriate place? */ break; } /* or just put up with last */ return(ap); } /** ** Insdev ** ** Inserts a copy of (dev) at the appropriate point in (list) **/ static void insdev(DEV_LIST *dev, DEV_LIST *list) { DEV_LIST *lp,*ap; lp = (DEV_LIST *)malloc(sizeof(DEV_LIST),M_DEVL,M_WAITOK); bcopy(dev,lp,sizeof(DEV_LIST)); /* create copied record */ ap = findspot(lp,list); /* find appropriate spot */ lp->next = ap->next; /* point to next */ if (ap->next) ap->next->prev = lp; /* point next to new */ lp->prev = ap; /* point new to current */ ap->next = lp; /* and current to new */ } /** ** Movedev ** ** Moves (dev) from its current list to an appropriate place in (list) ** (dev) may not come from the top of a list, but it may from the bottom. **/ static void movedev(DEV_LIST *dev, DEV_LIST *list) { DEV_LIST *ap; ap = findspot(dev,list); dev->prev->next = dev->next; /* remove from old list */ if (dev->next) dev->next->prev = dev->prev; dev->next = ap->next; /* insert in new list */ if (ap->next) ap->next->prev = dev; /* point next to new */ dev->prev = ap; /* point new to current */ ap->next = dev; /* and current to new */ } /** ** Initlist ** ** Initialises (*list) with the basic headings **/ static void initlist(DEV_LIST **list) { int i; for(i = 0; devclass_names[i].name[0]; i++) /* for each devtype name */ { strcpy(scratch.name,devclass_names[i].name); scratch.comment = DEV_ZOOMED; scratch.class = devclass_names[i].number; scratch.attrib = FLG_MANDATORY; /* can't be moved */ addev(&scratch,list); /* add to the list */ } } /** ** savelist ** ** Walks (list) and saves the settings of any entry marked as changed. ** ** The device's active field is set according to (active). ** ** Builds the uc_devlist used by kget to extract the changed device information. ** The code for this was taken almost verbatim from the original module. **/ static void savelist(DEV_LIST *list, int active) { struct uc_device *id_p,*id_pn; char *name; while (list) { if ((list->comment == DEV_DEVICE) && /* is a device */ (list->changed) && /* has been changed */ (list->device != NULL)) { /* has an uc_device structure */ setdev(list,active); /* set the device itself */ id_pn = NULL; for (id_p=uc_devlist; id_p; id_p=id_p->id_next) { /* look on the list for it */ if (id_p->id_id == list->device->id_id) { name = list->device->id_name; id_pn = id_p->id_next; if (id_p->id_name) free(id_p->id_name, M_DEVL); bcopy(list->device,id_p,sizeof(struct uc_device)); save_resource(list->device); id_p->id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(id_p->id_name, name); id_pn->id_next = uc_devlist; id_p->id_next = id_pn; break; } } if (!id_pn) /* not already on the list */ { name = list->device->id_name; id_pn = malloc(sizeof(struct uc_device),M_DEVL,M_WAITOK); bcopy(list->device,id_pn,sizeof(struct uc_device)); save_resource(list->device); id_pn->id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(id_pn->id_name, name); id_pn->id_next = uc_devlist; uc_devlist = id_pn; /* park at top of list */ } } list = list->next; } } /** ** nukelist ** ** Frees all storage in use by a (list). **/ static void nukelist(DEV_LIST *list) { DEV_LIST *dp; if (!list) return; while(list->prev) /* walk to head of list */ list = list->prev; while(list) { dp = list; list = list->next; free(dp,M_DEVL); } } /** ** prevent ** ** Returns the previous entry in (list), skipping zoomed regions. Returns NULL ** if there is no previous entry. (Only possible if list->prev == NULL given the ** premise that there is always a comment at the head of the list) **/ static DEV_LIST * prevent(DEV_LIST *list) { DEV_LIST *dp; if (!list) return(NULL); dp = list->prev; /* start back one */ while(dp) { if (dp->comment == DEV_ZOOMED) /* previous section is zoomed */ return(dp); /* so skip to comment */ if (dp->comment == DEV_COMMENT) /* not zoomed */ return(list->prev); /* one back as normal */ dp = dp->prev; /* backpedal */ } return(dp); /* NULL, we can assume */ } /** ** nextent ** ** Returns the next entry in (list), skipping zoomed regions. Returns NULL ** if there is no next entry. (Possible if the current entry is last, or ** if the current entry is the last heading and it's collapsed) **/ static DEV_LIST * nextent(DEV_LIST *list) { DEV_LIST *dp; if (!list) return(NULL); if (list->comment != DEV_ZOOMED) /* no reason to skip */ return(list->next); dp = list->next; while(dp) { if (dp->comment != DEV_DEVICE) /* found another heading */ break; dp = dp->next; } return(dp); /* back we go */ } /** ** ofsent ** ** Returns the (ofs)th entry down from (list), or NULL if it doesn't exist **/ static DEV_LIST * ofsent(int ofs, DEV_LIST *list) { while (ofs-- && list) list = nextent(list); return(list); } /** ** findconflict ** ** Scans every element of (list) and sets the conflict tags appropriately ** Returns the number of conflicts found. **/ static int findconflict(DEV_LIST *list) { int count = 0; /* number of conflicts found */ DEV_LIST *dp,*sp; for (dp = list; dp; dp = dp->next) /* over the whole list */ { if (dp->comment != DEV_DEVICE) /* comments don't usually conflict */ continue; dp->conflicts = 0; /* assume the best */ for (sp = list; sp; sp = sp->next) /* scan the entire list for conflicts */ { if (sp->comment != DEV_DEVICE) /* likewise */ continue; if (sp == dp) /* always conflict with itself */ continue; if ((dp->iobase > 0) && /* iobase conflict? */ (dp->iobase == sp->iobase)) dp->conflicts = 1; if ((dp->irq > 0) && /* irq conflict? */ (dp->irq == sp->irq)) dp->conflicts = 1; if ((dp->drq > 0) && /* drq conflict? */ (dp->drq == sp->drq)) dp->conflicts = 1; if ((sp->maddr > 0) && /* maddr/msize conflict? */ (dp->maddr > 0) && (sp->maddr + ((sp->msize == 0) ? 1 : sp->msize) > dp->maddr) && (dp->maddr + ((dp->msize == 0) ? 1 : dp->msize) > sp->maddr)) dp->conflicts = 1; } count += dp->conflicts; /* count conflicts */ } return(count); } /** ** expandlist ** ** Unzooms all headings in (list) **/ static void expandlist(DEV_LIST *list) { while(list) { if (list->comment == DEV_COMMENT) list->comment = DEV_ZOOMED; list = list->next; } } /** ** collapselist ** ** Zooms all headings in (list) **/ static void collapselist(DEV_LIST *list) { while(list) { if (list->comment == DEV_ZOOMED) list->comment = DEV_COMMENT; list = list->next; } } /** ** Screen-manipulation stuff ** ** This is the basic screen layout : ** ** 0 5 10 15 20 25 30 35 40 45 50 55 60 67 70 75 ** |....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|.... ** +--------------------------------------------------------------------------------+ ** 0 -|---Active Drivers----------------------------xx Conflicts------Dev---IRQ--Port--| ** 1 -| ........................ ....... .. 0x....| ** 2 -| ........................ ....... .. 0x....| ** 3 -| ........................ ....... .. 0x....| ** 4 -| ........................ ....... .. 0x....| ** 5 -| ........................ ....... .. 0x....| ** 6 -| ........................ ....... .. 0x....| ** 7 -| ........................ ....... .. 0x....| ** 8 -| ........................ ....... .. 0x....| ** 9 -|---Inactive Drivers--------------------------------------------Dev--------------| ** 10-| ........................ ....... | ** 11-| ........................ ....... | ** 12-| ........................ ....... | ** 13-| ........................ ....... | ** 14-| ........................ ....... | ** 15-| ........................ ....... | ** 16-| ........................ ....... | ** 17-|------------------------------------------------------UP-DOWN-------------------| ** 18-| Relevant parameters for the current device | ** 19-| | ** 20-| | ** 21-|--------------------------------------------------------------------------------| ** 22-| Help texts go here | ** 23-| | ** +--------------------------------------------------------------------------------+ ** ** Help texts ** ** On a collapsed comment : ** ** [Enter] Expand device list [z] Expand all lists ** [TAB] Change fields [Q] Save and Exit ** ** On an expanded comment : ** ** [Enter] Collapse device list [Z] Collapse all lists ** [TAB] Change fields [Q] Save and Exit ** ** On a comment with no followers ** ** ** [TAB] Change fields [Q] Save and Exit ** ** On a device in the active list ** ** [Enter] Edit device parameters [DEL] Disable device ** [TAB] Change fields [Q] Save and Exit [?] Help ** ** On a device in the inactive list ** ** [Enter] Enable device ** [TAB] Change fields [Q] Save and Exit [?] Help ** ** While editing parameters ** ** ** [TAB] Change fields [Q] Save device parameters **/ /** ** ** The base-level screen primitives : ** ** bold() - enter bold mode \E[1m (md) ** inverse() - enter inverse mode \E[7m (so) ** normal() - clear bold/inverse mode \E[m (se) ** clear() - clear the screen \E[H\E[J (ce) ** move(x,y) - move the cursor to x,y \E[y;xH: (cm) **/ static void bold(void) { printf("\033[1m"); } static void inverse(void) { printf("\033[7m"); } static void normal(void) { printf("\033[m"); } static void clear(void) { normal(); printf("\033[H\033[J"); } static void move(int x, int y) { printf("\033[%d;%dH",y+1,x+1); } /** ** ** High-level screen primitives : ** ** putxyl(x,y,str,len) - put (len) bytes of (str) at (x,y), supports embedded formatting ** putxy(x,y,str) - put (str) at (x,y), supports embedded formatting ** erase(x,y,w,h) - clear the box (x,y,w,h) ** txtbox(x,y,w,y,str) - put (str) in a region at (x,y,w,h) ** putmsg(str) - put (str) in the message area ** puthelp(str) - put (str) in the upper helpline ** pad(str,len) - pad (str) to (len) with spaces ** drawline(row,detail,list,inverse,*dhelp) ** - draws a line for (*list) at (row) onscreen. If (detail) is ** nonzero, include port, IRQ and maddr, if (inverse) is nonzero, ** draw the line in inverse video, and display (*dhelp) on the ** helpline. ** drawlist(row,num,detail,list) ** - draw (num) entries from (list) at (row) onscreen, passile (detail) ** through to drawline(). ** showparams(dev) - displays the relevant parameters for (dev) below the lists onscreen. ** yesno(str) - displays (str) in the message area, and returns nonzero on 'y' or 'Y' ** redraw(); - Redraws the entire screen layout, including the ** - two list panels. **/ /** ** putxy ** writes (str) at x,y onscreen ** putxyl ** writes up to (len) of (str) at x,y onscreen. ** ** Supports embedded formatting : ** !i - inverse mode. ** !b - bold mode. ** !n - normal mode. **/ static void putxyl(int x, int y, char *str, int len) { move(x,y); normal(); while((*str) && (len--)) { if (*str == '!') /* format escape? */ { switch(*(str+1)) /* depending on the next character */ { case 'i': inverse(); str +=2; /* skip formatting */ len++; /* doesn't count for length */ break; case 'b': bold(); str +=2; /* skip formatting */ len++; /* doesn't count for length */ break; case 'n': normal(); str +=2; /* skip formatting */ len++; /* doesn't count for length */ break; default: putchar(*str++); /* not an escape */ } }else{ putchar(*str++); /* emit the character */ } } } #define putxy(x,y,str) putxyl(x,y,str,-1) /** ** erase ** ** Erases the region (x,y,w,h) **/ static void erase(int x, int y, int w, int h) { int i; normal(); for (i = 0; i < h; i++) putxyl(x,y++,spaces,w); } /** ** txtbox ** ** Writes (str) into the region (x,y,w,h), supports embedded formatting using ** putxy. Lines are not wrapped, newlines must be forced with \n. **/ static void txtbox(int x, int y, int w, int h, char *str) { int i = 0; h--; while((str[i]) && h) { if (str[i] == '\n') /* newline */ { putxyl(x,y,str,(i= len) /* no padding needed */ return; while(i < len) /* pad */ str[i++] = ' '; str[i] = '\0'; } /** ** drawline ** ** Displays entry (ofs) of (list) in region at (row) onscreen, optionally displaying ** the port and IRQ fields if (detail) is nonzero. If (inverse), in inverse video. ** ** The text (dhelp) is displayed if the item is a normal device, otherwise ** help is shown for normal or zoomed comments **/ static void drawline(int row, int detail, DEV_LIST *list, int inverse, char *dhelp) { char lbuf[90],nb[70],db[20],ib[16],pb[16]; if (list->comment == DEV_DEVICE) { nb[0] = ' '; strncpy(nb+1,list->name,57); }else{ strncpy(nb,list->name,58); if ((list->comment == DEV_ZOOMED) && (list->next)) if (list->next->comment == DEV_DEVICE) /* only mention if there's something hidden */ strcat(nb," (Collapsed)"); } nb[58] = '\0'; pad(nb,60); if (list->conflicts) /* device in conflict? */ { if (inverse) { strcpy(nb+54," !nCONF!i "); /* tag conflict, careful of length */ }else{ strcpy(nb+54," !iCONF!n "); /* tag conflict, careful of length */ } } if (list->comment == DEV_DEVICE) { sprintf(db,"%s%d",list->dev,list->unit); pad(db,8); }else{ strcpy(db," "); } if ((list->irq > 0) && detail && (list->comment == DEV_DEVICE)) { sprintf(ib," %d",list->irq); pad(ib,4); }else{ strcpy(ib," "); } if ((list->iobase > 0) && detail && (list->comment == DEV_DEVICE)) { sprintf(pb,"0x%x",list->iobase); pad(pb,7); }else{ strcpy(pb," "); } sprintf(lbuf," %s%s%s%s%s",inverse?"!i":"",nb,db,ib,pb); putxyl(0,row,lbuf,80); if (dhelp) { switch(list->comment) { case DEV_DEVICE: /* ordinary device */ puthelp(dhelp); break; case DEV_COMMENT: puthelp(""); if (list->next) if (list->next->comment == DEV_DEVICE) puthelp(" [!bEnter!n] Collapse device list [!bC!n] Collapse all lists"); break; case DEV_ZOOMED: puthelp(""); if (list->next) if (list->next->comment == DEV_DEVICE) puthelp(" [!bEnter!n] Expand device list [!bX!n] Expand all lists"); break; default: puthelp(" WARNING: This list entry corrupted!"); break; } } move(0,row); /* put the cursor somewhere relevant */ } /** ** drawlist ** ** Displays (num) lines of the contents of (list) at (row), optionally displaying the ** port and IRQ fields as well if (detail) is nonzero ** ** printf in the kernel is essentially useless, so we do most of the hard work ourselves here. **/ static void drawlist(int row, int num, int detail, DEV_LIST *list) { int ofs; for(ofs = 0; ofs < num; ofs++) { if (list) { drawline(row+ofs,detail,list,0,NULL); /* NULL -> don't draw empty help string */ list = nextent(list); /* move down visible list */ }else{ erase(0,row+ofs,80,1); } } } /** ** redrawactive ** ** Redraws the active list **/ static void redrawactive(void) { char cbuf[16]; if (conflicts) { sprintf(cbuf,"!i%d conflict%s-",conflicts,(conflicts>1)?"s":""); putxy(45,0,cbuf); }else{ putxyl(45,0,lines,16); } drawlist(1,8,1,alist); /* draw device lists */ } /** ** redrawinactive ** ** Redraws the inactive list **/ static void redrawinactive(void) { drawlist(10,7,0,ilist); /* draw device lists */ } /** ** redraw ** ** Clear the screen and redraw the entire layout **/ static void redraw(void) { clear(); putxy(0,0,lines); putxy(3,0,"!bActive!n-!bDrivers"); putxy(63,0,"!bDev!n---!bIRQ!n--!bPort"); putxy(0,9,lines); putxy(3,9,"!bInactive!n-!bDrivers"); putxy(63,9,"!bDev"); putxy(0,17,lines); putxy(0,21,lines); masterhelp(" [!bTAB!n] Change fields [!bQ!n] Save and Exit [!b?!n] Help"); redrawactive(); redrawinactive(); } /** ** yesnocancel ** ** Put (str) in the message area, and return 1 if the user hits 'y' or 'Y', ** 2 if they hit 'c' or 'C', or 0 for 'n' or 'N'. **/ static int yesnocancel(char *str) { putmsg(str); for(;;) switch(getchar()) { case -1: case 'n': case 'N': return(0); case 'y': case 'Y': return(1); case 'c': case 'C': return(2); } } /** ** showparams ** ** Show device parameters in the region below the lists ** ** 0 5 10 15 20 25 30 35 40 45 50 55 60 67 70 75 ** |....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|.... ** +--------------------------------------------------------------------------------+ ** 17-|--------------------------------------------------------------------------------| ** 18-| Port address : 0x0000 Memory address : 0x00000 Conflict allowed | ** 19-| IRQ number : 00 Memory size : 0x0000 | ** 20-| Flags : 0x0000 DRQ number : 00 | ** 21-|--------------------------------------------------------------------------------| **/ static void showparams(DEV_LIST *dev) { char buf[80]; erase(0,18,80,3); /* clear area */ if (!dev) return; if (dev->comment != DEV_DEVICE) return; if (dev->iobase > 0) { sprintf(buf,"Port address : 0x%x",dev->iobase); putxy(1,18,buf); } if (dev->irq > 0) { sprintf(buf,"IRQ number : %d",dev->irq); putxy(1,19,buf); } sprintf(buf,"Flags : 0x%x",dev->flags); putxy(1,20,buf); if (dev->maddr > 0) { sprintf(buf,"Memory address : 0x%x",dev->maddr); putxy(26,18,buf); } if (dev->msize > 0) { sprintf(buf,"Memory size : 0x%x",dev->msize); putxy(26,19,buf); } if (dev->drq > 0) { sprintf(buf,"DRQ number : %d",dev->drq); putxy(26,20,buf); } } /** ** Editing functions for device parameters ** ** editval(x,y,width,hex,min,max,val) - Edit (*val) in a field (width) wide at (x,y) ** onscreen. Refuse values outsise (min) and (max). ** editparams(dev) - Edit the parameters for (dev) **/ #define VetRet(code) \ { \ if ((i >= min) && (i <= max)) /* legit? */ \ { \ *val = i; \ sprintf(buf,hex?"0x%x":"%d",i); \ putxy(hex?x-2:x,y,buf); \ return(code); /* all done and exit */ \ } \ i = *val; /* restore original value */ \ delta = 1; /* restore other stuff */ \ } /** ** editval ** ** Edit (*val) at (x,y) in (hex)?hex:decimal mode, allowing values between (min) and (max) ** in a field (width) wide. (Allow one space) ** If (ro) is set, we're in "readonly" mode, so disallow edits. ** ** Return KEY_TAB on \t, KEY_EXIT on 'q' **/ static int editval(int x, int y, int width, int hex, int min, int max, int *val, int ro) { int i = *val; /* work with copy of the value */ char buf[2+11+1],tc[11+1]; /* display buffer, text copy */ int xp = 0; /* cursor offset into text copy */ int delta = 1; /* force redraw first time in */ int c; int extended = 0; /* stage counter for extended key sequences */ if (hex) /* we presume there's a leading 0x onscreen */ putxy(x-2,y,"!i0x"); /* coz there sure is now */ for (;;) { if (delta) /* only update if necessary */ { sprintf(tc,hex?"%x":"%d",i); /* make a text copy of the value */ sprintf(buf,"!i%s",tc); /* format for printing */ erase(x,y,width,1); /* clear the area */ putxy(x,y,buf); /* write */ xp = strlen(tc); /* cursor always at end */ move(x+xp,y); /* position the cursor */ } c = getchar(); switch(extended) /* escape handling */ { case 0: if (c == 0x1b) /* esc? */ { extended = 1; /* flag and spin */ continue; } extended = 0; break; /* nope, drop through */ case 1: /* there was an escape prefix */ if (c == '[' || c == 'O') /* second character in sequence */ { extended = 2; continue; } if (c == 0x1b) return(KEY_EXIT); /* double esc exits */ extended = 0; break; /* nup, not a sequence. */ case 2: extended = 0; switch(c) /* looks like the real McCoy */ { case 'A': VetRet(KEY_UP); /* leave if OK */ continue; case 'B': VetRet(KEY_DOWN); /* leave if OK */ continue; case 'C': VetRet(KEY_RIGHT); /* leave if OK */ continue; case 'D': VetRet(KEY_LEFT); /* leave if OK */ continue; default: continue; } } switch(c) { case '\t': /* trying to tab off */ VetRet(KEY_TAB); /* verify and maybe return */ break; case -1: case 'q': case 'Q': VetRet(KEY_EXIT); break; case '\b': case '\177': /* BS or DEL */ if (ro) /* readonly? */ { puthelp(" !iThis value cannot be edited (Press ESC)"); while(getchar() != 0x1b); /* wait for key */ return(KEY_NULL); /* spin */ } if (xp) /* still something left to delete */ { i = (hex ? i/0x10u : i/10); /* strip last digit */ delta = 1; /* force update */ } break; case 588: VetRet(KEY_UP); break; case '\r': case '\n': case 596: VetRet(KEY_DOWN); break; case 591: VetRet(KEY_LEFT); break; case 593: VetRet(KEY_RIGHT); break; default: if (ro) /* readonly? */ { puthelp(" !iThis value cannot be edited (Press ESC)"); while(getchar() != 0x1b); /* wait for key */ return(KEY_NULL); /* spin */ } if (xp >= width) /* no room for more characters anyway */ break; if (hex) { if ((c >= '0') && (c <= '9')) { i = i*0x10 + (c-'0'); /* update value */ delta = 1; break; } if ((c >= 'a') && (c <= 'f')) { i = i*0x10 + (c-'a'+0xa); delta = 1; break; } if ((c >= 'A') && (c <= 'F')) { i = i*0x10 + (c-'A'+0xa); delta = 1; break; } }else{ if ((c >= '0') && (c <= '9')) { i = i*10 + (c-'0'); /* update value */ delta = 1; /* force redraw */ break; } } break; } } } /** ** editparams ** ** Edit the parameters for (dev) ** ** Note that it's _always_ possible to edit the flags, otherwise it might be ** possible for this to spin in an endless loop... ** 0 5 10 15 20 25 30 35 40 45 50 55 60 67 70 75 ** |....|....|....|....|....|....|....|....|....|....|....|....|....|....|....|.... ** +--------------------------------------------------------------------------------+ ** 17-|--------------------------------------------------------------------------------| ** 18-| Port address : 0x0000 Memory address : 0x00000 Conflict allowed | ** 19-| IRQ number : 00 Memory size : 0x0000 | ** 20-| Flags : 0x0000 DRQ number : 00 | ** 21-|--------------------------------------------------------------------------------| ** ** The "intelligence" in this function that hops around based on the directional ** returns from editval isn't very smart, and depends on the layout above. **/ static void editparams(DEV_LIST *dev) { int ret; char buf[16]; /* needs to fit the device name */ putxy(2,17,"!bParameters!n-!bfor!n-!bdevice!n-"); sprintf(buf,"!b%s",dev->dev); putxy(24,17,buf); erase(1,22,80,1); for (;;) { ep_iobase: if (dev->iobase > 0) { puthelp(" IO Port address (Hexadecimal, 0x1-0xffff)"); ret = editval(18,18,5,1,0x1,0xffff,&(dev->iobase),(dev->attrib & FLG_FIXIOBASE)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_RIGHT: if (dev->maddr > 0) goto ep_maddr; break; case KEY_TAB: case KEY_DOWN: goto ep_irq; } goto ep_iobase; } ep_irq: if (dev->irq > 0) { puthelp(" Interrupt number (Decimal, 1-15)"); ret = editval(16,19,3,0,1,15,&(dev->irq),(dev->attrib & FLG_FIXIRQ)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_RIGHT: if (dev->msize > 0) goto ep_msize; break; case KEY_UP: if (dev->iobase > 0) goto ep_iobase; break; case KEY_TAB: case KEY_DOWN: goto ep_flags; } goto ep_irq; } ep_flags: puthelp(" Device-specific flag values."); ret = editval(18,20,8,1,INT_MIN,INT_MAX,&(dev->flags),0); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_RIGHT: if (dev->drq > 0) goto ep_drq; break; case KEY_UP: if (dev->irq > 0) goto ep_irq; if (dev->iobase > 0) goto ep_iobase; break; case KEY_DOWN: if (dev->maddr > 0) goto ep_maddr; if (dev->msize > 0) goto ep_msize; if (dev->drq > 0) goto ep_drq; break; case KEY_TAB: goto ep_maddr; } goto ep_flags; ep_maddr: if (dev->maddr > 0) { puthelp(" Device memory start address (Hexadecimal, 0x1-0xfffff)"); ret = editval(45,18,6,1,0x1,0xfffff,&(dev->maddr),(dev->attrib & FLG_FIXMADDR)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_LEFT: if (dev->iobase > 0) goto ep_iobase; break; case KEY_UP: goto ep_flags; case KEY_DOWN: if (dev->msize > 0) goto ep_msize; if (dev->drq > 0) goto ep_drq; break; case KEY_TAB: goto ep_msize; } goto ep_maddr; } ep_msize: if (dev->msize > 0) { puthelp(" Device memory size (Hexadecimal, 0x1-0x10000)"); ret = editval(45,19,5,1,0x1,0x10000,&(dev->msize),(dev->attrib & FLG_FIXMSIZE)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_LEFT: if (dev->irq > 0) goto ep_irq; break; case KEY_UP: if (dev->maddr > 0) goto ep_maddr; goto ep_flags; case KEY_DOWN: if (dev->drq > 0) goto ep_drq; break; case KEY_TAB: goto ep_drq; } goto ep_msize; } ep_drq: if (dev->drq > 0) { puthelp(" Device DMA request number (Decimal, 1-7)"); ret = editval(43,20,2,0,1,7,&(dev->drq),(dev->attrib & FLG_FIXDRQ)); switch(ret) { case KEY_EXIT: goto ep_exit; case KEY_LEFT: goto ep_flags; case KEY_UP: if (dev->msize > 0) goto ep_msize; if (dev->maddr > 0) goto ep_maddr; goto ep_flags; case KEY_TAB: goto ep_iobase; } goto ep_drq; } } ep_exit: dev->changed = 1; /* mark as changed */ } static char *helptext[] = { " Using the UserConfig kernel settings editor", " -------------------------------------------", "", "VISUAL MODE:", "", "- - Layout -", "", "The screen displays a list of available drivers, divided into two", "scrolling lists: Active Drivers, and Inactive Drivers. Each list is", "by default collapsed and can be expanded to show all the drivers", "available in each category. The parameters for the currently selected", "driver are shown at the bottom of the screen.", "", "- - Moving around -", "", "To move in the current list, use the UP and DOWN cursor keys to select", "an item (the selected item will be highlighted). If the item is a", "category name, you may alternatively expand or collapse the list of", "drivers for that category by pressing [!bENTER!n]. Once the category is", "expanded, you can select each driver in the same manner and either:", "", " - change its parameters using [!bENTER!n]", " - move it to the Inactive list using [!bDEL!n]", "", "Use the [!bTAB!n] key to toggle between the Active and Inactive list; if", "you need to move a driver from the Inactive list back to the Active", "one, select it in the Inactive list, using [!bTAB!n] to change lists if", "necessary, and press [!bENTER!n] -- the device will be moved back to", "its place in the Active list.", "", "- - Altering the list/parameters -", "", "Any drivers for devices not installed in your system should be moved", "to the Inactive list, until there are no remaining parameter conflicts", "between the drivers, as indicated at the top.", "", "Once the list of Active drivers only contains entries for the devices", "present in your system, you can set their parameters (Interrupt, DMA", "channel, I/O addresses). To do this, select the driver and press", "[!bENTER!n]: it is now possible to edit the settings at the", "bottom of the screen. Use [!bTAB!n] to change fields, and when you are", "finished, use [!bQ!n] to return to the list.", "", "- - Saving changes -", "", "When all settings seem correct, and you wish to proceed with the", "kernel device probing and boot, press [!bQ!n] -- you will be asked to", "confirm your choice.", "", NULL }; /** ** helpscreen ** ** Displays help text onscreen for people that are confused, using a simple ** pager. **/ static void helpscreen(void) { int topline = 0; /* where we are in the text */ int c, delta = 1; char prompt[80]; for (;;) /* loop until user quits */ { int line = 0; /* display help text */ if (delta) { clear(); /* remove everything else */ for (line = topline; (line < (topline + 24)) && (helptext[line]); line++) putxy(0,line-topline,helptext[line]); delta = 0; } /* prompt */ sprintf(prompt,"!i --%s-- [U]p [D]own [Q]uit !n",helptext[line] ? "MORE" : "END"); putxy(0,24,prompt); c = getchar(); /* so what do they say? */ switch (c) { case 'u': case 'U': case 'b': case 'B': /* wired into 'more' users' fingers */ if (topline > 0) /* room to go up? */ { topline -= 24; if (topline < 0) /* don't go too far */ topline = 0; delta = 1; } break; case 'd': case 'D': case ' ': /* expected by most people */ if (helptext[line]) /* maybe more below? */ { topline += 24; delta = 1; } break; case 'q': case 'Q': redraw(); /* restore the screen */ return; } } } /** ** High-level control functions **/ /** ** dolist ** ** Handle user movement within (*list) in the region starting at (row) onscreen with ** (num) lines, starting at (*ofs) offset from row onscreen. ** Pass (detail) on to drawing routines. ** ** If the user hits a key other than a cursor key, maybe return a code. ** ** (*list) points to the device at the top line in the region, (*ofs) is the ** position of the highlight within the region. All routines below ** this take only a device and an absolute row : use ofsent() to find the ** device, and add (*ofs) to (row) to find the absolute row. **/ static int dolist(int row, int num, int detail, int *ofs, DEV_LIST **list, char *dhelp) { int extended = 0; int c; DEV_LIST *lp; int delta = 1; for(;;) { if (delta) { showparams(ofsent(*ofs,*list)); /* show device parameters */ drawline(row+*ofs,detail,ofsent(*ofs,*list),1,dhelp); /* highlight current line */ delta = 0; } c = getchar(); /* get a character */ if ((extended == 2) || (c==588) || (c==596)) /* console gives "alternative" codes */ { extended = 0; /* no longer */ switch(c) { case 588: /* syscons' idea of 'up' */ case 'A': /* up */ if (*ofs) /* just a move onscreen */ { drawline(row+*ofs,detail,ofsent(*ofs,*list),0,dhelp);/* unhighlight current line */ (*ofs)--; /* move up */ }else{ lp = prevent(*list); /* can we go up? */ if (!lp) /* no */ break; *list = lp; /* yes, move up list */ drawlist(row,num,detail,*list); } delta = 1; break; case 596: /* dooby-do */ case 'B': /* down */ lp = ofsent(*ofs,*list); /* get current item */ if (!nextent(lp)) break; /* nothing more to move to */ drawline(row+*ofs,detail,ofsent(*ofs,*list),0,dhelp); /* unhighlight current line */ if (*ofs < (num-1)) /* room to move onscreen? */ { (*ofs)++; }else{ *list = nextent(*list); /* scroll region down */ drawlist(row,num,detail,*list); } delta = 1; break; } }else{ switch(c) { case '\033': extended=1; break; case '[': /* cheat : always preceeds cursor move */ case 'O': /* ANSI application key mode */ if (extended==1) extended=2; else extended=0; break; case 'Q': case 'q': return(KEY_EXIT); /* user requests exit */ case '\r': case '\n': return(KEY_DO); /* "do" something */ case '\b': case '\177': case 599: return(KEY_DEL); /* "delete" response */ case 'X': case 'x': return(KEY_UNZOOM); /* expand everything */ case 'C': case 'c': return(KEY_ZOOM); /* collapse everything */ case '\t': drawline(row+*ofs,detail,ofsent(*ofs,*list),0,dhelp); /* unhighlight current line */ return(KEY_TAB); /* "move" response */ case '\014': /* ^L, redraw */ return(KEY_REDRAW); case '?': /* helptext */ return(KEY_HELP); } } } } /** ** visuserconfig ** ** Do the fullscreen config thang **/ static int visuserconfig(void) { int actofs = 0, inactofs = 0, mode = 0, ret = -1, i; DEV_LIST *dp; initlist(&active); initlist(&inactive); alist = active; ilist = inactive; getdevs(); conflicts = findconflict(active); /* find conflicts in the active list only */ redraw(); for(;;) { switch(mode) { case 0: /* active devices */ ret = dolist(1,8,1,&actofs,&alist, " [!bEnter!n] Edit device parameters [!bDEL!n] Disable device"); switch(ret) { case KEY_TAB: mode = 1; /* swap lists */ break; case KEY_REDRAW: redraw(); break; case KEY_ZOOM: alist = active; actofs = 0; expandlist(active); redrawactive(); break; case KEY_UNZOOM: alist = active; actofs = 0; collapselist(active); redrawactive(); break; case KEY_DEL: dp = ofsent(actofs,alist); /* get current device */ if (dp) /* paranoia... */ { if (dp->attrib & FLG_MANDATORY) /* can't be deleted */ break; if (dp == alist) /* moving top item on list? */ { if (dp->next) { alist = dp->next; /* point list to non-moving item */ }else{ alist = dp->prev; /* end of list, go back instead */ } }else{ if (!dp->next) /* moving last item on list? */ actofs--; } dp->conflicts = 0; /* no conflicts on the inactive list */ movedev(dp,inactive); /* shift to inactive list */ conflicts = findconflict(active); /* update conflict tags */ dp->changed = 1; redrawactive(); /* redraw */ redrawinactive(); } break; case KEY_DO: /* edit device parameters */ dp = ofsent(actofs,alist); /* get current device */ if (dp) /* paranoia... */ { if (dp->comment == DEV_DEVICE) /* can't edit comments, zoom? */ { masterhelp(" [!bTAB!n] Change fields [!bQ!n] Save device parameters"); editparams(dp); masterhelp(" [!bTAB!n] Change fields [!bQ!n] Save and Exit [!b?!n] Help"); putxy(0,17,lines); conflicts = findconflict(active); /* update conflict tags */ }else{ /* DO on comment = zoom */ switch(dp->comment) /* Depends on current state */ { case DEV_COMMENT: /* not currently zoomed */ dp->comment = DEV_ZOOMED; break; case DEV_ZOOMED: dp->comment = DEV_COMMENT; break; } } redrawactive(); } break; } break; case 1: /* inactive devices */ ret = dolist(10,7,0,&inactofs,&ilist, " [!bEnter!n] Enable device "); switch(ret) { case KEY_TAB: mode = 0; break; case KEY_REDRAW: redraw(); break; case KEY_ZOOM: ilist = inactive; inactofs = 0; expandlist(inactive); redrawinactive(); break; case KEY_UNZOOM: ilist = inactive; inactofs = 0; collapselist(inactive); redrawinactive(); break; case KEY_DO: dp = ofsent(inactofs,ilist); /* get current device */ if (dp) /* paranoia... */ { if (dp->comment == DEV_DEVICE) /* can't move comments, zoom? */ { if (dp == ilist) /* moving top of list? */ { if (dp->next) { ilist = dp->next; /* point list to non-moving item */ }else{ ilist = dp->prev; /* can't go down, go up instead */ } }else{ if (!dp->next) /* last entry on list? */ inactofs--; /* shift cursor up one */ } movedev(dp,active); /* shift to active list */ conflicts = findconflict(active); /* update conflict tags */ dp->changed = 1; alist = dp; /* put at top and current */ actofs = 0; while(dp->comment == DEV_DEVICE) dp = dp->prev; /* forcibly unzoom section */ dp ->comment = DEV_COMMENT; mode = 0; /* and swap modes to follow it */ }else{ /* DO on comment = zoom */ switch(dp->comment) /* Depends on current state */ { case DEV_COMMENT: /* not currently zoomed */ dp->comment = DEV_ZOOMED; break; case DEV_ZOOMED: dp->comment = DEV_COMMENT; break; } } redrawactive(); /* redraw */ redrawinactive(); } break; default: /* nothing else relevant here */ break; } break; default: mode = 0; /* shouldn't happen... */ } /* handle returns that are the same for both modes */ switch (ret) { case KEY_HELP: helpscreen(); break; case KEY_EXIT: i = yesnocancel(" Save these parameters before exiting? ([!bY!n]es/[!bN!n]o/[!bC!n]ancel) "); switch(i) { case 2: /* cancel */ redraw(); break; case 1: /* save and exit */ savelist(active,1); savelist(inactive,0); case 0: /* exit */ nukelist(active); /* clean up after ourselves */ nukelist(inactive); normal(); clear(); return(1); } break; } } } #endif /* VISUAL_USERCONFIG */ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 Jordan K. Hubbard * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * Many additional changes by Bruce Evans * * This code is derived from software contributed by the * University of California Berkeley, Jordan K. Hubbard, * David Greenman and Bruce Evans. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #define PARM_DEVSPEC 0x1 #define PARM_INT 0x2 #define PARM_ADDR 0x3 #define PARM_STRING 0x4 typedef struct _cmdparm { int type; union { struct uc_device *dparm; int iparm; union { void *aparm; const char *sparm; } u; } parm; } CmdParm; typedef int (*CmdFunc)(CmdParm *); typedef struct _cmd { char *name; CmdFunc handler; CmdParm *parms; } Cmd; #if 0 static void lsscsi(void); static int list_scsi(CmdParm *); #endif static int lsdevtab(struct uc_device *); static struct uc_device *find_device(char *, int); static struct uc_device *search_devtable(struct uc_device *, char *, int); static void cngets(char *, int); static Cmd *parse_cmd(char *); static int parse_args(const char *, CmdParm *); static int save_dev(struct uc_device *); static int list_devices(CmdParm *); static int set_device_ioaddr(CmdParm *); static int set_device_irq(CmdParm *); static int set_device_drq(CmdParm *); static int set_device_iosize(CmdParm *); static int set_device_mem(CmdParm *); static int set_device_flags(CmdParm *); static int set_device_enable(CmdParm *); static int set_device_disable(CmdParm *); static int quitfunc(CmdParm *); static int helpfunc(CmdParm *); #if defined(INTRO_USERCONFIG) static int introfunc(CmdParm *); #endif #if NPNP > 0 static int lspnp(void); static int set_pnp_parms(CmdParm *); #endif static int lineno; #include "eisa.h" #if NEISA > 0 #include static int set_num_eisa_slots(CmdParm *); #endif /* NEISA > 0 */ static CmdParm addr_parms[] = { { PARM_DEVSPEC, {} }, { PARM_ADDR, {} }, { -1, {} }, }; static CmdParm int_parms[] = { { PARM_DEVSPEC, {} }, { PARM_INT, {} }, { -1, {} }, }; static CmdParm dev_parms[] = { { PARM_DEVSPEC, {} }, { -1, {} }, }; #if NPNP > 0 static CmdParm string_arg[] = { { PARM_STRING, {} }, { -1, {} }, }; #endif #if NEISA > 0 static CmdParm int_arg[] = { { PARM_INT, {} }, { -1, {} }, }; #endif /* NEISA > 0 */ static Cmd CmdList[] = { { "?", helpfunc, NULL }, /* ? (help) */ { "di", set_device_disable, dev_parms }, /* disable dev */ { "dr", set_device_drq, int_parms }, /* drq dev # */ #if NEISA > 0 { "ei", set_num_eisa_slots, int_arg }, /* # EISA slots */ #endif /* NEISA > 0 */ { "en", set_device_enable, dev_parms }, /* enable dev */ { "ex", quitfunc, NULL }, /* exit (quit) */ { "f", set_device_flags, int_parms }, /* flags dev mask */ { "h", helpfunc, NULL }, /* help */ #if defined(INTRO_USERCONFIG) { "intro", introfunc, NULL }, /* intro screen */ #endif { "iom", set_device_mem, addr_parms }, /* iomem dev addr */ { "ios", set_device_iosize, int_parms }, /* iosize dev size */ { "ir", set_device_irq, int_parms }, /* irq dev # */ { "l", list_devices, NULL }, /* ls, list */ #if NPNP > 0 { "pn", set_pnp_parms, string_arg }, /* pnp ... */ #endif { "po", set_device_ioaddr, int_parms }, /* port dev addr */ { "res", (CmdFunc)cpu_reset, NULL }, /* reset CPU */ { "q", quitfunc, NULL }, /* quit */ #if 0 { "s", list_scsi, NULL }, /* scsi */ #endif #ifdef VISUAL_USERCONFIG { "v", (CmdFunc)visuserconfig, NULL }, /* visual mode */ #endif { NULL, NULL, NULL }, }; void userconfig(void) { static char banner = 1; char input[80]; int rval; Cmd *cmd; load_devtab(); init_config_script(); while (1) { /* Only display signon banner if we are about to go interactive */ if (!has_config_script()) { if (!(boothowto & RB_CONFIG)) #ifdef INTRO_USERCONFIG banner = 0; #else return; #endif if (banner) { banner = 0; printf("FreeBSD Kernel Configuration Utility - Version 1.2\n" " Type \"help\" for help" #ifdef VISUAL_USERCONFIG " or \"visual\" to go to the visual\n" " configuration interface (requires MGA/VGA display or\n" " serial terminal capable of displaying ANSI graphics)" #endif ".\n"); } } printf("config> "); cngets(input, 80); if (input[0] == '\0') continue; cmd = parse_cmd(input); if (!cmd) { printf("Invalid command or syntax. Type `?' for help.\n"); continue; } rval = (*cmd->handler)(cmd->parms); if (rval) { free_devtab(); return; } } } static Cmd * parse_cmd(char *cmd) { Cmd *cp; for (cp = CmdList; cp->name; cp++) { int len = strlen(cp->name); if (!strncmp(cp->name, cmd, len)) { while (*cmd && *cmd != ' ' && *cmd != '\t') ++cmd; if (parse_args(cmd, cp->parms)) return NULL; else return cp; } } return NULL; } static int parse_args(const char *cmd, CmdParm *parms) { while (1) { char *ptr; if (*cmd == ' ' || *cmd == '\t') { ++cmd; continue; } if (parms == NULL || parms->type == -1) { if (*cmd == '\0') return 0; printf("Extra arg(s): %s\n", cmd); return 1; } if (parms->type == PARM_DEVSPEC) { int i = 0; char devname[64]; int unit = 0; while (*cmd && !(*cmd == ' ' || *cmd == '\t' || (*cmd >= '0' && *cmd <= '9'))) devname[i++] = *(cmd++); devname[i] = '\0'; if (*cmd >= '0' && *cmd <= '9') { unit = strtoul(cmd, &ptr, 10); if (cmd == ptr) { printf("Invalid device number\n"); /* XXX should print invalid token here and elsewhere. */ return 1; } /* XXX else should require end of token. */ cmd = ptr; } if ((parms->parm.dparm = find_device(devname, unit)) == NULL) { printf("No such device: %s%d\n", devname, unit); return 1; } ++parms; continue; } if (parms->type == PARM_INT) { parms->parm.iparm = strtoul(cmd, &ptr, 0); if (cmd == ptr) { printf("Invalid numeric argument\n"); return 1; } cmd = ptr; ++parms; continue; } if (parms->type == PARM_ADDR) { parms->parm.u.aparm = (void *)(uintptr_t)strtoul(cmd, &ptr, 0); if (cmd == ptr) { printf("Invalid address argument\n"); return 1; } cmd = ptr; ++parms; continue; } if (parms->type == PARM_STRING) { parms->parm.u.sparm = cmd; return 0; } } return 0; } static int list_devices(CmdParm *parms) { lineno = 0; if (lsdevtab(uc_devtab)) return 0; #if NPNP > 0 if (lspnp()) return 0; #endif #if NEISA > 0 printf("\nNumber of EISA slots to probe: %d\n", num_eisa_slots); #endif /* NEISA > 0 */ return 0; } static int set_device_ioaddr(CmdParm *parms) { parms[0].parm.dparm->id_iobase = parms[1].parm.iparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_irq(CmdParm *parms) { unsigned irq; irq = parms[1].parm.iparm; #ifndef PC98 if (irq == 2) { printf("Warning: Remapping IRQ 2 to IRQ 9\n"); irq = 9; } else if (irq != -1 && irq > 15) { #else if (irq != -1 && irq > 15) { #endif printf("An IRQ > 15 would be invalid.\n"); return 0; } parms[0].parm.dparm->id_irq = (irq < 16 ? 1 << irq : 0); save_dev(parms[0].parm.dparm); return 0; } static int set_device_drq(CmdParm *parms) { unsigned drq; /* * The bounds checking is just to ensure that the value can be printed * in 5 characters. 32768 gets converted to -32768 and doesn't fit. */ drq = parms[1].parm.iparm; parms[0].parm.dparm->id_drq = (drq < 32768 ? drq : -1); save_dev(parms[0].parm.dparm); return 0; } static int set_device_iosize(CmdParm *parms) { parms[0].parm.dparm->id_msize = parms[1].parm.iparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_mem(CmdParm *parms) { parms[0].parm.dparm->id_maddr = parms[1].parm.u.aparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_flags(CmdParm *parms) { parms[0].parm.dparm->id_flags = parms[1].parm.iparm; save_dev(parms[0].parm.dparm); return 0; } static int set_device_enable(CmdParm *parms) { parms[0].parm.dparm->id_enabled = TRUE; save_dev(parms[0].parm.dparm); return 0; } static int set_device_disable(CmdParm *parms) { parms[0].parm.dparm->id_enabled = FALSE; save_dev(parms[0].parm.dparm); return 0; } #if NPNP > 0 static int -sysctl_machdep_uc_pnplist (SYSCTL_HANDLER_ARGS) +sysctl_machdep_uc_pnplist(SYSCTL_HANDLER_ARGS) { int error=0; if(!req->oldptr) { /* Only sizing */ return(SYSCTL_OUT(req,0,sizeof(struct pnp_cinfo)*MAX_PNP_LDN)); } else { /* * Output the pnp_ldn_overrides[] table. */ error=sysctl_handle_opaque(oidp,&pnp_ldn_overrides, sizeof(struct pnp_cinfo)*MAX_PNP_LDN,req); if(error) return(error); return(0); } } SYSCTL_PROC( _machdep, OID_AUTO, uc_pnplist, CTLFLAG_RD, 0, 0, sysctl_machdep_uc_pnplist, "A", "List of PnP overrides changed in UserConfig"); /* * this function sets the kernel table to override bios PnP * configuration. */ static int set_pnp_parms(CmdParm *parms) { u_long idx, val, ldn, csn; int i; char *q; const char *p = parms[0].parm.u.sparm; struct pnp_cinfo d; csn=strtoul(p,&q, 0); ldn=strtoul(q,&q, 0); for (p=q; *p && (*p==' ' || *p=='\t'); p++) ; if (csn < 1 || csn > MAX_PNP_CARDS || ldn >= MAX_PNP_LDN) { printf("bad csn/ldn %ld:%ld\n", csn, ldn); return 0; } for (i=0; i < MAX_PNP_LDN; i++) { if (pnp_ldn_overrides[i].csn == csn && pnp_ldn_overrides[i].ldn == ldn) break; } if (i==MAX_PNP_LDN) { for (i=0; i < MAX_PNP_LDN; i++) { if (pnp_ldn_overrides[i].csn <1 || pnp_ldn_overrides[i].csn > MAX_PNP_CARDS) break; } } if (i==MAX_PNP_LDN) { printf("sorry, no PnP entries available, try delete one\n"); return 0 ; } d = pnp_ldn_overrides[i] ; d.csn = csn; d.ldn = ldn ; while (*p) { idx = 0; val = 0; if (!strncmp(p,"irq",3)) { idx=strtoul(p+3,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 2) d.irq[idx] = val; } else if (!strncmp(p,"flags",5)) { idx=strtoul(p+5,&q, 0); d.flags = idx; } else if (!strncmp(p,"drq",3)) { idx=strtoul(p+3,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 2) d.drq[idx] = val; } else if (!strncmp(p,"port",4)) { idx=strtoul(p+4,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 8) d.port[idx] = val; } else if (!strncmp(p,"mem",3)) { idx=strtoul(p+3,&q, 0); val=strtoul(q,&q, 0); if (idx >=0 && idx < 4) d.mem[idx].base = val; } else if (!strncmp(p,"bios",4)) { q = p+ 4; d.override = 0 ; } else if (!strncmp(p,"os",2)) { q = p+2 ; d.override = 1 ; } else if (!strncmp(p,"disable",7)) { q = p+7 ; d.enable = 0 ; } else if (!strncmp(p,"enable",6)) { q = p+6; d.enable = 1 ; } else if (!strncmp(p,"delete",6)) { bzero(&pnp_ldn_overrides[i], sizeof (pnp_ldn_overrides[i])); if (i==0) pnp_ldn_overrides[i].csn = 255;/* not reinit */ return 0; } else { printf("unknown command <%s>\n", p); break; } for (p=q; *p && (*p==' ' || *p=='\t'); p++) ; } pnp_ldn_overrides[i] = d ; return 0; } #endif /* NPNP */ #if NEISA > 0 static int set_num_eisa_slots(CmdParm *parms) { int num_slots; num_slots = parms[0].parm.iparm; num_eisa_slots = (num_slots <= 16 ? num_slots : 10); return 0; } #endif /* NEISA > 0 */ static int quitfunc(CmdParm *parms) { /* * If kernel config supplied, and we are parsing it, and -c also supplied, * ignore a quit command, This provides a safety mechanism to allow * recovery from a damaged/buggy kernel config. */ if ((boothowto & RB_CONFIG) && userconfig_boot_parsing) return 0; return 1; } static int helpfunc(CmdParm *parms) { printf( "Command\t\t\tDescription\n" "-------\t\t\t-----------\n" "ls\t\t\tList currently configured devices\n" "port \tSet device port (i/o address)\n" "irq \tSet device irq\n" "drq \tSet device drq\n" "iomem \tSet device maddr (memory address)\n" "iosize \tSet device memory size\n" "flags \tSet device flags\n" "enable \tEnable device\n" "disable \tDisable device (will not be probed)\n"); #if NPNP > 0 printf( "pnp [enable|disable]\tenable/disable device\n" "pnp [os|bios]\tset parameters using FreeBSD or BIOS\n" "pnp [portX ]\tset addr for port X (0..7)\n" "pnp [memX ]\tset addr for memory range X (0..3)\n" "pnp [irqX ]\tset irq X (0..1) to number, 0=unused\n" "pnp [drqX ]\tset drq X (0..1) to number, 4=unused\n"); #endif #if NEISA > 0 printf("eisa \t\tSet the number of EISA slots to probe\n"); #endif /* NEISA > 0 */ printf( "quit\t\t\tExit this configuration utility\n" "reset\t\t\tReset CPU\n"); #ifdef VISUAL_USERCONFIG printf("visual\t\t\tGo to fullscreen mode.\n"); #endif printf( "help\t\t\tThis message\n\n" "Commands may be abbreviated to a unique prefix\n"); return 0; } #if defined(INTRO_USERCONFIG) #if defined (VISUAL_USERCONFIG) static void center(int y, char *str) { putxy((80 - strlen(str)) / 2, y, str); } #endif static int introfunc(CmdParm *parms) { #if defined (VISUAL_USERCONFIG) int curr_item, first_time, extended = 0; static char *choices[] = { " Skip kernel configuration and continue with installation ", " Start kernel configuration in full-screen visual mode ", " Start kernel configuration in CLI mode ", }; clear(); center(2, "!bKernel Configuration Menu!n"); curr_item = 0; first_time = 1; while (1) { char tmp[80]; int c, i; if (!extended) { for (i = 0; i < 3; i++) { tmp[0] = '\0'; if (curr_item == i) strcpy(tmp, "!i"); strcat(tmp, choices[i]); if (curr_item == i) strcat(tmp, "!n"); putxy(10, 5 + i, tmp); } if (first_time) { putxy(2, 10, "Here you have the chance to go into kernel configuration mode, making"); putxy(2, 11, "any changes which may be necessary to properly adjust the kernel to"); putxy(2, 12, "match your hardware configuration."); putxy(2, 14, "If you are installing FreeBSD for the first time, select Visual Mode"); putxy(2, 15, "(press Down-Arrow then ENTER)."); putxy(2, 17, "If you need to do more specialized kernel configuration and are an"); putxy(2, 18, "experienced FreeBSD user, select CLI mode."); putxy(2, 20, "If you are !icertain!n that you do not need to configure your kernel"); putxy(2, 21, "then simply press ENTER or Q now."); first_time = 0; } move(0, 0); /* move the cursor out of the way */ } c = getchar(); if ((extended == 2) || (c == 588) || (c == 596)) { /* console gives "alternative" codes */ extended = 0; /* no longer */ switch (c) { case 588: case 'A': /* up */ if (curr_item > 0) --curr_item; break; case 596: case 'B': /* down */ if (curr_item < 2) ++curr_item; break; } } else { switch(c) { case '\033': extended = 1; break; case '[': /* cheat : always preceeds cursor move */ case 'O': /* ANSI application key mode */ if (extended == 1) extended = 2; else extended = 0; break; case -1: case 'Q': case 'q': clear(); return 1; /* user requests exit */ case '1': /* select an item */ case 'S': case 's': curr_item = 0; break; case '2': case 'V': case 'v': curr_item = 1; break; case '3': case 'C': case 'c': curr_item = 2; break; case 'U': /* up */ case 'u': case 'P': case 'p': if (curr_item > 0) --curr_item; break; case 'D': /* down */ case 'd': case 'N': case 'n': if (curr_item < 2) ++curr_item; break; case '\r': case '\n': clear(); if (!curr_item) return 1; else if (curr_item == 1) return visuserconfig(); else { putxy(0, 1, "Type \"help\" for help or \"quit\" to exit."); /* enable quitfunc */ userconfig_boot_parsing=0; move (0, 3); boothowto |= RB_CONFIG; /* force -c */ return 0; } break; } } } #endif } #endif #if NPNP > 0 static int lspnp () { struct pnp_cinfo *c; int i, first = 1; for (i=0; i< MAX_PNP_LDN; i++) { c = &pnp_ldn_overrides[i]; if (c->csn >0 && c->csn != 255) { int pmax, mmax; static char pfmt[] = "port 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x "; static char mfmt[] = "mem 0x%x 0x%x 0x%x 0x%x"; char buf[256]; if (lineno >= 23) { if (!userconfig_boot_parsing) { printf(" "); if (getchar() == 'q') { printf("quit\n"); return (1); } printf("\n"); } lineno = 0; } if (lineno == 0 || first) printf("CSN LDN conf en irqs drqs others (PnP devices)\n"); first = 0 ; printf("%3d %3d %4s %2s %2d %-2d %2d %-2d ", c->csn, c->ldn, c->override ? "OS ":"BIOS", c->enable ? "Y":"N", c->irq[0], c->irq[1], c->drq[0], c->drq[1]); if (c->flags) printf("flags 0x%08lx ",c->flags); for (pmax = 7; pmax >=0 ; pmax--) if (c->port[pmax]!=0) break; for (mmax = 3; mmax >=0 ; mmax--) if (c->mem[mmax].base!=0) break; if (pmax>=0) { strcpy(buf, pfmt); buf[10 + 5*pmax]='\0'; printf(buf, c->port[0], c->port[1], c->port[2], c->port[3], c->port[4], c->port[5], c->port[6], c->port[7]); } if (mmax>=0) { strcpy(buf, mfmt); buf[8 + 5*mmax]='\0'; printf(buf, c->mem[0].base, c->mem[1].base, c->mem[2].base, c->mem[3].base); } printf("\n"); } } return 0 ; } #endif /* NPNP */ static int lsdevtab(struct uc_device *dt) { for (; dt->id_id != 0; dt++) { char dname[80]; if (lineno >= 23) { printf(" "); if (!userconfig_boot_parsing) { if (getchar() == 'q') { printf("quit\n"); return (1); } printf("\n"); } lineno = 0; } if (lineno == 0) { printf( "Device port irq drq iomem iosize unit flags enab\n" ); ++lineno; } sprintf(dname, "%s%d", dt->id_name, dt->id_unit); printf("%-9.9s%-#11x%-6d%-6d%-8p%-9d%-6d%-#11x%-5s\n", dname, /* dt->id_id, dt->id_driver(by name), */ dt->id_iobase, ffs(dt->id_irq) - 1, dt->id_drq, dt->id_maddr, dt->id_msize, /* dt->id_intr(by name), */ dt->id_unit, dt->id_flags, dt->id_enabled ? "Yes" : "No"); ++lineno; } return(0); } static void load_devtab(void) { int i, val; int count = resource_count(); int id = 1; int dt; char *name; int unit; uc_devtab = malloc(sizeof(struct uc_device)*(count + 1),M_DEVL,M_WAITOK); bzero(uc_devtab, sizeof(struct uc_device) * (count + 1)); dt = 0; for (i = 0; i < count; i++) { name = resource_query_name(i); unit = resource_query_unit(i); if (unit < 0) continue; /* skip wildcards */ uc_devtab[dt].id_id = id++; resource_int_value(name, unit, "port", &uc_devtab[dt].id_iobase); val = 0; resource_int_value(name, unit, "irq", &val); uc_devtab[dt].id_irq = (1 << val); resource_int_value(name, unit, "drq", &uc_devtab[dt].id_drq); resource_int_value(name, unit, "maddr",(int *)&uc_devtab[dt].id_maddr); resource_int_value(name, unit, "msize", &uc_devtab[dt].id_msize); uc_devtab[dt].id_unit = unit; resource_int_value(name, unit, "flags", &uc_devtab[dt].id_flags); val = 0; resource_int_value(name, unit, "disabled", &val); uc_devtab[dt].id_enabled = !val; uc_devtab[dt].id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(uc_devtab[dt].id_name, name); dt++; } } static void free_devtab(void) { int i; int count = resource_count(); for (i = 0; i < count; i++) if (uc_devtab[i].id_name) free(uc_devtab[i].id_name, M_DEVL); free(uc_devtab, M_DEVL); } static struct uc_device * find_device(char *devname, int unit) { struct uc_device *ret; if ((ret = search_devtable(uc_devtab, devname, unit)) != NULL) return ret; return NULL; } static struct uc_device * search_devtable(struct uc_device *dt, char *devname, int unit) { int i; for (i = 0; dt->id_id != 0; dt++) if (!strcmp(dt->id_name, devname) && dt->id_unit == unit) return dt; return NULL; } static void cngets(char *input, int maxin) { int c, nchars = 0; while (1) { c = getchar(); /* Treat ^H or ^? as backspace */ if ((c == '\010' || c == '\177')) { if (nchars) { printf("\010 \010"); *--input = '\0', --nchars; } continue; } /* Treat ^U or ^X as kill line */ else if ((c == '\025' || c == '\030')) { while (nchars) { printf("\010 \010"); *--input = '\0', --nchars; } continue; } printf("%c", c); if ((++nchars == maxin) || (c == '\n') || (c == '\r') || ( c == -1)) { *input = '\0'; break; } *input++ = (u_char)c; } } #if 0 /* scsi: Support for displaying configured SCSI devices. * There is no way to edit them, and this is inconsistent * with the ISA method. This is here as a basis for further work. */ static char * type_text(char *name) /* XXX: This is bogus */ { if (strcmp(name, "sd") == 0) return "disk"; if (strcmp(name, "st") == 0) return "tape"; return "device"; } id_put(char *desc, int id) { if (id != SCCONF_UNSPEC) { if (desc) printf("%s", desc); if (id == SCCONF_ANY) printf("?"); else printf("%d", id); } } static void lsscsi(void) { int i; printf("scsi: (can't be edited):\n"); for (i = 0; scsi_cinit[i].driver; i++) { id_put("controller scbus", scsi_cinit[i].scbus); if (scsi_cinit[i].unit != -1) { printf(" at "); id_put(scsi_cinit[i].driver, scsi_cinit[i].unit); } printf("\n"); } for (i = 0; scsi_dinit[i].name; i++) { printf("%s ", type_text(scsi_dinit[i].name)); id_put(scsi_dinit[i].name, scsi_dinit[i].unit); id_put(" at scbus", scsi_dinit[i].cunit); id_put(" target ", scsi_dinit[i].target); id_put(" lun ", scsi_dinit[i].lun); if (scsi_dinit[i].flags) printf(" flags 0x%x\n", scsi_dinit[i].flags); printf("\n"); } } static int list_scsi(CmdParm *parms) { lineno = 0; lsscsi(); return 0; } #endif static void save_resource(struct uc_device *idev) { char *name; int unit; name = idev->id_name; unit = idev->id_unit; resource_set_int(name, unit, "port", idev->id_iobase); resource_set_int(name, unit, "irq", ffs(idev->id_irq) - 1); resource_set_int(name, unit, "drq", idev->id_drq); resource_set_int(name, unit, "maddr", (int)idev->id_maddr); resource_set_int(name, unit, "msize", idev->id_msize); resource_set_int(name, unit, "flags", idev->id_flags); resource_set_int(name, unit, "disabled", !idev->id_enabled); } static int save_dev(idev) struct uc_device *idev; { struct uc_device *id_p,*id_pn; char *name = idev->id_name; for (id_p = uc_devlist; id_p; id_p = id_p->id_next) { if (id_p->id_id == idev->id_id) { id_pn = id_p->id_next; if (id_p->id_name) free(id_p->id_name, M_DEVL); bcopy(idev,id_p,sizeof(struct uc_device)); save_resource(idev); id_p->id_name = malloc(strlen(name)+1, M_DEVL,M_WAITOK); strcpy(id_p->id_name, name); id_p->id_next = id_pn; return 1; } } id_pn = malloc(sizeof(struct uc_device),M_DEVL,M_WAITOK); bcopy(idev,id_pn,sizeof(struct uc_device)); save_resource(idev); id_pn->id_name = malloc(strlen(name) + 1, M_DEVL,M_WAITOK); strcpy(id_pn->id_name, name); id_pn->id_next = uc_devlist; uc_devlist = id_pn; return 0; } Index: head/sys/pc98/pc98/clock.c =================================================================== --- head/sys/pc98/pc98/clock.c (revision 62572) +++ head/sys/pc98/pc98/clock.c (revision 62573) @@ -1,1621 +1,1621 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 * $FreeBSD$ */ /* * Routines to handle clock hardware. */ /* * inittodr, settodr and support routines written * by Christoph Robitschko * * reintroduced and updated by Chris Stenton 8/10/94 */ /* * modified for PC98 by Kakefuda */ #include "opt_clock.h" #include "apm.h" #include #include #include #include #include #include #ifndef SMP #include #endif #include #include #include #ifdef CLK_CALIBRATION_LOOP #endif #include #include #include #include #include #include #ifdef APIC_IO #include #endif #if defined(SMP) || defined(APIC_IO) #include #endif /* SMP || APIC_IO */ #include #include #ifdef PC98 #include #include #include #else #include #include #endif #include #include #include #include "mca.h" #if NMCA > 0 #include #endif #ifdef SMP #define disable_intr() CLOCK_DISABLE_INTR() #define enable_intr() CLOCK_ENABLE_INTR() #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif #endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) #define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x)) /* * Time in timer cycles that it takes for microtime() to disable interrupts * and latch the count. microtime() currently uses "cli; outb ..." so it * normally takes less than 2 timer cycles. Add a few for cache misses. * Add a few more to allow for latency in bogus calls to microtime() with * interrupts already disabled. */ #define TIMER0_LATCH_COUNT 20 /* * Maximum frequency that we are willing to allow for timer0. Must be * low enough to guarantee that the timer interrupt handler returns * before the next timer interrupt. */ #define TIMER0_MAX_FREQ 20000 int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int disable_rtc_set; /* disable resettodr() if != 0 */ volatile u_int idelayed; int statclock_disable; u_int stat_imask = SWI_LOW_MASK; #ifndef TIMER_FREQ #ifdef PC98 #define TIMER_FREQ 2457600; #else /* IBM-PC */ #define TIMER_FREQ 1193182; #endif /* PC98 */ #endif u_int timer_freq = TIMER_FREQ; int timer0_max_count; u_int tsc_freq; int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; static u_int clk_imask = HWI_MASK | SWI_MASK; static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int i8254_ticked; /* * XXX new_function and timer_func should not handle clockframes, but * timer_func currently needs to hold hardclock to handle the * timer0_state == 0 case. We should use inthand_add()/inthand_remove() * to switch between clkintr() and a slightly different timerintr(). */ static void (*new_function) __P((struct clockframe *frame)); static u_int new_rate; #ifndef PC98 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; #endif static u_int timer0_prescaler_count; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer0_state; #ifdef PC98 static u_char timer1_state; #endif static u_char timer2_state; static void (*timer_func) __P((struct clockframe *frame)) = hardclock; #ifdef PC98 static void rtc_serialcombit __P((int)); static void rtc_serialcom __P((int)); static int rtc_inb __P((void)); static void rtc_outb __P((int)); #endif static u_int tsc_present; static unsigned i8254_get_timecount __P((struct timecounter *tc)); static unsigned tsc_get_timecount __P((struct timecounter *tc)); static void set_timer_freq(u_int freq, int intr_freq); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "TSC" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, &tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", ""); static struct timecounter i8254_timecounter = { i8254_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "i8254" /* name */ }; SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", ""); static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { disable_intr(); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += timer0_max_count; i8254_lastcount = 0; } clkintr_pending = 0; enable_intr(); } timer_func(&frame); switch (timer0_state) { case RELEASED: setdelayed(); break; case ACQUIRED: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); setdelayed(); } break; case ACQUIRE_PENDING: disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); break; case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { disable_intr(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); enable_intr(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); setdelayed(); } break; } #if NMCA > 0 /* Reset clock interrupt by asserting bit 7 of port 0x61 */ if (MCA_system) outb(0x61, inb(0x61) | 0x80); #endif } /* * The acquire and release functions must be called at ipl >= splclock(). */ int acquire_timer0(int rate, void (*function) __P((struct clockframe *frame))) { static int old_rate; if (rate <= 0 || rate > TIMER0_MAX_FREQ) return (-1); switch (timer0_state) { case RELEASED: timer0_state = ACQUIRE_PENDING; break; case RELEASE_PENDING: if (rate != old_rate) return (-1); /* * The timer has been released recently, but is being * re-acquired before the release completed. In this * case, we simply reclaim it as if it had not been * released at all. */ timer0_state = ACQUIRED; break; default: return (-1); /* busy */ } new_function = function; old_rate = new_rate = rate; return (0); } #ifdef PC98 int acquire_timer1(int mode) { if (timer1_state != RELEASED) return (-1); timer1_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL1 | (mode & 0x3f)); return (0); } #endif int acquire_timer2(int mode) { if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); return (0); } int release_timer0() { switch (timer0_state) { case ACQUIRED: timer0_state = RELEASE_PENDING; break; case ACQUIRE_PENDING: /* Nothing happened yet, release quickly. */ timer0_state = RELEASED; break; default: return (-1); } return (0); } #ifdef PC98 int release_timer1() { if (timer1_state != ACQUIRED) return (-1); timer1_state = RELEASED; outb(TIMER_MODE, TIMER_SEL1 | TIMER_SQWAVE | TIMER_16BIT); return (0); } #endif int release_timer2() { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); return (0); } #ifndef PC98 /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static void rtcintr(struct clockframe frame) { while (rtcin(RTC_INTR) & RTCIR_PERIOD) statclock(&frame); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(rtc, rtc) { printf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); } #endif /* DDB */ #endif /* for PC98 */ static int getit(void) { u_long ef; int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); write_eflags(ef); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Guard against the timer being uninitialized if we are called * early for console i/o. */ if (timer0_max_count == 0) set_timer_freq(timer_freq, hz); /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += timer0_max_count; /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void sysbeepstop(void *chan) { #ifdef PC98 /* PC98 */ outb(IO_PPI, inb(IO_PPI)|0x08); /* disable counter1 output to speaker */ release_timer1(); #else outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ release_timer2(); #endif beeping = 0; } int sysbeep(int pitch, int period) { int x = splclock(); #ifdef PC98 if (acquire_timer1(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(0x3fdb, pitch); outb(0x3fdb, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter1 output to speaker */ outb(IO_PPI, (inb(IO_PPI) & 0xf7)); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } #else if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) if (!beeping) { /* Something else owns it. */ splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } disable_intr(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); enable_intr(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); beeping = period; timeout(sysbeepstop, (void *)NULL, period); } #endif splx(x); return (0); } #ifndef PC98 /* * RTC support routines */ int rtcin(reg) int reg; { int s; u_char val; s = splhigh(); outb(IO_RTC, reg); inb(0x84); val = inb(IO_RTC + 1); inb(0x84); splx(s); return (val); } static __inline void writertc(u_char reg, u_char val) { int s; s = splhigh(); inb(0x84); outb(IO_RTC, reg); inb(0x84); outb(IO_RTC + 1, val); inb(0x84); /* XXX work around wrong order in rtcin() */ splx(s); } static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } #endif #ifdef PC98 unsigned int delaycount; #define FIRST_GUESS 0x2000 static void findcpuspeed(void) { int i; int remainder; /* Put counter in count down mode */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_16BIT | TIMER_RATEGEN); outb(TIMER_CNTR0, 0xff); outb(TIMER_CNTR0, 0xff); for (i = FIRST_GUESS; i; i--) ; remainder = getit(); delaycount = (FIRST_GUESS * TIMER_DIV(1000)) / (0xffff - remainder); } #endif #ifdef PC98 static u_int calibrate_clocks(void) { int timeout; u_int count, prev_count, tot_count; u_short sec, start_sec; if (bootverbose) printf("Calibrating clock(s) ... "); /* Check ARTIC. */ if (!(PC98_SYSTEM_PARAMETER(0x458) & 0x80) && !(PC98_SYSTEM_PARAMETER(0x45b) & 0x04)) goto fail; timeout = 100000000; /* Read the ARTIC. */ sec = inw(0x5e); /* Wait for the ARTIC to changes. */ start_sec = sec; for (;;) { sec = inw(0x5e); if (sec != start_sec) break; if (--timeout == 0) goto fail; } prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) wrmsr(0x10, 0LL); /* XXX 0x10 is the MSR for the TSC */ start_sec = sec; for (;;) { sec = inw(0x5e); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if ((sec == start_sec + 1200) || (sec < start_sec && (u_int)sec + 0x10000 == (u_int)start_sec + 1200)) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc(); if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } #else static u_int calibrate_clocks(void) { u_int64_t old_tsc; u_int count, prev_count, tot_count; int sec, start_sec, timeout; if (bootverbose) printf("Calibrating clock(s) ... "); if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto fail; timeout = 100000000; /* Read the mc146818A seconds counter. */ for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); break; } if (--timeout == 0) goto fail; } /* Wait for the mC146818A seconds counter to change. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { sec = rtcin(RTC_SEC); if (sec != start_sec) break; } if (--timeout == 0) goto fail; } /* Start keeping track of the i8254 counter. */ prev_count = getit(); if (prev_count == 0 || prev_count > timer0_max_count) goto fail; tot_count = 0; if (tsc_present) old_tsc = rdtsc(); else old_tsc = 0; /* shut up gcc */ /* * Wait for the mc146818A seconds counter to change. Read the i8254 * counter for each iteration since this is convenient and only * costs a few usec of inaccuracy. The timing of the final reads * of the counters almost matches the timing of the initial reads, * so the main cause of inaccuracy is the varying latency from * inside getit() or rtcin(RTC_STATUSA) to the beginning of the * rtcin(RTC_SEC) that returns a changed seconds count. The * maximum inaccuracy from this cause is < 10 usec on 486's. */ start_sec = sec; for (;;) { if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) sec = rtcin(RTC_SEC); count = getit(); if (count == 0 || count > timer0_max_count) goto fail; if (count > prev_count) tot_count += prev_count - (count - timer0_max_count); else tot_count += prev_count - count; prev_count = count; if (sec != start_sec) break; if (--timeout == 0) goto fail; } /* * Read the cpu cycle counter. The timing considerations are * similar to those for the i8254 clock. */ if (tsc_present) tsc_freq = rdtsc() - old_tsc; if (bootverbose) { if (tsc_present) printf("TSC clock: %u Hz, ", tsc_freq); printf("i8254 clock: %u Hz\n", tot_count); } return (tot_count); fail: if (bootverbose) printf("failed, using default i8254 clock of %u Hz\n", timer_freq); return (timer_freq); } #endif /* !PC98 */ static void set_timer_freq(u_int freq, int intr_freq) { u_long ef; int new_timer0_max_count; ef = read_eflags(); disable_intr(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { timer0_max_count = new_timer0_max_count; outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); write_eflags(ef); } /* * i8254_restore is called from apm_default_resume() to reload * the countdown register. * this should not be necessary but there are broken laptops that * do not restore the countdown register on resume. * when it happnes, it messes up the hardclock interval and system clock, * which leads to the infamous "calcru: negative time" problem. */ void i8254_restore(void) { u_long ef; ef = read_eflags(); disable_intr(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); write_eflags(ef); } /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. */ void startrtclock() { u_int delta, freq; #ifdef PC98 findcpuspeed(); if (pc98_machine_type & M_8M) timer_freq = 1996800L; /* 1.9968 MHz */ else timer_freq = 2457600L; /* 2.4576 MHz */ #endif /* PC98 */ if (cpu_feature & CPUID_TSC) tsc_present = 1; else tsc_present = 0; #ifndef PC98 writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); #endif set_timer_freq(timer_freq, hz); freq = calibrate_clocks(); #ifdef CLK_CALIBRATION_LOOP if (bootverbose) { printf( "Press a key on the console to abort clock calibration\n"); while (cncheckc() == -1) calibrate_clocks(); } #endif /* * Use the calibrated i8254 frequency if it seems reasonable. * Otherwise use the default, and don't use the calibrated i586 * frequency. */ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq; if (delta < timer_freq / 100) { #ifndef CLK_USE_I8254_CALIBRATION if (bootverbose) printf( "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); freq = timer_freq; #endif timer_freq = freq; } else { if (bootverbose) printf( "%d Hz differs from default of %d Hz by more than 1%%\n", freq, timer_freq); tsc_freq = 0; } set_timer_freq(timer_freq, hz); i8254_timecounter.tc_frequency = timer_freq; tc_init(&i8254_timecounter); #ifndef CLK_USE_TSC_CALIBRATION if (tsc_freq != 0) { if (bootverbose) printf( "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); tsc_freq = 0; } #endif if (tsc_present && tsc_freq == 0) { /* * Calibration of the i586 clock relative to the mc146818A * clock failed. Do a less accurate calibration relative * to the i8254 clock. */ u_int64_t old_tsc = rdtsc(); DELAY(1000000); tsc_freq = rdtsc() - old_tsc; #ifdef CLK_USE_TSC_CALIBRATION if (bootverbose) printf("TSC clock: %u Hz (Method B)\n", tsc_freq); #endif } #if !defined(SMP) /* * We can not use the TSC in SMP mode, until we figure out a * cheap (impossible), reliable and precise (yeah right!) way * to synchronize the TSCs of all the CPUs. * Curse Intel for leaving the counter out of the I/O APIC. */ #if NAPM > 0 /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 looses too of course * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ return; #endif /* NAPM > 0 */ if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } #endif /* !defined(SMP) */ } #ifdef PC98 static void rtc_serialcombit(int i) { outb(IO_RTC, ((i&0x01)<<5)|0x07); DELAY(1); outb(IO_RTC, ((i&0x01)<<5)|0x17); DELAY(1); outb(IO_RTC, ((i&0x01)<<5)|0x07); DELAY(1); } static void rtc_serialcom(int i) { rtc_serialcombit(i&0x01); rtc_serialcombit((i&0x02)>>1); rtc_serialcombit((i&0x04)>>2); rtc_serialcombit((i&0x08)>>3); outb(IO_RTC, 0x07); DELAY(1); outb(IO_RTC, 0x0f); DELAY(1); outb(IO_RTC, 0x07); DELAY(1); } static void rtc_outb(int val) { int s; int sa = 0; for (s=0;s<8;s++) { sa = ((val >> s) & 0x01) ? 0x27 : 0x07; outb(IO_RTC, sa); /* set DI & CLK 0 */ DELAY(1); outb(IO_RTC, sa | 0x10); /* CLK 1 */ DELAY(1); } outb(IO_RTC, sa & 0xef); /* CLK 0 */ } static int rtc_inb(void) { int s; int sa = 0; for (s=0;s<8;s++) { sa |= ((inb(0x33) & 0x01) << s); outb(IO_RTC, 0x17); /* CLK 1 */ DELAY(1); outb(IO_RTC, 0x07); /* CLK 0 */ DELAY(2); } return sa; } #endif /* PC-98 */ /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; #ifndef PC98 int yd; #endif int year, month; int y, m, s; struct timespec ts; #ifdef PC98 int second, min, hour; #endif if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } #ifdef PC98 rtc_serialcom(0x03); /* Time Read */ rtc_serialcom(0x01); /* Register shift command. */ DELAY(20); second = bcd2bin(rtc_inb() & 0xff); /* sec */ min = bcd2bin(rtc_inb() & 0xff); /* min */ hour = bcd2bin(rtc_inb() & 0xff); /* hour */ days = bcd2bin(rtc_inb() & 0xff) - 1; /* date */ month = (rtc_inb() >> 4) & 0x0f; /* month */ for (m = 1; m < month; m++) days += daysinmonth[m-1]; year = bcd2bin(rtc_inb() & 0xff) + 1900; /* year */ /* 2000 year problem */ if (year < 1995) year += 100; if (year < 1970) goto wrong_time; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); if ((month > 2) && LEAPYEAR(year)) days ++; sec = ((( days * 24 + hour) * 60 + min) * 60 + second); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ #else /* IBM-PC */ /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; yd = days; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ #endif sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; #ifdef PC98 int wd; #endif if (disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); #ifdef PC98 rtc_serialcom(0x01); /* Register shift command. */ /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); rtc_outb(bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ rtc_outb(bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ rtc_outb(bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ wd = (tm+4)%7; for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } m++; rtc_outb(bin2bcd(tm+1)); /* Write back Day */ rtc_outb((m << 4) | wd); /* Write back Month & Weekday */ rtc_outb(bin2bcd(y%100)); /* Write back Year */ rtc_serialcom(0x02); /* Time set & Counter hold command. */ rtc_serialcom(0x00); /* Register hold command. */ #else /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, rtc_statusb); #endif } /* * Start both clocks running. */ void cpu_initclocks() { #ifdef APIC_IO int apic_8254_trial; struct intrec *clkdesc; #endif /* APIC_IO */ #ifndef PC98 int diag; if (statclock_disable) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt * flag which would normally cause the RTC to generate * interrupts. */ stat_imask = HWI_MASK | SWI_MASK; rtc_statusb = RTCSB_24HR; } else { /* Setting stathz to nonzero early helps avoid races. */ stathz = RTC_NOPROFRATE; profhz = RTC_PROFRATE; } #endif /* Finish initializing 8253 timer 0. */ #ifdef APIC_IO apic_8254_intr = isa_apic_irq(0); apic_8254_trial = 0; if (apic_8254_intr >= 0 ) { if (apic_int_type(0, 0) == 3) apic_8254_trial = 1; } else { /* look for ExtInt on pin 0 */ if (apic_int_type(0, 0) == 3) { apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); #else /* APIC_IO */ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(IRQ0); #endif /* APIC_IO */ #ifndef PC98 /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ if (statclock_disable) return; diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); #endif /* APIC_IO */ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, INTR_EXCL); #ifdef APIC_IO INTREN(APIC_IRQ8); #else INTREN(IRQ8); #endif /* APIC_IO */ writertc(RTC_STATUSB, rtc_statusb); #endif /* !PC98 */ #ifdef APIC_IO if (apic_8254_trial) { printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ if (read_intr_count(apic_8254_intr) < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ INTRDIS(1 << apic_8254_intr); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); /* * Revoke current ISA IRQ 0 assignment and * configure a fallback interrupt routing from * the 8254 Timer via the 8259 PIC to the * an ExtInt interrupt line on IOAPIC #0 intpin 0. * We reuse the low level interrupt handler number. */ if (apic_irq(0, 0) < 0) { revoke_apic_irq(apic_8254_intr); assign_apic_irq(0, 0, apic_8254_intr); } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, &clk_imask, INTR_EXCL); INTREN(1 << apic_8254_intr); } } if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", int_to_apicintpin[apic_8254_intr].ioapic, int_to_apicintpin[apic_8254_intr].int_pin); else printf("APIC_IO: " "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); #endif } #ifdef APIC_IO static u_long read_intr_count(int vec) { u_long *up; up = intr_countp[vec]; if (up) return *up; return 0UL; } static void setup_8254_mixed_mode() { /* * Allow 8254 timer to INTerrupt 8259: * re-initialize master 8259: * reset; prog 4 bytes, single ICU, edge triggered */ outb(IO_ICU1, 0x13); #ifdef PC98 outb(IO_ICU1 + 2, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 2, 0x00); /* ignore slave */ outb(IO_ICU1 + 2, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 2, 0xfe); /* unmask INT0 */ #else outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ outb(IO_ICU1 + 1, 0x00); /* ignore slave */ outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ #endif /* program IO APIC for type 3 INT on INT0 */ if (ext_int_setup(0, 0) < 0) panic("8254 redirect via APIC pin0 impossible!"); } #endif void setstatclockrate(int newhz) { #ifndef PC98 if (newhz == RTC_PROFRATE) rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; else rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); #endif } static int -sysctl_machdep_i8254_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is is too generic. Should use it everywhere. */ freq = timer_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { if (timer0_state != RELEASED) return (EBUSY); /* too much trouble to handle */ set_timer_freq(freq, hz); i8254_timecounter.tc_frequency = freq; tc_update(&i8254_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", ""); static int -sysctl_machdep_tsc_freq (SYSCTL_HANDLER_ARGS) +sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; if (tsc_timecounter.tc_frequency == 0) return (EOPNOTSUPP); freq = tsc_freq; error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); if (error == 0 && req->newptr != NULL) { tsc_freq = freq; tsc_timecounter.tc_frequency = tsc_freq; tc_update(&tsc_timecounter); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", ""); static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; u_long ef; u_int high, low; ef = read_eflags(); disable_intr(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ (lapic_irr1 & (1 << apic_8254_intr)))) #else (inb(IO_ICU1) & 1))) #endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; } i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); write_eflags(ef); return (count); } static unsigned tsc_get_timecount(struct timecounter *tc) { return (rdtsc()); } /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static int attimer_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids)) <= 0) device_quiet(dev); return(result); } static int attimer_attach(device_t dev) { return(0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, bus_generic_resume), /* XXX restart statclock? */ { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, 1, /* no softc */ }; static devclass_t attimer_devclass; DRIVER_MODULE(attimer, isa, attimer_driver, attimer_devclass, 0, 0); Index: head/sys/pc98/pc98/machdep.c =================================================================== --- head/sys/pc98/pc98/machdep.c (revision 62572) +++ head/sys/pc98/pc98/machdep.c (revision 62573) @@ -1,2731 +1,2731 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD$ */ #include "apm.h" #include "npx.h" #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" #include "opt_smp.h" #include "opt_user_ldt.h" #include "opt_userconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* pcb.h included via sys/user.h */ #ifdef SMP #include #include #endif #ifdef PERFMON #include #endif #ifdef OLD_BUS_ARCH #include #endif #include #ifdef PC98 #include #include #else #include #endif #include #include #include extern void init386 __P((int first)); extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup __P((void *)); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ #endif int _udatasel, _ucodesel; u_int atdevbase; #if defined(SWTCH_OPTIM_STATS) extern int swtch_optim_stats; SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, CTLFLAG_RD, &swtch_optim_stats, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif #ifdef PC98 static int ispc98 = 1; #else static int ispc98 = 0; #endif SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); int physmem = 0; int cold = 1; static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code)); static int -sysctl_hw_physmem (SYSCTL_HANDLER_ARGS) +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); return (error); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "I", ""); static int -sysctl_hw_usermem (SYSCTL_HANDLER_ARGS) +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, ctob(physmem - cnt.v_wire_count), req); return (error); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "I", ""); static int -sysctl_hw_availpages (SYSCTL_HANDLER_ARGS) +sysctl_hw_availpages(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, 0, i386_btop(avail_end - avail_start), req); return (error); } SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_hw_availpages, "I", ""); static int -sysctl_machdep_msgbuf (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS) { int error; /* Unwind the buffer, so that it's linear (possibly starting with * some initial nulls). */ error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr, msgbufp->msg_size-msgbufp->msg_bufr,req); if(error) return(error); if(msgbufp->msg_bufr>0) { error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr, msgbufp->msg_bufr,req); } return(error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer"); static int msgbuf_clear; static int -sysctl_machdep_msgbuf_clear (SYSCTL_HANDLER_ARGS) +sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) { /* Clear the buffer and reset write pointer */ bzero(msgbufp->msg_ptr,msgbufp->msg_size); msgbufp->msg_bufr=msgbufp->msg_bufx=0; msgbuf_clear=0; } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW, &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I", "Clear kernel message buffer"); int bootverbose = 0, Maxmem = 0; #ifdef PC98 int Maxmem_under16M = 0; #endif long dumplo; vm_offset_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) static vm_offset_t buffer_sva, buffer_eva; vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void cpu_startup(dummy) void *dummy; { register unsigned i; register caddr_t v; vm_offset_t maxaddr; vm_size_t size = 0; int firstaddr; vm_offset_t minaddr; if (boothowto & RB_VERBOSE) bootverbose++; /* * Good {morning,afternoon,evening,night}. */ printf(version); earlysetcpuclass(); startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %u (%uK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %u bytes (%u pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } /* * Calculate callout wheel size */ for (callwheelsize = 1, callwheelbits = 0; callwheelsize < ncallout; callwheelsize <<= 1, ++callwheelbits) ; callwheelmask = callwheelsize - 1; /* * Allocate space for system data structures. * The first available kernel virtual address is in "v". * As pages of kernel virtual memory are allocated, "v" is incremented. * As pages of memory are allocated and cleared, * "firstaddr" is incremented. * An index into the kernel page table corresponding to the * virtual memory address maintained in "v" is kept in "mapaddr". */ /* * Make two passes. The first pass calculates how much memory is * needed and allocates it. The second pass assigns virtual * addresses to the various data structures. */ firstaddr = 0; again: v = (caddr_t)firstaddr; #define valloc(name, type, num) \ (name) = (type *)v; v = (caddr_t)((name)+(num)) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); valloc(callwheel, struct callout_tailq, callwheelsize); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional * buffers to cover 1/20 of our ram over 64MB. * * factor represents the 1/4 x ram conversion. */ if (nbuf == 0) { int factor = 4 * BKVASIZE / PAGE_SIZE; nbuf = 50; if (physmem > 1024) nbuf += min((physmem - 1024) / factor, 16384 / factor); if (physmem > 16384) nbuf += (physmem - 16384) * 2 / (factor * 5); } /* * Do not allow the buffer_map to be more then 1/2 the size of the * kernel_map. */ if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2)) { nbuf = (kernel_map->max_offset - kernel_map->min_offset) / (BKVASIZE * 2); printf("Warning: nbufs capped at %d\n", nbuf); } nswbuf = max(min(nbuf/4, 256), 16); valloc(swbuf, struct buf, nswbuf); valloc(buf, struct buf, nbuf); v = bufhashinit(v); /* * End of first pass, size has been calculated so allocate memory */ if (firstaddr == 0) { size = (vm_size_t)(v - firstaddr); firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); if (firstaddr == 0) panic("startup: no room for tables"); goto again; } /* * End of second pass, addresses have been assigned */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, (nbuf*BKVASIZE)); pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, (nswbuf*MAXPHYS) + pager_map_size); pager_map->system_map = 1; exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*(ARG_MAX+(PAGE_SIZE*3)))); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size * we use the more space efficient malloc in place of kmem_alloc. */ { vm_offset_t mb_map_size; mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); bzero(mclrefcnt, mb_map_size / MCLBYTES); mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, mb_map_size); mb_map->system_map = 1; } /* * Initialize callouts */ SLIST_INIT(&callfree); for (i = 0; i < ncallout; i++) { callout_init(&callout[i]); callout[i].c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle); } for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&callwheel[i]); } #if defined(USERCONFIG) userconfig(); cninit(); /* the preferred console may have changed */ #endif printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1024); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! */ mp_start(); /* fire up the APs and APICs */ mp_announce(); #endif /* SMP */ } int register_netisr(num, handler) int num; netisr_t *handler; { if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { printf("register_netisr: bad isr number: %d\n", num); return (EINVAL); } netisrs[num] = handler; return (0); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void osendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct osigframe sf; struct osigframe *fp; struct proc *p; struct sigacts *psp; struct trapframe *regs; int oonstack; p = curproc; psp = p->p_sigacts; regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct osigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else fp = (struct osigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *fp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)fp) == 0 || !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = oonstack; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf; struct proc *p; struct sigacts *psp; struct trapframe *regs; struct sigframe *sfp; int oonstack; p = curproc; psp = p->p_sigacts; if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } regs = p->p_md.md_regs; oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = oonstack; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate and validate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe)); p->p_sigstk.ss_flags |= SS_ONSTACK; } else sfp = (struct sigframe *)regs->tf_esp - 1; /* * grow_stack() will return 0 if *sfp does not fit inside the stack * and the stack can not be grown. * useracc() will return FALSE if access is denied. */ if (grow_stack(p, (int)sfp) == 0 || !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG printf("process %d has trashed its stack\n", p->p_pid); #endif SIGACTION(p, SIGILL) = SIG_DFL; SIGDELSET(p->p_sigignore, SIGILL); SIGDELSET(p->p_sigcatch, SIGILL); SIGDELSET(p->p_sigmask, SIGILL); psignal(p, SIGILL); return; } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill siginfo structure. */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * We should never have PSL_T set when returning from vm86 * mode. It may be set here if we deliver a signal before * getting to vm86 mode, so turn it off. * * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP); } /* Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ sigexit(p, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ int osigreturn(p, uap) struct proc *p; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct trapframe *regs; struct osigcontext *scp; int eflags; regs = p->p_md.md_regs; scp = uap->sigcntxp; if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ)) return (EFAULT); eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; if (scp->sc_onstack & 01) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; SIGSETOLD(p->p_sigmask, scp->sc_mask); SIG_CANTMASK(p->p_sigmask); regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; return (EJUSTRETURN); } int sigreturn(p, uap) struct proc *p; struct sigreturn_args /* { ucontext_t *sigcntxp; } */ *uap; { struct trapframe *regs; ucontext_t *ucp; int cs, eflags; ucp = uap->sigcntxp; if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ)) return (EFAULT); if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516) return (osigreturn(p, (struct osigreturn_args *)uap)); /* * Since ucp is not an osigcontext but a ucontext_t, we have to * check again if all of it is accessible. A ucontext_t is * much larger, so instead of just checking for the pointer * being valid for the size of an osigcontext, now check for * it being valid for a whole, new-style ucontext_t. */ if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ)) return (EFAULT); regs = p->p_md.md_regs; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (p->p_addr->u_pcb.pcb_ext == 0) return (EINVAL); vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Clear registers on exec */ void setregs(p, entry, stack, ps_strings) struct proc *p; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = p->p_md.md_regs; struct pcb *pcb = &p->p_addr->u_pcb; #ifdef USER_LDT /* was i386_user_cleanup() in NetBSD */ user_ldt_free(pcb); #endif bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* reset %gs as well */ if (pcb == curpcb) load_gs(_udatasel); else pcb->pcb_gs = _udatasel; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); #if NNPX > 0 /* Initialize the npx (if any) for the current process. */ npxinit(__INITIAL_NPXCW__); #endif /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ p->p_retval[1] = 0; } static int -sysctl_machdep_adjkerntz (SYSCTL_HANDLER_ARGS) +sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; #ifdef SMP union descriptor gdt[NGDT * NCPU]; /* global descriptor table */ #else union descriptor gdt[NGDT]; /* global descriptor table */ #endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #ifdef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif #ifndef SMP extern struct segment_descriptor common_tssd, *tss_gdt; #endif int private_tss; /* flag indicating private tss */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 4 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 5 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 6 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 7 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 9 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #define PHYSMAP_SIZE (2 * 8) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. */ #ifdef PC98 static void getmemsize(int first) { u_int biosbasemem, biosextmem; u_int pagesinbase, pagesinext; int pa_indx; int pg_n; int speculative_mprobe; #if NNPX > 0 int msize; #endif unsigned under16; vm_offset_t target_page; pc98_getmemsize(&biosbasemem, &biosextmem, &under16); #ifdef SMP /* make hole for AP bootstrap code */ pagesinbase = mp_bootaddress(biosbasemem) / PAGE_SIZE; #else pagesinbase = biosbasemem * 1024 / PAGE_SIZE; #endif pagesinext = biosextmem * 1024 / PAGE_SIZE; Maxmem_under16M = under16 * 1024 / PAGE_SIZE; #ifndef MAXMEM /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = pagesinext + 0x100000/PAGE_SIZE; /* * Indicate that we wish to do a speculative search for memory beyond * the end of the reported size if the indicated amount is 64MB (0x4000 * pages) - which is the largest amount that the BIOS/bootblocks can * currently report. If a specific amount of memory is indicated via * the MAXMEM option or the npx0 "msize", then don't do the speculative * memory probe. */ if (Maxmem >= 0x4000) speculative_mprobe = TRUE; else speculative_mprobe = FALSE; #else Maxmem = MAXMEM/4; speculative_mprobe = FALSE; #endif #if NNPX > 0 if (resource_int_value("npx", 0, "msize", &msize) == 0) { if (msize != 0) { Maxmem = msize / 4; speculative_mprobe = FALSE; } } #endif #ifdef SMP /* look for the MP hardware - needed for apic addresses */ mp_probe(); #endif /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); /* * Size up each available chunk of physical memory. */ /* * We currently don't bother testing base memory. * XXX ...but we probably should. */ pa_indx = 0; if (pagesinbase > 1) { phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ physmem = pagesinbase - 1; } else { /* point at first chunk end */ pa_indx++; } /* XXX - some of EPSON machines can't use PG_N */ pg_n = PG_N; if (pc98_machine_type & M_EPSON_PC98) { switch (epson_machine_id) { #ifdef WB_CACHE default: #endif case 0x34: /* PC-486HX */ case 0x35: /* PC-486HG */ case 0x3B: /* PC-486HA */ pg_n = 0; break; } } speculative_mprobe = FALSE; #ifdef notdef /* XXX - see below */ /* * Certain 'CPU accelerator' supports over 16MB memory on the machines * whose BIOS doesn't store true size. * To support this, we don't trust BIOS values if Maxmem < 16MB (0x1000 * pages) - which is the largest amount that the OLD PC-98 can report. * * OK: PC-9801NS/R(9.6M) * OK: PC-9801DA(5.6M)+EUD-H(32M)+Cyrix 5x86 * OK: PC-9821Ap(14.6M)+EUA-T(8M)+Cyrix 5x86-100 * NG: PC-9821Ap(14.6M)+EUA-T(8M)+AMD DX4-100 -> freeze */ if (Maxmem < 0x1000) { int tmp, page_bad; page_bad = FALSE; /* * For Max14.6MB machines, the 0x10f0 page is same as 0x00f0, * which is BIOS ROM, by overlapping. * So, we check that page's ability of writing. */ target_page = ptoa(0x10f0); /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust Maxmem if valid/good page. */ if (page_bad == FALSE) { /* '+ 2' is needed to make speculative_mprobe sure */ Maxmem = 0x1000 + 2; speculative_mprobe = TRUE; } } #endif for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { int tmp, page_bad; page_bad = FALSE; /* skip system area */ if (target_page >= ptoa(Maxmem_under16M) && target_page < ptoa(4096)) continue; /* * map page into kernel: valid, read/write, non-cacheable */ *(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page; invltlb(); tmp = *(int *)CADDR1; /* * Test for alternating 1's and 0's */ *(volatile int *)CADDR1 = 0xaaaaaaaa; if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)CADDR1 = 0x55555555; if (*(volatile int *)CADDR1 != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)CADDR1 = 0xffffffff; if (*(volatile int *)CADDR1 != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)CADDR1 = 0x0; if (*(volatile int *)CADDR1 != 0x0) { /* * test of page failed */ page_bad = TRUE; } /* * Restore original value. */ *(int *)CADDR1 = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == FALSE) { /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == target_page) { phys_avail[pa_indx] += PAGE_SIZE; if (speculative_mprobe == TRUE && phys_avail[pa_indx] >= (16*1024*1024)) Maxmem++; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = target_page; /* start */ phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ } physmem++; } } *(int *)CMAP1 = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } #else static void getmemsize(int first) { int i, physmap_idx, pa_indx; u_int basemem, extmem; struct vm86frame vmf; struct vm86context vmc; vm_offset_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t pte; const char *cp; struct { u_int64_t base; u_int64_t length; u_int32_t type; } *smap; bzero(&vmf, sizeof(struct vm86frame)); bzero(physmap, sizeof(physmap)); /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) { pte = (pt_entry_t)vtopte(pa + KERNBASE); *pte = pa | PG_RW | PG_V; } /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT)); *pte = (1 << PAGE_SHIFT) | PG_RW | PG_V; /* * get memory map with INT 15:E820 */ #define SMAPSIZ sizeof(*smap) #define SMAP_SIG 0x534D4150 /* 'SMAP' */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); physmap_idx = 0; vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = SMAPSIZ; i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n", smap->type, *(u_int32_t *)((char *)&smap->base + 4), (u_int32_t)smap->base, *(u_int32_t *)((char *)&smap->length + 4), (u_int32_t)smap->length); if (smap->type != 0x01) goto next_run; if (smap->length == 0) goto next_run; if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); goto next_run; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; goto next_run; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; next_run: } while (vmf.vmf_ebx != 0); if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); /* look for the MP hardware - needed for apic addresses */ mp_probe(); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif /* * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ if ((cp = getenv("hw.physmem")) != NULL) { u_int64_t AllowMem, sanity; char *ep; sanity = AllowMem = strtouq(cp, &ep, 0); if ((ep != cp) && (*ep != 0)) { switch(*ep) { case 'g': case 'G': AllowMem <<= 10; case 'm': case 'M': AllowMem <<= 10; case 'k': case 'K': AllowMem <<= 10; break; default: AllowMem = sanity = 0; } if (AllowMem < sanity) AllowMem = 0; } if (AllowMem == 0) printf("Ignoring invalid memory size of '%s'\n", cp); else Maxmem = atop(AllowMem); } if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %uK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa(Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first, 0); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; #if 0 pte = (pt_entry_t)vtopte(KERNBASE); #else pte = (pt_entry_t)CMAP1; #endif /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_offset_t end; end = ptoa(Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; #if 0 int *ptr = 0; #else int *ptr = (int *)CADDR1; #endif /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf("Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } #endif void init386(first) int first; { int x; struct gate_descriptor *gdp; int gsel_tss; #ifndef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif int off; /* * Prevent lowering of the ipl if we call tsleep() early. */ safepri = cpl; proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; #ifdef PC98 /* * Initialize DMAC */ pc98_init_dmac(); #endif if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; #ifdef SMP gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(sizeof(struct privatespace)) - 1; gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0]; gdt_segs[GPROC0_SEL].ssd_base = (int) &SMP_prvspace[0].globaldata.gd_common_tss; SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0]; #else gdt_segs[GPRIV_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GPROC0_SEL].ssd_base = (int) &common_tss; #endif for (x = 0; x < NGDT; x++) { #ifdef BDE_DEBUGGER /* avoid overwriting db entries with APM ones */ if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL) continue; #endif ssdtosd(&gdt_segs[x], &gdt[x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we * don't want the user area to be writable in copyout() etc. (page * level protection is lost in kernel mode on 386's). Also, we * don't want the user area to be writable directly (page level * protection of the user area is not available on 486's with * CR0_WP set, because there is no user-read/kernel-write mode). * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ #define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS /* * The code segment limit has to cover the user area until we move * the signal trampoline out of the user area. This is safe because * the code segment cannot be written to directly. */ #define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); #ifdef USER_LDT currentldt = _default_ldt; #endif /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); #include "isa.h" #if NISA >0 isa_defaultirq(); #endif #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; tss_gdt = &gdt[GPROC0_SEL].sd; common_tssd = *tss_gdt; common_tss.tss_ioopt = (sizeof common_tss) << 16; ltr(gsel_tss); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = (int)IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(syscall); gdp->gd_looffset = x++; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; #ifdef SMP proc0.p_addr->u_pcb.pcb_mpnest = 1; #endif proc0.p_addr->u_pcb.pcb_ext = 0; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; #ifndef SMP struct region_descriptor r_idt; #endif vm_offset_t tmp; if (!has_f00f_bug) return; printf("Intel Pentium detected, installing workaround for F00F bug\n"); r_idt.rd_limit = sizeof(idt0) - 1; tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) panic("kmem_alloc returned non-page-aligned memory"); /* Put the first seven entries in the lower page */ new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ int ptrace_set_pc(p, addr) struct proc *p; unsigned long addr; { p->p_md.md_regs->tf_eip = addr; return (0); } int ptrace_single_step(p) struct proc *p; { p->p_md.md_regs->tf_eflags |= PSL_T; return (0); } int ptrace_read_u_check(p, addr, len) struct proc *p; vm_offset_t addr; size_t len; { vm_offset_t gap; if ((vm_offset_t) (addr + len) < addr) return EPERM; if ((vm_offset_t) (addr + len) <= sizeof(struct user)) return 0; gap = (char *) p->p_md.md_regs - (char *) p->p_addr; if ((vm_offset_t) addr < gap) return EPERM; if ((vm_offset_t) (addr + len) <= (vm_offset_t) (gap + sizeof(struct trapframe))) return 0; return EPERM; } int ptrace_write_u(p, off, data) struct proc *p; vm_offset_t off; long data; { struct trapframe frame_copy; vm_offset_t min; struct trapframe *tp; /* * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ min = (char *)p->p_md.md_regs - (char *)p->p_addr; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = p->p_md.md_regs; frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = &p->p_addr->u_pcb; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(p, regs) struct proc *p; struct reg *regs; { struct pcb *pcb; struct trapframe *tp; tp = p->p_md.md_regs; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = &p->p_addr->u_pcb; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs); return (0); } int set_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs); return (0); } int fill_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; dbregs->dr0 = pcb->pcb_dr0; dbregs->dr1 = pcb->pcb_dr1; dbregs->dr2 = pcb->pcb_dr2; dbregs->dr3 = pcb->pcb_dr3; dbregs->dr4 = 0; dbregs->dr5 = 0; dbregs->dr6 = pcb->pcb_dr6; dbregs->dr7 = pcb->pcb_dr7; return (0); } int set_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; pcb = &p->p_addr->u_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space, unless, perhaps, we were called by * uid 0. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (p->p_ucred->cr_uid != 0) { if (dbregs->dr7 & 0x3) { /* dr0 is enabled */ if (dbregs->dr0 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<2)) { /* dr1 is enabled */ if (dbregs->dr1 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<4)) { /* dr2 is enabled */ if (dbregs->dr2 >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr7 & (0x3<<6)) { /* dr3 is enabled */ if (dbregs->dr3 >= VM_MAXUSER_ADDRESS) return (EINVAL); } } pcb->pcb_dr0 = dbregs->dr0; pcb->pcb_dr1 = dbregs->dr1; pcb->pcb_dr2 = dbregs->dr2; pcb->pcb_dr3 = dbregs->dr3; pcb->pcb_dr6 = dbregs->dr6; pcb->pcb_dr7 = dbregs->dr7; pcb->pcb_flags |= PCB_DBREGS; return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i=0; i /* * Determine the size of the transfer, and make sure it is * within the boundaries of the partition. Adjust transfer * if needed, and signal errors or early completion. */ int bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel) { struct partition *p = lp->d_partitions + dkpart(bp->bio_dev); int labelsect = lp->d_partitions[0].p_offset; int maxsz = p->p_size, sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* overwriting disk label ? */ /* XXX should also protect bootstrap in first 8K */ if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect && #if LABELSECTOR != 0 bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect && #endif (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #if defined(DOSBBSECTOR) && defined(notyet) /* overwriting master boot record? */ if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR && (bp->bio_cmd == BIO_WRITE) && wlabel == 0) { bp->bio_error = EROFS; goto bad; } #endif /* beyond partition? */ if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) { /* if exactly at end of disk, return an EOF */ if (bp->bio_blkno == maxsz) { bp->bio_resid = bp->bio_bcount; return(0); } /* or truncate if part of it fits */ sz = maxsz - bp->bio_blkno; if (sz <= 0) { bp->bio_error = EINVAL; goto bad; } bp->bio_bcount = sz << DEV_BSHIFT; } bp->bio_pblkno = bp->bio_blkno + p->p_offset; return(1); bad: bp->bio_flags |= BIO_ERROR; return(-1); } #ifdef DDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called inside DDB. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/pc98/pc98/sio.c =================================================================== --- head/sys/pc98/pc98/sio.c (revision 62572) +++ head/sys/pc98/pc98/sio.c (revision 62573) @@ -1,5185 +1,5185 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * from: @(#)com.c 7.5 (Berkeley) 5/16/91 * from: i386/isa sio.c,v 1.234 */ #include "opt_comconsole.h" #include "opt_compat.h" #include "opt_ddb.h" #include "opt_sio.h" #include "card.h" #include "pci.h" #include "sio.h" /* * Serial driver, based on 386BSD-0.1 com driver. * Mostly rewritten to use pseudo-DMA. * Works for National Semiconductor NS8250-NS16550AF UARTs. * COM driver, based on HP dca driver. * * Changes for PC-Card integration: * - Added PC-Card driver table and handlers */ /*=============================================================== * 386BSD(98),FreeBSD-1.1x(98) com driver. * ----- * modified for PC9801 by M.Ishii * Kyoto University Microcomputer Club (KMC) * Chou "TEFUTEFU" Hirotomi * Kyoto Univ. the faculty of medicine *=============================================================== * FreeBSD-2.0.1(98) sio driver. * ----- * modified for pc98 Internal i8251 and MICRO CORE MC16550II * T.Koike(hfc01340@niftyserve.or.jp) * implement kernel device configuration * aizu@orient.center.nitech.ac.jp * * Notes. * ----- * PC98 localization based on 386BSD(98) com driver. Using its PC98 local * functions. * This driver is under debugging,has bugs. * * 1) config * options COM_MULTIPORT #if using MC16550II * device sio0 at nec? port 0x30 tty irq 4 #internal * device sio1 at nec? port 0xd2 tty irq 5 flags 0x101 #mc1 * device sio2 at nec? port 0x8d2 tty flags 0x101 #mc2 * # ~~~~~iobase ~~multi port flag * # ~ master device is sio1 * 2) device * cd /dev; MAKEDEV ttyd0 ttyd1 .. * 3) /etc/rc.serial * 57600bps is too fast for sio0(internal8251) * my ex. * #set default speed 9600 * modem() * : * stty last update: 15 Sep.1995 * * How to configure... * # options COM_MULTIPORT # support for MICROCORE MC16550II * ... comment-out this line, which will conflict with B98_01. * options "B98_01" # support for AIWA B98-01 * device sio1 at nec? port 0x00d1 tty irq ? * device sio2 at nec? port 0x00d5 tty irq ? * ... you can leave these lines `irq ?', irq will be autodetected. */ /* * Modified by Y.Takahashi of Kogakuin University. */ /* * modified for 8251(FIFO) by Seigo TANIMURA */ #ifdef PC98 #define COM_IF_INTERNAL 0x00 #define COM_IF_PC9861K_1 0x01 #define COM_IF_PC9861K_2 0x02 #define COM_IF_IND_SS_1 0x03 #define COM_IF_IND_SS_2 0x04 #define COM_IF_PIO9032B_1 0x05 #define COM_IF_PIO9032B_2 0x06 #define COM_IF_B98_01_1 0x07 #define COM_IF_B98_01_2 0x08 #define COM_IF_END1 COM_IF_B98_01_2 #define COM_IF_RSA98 0x10 /* same as COM_IF_NS16550 */ #define COM_IF_NS16550 0x11 #define COM_IF_SECOND_CCU 0x12 /* same as COM_IF_NS16550 */ #define COM_IF_MC16550II 0x13 #define COM_IF_MCRS98 0x14 /* same as COM_IF_MC16550II */ #define COM_IF_RSB3000 0x15 #define COM_IF_RSB384 0x16 #define COM_IF_MODEM_CARD 0x17 /* same as COM_IF_NS16550 */ #define COM_IF_RSA98III 0x18 #define COM_IF_ESP98 0x19 #define COM_IF_END2 COM_IF_ESP98 #endif /* PC98 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PC98 #include #include #include #else #include #endif #include #if NPCI > 0 #include #include #endif #include #include #include #ifndef SMP #include #endif #include #include #ifdef COM_ESP #include #endif #include #ifdef PC98 #include #endif #ifndef __i386__ #define disable_intr() #define enable_intr() #endif #ifdef SMP #define disable_intr() COM_DISABLE_INTR() #define enable_intr() COM_ENABLE_INTR() #endif /* SMP */ #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ #define CALLOUT_MASK 0x80 #define CONTROL_MASK 0x60 #define CONTROL_INIT_STATE 0x20 #define CONTROL_LOCK_STATE 0x40 #define DEV_TO_UNIT(dev) (MINOR_TO_UNIT(minor(dev))) #define MINOR_MAGIC_MASK (CALLOUT_MASK | CONTROL_MASK) #define MINOR_TO_UNIT(mynor) ((mynor) & ~MINOR_MAGIC_MASK) #ifdef COM_MULTIPORT /* checks in flags for multiport and which is multiport "master chip" * for a given card */ #define COM_ISMULTIPORT(flags) ((flags) & 0x01) #define COM_MPMASTER(flags) (((flags) >> 8) & 0x0ff) #define COM_NOTAST4(flags) ((flags) & 0x04) #endif /* COM_MULTIPORT */ #define COM_CONSOLE(flags) ((flags) & 0x10) #define COM_FORCECONSOLE(flags) ((flags) & 0x20) #define COM_LLCONSOLE(flags) ((flags) & 0x40) #define COM_DEBUGGER(flags) ((flags) & 0x80) #define COM_LOSESOUTINTS(flags) ((flags) & 0x08) #define COM_NOFIFO(flags) ((flags) & 0x02) #define COM_ST16650A(flags) ((flags) & 0x20000) #define COM_C_NOPROBE (0x40000) #define COM_NOPROBE(flags) ((flags) & COM_C_NOPROBE) #define COM_C_IIR_TXRDYBUG (0x80000) #define COM_IIR_TXRDYBUG(flags) ((flags) & COM_C_IIR_TXRDYBUG) #define COM_FIFOSIZE(flags) (((flags) & 0xff000000) >> 24) #ifdef PC98 #define com_emr com_msr /* Extension mode register for RSB-2000/3000 */ #endif #define com_scr 7 /* scratch register for 16450-16550 (R/W) */ #define sio_getreg(com, off) \ (bus_space_read_1((com)->bst, (com)->bsh, (off))) #define sio_setreg(com, off, value) \ (bus_space_write_1((com)->bst, (com)->bsh, (off), (value))) /* * com state bits. * (CS_BUSY | CS_TTGO) and (CS_BUSY | CS_TTGO | CS_ODEVREADY) must be higher * than the other bits so that they can be tested as a group without masking * off the low bits. * * The following com and tty flags correspond closely: * CS_BUSY = TS_BUSY (maintained by comstart(), siopoll() and * comstop()) * CS_TTGO = ~TS_TTSTOP (maintained by comparam() and comstart()) * CS_CTS_OFLOW = CCTS_OFLOW (maintained by comparam()) * CS_RTS_IFLOW = CRTS_IFLOW (maintained by comparam()) * TS_FLUSH is not used. * XXX I think TIOCSETA doesn't clear TS_TTSTOP when it clears IXON. * XXX CS_*FLOW should be CF_*FLOW in com->flags (control flags not state). */ #define CS_BUSY 0x80 /* output in progress */ #define CS_TTGO 0x40 /* output not stopped by XOFF */ #define CS_ODEVREADY 0x20 /* external device h/w ready (CTS) */ #define CS_CHECKMSR 1 /* check of MSR scheduled */ #define CS_CTS_OFLOW 2 /* use CTS output flow control */ #define CS_DTR_OFF 0x10 /* DTR held off */ #define CS_ODONE 4 /* output completed */ #define CS_RTS_IFLOW 8 /* use RTS input flow control */ #define CSE_BUSYCHECK 1 /* siobusycheck() scheduled */ static char const * const error_desc[] = { #define CE_OVERRUN 0 "silo overflow", #define CE_INTERRUPT_BUF_OVERFLOW 1 "interrupt-level buffer overflow", #define CE_TTY_BUF_OVERFLOW 2 "tty-level buffer overflow", }; #define CE_NTYPES 3 #define CE_RECORD(com, errnum) (++(com)->delta_error_counts[errnum]) /* types. XXX - should be elsewhere */ typedef u_int Port_t; /* hardware port */ typedef u_char bool_t; /* boolean */ /* queue of linear buffers */ struct lbq { u_char *l_head; /* next char to process */ u_char *l_tail; /* one past the last char to process */ struct lbq *l_next; /* next in queue */ bool_t l_queued; /* nonzero if queued */ }; /* com device structure */ struct com_s { u_int flags; /* Copy isa device flags */ u_char state; /* miscellaneous flag bits */ bool_t active_out; /* nonzero if the callout device is open */ u_char cfcr_image; /* copy of value written to CFCR */ #ifdef COM_ESP bool_t esp; /* is this unit a hayes esp board? */ #endif u_char extra_state; /* more flag bits, separate for order trick */ u_char fifo_image; /* copy of value written to FIFO */ bool_t hasfifo; /* nonzero for 16550 UARTs */ bool_t st16650a; /* Is a Startech 16650A or RTS/CTS compat */ bool_t loses_outints; /* nonzero if device loses output interrupts */ u_char mcr_image; /* copy of value written to MCR */ #ifdef COM_MULTIPORT bool_t multiport; /* is this unit part of a multiport device? */ #endif /* COM_MULTIPORT */ bool_t no_irq; /* nonzero if irq is not attached */ bool_t gone; /* hardware disappeared */ bool_t poll; /* nonzero if polling is required */ bool_t poll_output; /* nonzero if polling for output is required */ int unit; /* unit number */ int dtr_wait; /* time to hold DTR down on close (* 1/hz) */ u_int tx_fifo_size; u_int wopeners; /* # processes waiting for DCD in open() */ /* * The high level of the driver never reads status registers directly * because there would be too many side effects to handle conveniently. * Instead, it reads copies of the registers stored here by the * interrupt handler. */ u_char last_modem_status; /* last MSR read by intr handler */ u_char prev_modem_status; /* last MSR handled by high level */ u_char hotchar; /* ldisc-specific char to be handled ASAP */ u_char *ibuf; /* start of input buffer */ u_char *ibufend; /* end of input buffer */ u_char *ibufold; /* old input buffer, to be freed */ u_char *ihighwater; /* threshold in input buffer */ u_char *iptr; /* next free spot in input buffer */ int ibufsize; /* size of ibuf (not include error bytes) */ int ierroff; /* offset of error bytes in ibuf */ struct lbq obufq; /* head of queue of output buffers */ struct lbq obufs[2]; /* output buffers */ bus_space_tag_t bst; bus_space_handle_t bsh; #ifdef PC98 Port_t cmd_port; Port_t sts_port; Port_t in_modem_port; Port_t intr_ctrl_port; Port_t rsabase; /* iobase address of a I/O-DATA RSA board */ int intr_enable; int pc98_prev_modem_status; int pc98_modem_delta; int modem_car_chg_timer; int pc98_prev_siocmd; int pc98_prev_siomod; int modem_checking; int pc98_if_type; bool_t pc98_8251fifo; bool_t pc98_8251fifo_enable; #endif /* PC98 */ Port_t data_port; /* i/o ports */ #ifdef COM_ESP Port_t esp_port; #endif Port_t int_id_port; Port_t modem_ctl_port; Port_t line_status_port; Port_t modem_status_port; Port_t intr_ctl_port; /* Ports of IIR register */ struct tty *tp; /* cross reference */ /* Initial state. */ struct termios it_in; /* should be in struct tty */ struct termios it_out; /* Lock state. */ struct termios lt_in; /* should be in struct tty */ struct termios lt_out; bool_t do_timestamp; bool_t do_dcd_timestamp; struct timeval timestamp; struct timeval dcd_timestamp; struct pps_state pps; u_long bytes_in; /* statistics */ u_long bytes_out; u_int delta_error_counts[CE_NTYPES]; u_long error_counts[CE_NTYPES]; struct resource *irqres; struct resource *ioportres; void *cookie; /* * Data area for output buffers. Someday we should build the output * buffer queue without copying data. */ #ifdef PC98 int obufsize; u_char *obuf1; u_char *obuf2; #else u_char obuf1[256]; u_char obuf2[256]; #endif }; #ifdef COM_ESP static int espattach __P((struct com_s *com, Port_t esp_port)); #endif static int sioattach __P((device_t dev, int rid)); static int sio_isa_attach __P((device_t dev)); static timeout_t siobusycheck; static timeout_t siodtrwakeup; static void comhardclose __P((struct com_s *com)); static void sioinput __P((struct com_s *com)); static void siointr1 __P((struct com_s *com)); static void siointr __P((void *arg)); static int commctl __P((struct com_s *com, int bits, int how)); static int comparam __P((struct tty *tp, struct termios *t)); static swihand_t siopoll; static int sioprobe __P((device_t dev, int xrid)); static int sio_isa_probe __P((device_t dev)); static void siosettimeout __P((void)); static int siosetwater __P((struct com_s *com, speed_t speed)); static void comstart __P((struct tty *tp)); static void comstop __P((struct tty *tp, int rw)); static timeout_t comwakeup; static void disc_optim __P((struct tty *tp, struct termios *t, struct com_s *com)); #if NCARD > 0 static int sio_pccard_attach __P((device_t dev)); static int sio_pccard_detach __P((device_t dev)); static int sio_pccard_probe __P((device_t dev)); #endif /* NCARD > 0 */ #if NPCI > 0 static int sio_pci_attach __P((device_t dev)); static void sio_pci_kludge_unit __P((device_t dev)); static int sio_pci_probe __P((device_t dev)); #endif /* NPCI > 0 */ static char driver_name[] = "sio"; /* table and macro for fast conversion from a unit number to its com struct */ static devclass_t sio_devclass; #define com_addr(unit) ((struct com_s *) \ devclass_get_softc(sio_devclass, unit)) static device_method_t sio_isa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_isa_probe), DEVMETHOD(device_attach, sio_isa_attach), { 0, 0 } }; static driver_t sio_isa_driver = { driver_name, sio_isa_methods, sizeof(struct com_s), }; #if NCARD > 0 static device_method_t sio_pccard_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pccard_probe), DEVMETHOD(device_attach, sio_pccard_attach), DEVMETHOD(device_detach, sio_pccard_detach), { 0, 0 } }; static driver_t sio_pccard_driver = { driver_name, sio_pccard_methods, sizeof(struct com_s), }; #endif /* NCARD > 0 */ #if NPCI > 0 static device_method_t sio_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sio_pci_probe), DEVMETHOD(device_attach, sio_pci_attach), { 0, 0 } }; static driver_t sio_pci_driver = { driver_name, sio_pci_methods, sizeof(struct com_s), }; #endif /* NPCI > 0 */ static d_open_t sioopen; static d_close_t sioclose; static d_read_t sioread; static d_write_t siowrite; static d_ioctl_t sioioctl; #define CDEV_MAJOR 28 static struct cdevsw sio_cdevsw = { /* open */ sioopen, /* close */ sioclose, /* read */ sioread, /* write */ siowrite, /* ioctl */ sioioctl, /* poll */ ttypoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ driver_name, /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ D_TTY, /* bmaj */ -1 }; int comconsole = -1; static volatile speed_t comdefaultrate = CONSPEED; #ifdef __alpha__ static volatile speed_t gdbdefaultrate = CONSPEED; #endif static u_int com_events; /* input chars + weighted output completions */ static Port_t siocniobase; static int siocnunit; static Port_t siogdbiobase; static int siogdbunit = -1; static bool_t sio_registered; static int sio_timeout; static int sio_timeouts_until_log; static struct callout_handle sio_timeout_handle = CALLOUT_HANDLE_INITIALIZER(&sio_timeout_handle); static int sio_numunits; #ifdef PC98 struct siodev { short if_type; short irq; Port_t cmd, sts, ctrl, mod; }; static int sysclock; #define COM_INT_DISABLE {int previpri; previpri=spltty(); #define COM_INT_ENABLE splx(previpri);} #define IEN_TxFLAG IEN_Tx #define COM_CARRIER_DETECT_EMULATE 0 #define PC98_CHECK_MODEM_INTERVAL (hz/10) #define DCD_OFF_TOLERANCE 2 #define DCD_ON_RECOGNITION 2 #define GET_IFTYPE(flags) ((flags >> 24) & 0x1f) #define SET_IFTYPE(type) (type << 24) #define IS_8251(if_type) (!(if_type & 0x10)) #define COM1_EXT_CLOCK 0x40000 static void commint __P((dev_t dev)); static void com_tiocm_set __P((struct com_s *com, int msr)); static void com_tiocm_bis __P((struct com_s *com, int msr)); static void com_tiocm_bic __P((struct com_s *com, int msr)); static int com_tiocm_get __P((struct com_s *com)); static int com_tiocm_get_delta __P((struct com_s *com)); static void pc98_msrint_start __P((dev_t dev)); static void com_cflag_and_speed_set __P((struct com_s *com, int cflag, int speed)); static int pc98_ttspeedtab __P((struct com_s *com, int speed)); static int pc98_get_modem_status __P((struct com_s *com)); static timeout_t pc98_check_msr; static void pc98_set_baud_rate __P((struct com_s *com, int count)); static void pc98_i8251_reset __P((struct com_s *com, int mode, int command)); static void pc98_disable_i8251_interrupt __P((struct com_s *com, int mod)); static void pc98_enable_i8251_interrupt __P((struct com_s *com, int mod)); static int pc98_check_i8251_interrupt __P((struct com_s *com)); static int pc98_i8251_get_cmd __P((struct com_s *com)); static int pc98_i8251_get_mod __P((struct com_s *com)); static void pc98_i8251_set_cmd __P((struct com_s *com, int x)); static void pc98_i8251_or_cmd __P((struct com_s *com, int x)); static void pc98_i8251_clear_cmd __P((struct com_s *com, int x)); static void pc98_i8251_clear_or_cmd __P((struct com_s *com, int clr, int x)); static int pc98_check_if_type __P((device_t dev, struct siodev *iod)); static int pc98_check_8251vfast __P((void)); static int pc98_check_8251fifo __P((void)); static void pc98_check_sysclock __P((void)); static void pc98_set_ioport __P((struct com_s *com)); #define com_int_Tx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Tx|IEN_TxEMP) #define com_int_Tx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_TxFLAG) #define com_int_Rx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Rx) #define com_int_Rx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_Rx) #define com_int_TxRx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Tx|IEN_TxEMP|IEN_Rx) #define com_int_TxRx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_TxFLAG|IEN_Rx) #define com_send_break_on(com) \ pc98_i8251_or_cmd(com,CMD8251_SBRK) #define com_send_break_off(com) \ pc98_i8251_clear_cmd(com,CMD8251_SBRK) static struct speedtab pc98speedtab[] = { /* internal RS232C interface */ { 0, 0, }, { 50, 50, }, { 75, 75, }, { 150, 150, }, { 200, 200, }, { 300, 300, }, { 600, 600, }, { 1200, 1200, }, { 2400, 2400, }, { 4800, 4800, }, { 9600, 9600, }, { 19200, 19200, }, { 38400, 38400, }, { 51200, 51200, }, { 76800, 76800, }, { 20800, 20800, }, { 31200, 31200, }, { 41600, 41600, }, { 62400, 62400, }, { -1, -1 } }; static struct speedtab pc98fast_speedtab[] = { { 9600, 0x80 | COMBRD(9600), }, { 19200, 0x80 | COMBRD(19200), }, { 38400, 0x80 | COMBRD(38400), }, { 57600, 0x80 | COMBRD(57600), }, { 115200, 0x80 | COMBRD(115200), }, { -1, -1 } }; static struct speedtab comspeedtab_pio9032b[] = { { 300, 6, }, { 600, 5, }, { 1200, 4, }, { 2400, 3, }, { 4800, 2, }, { 9600, 1, }, { 19200, 0, }, { 38400, 7, }, { -1, -1 } }; static struct speedtab comspeedtab_b98_01[] = { { 75, 11, }, { 150, 10, }, { 300, 9, }, { 600, 8, }, { 1200, 7, }, { 2400, 6, }, { 4800, 5, }, { 9600, 4, }, { 19200, 3, }, { 38400, 2, }, { 76800, 1, }, { 153600, 0, }, { -1, -1 } }; static struct speedtab comspeedtab_mc16550[] = { { 300, 1536, }, { 600, 768, }, { 1200, 384, }, { 2400, 192, }, { 4800, 96, }, { 9600, 48, }, { 19200, 24, }, { 38400, 12, }, { 57600, 8, }, { 115200, 4, }, { 153600, 3, }, { 230400, 2, }, { 460800, 1, }, { -1, -1 } }; static struct speedtab comspeedtab_rsb384[] = { { 300, 3840, }, { 600, 1920, }, { 1200, 960, }, { 2400, 480, }, { 4800, 240, }, { 9600, 120, }, { 19200, 60, }, { 38400, 30, }, { 57600, 20, }, { 115200, 10, }, { 128000, 9, }, { 144000, 8, }, { 192000, 6, }, { 230400, 5, }, { 288000, 4, }, { 384000, 3, }, { 576000, 2, }, { 1152000, 1, }, { -1, -1 } }; static struct speedtab comspeedtab_rsa[] = { { 0, 0 }, { 50, COMBRD_RSA(50) }, { 75, COMBRD_RSA(75) }, { 110, COMBRD_RSA(110) }, { 134, COMBRD_RSA(134) }, { 150, COMBRD_RSA(150) }, { 200, COMBRD_RSA(200) }, { 300, COMBRD_RSA(300) }, { 600, COMBRD_RSA(600) }, { 1200, COMBRD_RSA(1200) }, { 1800, COMBRD_RSA(1800) }, { 2400, COMBRD_RSA(2400) }, { 4800, COMBRD_RSA(4800) }, { 9600, COMBRD_RSA(9600) }, { 19200, COMBRD_RSA(19200) }, { 38400, COMBRD_RSA(38400) }, { 57600, COMBRD_RSA(57600) }, { 115200, COMBRD_RSA(115200) }, { 230400, COMBRD_RSA(230400) }, { 460800, COMBRD_RSA(460800) }, { 921600, COMBRD_RSA(921600) }, { -1, -1 } }; #endif /* PC98 */ static struct speedtab comspeedtab[] = { { 0, 0 }, { 50, COMBRD(50) }, { 75, COMBRD(75) }, { 110, COMBRD(110) }, { 134, COMBRD(134) }, { 150, COMBRD(150) }, { 200, COMBRD(200) }, { 300, COMBRD(300) }, { 600, COMBRD(600) }, { 1200, COMBRD(1200) }, { 1800, COMBRD(1800) }, { 2400, COMBRD(2400) }, { 4800, COMBRD(4800) }, { 9600, COMBRD(9600) }, { 19200, COMBRD(19200) }, { 38400, COMBRD(38400) }, { 57600, COMBRD(57600) }, { 115200, COMBRD(115200) }, { -1, -1 } }; #ifdef PC98 struct { char *name; short port_table[7]; short irr_mask; struct speedtab *speedtab; short check_irq; } if_8251_type[] = { /* COM_IF_INTERNAL */ { " (internal)", {0x30, 0x32, 0x32, 0x33, 0x35, -1, -1}, -1, pc98speedtab, 1 }, /* COM_IF_PC9861K_1 */ { " (PC9861K)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, -1, -1}, 3, NULL, 1 }, /* COM_IF_PC9861K_2 */ { " (PC9861K)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, -1, -1}, 3, NULL, 1 }, /* COM_IF_IND_SS_1 */ { " (IND-SS)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xb3, -1}, 3, comspeedtab_mc16550, 1 }, /* COM_IF_IND_SS_2 */ { " (IND-SS)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xbb, -1}, 3, comspeedtab_mc16550, 1 }, /* COM_IF_PIO9032B_1 */ { " (PIO9032B)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xb8, -1}, 7, comspeedtab_pio9032b, 1 }, /* COM_IF_PIO9032B_2 */ { " (PIO9032B)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xba, -1}, 7, comspeedtab_pio9032b, 1 }, /* COM_IF_B98_01_1 */ { " (B98-01)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xd1, 0xd3}, 7, comspeedtab_b98_01, 0 }, /* COM_IF_B98_01_2 */ { " (B98-01)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xd5, 0xd7}, 7, comspeedtab_b98_01, 0 }, }; #define PC98SIO_data_port(type) (if_8251_type[type].port_table[0]) #define PC98SIO_cmd_port(type) (if_8251_type[type].port_table[1]) #define PC98SIO_sts_port(type) (if_8251_type[type].port_table[2]) #define PC98SIO_in_modem_port(type) (if_8251_type[type].port_table[3]) #define PC98SIO_intr_ctrl_port(type) (if_8251_type[type].port_table[4]) #define PC98SIO_baud_rate_port(type) (if_8251_type[type].port_table[5]) #define PC98SIO_func_port(type) (if_8251_type[type].port_table[6]) #define I8251F_data 0x130 #define I8251F_lsr 0x132 #define I8251F_msr 0x134 #define I8251F_iir 0x136 #define I8251F_fcr 0x138 #define I8251F_div 0x13a static bus_addr_t port_table_0[] = {0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007}; static bus_addr_t port_table_1[] = {0x000, 0x002, 0x004, 0x006, 0x008, 0x00a, 0x00c, 0x00e}; static bus_addr_t port_table_8[] = {0x000, 0x100, 0x200, 0x300, 0x400, 0x500, 0x600, 0x700}; static bus_addr_t port_table_rsa[] = { 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007 }; struct { char *name; short irr_read; short irr_write; bus_addr_t *iat; bus_size_t iatsz; struct speedtab *speedtab; } if_16550a_type[] = { /* COM_IF_RSA98 */ {" (RSA-98)", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_NS16550 */ {"", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_SECOND_CCU */ {"", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_MC16550II */ {" (MC16550II)", -1, 0x1000, port_table_8, IO_COMSIZE, comspeedtab_mc16550}, /* COM_IF_MCRS98 */ {" (MC-RS98)", -1, 0x1000, port_table_8, IO_COMSIZE, comspeedtab_mc16550}, /* COM_IF_RSB3000 */ {" (RSB-3000)", 0xbf, -1, port_table_1, IO_COMSIZE, comspeedtab_rsb384}, /* COM_IF_RSB384 */ {" (RSB-384)", 0xbf, -1, port_table_1, IO_COMSIZE, comspeedtab_rsb384}, /* COM_IF_MODEM_CARD */ {"", -1, -1, port_table_0, IO_COMSIZE, comspeedtab}, /* COM_IF_RSA98III */ {" (RSA-98III)", -1, -1, port_table_rsa, 16, comspeedtab_rsa}, /* COM_IF_ESP98 */ {" (ESP98)", -1, -1, port_table_1, IO_COMSIZE, comspeedtab_mc16550}, }; #endif /* PC98 */ #ifdef COM_ESP #ifdef PC98 /* XXX configure this properly. */ static Port_t likely_com_ports[] = { 0, 0xb0, 0xb1, 0 }; static Port_t likely_esp_ports[] = { 0xc0d0, 0 }; #define ESP98_CMD1 (ESP_CMD1 * 0x100) #define ESP98_CMD2 (ESP_CMD2 * 0x100) #define ESP98_STATUS1 (ESP_STATUS1 * 0x100) #define ESP98_STATUS2 (ESP_STATUS2 * 0x100) #else /* PC98 */ /* XXX configure this properly. */ static Port_t likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, }; static Port_t likely_esp_ports[] = { 0x140, 0x180, 0x280, 0 }; #endif /* PC98 */ #endif /* * handle sysctl read/write requests for console speed * * In addition to setting comdefaultrate for I/O through /dev/console, * also set the initial and lock values for the /dev/ttyXX device * if there is one associated with the console. Finally, if the /dev/tty * device has already been open, change the speed on the open running port * itself. */ static int -sysctl_machdep_comdefaultrate (SYSCTL_HANDLER_ARGS) +sysctl_machdep_comdefaultrate(SYSCTL_HANDLER_ARGS) { int error, s; speed_t newspeed; struct com_s *com; struct tty *tp; newspeed = comdefaultrate; error = sysctl_handle_opaque(oidp, &newspeed, sizeof newspeed, req); if (error || !req->newptr) return (error); comdefaultrate = newspeed; if (comconsole < 0) /* serial console not selected? */ return (0); com = com_addr(comconsole); if (com == NULL) return (ENXIO); /* * set the initial and lock rates for /dev/ttydXX and /dev/cuaXX * (note, the lock rates really are boolean -- if non-zero, disallow * speed changes) */ com->it_in.c_ispeed = com->it_in.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_out.c_ispeed = com->it_out.c_ospeed = com->lt_out.c_ispeed = com->lt_out.c_ospeed = comdefaultrate; /* * if we're open, change the running rate too */ tp = com->tp; if (tp && (tp->t_state & TS_ISOPEN)) { tp->t_termios.c_ispeed = tp->t_termios.c_ospeed = comdefaultrate; s = spltty(); error = comparam(tp, &tp->t_termios); splx(s); } return error; } SYSCTL_PROC(_machdep, OID_AUTO, conspeed, CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_machdep_comdefaultrate, "I", ""); #define SET_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) | (bit)) #define CLR_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) & ~(bit)) #if NCARD > 0 static int sio_pccard_probe(dev) device_t dev; { /* Do not probe IRQ - pccard doesn't turn on the interrupt line */ /* until bus_setup_intr */ SET_FLAG(dev, COM_C_NOPROBE); return (sioprobe(dev, 0)); } static int sio_pccard_attach(dev) device_t dev; { return (sioattach(dev, 0)); } /* * sio_detach - unload the driver and clear the table. * XXX TODO: * This is usually called when the card is ejected, but * can be caused by a modunload of a controller driver. * The idea is to reset the driver's view of the device * and ensure that any driver entry points such as * read and write do not hang. */ static int sio_pccard_detach(dev) device_t dev; { struct com_s *com; com = (struct com_s *) device_get_softc(dev); if (com == NULL) { device_printf(dev, "NULL com in siounload\n"); return (0); } com->gone = 1; if (com->irqres) { bus_teardown_intr(dev, com->irqres, com->cookie); bus_release_resource(dev, SYS_RES_IRQ, 0, com->irqres); } if (com->ioportres) bus_release_resource(dev, SYS_RES_IOPORT, 0, com->ioportres); if (com->tp && (com->tp->t_state & TS_ISOPEN)) { device_printf(dev, "still open, forcing close\n"); com->tp->t_gen++; ttyclose(com->tp); ttwakeup(com->tp); ttwwakeup(com->tp); } else { if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); } device_printf(dev, "unloaded\n"); return (0); } #endif /* NCARD > 0 */ #if NPCI > 0 struct pci_ids { u_int32_t type; const char *desc; int rid; }; static struct pci_ids pci_ids[] = { { 0x100812b9, "3COM PCI FaxModem", 0x10 }, { 0x048011c1, "ActionTec 56k FAX PCI Modem", 0x14 }, { 0x00000000, NULL, 0 } }; static int sio_pci_attach(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); sio_pci_kludge_unit(dev); return (sioattach(dev, id->rid)); } /* * Don't cut and paste this to other drivers. It is a horrible kludge * which will fail to work and also be unnecessary in future versions. */ static void sio_pci_kludge_unit(dev) device_t dev; { devclass_t dc; int err; int start; int unit; unit = 0; start = 0; while (resource_int_value("sio", unit, "port", &start) == 0 && start > 0) unit++; if (device_get_unit(dev) < unit) { dc = device_get_devclass(dev); while (devclass_get_device(dc, unit)) unit++; device_printf(dev, "moving to sio%d\n", unit); err = device_set_unit(dev, unit); /* EVIL DO NOT COPY */ if (err) device_printf(dev, "error moving device %d\n", err); } } static int sio_pci_probe(dev) device_t dev; { u_int32_t type; struct pci_ids *id; type = pci_get_devid(dev); id = pci_ids; while (id->type && id->type != type) id++; if (id->desc == NULL) return (ENXIO); device_set_desc(dev, id->desc); return (sioprobe(dev, id->rid)); } #endif /* NPCI > 0 */ static struct isa_pnp_id sio_ids[] = { {0x0005d041, "Standard PC COM port"}, /* PNP0500 */ {0x0105d041, "16550A-compatible COM port"}, /* PNP0501 */ {0x0205d041, "Multiport serial device (non-intelligent 16550)"}, /* PNP0502 */ {0x1005d041, "Generic IRDA-compatible device"}, /* PNP0510 */ {0x1105d041, "Generic IRDA-compatible device"}, /* PNP0511 */ /* Devices that do not have a compatid */ {0x7602a904, NULL}, /* AEI0276 - 56K v.90 Fax Modem (LKT) */ {0x00007905, NULL}, /* AKY0000 - 56K Plug&Play Modem */ {0x01405407, NULL}, /* AZT4001 - AZT3000 PnP SOUND DEVICE, MODEM */ {0x56039008, NULL}, /* BDP0356 - Best Data 56x2 */ {0x36339008, NULL}, /* BDP3336 - Best Data Prods. 336F */ {0x0014490a, NULL}, /* BRI1400 - Boca 33.6 PnP */ {0x0015490a, NULL}, /* BRI1500 - Internal Fax Data */ {0x0034490a, NULL}, /* BRI3400 - Internal ACF Modem */ {0x00b4490a, NULL}, /* BRIB400 - Boca 56k PnP */ {0x0030320d, NULL}, /* CIR3000 - Cirrus Logic V43 */ {0x0100440e, NULL}, /* CRD0001 - Cardinal MVP288IV ? */ {0x1200c31e, NULL}, /* GVC0012 - VF1128HV-R9 (win modem?) */ {0x0303c31e, NULL}, /* GVC0303 - MaxTech 33.6 PnP D/F/V */ {0x0505c31e, NULL}, /* GVC0505 - GVC 56k Faxmodem */ {0x0050c31e, NULL}, /* GVC5000 - some GVC modem */ {0x3800f91e, NULL}, /* GWY0038 - Telepath with v.90 */ {0x9062f91e, NULL}, /* GWY6290 - Telepath with x2 Technology */ {0x0000f435, NULL}, /* MOT0000 - Motorola ModemSURFR 33.6 Intern */ {0x5015f435, NULL}, /* MOT1550 - Motorola ModemSURFR 56K Modem */ {0xf015f435, NULL}, /* MOT15F0 - Motorola VoiceSURFR 56K Modem */ {0x6045f435, NULL}, /* MOT4560 - Motorola ? */ {0x61e7a338, NULL}, /* NECE761 - 33.6Modem */ {0x39804f3f, NULL}, /* OZO8039 - Zoom 56k flex */ {0x3024a341, NULL}, /* PMC2430 - Pace 56 Voice Internal Modem */ {0x1000eb49, NULL}, /* ROK0010 - Rockwell ? */ {0x5002734a, NULL}, /* RSS0250 - 5614Jx3(G) Internal Modem */ {0xc100ad4d, NULL}, /* SMM00C1 - Leopard 56k PnP */ {0x9012b04e, NULL}, /* SUP1290 - Supra ? */ {0x1013b04e, NULL}, /* SUP1310 - SupraExpress 336i PnP */ {0x8013b04e, NULL}, /* SUP1380 - SupraExpress 288i PnP Voice */ {0x8113b04e, NULL}, /* SUP1381 - SupraExpress 336i PnP Voice */ {0x5016b04e, NULL}, /* SUP1650 - Supra 336i Sp Intl */ {0x7420b04e, NULL}, /* SUP2070 - Supra ? */ {0x8020b04e, NULL}, /* SUP2080 - Supra ? */ {0x8420b04e, NULL}, /* SUP2084 - SupraExpress 56i PnP */ {0x7121b04e, NULL}, /* SUP2171 - SupraExpress 56i Sp? */ {0x8024b04e, NULL}, /* SUP2480 - Supra ? */ {0x01007256, NULL}, /* USR0001 - U.S. Robotics Inc., Sportster W */ {0x02007256, NULL}, /* USR0002 - U.S. Robotics Inc. Sportster 33. */ {0x04007256, NULL}, /* USR0004 - USR Sportster 14.4k */ {0x06007256, NULL}, /* USR0006 - USR Sportster 33.6k */ {0x11007256, NULL}, /* USR0011 - USR ? */ {0x01017256, NULL}, /* USR0101 - USR ? */ {0x30207256, NULL}, /* USR2030 - U.S.Robotics Inc. Sportster 560 */ {0x50207256, NULL}, /* USR2050 - U.S.Robotics Inc. Sportster 33. */ {0x70207256, NULL}, /* USR2070 - U.S.Robotics Inc. Sportster 560 */ {0x30307256, NULL}, /* USR3030 - U.S. Robotics 56K FAX INT */ {0x31307256, NULL}, /* USR3031 - U.S. Robotics 56K FAX INT */ {0x50307256, NULL}, /* USR3050 - U.S. Robotics 56K FAX INT */ {0x70307256, NULL}, /* USR3070 - U.S. Robotics 56K Voice INT */ {0x90307256, NULL}, /* USR3090 - USR ? */ {0x90917256, NULL}, /* USR9190 - USR 56k Voice INT */ {0x0300695c, NULL}, /* WCI0003 - Fax/Voice/Modem/Speakphone/Asvd */ {0x61f7896a, NULL}, /* ZTIF761 - Zoom ComStar 33.6 */ #ifdef PC98 {0x0100e4a5, "RSA-98III"}, #endif {0} }; static int sio_isa_probe(dev) device_t dev; { #ifdef PC98 int logical_id; #endif /* Check isapnp ids */ if (ISA_PNP_PROBE(device_get_parent(dev), dev, sio_ids) == ENXIO) return (ENXIO); #ifdef PC98 logical_id = isa_get_logicalid(dev); if (logical_id == 0x0100e4a5) /* RSA-98III */ device_set_flags(dev, SET_IFTYPE(COM_IF_RSA98III)); #endif return (sioprobe(dev, 0)); } static int sioprobe(dev, xrid) device_t dev; int xrid; { #if 0 static bool_t already_init; device_t xdev; #endif struct com_s *com; bool_t failures[10]; int fn; device_t idev; Port_t iobase; intrmask_t irqmap[4]; intrmask_t irqs; u_char mcr_image; int result; u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; #ifdef PC98 int tmp; struct siodev iod; #endif #ifdef PC98 iod.if_type = GET_IFTYPE(flags); if ((iod.if_type < 0 || iod.if_type > COM_IF_END1) && (iod.if_type < 0x10 || iod.if_type > COM_IF_END2)) return ENXIO; #endif rid = xrid; #ifdef PC98 if (IS_8251(iod.if_type)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, 1, RF_ACTIVE); } else if (iod.if_type == COM_IF_RSA98III || isa_get_vendorid(dev)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, if_16550a_type[iod.if_type & 0x0f].iatsz, RF_ACTIVE); } else { port = isa_alloc_resourcev(dev, SYS_RES_IOPORT, &rid, if_16550a_type[iod.if_type & 0x0f].iat, if_16550a_type[iod.if_type & 0x0f].iatsz, RF_ACTIVE); } #else port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); #endif if (!port) return (ENXIO); #ifdef PC98 if (!IS_8251(iod.if_type)) { if (isa_load_resourcev(port, if_16550a_type[iod.if_type & 0x0f].iat, if_16550a_type[iod.if_type & 0x0f].iatsz) != 0) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } } #endif com = device_get_softc(dev); com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); #if 0 /* * XXX this is broken - when we are first called, there are no * previously configured IO ports. We could hard code * 0x3f8, 0x2f8, 0x3e8, 0x2e8 etc but that's probably worse. * This code has been doing nothing since the conversion since * "count" is zero the first time around. */ if (!already_init) { /* * Turn off MCR_IENABLE for all likely serial ports. An unused * port with its MCR_IENABLE gate open will inhibit interrupts * from any used port that shares the interrupt vector. * XXX the gate enable is elsewhere for some multiports. */ device_t *devs; int count, i, xioport; #ifdef PC98 int xiftype; #endif devclass_get_devices(sio_devclass, &devs, &count); #ifdef PC98 for (i = 0; i < count; i++) { xdev = devs[i]; xioport = bus_get_resource_start(xdev, SYS_RES_IOPORT, 0); xiftype = GET_IFTYPE(device_get_flags(xdev)); if (device_is_enabled(xdev) && xioport > 0) { if (IS_8251(xiftype)) outb((xioport & 0xff00) | PC98SIO_cmd_port(xiftype & 0x0f), 0xf2); else outb(xioport + if_16550a_type[xiftype & 0x0f].iat[com_mcr], 0); } } #else for (i = 0; i < count; i++) { xdev = devs[i]; if (device_is_enabled(xdev) && bus_get_resource(xdev, SYS_RES_IOPORT, 0, &xioport, NULL) == 0) outb(xioport + com_mcr, 0); } #endif free(devs, M_TEMP); already_init = TRUE; } #endif if (COM_LLCONSOLE(flags)) { printf("sio%d: reserved for low-level i/o\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } #ifdef PC98 DELAY(10); /* * If the port is i8251 UART (internal, B98_01) */ if (pc98_check_if_type(dev, &iod) == -1) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } if (iod.irq > 0) bus_set_resource(dev, SYS_RES_IRQ, 0, iod.irq, 1); if (IS_8251(iod.if_type)) { outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, CMD8251_RESET); DELAY(1000); /* for a while...*/ outb(iod.cmd, 0xf2); /* MODE (dummy) */ DELAY(10); outb(iod.cmd, 0x01); /* CMD (dummy) */ DELAY(1000); /* for a while...*/ if (( inb(iod.sts) & STS8251_TxEMP ) == 0 ) { result = (ENXIO); } if (if_8251_type[iod.if_type & 0x0f].check_irq) { COM_INT_DISABLE tmp = ( inb( iod.ctrl ) & ~(IEN_Rx|IEN_TxEMP|IEN_Tx)); outb( iod.ctrl, tmp|IEN_TxEMP ); DELAY(10); result = isa_irq_pending() ? 0 : ENXIO; outb( iod.ctrl, tmp ); COM_INT_ENABLE } else { /* * B98_01 doesn't activate TxEMP interrupt line * when being reset, so we can't check irq pending. */ result = 0; } if (epson_machine_id==0x20) { /* XXX */ result = 0; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return result; } #endif /* PC98 */ /* * If the device is on a multiport card and has an AST/4 * compatible interrupt control register, initialize this * register and prepare to leave MCR_IENABLE clear in the mcr. * Otherwise, prepare to set MCR_IENABLE in the mcr. * Point idev to the device struct giving the correct id_irq. * This is the struct for the master device if there is one. */ idev = dev; mcr_image = MCR_IENABLE; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { #ifndef PC98 Port_t xiobase; u_long io; #endif idev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); if (idev == NULL) { printf("sio%d: master device %d not configured\n", device_get_unit(dev), COM_MPMASTER(flags)); idev = dev; } #ifndef PC98 if (!COM_NOTAST4(flags)) { if (bus_get_resource(idev, SYS_RES_IOPORT, 0, &io, NULL) == 0) { xiobase = io; if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) == 0) outb(xiobase + com_scr, 0x80); else outb(xiobase + com_scr, 0); } mcr_image = 0; } #endif } #endif /* COM_MULTIPORT */ if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) mcr_image = 0; bzero(failures, sizeof failures); iobase = rman_get_start(port); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) { mcr_image = 0; outb(iobase + rsa_msr, 0x04); outb(iobase + rsa_frr, 0x00); if ((inb(iobase + rsa_srr) & 0x36) != 0x36) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } outb(iobase + rsa_ier, 0x00); outb(iobase + rsa_frr, 0x00); outb(iobase + rsa_tivsr, 0x00); outb(iobase + rsa_tcr, 0x00); } tmp = if_16550a_type[iod.if_type & 0x0f].irr_write; if (tmp != -1) { /* MC16550II */ int irqout; switch (isa_get_irq(idev)) { case 3: irqout = 4; break; case 5: irqout = 5; break; case 6: irqout = 6; break; case 12: irqout = 7; break; default: printf("sio%d: irq configuration error\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENXIO); } outb((iobase & 0x00ff) | tmp, irqout); } #endif /* * We don't want to get actual interrupts, just masked ones. * Interrupts from this line should already be masked in the ICU, * but mask them in the processor as well in case there are some * (misconfigured) shared interrupts. */ disable_intr(); /* EXTRA DELAY? */ /* * Initialize the speed and the word size and wait long enough to * drain the maximum of 16 bytes of junk in device output queues. * The speed is undefined after a master reset and must be set * before relying on anything related to output. There may be * junk after a (very fast) soft reboot and (apparently) after * master reset. * XXX what about the UART bug avoided by waiting in comparam()? * We don't want to to wait long enough to drain at 2 bps. */ if (iobase == siocniobase) DELAY((16 + 1) * 1000000 / (comdefaultrate / 10)); else { #ifdef PC98 tmp = ttspeedtab(SIO_TEST_SPEED, if_16550a_type[iod.if_type & 0x0f].speedtab); sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); sio_setreg(com, com_dlbl, tmp & 0xff); sio_setreg(com, com_dlbh, (tmp >> 8) & 0xff); sio_setreg(com, com_cfcr, CFCR_8BITS); #else sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); sio_setreg(com, com_dlbl, COMBRD(SIO_TEST_SPEED) & 0xff); sio_setreg(com, com_dlbh, (u_int) COMBRD(SIO_TEST_SPEED) >> 8); sio_setreg(com, com_cfcr, CFCR_8BITS); #endif DELAY((16 + 1) * 1000000 / (SIO_TEST_SPEED / 10)); } /* * Enable the interrupt gate and disable device interupts. This * should leave the device driving the interrupt line low and * guarantee an edge trigger if an interrupt can be generated. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ irqmap[0] = isa_irq_pending(); /* * Attempt to set loopback mode so that we can send a null byte * without annoying any external device. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image | MCR_LOOPBACK); /* * Attempt to generate an output interrupt. On 8250's, setting * IER_ETXRDY generates an interrupt independent of the current * setting and independent of whether the THR is empty. On 16450's, * setting IER_ETXRDY generates an interrupt independent of the * current setting. On 16550A's, setting IER_ETXRDY only * generates an interrupt when IER_ETXRDY is not already set. */ sio_setreg(com, com_ier, IER_ETXRDY); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) outb(iobase + rsa_ier, 0x04); #endif /* * On some 16x50 incompatibles, setting IER_ETXRDY doesn't generate * an interrupt. They'd better generate one for actually doing * output. Loopback may be broken on the same incompatibles but * it's unlikely to do more than allow the null byte out. */ sio_setreg(com, com_data, 0); DELAY((1 + 2) * 1000000 / (SIO_TEST_SPEED / 10)); /* * Turn off loopback mode so that the interrupt gate works again * (MCR_IENABLE was hidden). This should leave the device driving * an interrupt line high. It doesn't matter if the interrupt * line oscillates while we are not looking at it, since interrupts * are disabled. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); /* * Some pcmcia cards have the "TXRDY bug", so we check everyone * for IIR_TXRDY implementation ( Palido 321s, DC-1S... ) */ if (COM_NOPROBE(flags)) { /* Reading IIR register twice */ for (fn = 0; fn < 2; fn ++) { DELAY(10000); failures[6] = sio_getreg(com, com_iir); } /* Check IIR_TXRDY clear ? */ result = 0; if (failures[6] & IIR_TXRDY) { /* Nop, Double check with clearing IER */ sio_setreg(com, com_ier, 0); if (sio_getreg(com, com_iir) & IIR_NOPEND) { /* Ok. we're familia this gang */ SET_FLAG(dev, COM_C_IIR_TXRDYBUG); } else { /* Unknown, Just omit this chip.. XXX */ result = ENXIO; } } else { /* OK. this is well-known guys */ CLR_FLAG(dev, COM_C_IIR_TXRDYBUG); } sio_setreg(com, com_cfcr, CFCR_8BITS); enable_intr(); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } /* * Check that * o the CFCR, IER and MCR in UART hold the values written to them * (the values happen to be all distinct - this is good for * avoiding false positive tests from bus echoes). * o an output interrupt is generated and its vector is correct. * o the interrupt goes away when the IIR in the UART is read. */ /* EXTRA DELAY? */ failures[0] = sio_getreg(com, com_cfcr) - CFCR_8BITS; failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ irqmap[1] = isa_irq_pending(); failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) inb(iobase + rsa_srr); #endif DELAY(1000); /* XXX */ irqmap[2] = isa_irq_pending(); failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) inb(iobase + rsa_srr); #endif /* * Turn off all device interrupts and check that they go off properly. * Leave MCR_IENABLE alone. For ports without a master port, it gates * the OUT2 output of the UART to * the ICU input. Closing the gate would give a floating ICU input * (unless there is another device driving it) and spurious interrupts. * (On the system that this was first tested on, the input floats high * and gives a (masked) interrupt as soon as the gate is closed.) */ sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); /* dummy to avoid bus echo */ failures[7] = sio_getreg(com, com_ier); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) outb(iobase + rsa_ier, 0x00); #endif DELAY(1000); /* XXX */ irqmap[3] = isa_irq_pending(); failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) { inb(iobase + rsa_srr); outb(iobase + rsa_frr, 0x00); } #endif enable_intr(); irqs = irqmap[1] & ~irqmap[0]; if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && ((1 << xirq) & irqs) == 0) printf( "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", device_get_unit(dev), xirq, irqs); if (bootverbose) printf("sio%d: irq maps: %#x %#x %#x %#x\n", device_get_unit(dev), irqmap[0], irqmap[1], irqmap[2], irqmap[3]); result = 0; for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) { sio_setreg(com, com_mcr, 0); result = ENXIO; if (bootverbose) { printf("sio%d: probe failed test(s):", device_get_unit(dev)); for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) printf(" %d", fn); printf("\n"); } break; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } #ifdef COM_ESP static int espattach(com, esp_port) struct com_s *com; Port_t esp_port; { u_char dips; u_char val; /* * Check the ESP-specific I/O port to see if we're an ESP * card. If not, return failure immediately. */ if ((inb(esp_port) & 0xf3) == 0) { printf(" port 0x%x is not an ESP board?\n", esp_port); return (0); } /* * We've got something that claims to be a Hayes ESP card. * Let's hope so. */ /* Get the dip-switch configuration */ #ifdef PC98 outb(esp_port + ESP98_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP98_STATUS1); #else outb(esp_port + ESP_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP_STATUS1); #endif /* * Bits 0,1 of dips say which COM port we are. */ #ifdef PC98 if ((rman_get_start(com->ioportres) & 0xff) == likely_com_ports[dips & 0x03]) #else if (rman_get_start(com->ioportres) == likely_com_ports[dips & 0x03]) #endif printf(" : ESP"); else { printf(" esp_port has com %d\n", dips & 0x03); return (0); } /* * Check for ESP version 2.0 or later: bits 4,5,6 = 010. */ #ifdef PC98 outb(esp_port + ESP98_CMD1, ESP_GETTEST); val = inb(esp_port + ESP98_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP98_STATUS2); #else outb(esp_port + ESP_CMD1, ESP_GETTEST); val = inb(esp_port + ESP_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP_STATUS2); #endif if ((val & 0x70) < 0x20) { printf("-old (%o)", val & 0x70); return (0); } /* * Check for ability to emulate 16550: bit 7 == 1 */ if ((dips & 0x80) == 0) { printf(" slave"); return (0); } /* * Okay, we seem to be a Hayes ESP card. Whee. */ com->esp = TRUE; com->esp_port = esp_port; return (1); } #endif /* COM_ESP */ static int sio_isa_attach(dev) device_t dev; { return (sioattach(dev, 0)); } static int sioattach(dev, xrid) device_t dev; int xrid; { struct com_s *com; #ifdef COM_ESP Port_t *espp; #endif Port_t iobase; int unit; u_int flags; int rid; struct resource *port; int ret; #ifdef PC98 u_char *obuf; u_long obufsize; int if_type = GET_IFTYPE(device_get_flags(dev)); #endif rid = xrid; #ifdef PC98 if (IS_8251(if_type)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, 1, RF_ACTIVE); } else if (if_type == COM_IF_RSA98III || isa_get_vendorid(dev)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, if_16550a_type[if_type & 0x0f].iatsz, RF_ACTIVE); } else { port = isa_alloc_resourcev(dev, SYS_RES_IOPORT, &rid, if_16550a_type[if_type & 0x0f].iat, if_16550a_type[if_type & 0x0f].iatsz, RF_ACTIVE); } #else port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); #endif if (!port) return (ENXIO); #ifdef PC98 if (!IS_8251(if_type)) { if (isa_load_resourcev(port, if_16550a_type[if_type & 0x0f].iat, if_16550a_type[if_type & 0x0f].iatsz) != 0) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } } #endif iobase = rman_get_start(port); unit = device_get_unit(dev); com = device_get_softc(dev); flags = device_get_flags(dev); if (unit >= sio_numunits) sio_numunits = unit + 1; #ifdef PC98 obufsize = 256; if (if_type == COM_IF_RSA98III) obufsize = 2048; if ((obuf = malloc(obufsize * 2, M_DEVBUF, M_NOWAIT)) == NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } bzero(obuf, obufsize * 2); #endif /* * sioprobe() has initialized the device registers as follows: * o cfcr = CFCR_8BITS. * It is most important that CFCR_DLAB is off, so that the * data port is not hidden when we enable interrupts. * o ier = 0. * Interrupts are only enabled when the line is open. * o mcr = MCR_IENABLE, or 0 if the port has AST/4 compatible * interrupt control register or the config specifies no irq. * Keeping MCR_DTR and MCR_RTS off might stop the external * device from sending before we are ready. */ bzero(com, sizeof *com); com->unit = unit; com->ioportres = port; com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); com->cfcr_image = CFCR_8BITS; com->dtr_wait = 3 * hz; com->loses_outints = COM_LOSESOUTINTS(flags) != 0; com->no_irq = bus_get_resource(dev, SYS_RES_IRQ, 0, NULL, NULL) != 0; com->tx_fifo_size = 1; #ifdef PC98 com->obufsize = obufsize; com->obuf1 = obuf; com->obuf2 = obuf + obufsize; #endif com->obufs[0].l_head = com->obuf1; com->obufs[1].l_head = com->obuf2; #ifdef PC98 com->pc98_if_type = if_type; if (IS_8251(if_type)) { pc98_set_ioport(com); if (if_type == COM_IF_INTERNAL && pc98_check_8251fifo()) { com->pc98_8251fifo = 1; com->pc98_8251fifo_enable = 0; } } else { bus_addr_t *iat = if_16550a_type[if_type & 0x0f].iat; com->data_port = iobase + iat[com_data]; com->int_id_port = iobase + iat[com_iir]; com->modem_ctl_port = iobase + iat[com_mcr]; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + iat[com_lsr]; com->modem_status_port = iobase + iat[com_msr]; com->intr_ctl_port = iobase + iat[com_ier]; } #else /* not PC98 */ com->data_port = iobase + com_data; com->int_id_port = iobase + com_iir; com->modem_ctl_port = iobase + com_mcr; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + com_lsr; com->modem_status_port = iobase + com_msr; com->intr_ctl_port = iobase + com_ier; #endif /* * We don't use all the flags from since they * are only relevant for logins. It's important to have echo off * initially so that the line doesn't start blathering before the * echo flag can be turned off. */ com->it_in.c_iflag = 0; com->it_in.c_oflag = 0; com->it_in.c_cflag = TTYDEF_CFLAG; com->it_in.c_lflag = 0; if (unit == comconsole) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) DELAY(100000); #endif com->it_in.c_iflag = TTYDEF_IFLAG; com->it_in.c_oflag = TTYDEF_OFLAG; com->it_in.c_cflag = TTYDEF_CFLAG | CLOCAL; com->it_in.c_lflag = TTYDEF_LFLAG; com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL; com->lt_out.c_ispeed = com->lt_out.c_ospeed = com->lt_in.c_ispeed = com->lt_in.c_ospeed = com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate; } else com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED; if (siosetwater(com, com->it_in.c_ispeed) != 0) { enable_intr(); /* * Leave i/o resources allocated if this is a `cn'-level * console, so that other devices can't snarf them. */ if (iobase != siocniobase) bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } enable_intr(); termioschars(&com->it_in); com->it_out = com->it_in; /* attempt to determine UART type */ printf("sio%d: type", unit); #ifndef PC98 #ifdef COM_MULTIPORT if (!COM_ISMULTIPORT(flags) && !COM_IIR_TXRDYBUG(flags)) #else if (!COM_IIR_TXRDYBUG(flags)) #endif { u_char scr; u_char scr1; u_char scr2; scr = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0xa5); scr1 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0x5a); scr2 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, scr); if (scr1 != 0xa5 || scr2 != 0x5a) { printf(" 8250"); goto determined_type; } } #endif /* !PC98 */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo && !COM_NOFIFO(flags)) com->tx_fifo_size = 16; com_int_TxRx_disable( com ); com_cflag_and_speed_set( com, com->it_in.c_cflag, comdefaultrate ); com_tiocm_bic( com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE ); com_send_break_off( com ); if (com->pc98_if_type == COM_IF_INTERNAL) { printf(" (internal%s%s)", com->pc98_8251fifo ? " fifo" : "", PC98SIO_baud_rate_port(com->pc98_if_type) != -1 ? " v-fast" : ""); } else { printf(" 8251%s", if_8251_type[com->pc98_if_type & 0x0f].name); } } else { #endif /* PC98 */ sio_setreg(com, com_fifo, FIFO_ENABLE | FIFO_RX_HIGH); DELAY(100); com->st16650a = 0; switch (inb(com->int_id_port) & IIR_FIFO_MASK) { case FIFO_RX_LOW: printf(" 16450"); break; case FIFO_RX_MEDL: printf(" 16450?"); break; case FIFO_RX_MEDH: printf(" 16550?"); break; case FIFO_RX_HIGH: if (COM_NOFIFO(flags)) { printf(" 16550A fifo disabled"); } else { com->hasfifo = TRUE; #ifdef PC98 com->tx_fifo_size = 0; /* XXX flag conflicts. */ printf(" 16550A"); #else if (COM_ST16650A(flags)) { com->st16650a = 1; com->tx_fifo_size = 32; printf(" ST16650A"); } else { com->tx_fifo_size = COM_FIFOSIZE(flags); printf(" 16550A"); } #endif } #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { com->tx_fifo_size = 2048; com->rsabase = iobase; outb(com->rsabase + rsa_ier, 0x00); outb(com->rsabase + rsa_frr, 0x00); } #endif #ifdef COM_ESP #ifdef PC98 if (com->pc98_if_type == COM_IF_ESP98) #endif for (espp = likely_esp_ports; *espp != 0; espp++) if (espattach(com, *espp)) { com->tx_fifo_size = 1024; break; } #endif if (!com->st16650a) { if (!com->tx_fifo_size) com->tx_fifo_size = 16; else printf(" lookalike with %d bytes FIFO", com->tx_fifo_size); } break; } #ifdef PC98 if (com->pc98_if_type == COM_IF_RSB3000) { /* Set RSB-2000/3000 Extended Buffer mode. */ u_char lcr; lcr = sio_getreg(com, com_cfcr); sio_setreg(com, com_cfcr, lcr | CFCR_DLAB); sio_setreg(com, com_emr, EMR_EXBUFF | EMR_EFMODE); sio_setreg(com, com_cfcr, lcr); } #endif #ifdef COM_ESP if (com->esp) { /* * Set 16550 compatibility mode. * We don't use the ESP_MODE_SCALE bit to increase the * fifo trigger levels because we can't handle large * bursts of input. * XXX flow control should be set in comparam(), not here. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETMODE); outb(com->esp_port + ESP98_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); #else outb(com->esp_port + ESP_CMD1, ESP_SETMODE); outb(com->esp_port + ESP_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); #endif /* Set RTS/CTS flow control. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP98_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP98_CMD2, ESP_FLOW_CTS); #else outb(com->esp_port + ESP_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP_CMD2, ESP_FLOW_CTS); #endif /* Set flow-control levels. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP98_CMD2, HIBYTE(768)); outb(com->esp_port + ESP98_CMD2, LOBYTE(768)); outb(com->esp_port + ESP98_CMD2, HIBYTE(512)); outb(com->esp_port + ESP98_CMD2, LOBYTE(512)); #else outb(com->esp_port + ESP_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP_CMD2, HIBYTE(768)); outb(com->esp_port + ESP_CMD2, LOBYTE(768)); outb(com->esp_port + ESP_CMD2, HIBYTE(512)); outb(com->esp_port + ESP_CMD2, LOBYTE(512)); #endif #ifdef PC98 /* Set UART clock prescaler. */ outb(com->esp_port + ESP98_CMD1, ESP_SETCLOCK); outb(com->esp_port + ESP98_CMD2, 2); /* 4 times */ #endif } #endif /* COM_ESP */ sio_setreg(com, com_fifo, 0); #ifdef PC98 printf("%s", if_16550a_type[com->pc98_if_type & 0x0f].name); #else determined_type: ; #endif #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { device_t masterdev; com->multiport = TRUE; printf(" (multiport"); if (unit == COM_MPMASTER(flags)) printf(" master"); printf(")"); masterdev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); com->no_irq = (masterdev == NULL || bus_get_resource(masterdev, SYS_RES_IRQ, 0, NULL, NULL) != 0); } #endif /* COM_MULTIPORT */ #ifdef PC98 } #endif if (unit == comconsole) printf(", console"); if (COM_IIR_TXRDYBUG(flags)) printf(" with a bogus IIR_TXRDY register"); printf("\n"); if (!sio_registered) { register_swi(SWI_TTY, siopoll); sio_registered = TRUE; } make_dev(&sio_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, "ttyd%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_INIT_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyid%r", unit); make_dev(&sio_cdevsw, unit | CONTROL_LOCK_STATE, UID_ROOT, GID_WHEEL, 0600, "ttyld%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK, UID_UUCP, GID_DIALER, 0660, "cuaa%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_INIT_STATE, UID_UUCP, GID_DIALER, 0660, "cuaia%r", unit); make_dev(&sio_cdevsw, unit | CALLOUT_MASK | CONTROL_LOCK_STATE, UID_UUCP, GID_DIALER, 0660, "cuala%r", unit); com->flags = flags; com->pps.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR; pps_init(&com->pps); rid = 0; com->irqres = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0ul, ~0ul, 1, RF_ACTIVE); if (com->irqres) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY | INTR_TYPE_FAST, siointr, com, &com->cookie); if (ret) { ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres, INTR_TYPE_TTY, siointr, com, &com->cookie); if (ret == 0) device_printf(dev, "unable to activate interrupt in fast mode - using normal mode"); } if (ret) device_printf(dev, "could not activate interrupt\n"); } return (0); } static int sioopen(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; int unit; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL) return (ENXIO); if (com->gone) return (ENXIO); if (mynor & CONTROL_MASK) return (0); tp = dev->si_tty = com->tp = ttymalloc(com->tp); s = spltty(); /* * We jump to this label after all non-interrupted sleeps to pick * up any changes of the device state. */ open_top: while (com->state & CS_DTR_OFF) { error = tsleep(&com->dtr_wait, TTIPRI | PCATCH, "siodtr", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; } if (tp->t_state & TS_ISOPEN) { /* * The device is open, so everything has been initialized. * Handle conflicts. */ if (mynor & CALLOUT_MASK) { if (!com->active_out) { error = EBUSY; goto out; } } else { if (com->active_out) { if (flag & O_NONBLOCK) { error = EBUSY; goto out; } error = tsleep(&com->active_out, TTIPRI | PCATCH, "siobi", 0); if (com_addr(unit) == NULL) return (ENXIO); if (error != 0 || com->gone) goto out; goto open_top; } } if (tp->t_state & TS_XCLUDE && suser(p)) { error = EBUSY; goto out; } } else { /* * The device isn't open, so there are no conflicts. * Initialize it. Initialization is done twice in many * cases: to preempt sleeping callin opens if we are * callout, and to complete a callin open after DCD rises. */ tp->t_oproc = comstart; tp->t_param = comparam; tp->t_stop = comstop; tp->t_dev = dev; tp->t_termios = mynor & CALLOUT_MASK ? com->it_out : com->it_in; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) #endif (void)commctl(com, TIOCM_DTR | TIOCM_RTS, DMSET); com->poll = com->no_irq; com->poll_output = com->loses_outints; ++com->wopeners; error = comparam(tp, &tp->t_termios); --com->wopeners; if (error != 0) goto out; #ifdef PC98 if (IS_8251(com->pc98_if_type)) { com_tiocm_bis(com, TIOCM_DTR|TIOCM_RTS); pc98_msrint_start(dev); if (com->pc98_8251fifo) { com->pc98_8251fifo_enable = 1; outb(I8251F_fcr, CTRL8251F_ENABLE | CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); } } #endif /* * XXX we should goto open_top if comparam() slept. */ if (com->hasfifo) { /* * (Re)enable and drain fifos. * * Certain SMC chips cause problems if the fifos * are enabled while input is ready. Turn off the * fifo if necessary to clear the input. We test * the input ready bit after enabling the fifos * since we've already enabled them in comparam() * and to handle races between enabling and fresh * input. */ while (TRUE) { sio_setreg(com, com_fifo, FIFO_RCV_RST | FIFO_XMT_RST | com->fifo_image); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) outb(com->rsabase + rsa_frr , 0x00); #endif /* * XXX the delays are for superstitious * historical reasons. It must be less than * the character time at the maximum * supported speed (87 usec at 115200 bps * 8N1). Otherwise we might loop endlessly * if data is streaming in. We used to use * delays of 100. That usually worked * because DELAY(100) used to usually delay * for about 85 usec instead of 100. */ DELAY(50); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III ? !(inb(com->rsabase + rsa_srr) & 0x08) : !(inb(com->line_status_port) & LSR_RXRDY)) break; #else if (!(inb(com->line_status_port) & LSR_RXRDY)) break; #endif sio_setreg(com, com_fifo, 0); DELAY(50); (void) inb(com->data_port); } } disable_intr(); #ifdef PC98 if (IS_8251(com->pc98_if_type)) { com_tiocm_bis(com, TIOCM_LE); com->pc98_prev_modem_status = pc98_get_modem_status(com); com_int_Rx_enable(com); } else { #endif (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status = inb(com->modem_status_port); if (COM_IIR_TXRDYBUG(com->flags)) { outb(com->intr_ctl_port, IER_ERXRDY | IER_ERLS | IER_EMSC); } else { outb(com->intr_ctl_port, IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC); } #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { outb(com->rsabase + rsa_ier, 0x1d); outb(com->intr_ctl_port, IER_ERLS | IER_EMSC); } #endif #ifdef PC98 } #endif enable_intr(); /* * Handle initial DCD. Callout devices get a fake initial * DCD (trapdoor DCD). If we are callout, then any sleeping * callin opens get woken up and resume sleeping on "siobi" * instead of "siodcd". */ /* * XXX `mynor & CALLOUT_MASK' should be * `tp->t_cflag & (SOFT_CARRIER | TRAPDOOR_CARRIER) where * TRAPDOOR_CARRIER is the default initial state for callout * devices and SOFT_CARRIER is like CLOCAL except it hides * the true carrier. */ #ifdef PC98 if ((IS_8251(com->pc98_if_type) && (pc98_get_modem_status(com) & TIOCM_CAR)) || (!IS_8251(com->pc98_if_type) && (com->prev_modem_status & MSR_DCD)) || mynor & CALLOUT_MASK) (*linesw[tp->t_line].l_modem)(tp, 1); #else if (com->prev_modem_status & MSR_DCD || mynor & CALLOUT_MASK) (*linesw[tp->t_line].l_modem)(tp, 1); #endif } /* * Wait for DCD if necessary. */ if (!(tp->t_state & TS_CARR_ON) && !(mynor & CALLOUT_MASK) && !(tp->t_cflag & CLOCAL) && !(flag & O_NONBLOCK)) { ++com->wopeners; error = tsleep(TSA_CARR_ON(tp), TTIPRI | PCATCH, "siodcd", 0); if (com_addr(unit) == NULL) return (ENXIO); --com->wopeners; if (error != 0 || com->gone) goto out; goto open_top; } error = (*linesw[tp->t_line].l_open)(dev, tp); disc_optim(tp, &tp->t_termios, com); if (tp->t_state & TS_ISOPEN && mynor & CALLOUT_MASK) com->active_out = TRUE; siosettimeout(); out: splx(s); if (!(tp->t_state & TS_ISOPEN) && com->wopeners == 0) comhardclose(com); return (error); } static int sioclose(dev, flag, mode, p) dev_t dev; int flag; int mode; struct proc *p; { struct com_s *com; int mynor; int s; struct tty *tp; mynor = minor(dev); if (mynor & CONTROL_MASK) return (0); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL) return (ENODEV); tp = com->tp; s = spltty(); (*linesw[tp->t_line].l_close)(tp, flag); #ifdef PC98 com->modem_checking = 0; #endif disc_optim(tp, &tp->t_termios, com); comstop(tp, FREAD | FWRITE); comhardclose(com); ttyclose(tp); siosettimeout(); splx(s); if (com->gone) { printf("sio%d: gone\n", com->unit); s = spltty(); if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); bzero(tp, sizeof *tp); splx(s); } return (0); } static void comhardclose(com) struct com_s *com; { int s; struct tty *tp; int unit; unit = com->unit; s = spltty(); com->poll = FALSE; com->poll_output = FALSE; com->do_timestamp = FALSE; com->do_dcd_timestamp = FALSE; com->pps.ppsparam.mode = 0; #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_send_break_off(com); else #endif sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); { #ifdef PC98 int tmp; if (IS_8251(com->pc98_if_type)) com_int_TxRx_disable(com); else sio_setreg(com, com_ier, 0); if (com->pc98_if_type == COM_IF_RSA98III) outb(com->rsabase + rsa_ier, 0x00); #else sio_setreg(com, com_ier, 0); #endif tp = com->tp; #ifdef PC98 if (IS_8251(com->pc98_if_type)) tmp = pc98_get_modem_status(com) & TIOCM_CAR; else tmp = com->prev_modem_status & MSR_DCD; #endif if (tp->t_cflag & HUPCL /* * XXX we will miss any carrier drop between here and the * next open. Perhaps we should watch DCD even when the * port is closed; it is not sufficient to check it at * the next open because it might go up and down while * we're not watching. */ || (!com->active_out #ifdef PC98 && !(tmp) #else && !(com->prev_modem_status & MSR_DCD) #endif && !(com->it_in.c_cflag & CLOCAL)) || !(tp->t_state & TS_ISOPEN)) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_tiocm_bic(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); else #endif (void)commctl(com, TIOCM_DTR, DMBIC); if (com->dtr_wait != 0 && !(com->state & CS_DTR_OFF)) { timeout(siodtrwakeup, com, com->dtr_wait); com->state |= CS_DTR_OFF; } } #ifdef PC98 else { if (IS_8251(com->pc98_if_type)) com_tiocm_bic(com, TIOCM_LE); } #endif } #ifdef PC98 if (com->pc98_8251fifo) { if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); com->pc98_8251fifo_enable = 0; } #endif if (com->hasfifo) { /* * Disable fifos so that they are off after controlled * reboots. Some BIOSes fail to detect 16550s when the * fifos are enabled. */ sio_setreg(com, com_fifo, 0); } com->active_out = FALSE; wakeup(&com->active_out); wakeup(TSA_CARR_ON(tp)); /* restart any wopeners */ splx(s); } static int sioread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); return ((*linesw[com->tp->t_line].l_read)(com->tp, uio, flag)); } static int siowrite(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int mynor; struct com_s *com; int unit; mynor = minor(dev); if (mynor & CONTROL_MASK) return (ENODEV); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); if (com == NULL || com->gone) return (ENODEV); /* * (XXX) We disallow virtual consoles if the physical console is * a serial port. This is in case there is a display attached that * is not the console. In that situation we don't need/want the X * server taking over the console. */ if (constty != NULL && unit == comconsole) constty = NULL; return ((*linesw[com->tp->t_line].l_write)(com->tp, uio, flag)); } static void siobusycheck(chan) void *chan; { struct com_s *com; int s; com = (struct com_s *)chan; /* * Clear TS_BUSY if low-level output is complete. * spl locking is sufficient because siointr1() does not set CS_BUSY. * If siointr1() clears CS_BUSY after we look at it, then we'll get * called again. Reading the line status port outside of siointr1() * is safe because CS_BUSY is clear so there are no output interrupts * to lose. */ s = spltty(); if (com->state & CS_BUSY) com->extra_state &= ~CSE_BUSYCHECK; /* False alarm. */ #ifdef PC98 else if ((IS_8251(com->pc98_if_type) && ((com->pc98_8251fifo_enable && (inb(I8251F_lsr) & (STS8251F_TxRDY | STS8251F_TxEMP)) == (STS8251F_TxRDY | STS8251F_TxEMP)) || (!com->pc98_8251fifo_enable && (inb(com->sts_port) & (STS8251_TxRDY | STS8251_TxEMP)) == (STS8251_TxRDY | STS8251_TxEMP)))) || ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY))) { #else else if ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY)) { #endif com->tp->t_state &= ~TS_BUSY; ttwwakeup(com->tp); com->extra_state &= ~CSE_BUSYCHECK; } else timeout(siobusycheck, com, hz / 100); splx(s); } static void siodtrwakeup(chan) void *chan; { struct com_s *com; com = (struct com_s *)chan; com->state &= ~CS_DTR_OFF; wakeup(&com->dtr_wait); } static void sioinput(com) struct com_s *com; { u_char *buf; int incc; u_char line_status; int recv_data; struct tty *tp; buf = com->ibuf; tp = com->tp; if (!(tp->t_state & TS_ISOPEN) || !(tp->t_cflag & CREAD)) { com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; return; } if (tp->t_state & TS_CAN_BYPASS_L_RINT) { /* * Avoid the grotesquely inefficient lineswitch routine * (ttyinput) in "raw" mode. It usually takes about 450 * instructions (that's without canonical processing or echo!). * slinput is reasonably fast (usually 40 instructions plus * call overhead). */ do { enable_intr(); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat && (com->state & CS_RTS_IFLOW || tp->t_iflag & IXOFF) && !(tp->t_state & TS_TBLOCK)) ttyblock(tp); com->delta_error_counts[CE_TTY_BUF_OVERFLOW] += b_to_q((char *)buf, incc, &tp->t_rawq); buf += incc; tk_nin += incc; tk_rawcc += incc; tp->t_rawcc += incc; ttwakeup(tp); if (tp->t_state & TS_TTSTOP && (tp->t_iflag & IXANY || tp->t_cc[VSTART] == tp->t_cc[VSTOP])) { tp->t_state &= ~TS_TTSTOP; tp->t_lflag &= ~FLUSHO; comstart(tp); } disable_intr(); } while (buf < com->iptr); } else { do { enable_intr(); line_status = buf[com->ierroff]; recv_data = *buf++; if (line_status & (LSR_BI | LSR_FE | LSR_OE | LSR_PE)) { if (line_status & LSR_BI) recv_data |= TTY_BI; if (line_status & LSR_FE) recv_data |= TTY_FE; if (line_status & LSR_OE) recv_data |= TTY_OE; if (line_status & LSR_PE) recv_data |= TTY_PE; } (*linesw[tp->t_line].l_rint)(recv_data, tp); disable_intr(); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; /* * There is now room for another low-level buffer full of input, * so enable RTS if it is now disabled and there is room in the * high-level buffer. */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if ((com->state & CS_RTS_IFLOW) && !(com_tiocm_get(com) & TIOCM_RTS) && !(tp->t_state & TS_TBLOCK)) com_tiocm_bis(com, TIOCM_RTS); } else { if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } #else if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #endif } void siointr(arg) void *arg; { #ifndef COM_MULTIPORT COM_LOCK(); siointr1((struct com_s *) arg); COM_UNLOCK(); #else /* COM_MULTIPORT */ bool_t possibly_more_intrs; int unit; struct com_s *com; #ifdef PC98 u_char rsa_buf_status; #endif /* * Loop until there is no activity on any port. This is necessary * to get an interrupt edge more than to avoid another interrupt. * If the IRQ signal is just an OR of the IRQ signals from several * devices, then the edge from one may be lost because another is * on. */ COM_LOCK(); do { possibly_more_intrs = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); /* * XXX COM_LOCK(); * would it work here, or be counter-productive? */ #ifdef PC98 if (com != NULL && !com->gone && IS_8251(com->pc98_if_type)) { siointr1(com); } else if (com != NULL && !com->gone && com->pc98_if_type == COM_IF_RSA98III) { rsa_buf_status = inb(com->rsabase + rsa_srr) & 0xc9; if ((rsa_buf_status & 0xc8) || !(rsa_buf_status & 0x01)) { siointr1(com); if (rsa_buf_status != (inb(com->rsabase + rsa_srr) & 0xc9)) possibly_more_intrs = TRUE; } } else #endif if (com != NULL && !com->gone && (inb(com->int_id_port) & IIR_IMASK) != IIR_NOPEND) { siointr1(com); possibly_more_intrs = TRUE; } /* XXX COM_UNLOCK(); */ } } while (possibly_more_intrs); COM_UNLOCK(); #endif /* COM_MULTIPORT */ } static void siointr1(com) struct com_s *com; { u_char line_status; u_char modem_status; u_char *ioptr; u_char recv_data; u_char int_ctl; u_char int_ctl_new; struct timecounter *tc; u_int count; #ifdef PC98 u_char tmp = 0; u_char rsa_buf_status = 0; int rsa_tx_fifo_size = 0; #endif /* PC98 */ int_ctl = inb(com->intr_ctl_port); int_ctl_new = int_ctl; while (!com->gone) { #ifdef PC98 status_read:; if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) tmp = inb(I8251F_lsr); else tmp = inb(com->sts_port); more_intr: line_status = 0; if (com->pc98_8251fifo_enable) { if (tmp & STS8251F_TxRDY) line_status |= LSR_TXRDY; if (tmp & STS8251F_RxRDY) line_status |= LSR_RXRDY; if (tmp & STS8251F_TxEMP) line_status |= LSR_TSRE; if (tmp & STS8251F_PE) line_status |= LSR_PE; if (tmp & STS8251F_OE) line_status |= LSR_OE; if (tmp & STS8251F_BD_SD) line_status |= LSR_BI; } else { if (tmp & STS8251_TxRDY) line_status |= LSR_TXRDY; if (tmp & STS8251_RxRDY) line_status |= LSR_RXRDY; if (tmp & STS8251_TxEMP) line_status |= LSR_TSRE; if (tmp & STS8251_PE) line_status |= LSR_PE; if (tmp & STS8251_OE) line_status |= LSR_OE; if (tmp & STS8251_FE) line_status |= LSR_FE; if (tmp & STS8251_BD_SD) line_status |= LSR_BI; } } else { #endif /* PC98 */ if (com->pps.ppsparam.mode & PPS_CAPTUREBOTH) { modem_status = inb(com->modem_status_port); if ((modem_status ^ com->last_modem_status) & MSR_DCD) { tc = timecounter; count = tc->tc_get_timecount(tc); pps_event(&com->pps, tc, count, (modem_status & MSR_DCD) ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR); } } line_status = inb(com->line_status_port); #ifdef PC98 } if (com->pc98_if_type == COM_IF_RSA98III) rsa_buf_status = inb(com->rsabase + rsa_srr); #endif /* PC98 */ /* input event? (check first to help avoid overruns) */ #ifndef PC98 while (line_status & LSR_RCV_MASK) { #else while ((line_status & LSR_RCV_MASK) || (com->pc98_if_type == COM_IF_RSA98III && (rsa_buf_status & 0x08))) { #endif /* PC98 */ /* break/unnattached error bits or real input? */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) { recv_data = inb(I8251F_data); if (tmp & (STS8251F_PE | STS8251F_OE | STS8251F_BD_SD)) { pc98_i8251_or_cmd(com, CMD8251_ER); recv_data = 0; } } else { recv_data = inb(com->data_port); if (tmp & (STS8251_PE | STS8251_OE | STS8251_FE | STS8251_BD_SD)) { pc98_i8251_or_cmd(com, CMD8251_ER); recv_data = 0; } } } else if (com->pc98_if_type == COM_IF_RSA98III) { if (!(rsa_buf_status & 0x08)) recv_data = 0; else recv_data = inb(com->data_port); } else #endif if (!(line_status & LSR_RXRDY)) recv_data = 0; else recv_data = inb(com->data_port); if (line_status & (LSR_BI | LSR_FE | LSR_PE)) { /* * Don't store BI if IGNBRK or FE/PE if IGNPAR. * Otherwise, push the work to a higher level * (to handle PARMRK) if we're bypassing. * Otherwise, convert BI/FE and PE+INPCK to 0. * * This makes bypassing work right in the * usual "raw" case (IGNBRK set, and IGNPAR * and INPCK clear). * * Note: BI together with FE/PE means just BI. */ if (line_status & LSR_BI) { #if defined(DDB) && defined(BREAK_TO_DEBUGGER) if (com->unit == comconsole) { breakpoint(); goto cont; } #endif if (com->tp == NULL || com->tp->t_iflag & IGNBRK) goto cont; } else { if (com->tp == NULL || com->tp->t_iflag & IGNPAR) goto cont; } if (com->tp->t_state & TS_CAN_BYPASS_L_RINT && (line_status & (LSR_BI | LSR_FE) || com->tp->t_iflag & INPCK)) recv_data = 0; } ++com->bytes_in; if (com->hotchar != 0 && recv_data == com->hotchar) setsofttty(); ioptr = com->iptr; if (ioptr >= com->ibufend) CE_RECORD(com, CE_INTERRUPT_BUF_OVERFLOW); else { if (com->do_timestamp) microtime(&com->timestamp); ++com_events; schedsofttty(); #if 0 /* for testing input latency vs efficiency */ if (com->iptr - com->ibuf == 8) setsofttty(); #endif ioptr[0] = recv_data; ioptr[com->ierroff] = line_status; com->iptr = ++ioptr; if (ioptr == com->ihighwater && com->state & CS_RTS_IFLOW) #ifdef PC98 IS_8251(com->pc98_if_type) ? com_tiocm_bic(com, TIOCM_RTS) : #endif outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); if (line_status & LSR_OE) CE_RECORD(com, CE_OVERRUN); } cont: /* * "& 0x7F" is to avoid the gcc-1.40 generating a slow * jump from the top of the loop to here */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) goto status_read; else #endif line_status = inb(com->line_status_port) & 0x7F; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) rsa_buf_status = inb(com->rsabase + rsa_srr); #endif /* PC98 */ } /* modem status change? (always check before doing output) */ #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif modem_status = inb(com->modem_status_port); if (modem_status != com->last_modem_status) { if (com->do_dcd_timestamp && !(com->last_modem_status & MSR_DCD) && modem_status & MSR_DCD) microtime(&com->dcd_timestamp); /* * Schedule high level to handle DCD changes. Note * that we don't use the delta bits anywhere. Some * UARTs mess them up, and it's easy to remember the * previous bits and calculate the delta. */ com->last_modem_status = modem_status; if (!(com->state & CS_CHECKMSR)) { com_events += LOTS_OF_EVENTS; com->state |= CS_CHECKMSR; setsofttty(); } /* handle CTS change immediately for crisp flow ctl */ if (com->state & CS_CTS_OFLOW) { if (modem_status & MSR_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } } #ifdef PC98 } #endif /* output queued and everything ready? */ #ifndef PC98 if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { #else if (((com->pc98_if_type == COM_IF_RSA98III) ? (rsa_buf_status & 0x02) : (line_status & LSR_TXRDY)) && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { #endif #ifdef PC98 Port_t tmp_data_port; if (IS_8251(com->pc98_if_type) && com->pc98_8251fifo_enable) tmp_data_port = I8251F_data; else tmp_data_port = com->data_port; #endif ioptr = com->obufq.l_head; if (com->tx_fifo_size > 1) { u_int ocount; ocount = com->obufq.l_tail - ioptr; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { rsa_buf_status = inb(com->rsabase + rsa_srr); rsa_tx_fifo_size = 1024; if (!(rsa_buf_status & 0x01)) rsa_tx_fifo_size = 2048; if (ocount > rsa_tx_fifo_size) ocount = rsa_tx_fifo_size; } else #endif if (ocount > com->tx_fifo_size) ocount = com->tx_fifo_size; com->bytes_out += ocount; do #ifdef PC98 outb(tmp_data_port, *ioptr++); #else outb(com->data_port, *ioptr++); #endif while (--ocount != 0); } else { #ifdef PC98 outb(tmp_data_port, *ioptr++); #else outb(com->data_port, *ioptr++); #endif ++com->bytes_out; } #ifdef PC98 if (IS_8251(com->pc98_if_type)) if (!(pc98_check_i8251_interrupt(com) & IEN_TxFLAG)) com_int_Tx_enable(com); #endif com->obufq.l_head = ioptr; if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl | IER_ETXRDY; } if (ioptr >= com->obufq.l_tail) { struct lbq *qp; qp = com->obufq.l_next; qp->l_queued = FALSE; qp = qp->l_next; if (qp != NULL) { com->obufq.l_head = qp->l_head; com->obufq.l_tail = qp->l_tail; com->obufq.l_next = qp; } else { /* output just completed */ if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl_new = int_ctl & ~IER_ETXRDY; } com->state &= ~CS_BUSY; #if defined(PC98) if (IS_8251(com->pc98_if_type) && pc98_check_i8251_interrupt(com) & IEN_TxFLAG) com_int_Tx_disable(com); #endif } if (!(com->state & CS_ODONE)) { com_events += LOTS_OF_EVENTS; com->state |= CS_ODONE; setsofttty(); /* handle at high level ASAP */ } } if (COM_IIR_TXRDYBUG(com->flags) && (int_ctl != int_ctl_new)) { #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { int_ctl_new &= ~(IER_ETXRDY | IER_ERXRDY); outb(com->intr_ctl_port, int_ctl_new); outb(com->rsabase + rsa_ier, 0x1d); } else #endif outb(com->intr_ctl_port, int_ctl_new); } } #ifdef PC98 else if (line_status & LSR_TXRDY) { if (IS_8251(com->pc98_if_type)) if (pc98_check_i8251_interrupt(com) & IEN_TxFLAG) com_int_Tx_disable(com); } if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) { if ((tmp = inb(I8251F_lsr)) & STS8251F_RxRDY) goto more_intr; } else { if ((tmp = inb(com->sts_port)) & STS8251_RxRDY) goto more_intr; } } #endif /* finished? */ #ifndef COM_MULTIPORT #ifdef PC98 if (IS_8251(com->pc98_if_type)) return; #endif if ((inb(com->int_id_port) & IIR_IMASK) == IIR_NOPEND) #endif /* COM_MULTIPORT */ return; } } static int sioioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { struct com_s *com; int error; int mynor; int s; struct tty *tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) u_long oldcmd; struct termios term; #endif mynor = minor(dev); com = com_addr(MINOR_TO_UNIT(mynor)); if (com == NULL || com->gone) return (ENODEV); if (mynor & CONTROL_MASK) { struct termios *ct; switch (mynor & CONTROL_MASK) { case CONTROL_INIT_STATE: ct = mynor & CALLOUT_MASK ? &com->it_out : &com->it_in; break; case CONTROL_LOCK_STATE: ct = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; break; default: return (ENODEV); /* /dev/nodev */ } switch (cmd) { case TIOCSETA: error = suser(p); if (error != 0) return (error); *ct = *(struct termios *)data; return (0); case TIOCGETA: *(struct termios *)data = *ct; return (0); case TIOCGETD: *(int *)data = TTYDISC; return (0); case TIOCGWINSZ: bzero(data, sizeof(struct winsize)); return (0); default: return (ENOTTY); } } tp = com->tp; #if defined(COMPAT_43) || defined(COMPAT_SUNOS) term = tp->t_termios; oldcmd = cmd; error = ttsetcompat(tp, &cmd, data, &term); if (error != 0) return (error); if (cmd != oldcmd) data = (caddr_t)&term; #endif if (cmd == TIOCSETA || cmd == TIOCSETAW || cmd == TIOCSETAF) { int cc; struct termios *dt = (struct termios *)data; struct termios *lt = mynor & CALLOUT_MASK ? &com->lt_out : &com->lt_in; dt->c_iflag = (tp->t_iflag & lt->c_iflag) | (dt->c_iflag & ~lt->c_iflag); dt->c_oflag = (tp->t_oflag & lt->c_oflag) | (dt->c_oflag & ~lt->c_oflag); dt->c_cflag = (tp->t_cflag & lt->c_cflag) | (dt->c_cflag & ~lt->c_cflag); dt->c_lflag = (tp->t_lflag & lt->c_lflag) | (dt->c_lflag & ~lt->c_lflag); for (cc = 0; cc < NCCS; ++cc) if (lt->c_cc[cc] != 0) dt->c_cc[cc] = tp->t_cc[cc]; if (lt->c_ispeed != 0) dt->c_ispeed = tp->t_ispeed; if (lt->c_ospeed != 0) dt->c_ospeed = tp->t_ospeed; } error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (error != ENOIOCTL) return (error); s = spltty(); error = ttioctl(tp, cmd, data, flag); disc_optim(tp, &tp->t_termios, com); if (error != ENOIOCTL) { splx(s); return (error); } #ifdef PC98 if (IS_8251(com->pc98_if_type)) { switch (cmd) { case TIOCSBRK: com_send_break_on(com); break; case TIOCCBRK: com_send_break_off(com); break; case TIOCSDTR: com_tiocm_bis(com, TIOCM_DTR | TIOCM_RTS); break; case TIOCCDTR: com_tiocm_bic(com, TIOCM_DTR); break; /* * XXX should disallow changing MCR_RTS if CS_RTS_IFLOW is set. The * changes get undone on the next call to comparam(). */ case TIOCMSET: com_tiocm_set(com, *(int *)data); break; case TIOCMBIS: com_tiocm_bis(com, *(int *)data); break; case TIOCMBIC: com_tiocm_bic(com, *(int *)data); break; case TIOCMGET: *(int *)data = com_tiocm_get(com); break; case TIOCMSDTRWAIT: /* must be root since the wait applies to following logins */ error = suser(p); if (error != 0) { splx(s); return (error); } com->dtr_wait = *(int *)data * hz / 100; break; case TIOCMGDTRWAIT: *(int *)data = com->dtr_wait * 100 / hz; break; case TIOCTIMESTAMP: com->do_timestamp = TRUE; *(struct timeval *)data = com->timestamp; break; case TIOCDCDTIMESTAMP: com->do_dcd_timestamp = TRUE; *(struct timeval *)data = com->dcd_timestamp; break; default: splx(s); error = pps_ioctl(cmd, data, &com->pps); if (error == ENODEV) error = ENOTTY; return (error); } } else { #endif switch (cmd) { case TIOCSBRK: sio_setreg(com, com_cfcr, com->cfcr_image |= CFCR_SBREAK); break; case TIOCCBRK: sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); break; case TIOCSDTR: (void)commctl(com, TIOCM_DTR, DMBIS); break; case TIOCCDTR: (void)commctl(com, TIOCM_DTR, DMBIC); break; /* * XXX should disallow changing MCR_RTS if CS_RTS_IFLOW is set. The * changes get undone on the next call to comparam(). */ case TIOCMSET: (void)commctl(com, *(int *)data, DMSET); break; case TIOCMBIS: (void)commctl(com, *(int *)data, DMBIS); break; case TIOCMBIC: (void)commctl(com, *(int *)data, DMBIC); break; case TIOCMGET: *(int *)data = commctl(com, 0, DMGET); break; case TIOCMSDTRWAIT: /* must be root since the wait applies to following logins */ error = suser(p); if (error != 0) { splx(s); return (error); } com->dtr_wait = *(int *)data * hz / 100; break; case TIOCMGDTRWAIT: *(int *)data = com->dtr_wait * 100 / hz; break; case TIOCTIMESTAMP: com->do_timestamp = TRUE; *(struct timeval *)data = com->timestamp; break; case TIOCDCDTIMESTAMP: com->do_dcd_timestamp = TRUE; *(struct timeval *)data = com->dcd_timestamp; break; default: splx(s); error = pps_ioctl(cmd, data, &com->pps); if (error == ENODEV) error = ENOTTY; return (error); } #ifdef PC98 } #endif splx(s); return (0); } static void siopoll() { int unit; if (com_events == 0) return; repeat: for (unit = 0; unit < sio_numunits; ++unit) { struct com_s *com; int incc; struct tty *tp; com = com_addr(unit); if (com == NULL) continue; tp = com->tp; if (tp == NULL || com->gone) { /* * Discard any events related to never-opened or * going-away devices. */ disable_intr(); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { incc += LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; } com_events -= incc; enable_intr(); continue; } if (com->iptr != com->ibuf) { disable_intr(); sioinput(com); enable_intr(); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif disable_intr(); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; enable_intr(); if (delta_modem_status & MSR_DCD) (*linesw[tp->t_line].l_modem) (tp, com->prev_modem_status & MSR_DCD); #ifdef PC98 } #endif } if (com->state & CS_ODONE) { disable_intr(); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; enable_intr(); if (!(com->state & CS_BUSY) && !(com->extra_state & CSE_BUSYCHECK)) { timeout(siobusycheck, com, hz / 100); com->extra_state |= CSE_BUSYCHECK; } (*linesw[tp->t_line].l_start)(tp); } if (com_events == 0) break; } if (com_events >= LOTS_OF_EVENTS) goto repeat; } static int comparam(tp, t) struct tty *tp; struct termios *t; { u_int cfcr; int cflag; struct com_s *com; int divisor; u_char dlbh; u_char dlbl; int s; int unit; #ifdef PC98 u_char param = 0; #endif #ifdef PC98 unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); cfcr = 0; if (IS_8251(com->pc98_if_type)) { divisor = pc98_ttspeedtab(com, t->c_ospeed); } else { /* do historical conversions */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* check requested parameters */ divisor = ttspeedtab(t->c_ospeed, if_16550a_type[com->pc98_if_type & 0x0f].speedtab); } #else /* do historical conversions */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* check requested parameters */ divisor = ttspeedtab(t->c_ospeed, comspeedtab); #endif if (divisor < 0 || (divisor > 0 && t->c_ispeed != t->c_ospeed)) return (EINVAL); #ifndef PC98 /* parameters are OK, convert them to the com struct and the device */ unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return (ENODEV); #endif s = spltty(); #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (divisor == 0) com_tiocm_bic(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); else com_tiocm_bis(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); } else #endif if (divisor == 0) (void)commctl(com, TIOCM_DTR, DMBIC); /* hang up line */ else (void)commctl(com, TIOCM_DTR, DMBIS); cflag = t->c_cflag; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif switch (cflag & CSIZE) { case CS5: cfcr = CFCR_5BITS; break; case CS6: cfcr = CFCR_6BITS; break; case CS7: cfcr = CFCR_7BITS; break; default: cfcr = CFCR_8BITS; break; } if (cflag & PARENB) { cfcr |= CFCR_PENAB; if (!(cflag & PARODD)) cfcr |= CFCR_PEVEN; } if (cflag & CSTOPB) cfcr |= CFCR_STOPB; if (com->hasfifo && divisor != 0) { /* * Use a fifo trigger level low enough so that the input * latency from the fifo is less than about 16 msec and * the total latency is less than about 30 msec. These * latencies are reasonable for humans. Serial comms * protocols shouldn't expect anything better since modem * latencies are larger. */ com->fifo_image = t->c_ospeed <= 4800 ? FIFO_ENABLE : FIFO_ENABLE | FIFO_RX_HIGH; #ifdef COM_ESP /* * The Hayes ESP card needs the fifo DMA mode bit set * in compatibility mode. If not, it will interrupt * for each character received. */ if (com->esp) com->fifo_image |= FIFO_DMA_MODE; #endif sio_setreg(com, com_fifo, com->fifo_image); } #ifdef PC98 } #endif /* * This returns with interrupts disabled so that we can complete * the speed change atomically. Keeping interrupts disabled is * especially important while com_data is hidden. */ (void) siosetwater(com, t->c_ispeed); #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_cflag_and_speed_set(com, cflag, t->c_ospeed); else { #endif if (divisor != 0) { sio_setreg(com, com_cfcr, cfcr | CFCR_DLAB); /* * Only set the divisor registers if they would change, * since on some 16550 incompatibles (UMC8669F), setting * them while input is arriving them loses sync until * data stops arriving. */ dlbl = divisor & 0xFF; if (sio_getreg(com, com_dlbl) != dlbl) sio_setreg(com, com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sio_getreg(com, com_dlbh) != dlbh) sio_setreg(com, com_dlbh, dlbh); } sio_setreg(com, com_cfcr, com->cfcr_image = cfcr); #ifdef PC98 } #endif if (!(tp->t_state & TS_TTSTOP)) com->state |= CS_TTGO; if (cflag & CRTS_IFLOW) { #ifndef PC98 if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x40); } #endif com->state |= CS_RTS_IFLOW; /* * If CS_RTS_IFLOW just changed from off to on, the change * needs to be propagated to MCR_RTS. This isn't urgent, * so do it later by calling comstart() instead of repeating * a lot of code from comstart() here. */ } else if (com->state & CS_RTS_IFLOW) { com->state &= ~CS_RTS_IFLOW; /* * CS_RTS_IFLOW just changed from on to off. Force MCR_RTS * on here, since comstart() won't do it later. */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_tiocm_bis(com, TIOCM_RTS); else outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #else outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x40); } #endif } /* * Set up state to handle output flow control. * XXX - worth handling MDMBUF (DCD) flow control at the lowest level? * Now has 10+ msec latency, while CTS flow has 50- usec latency. */ com->state |= CS_ODEVREADY; com->state &= ~CS_CTS_OFLOW; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { param = inb(com->rsabase + rsa_msr); outb(com->rsabase + rsa_msr, param & 0x14); } #endif if (cflag & CCTS_OFLOW) { com->state |= CS_CTS_OFLOW; #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (!(pc98_get_modem_status(com) & TIOCM_CTS)) com->state &= ~CS_ODEVREADY; } else { if (com->pc98_if_type == COM_IF_RSA98III) { /* Set automatic flow control mode */ outb(com->rsabase + rsa_msr, param | 0x08); } else #endif if (!(com->last_modem_status & MSR_CTS)) com->state &= ~CS_ODEVREADY; #ifdef PC98 } #else if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) | 0x80); } } else { if (com->st16650a) { sio_setreg(com, com_cfcr, 0xbf); sio_setreg(com, com_fifo, sio_getreg(com, com_fifo) & ~0x80); } #endif } #ifdef PC98 if (!IS_8251(com->pc98_if_type)) #endif sio_setreg(com, com_cfcr, com->cfcr_image); /* XXX shouldn't call functions while intrs are disabled. */ disc_optim(tp, t, com); /* * Recover from fiddling with CS_TTGO. We used to call siointr1() * unconditionally, but that defeated the careful discarding of * stale input in sioopen(). */ if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); enable_intr(); splx(s); comstart(tp); if (com->ibufold != NULL) { free(com->ibufold, M_DEVBUF); com->ibufold = NULL; } return (0); } static int siosetwater(com, speed) struct com_s *com; speed_t speed; { int cp4ticks; u_char *ibuf; int ibufsize; struct tty *tp; /* * Make the buffer size large enough to handle a softtty interrupt * latency of about 2 ticks without loss of throughput or data * (about 3 ticks if input flow control is not used or not honoured, * but a bit less for CS5-CS7 modes). */ cp4ticks = speed / 10 / hz * 4; for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) ibufsize = 2048; #endif if (ibufsize == com->ibufsize) { disable_intr(); return (0); } /* * Allocate input buffer. The extra factor of 2 in the size is * to allow for an error byte for each input byte. */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { disable_intr(); return (ENOMEM); } /* Initialize non-critical variables. */ com->ibufold = com->ibuf; com->ibufsize = ibufsize; tp = com->tp; if (tp != NULL) { tp->t_ififosize = 2 * ibufsize; tp->t_ispeedwat = (speed_t)-1; tp->t_ospeedwat = (speed_t)-1; } /* * Read current input buffer, if any. Continue with interrupts * disabled. */ disable_intr(); if (com->iptr != com->ibuf) sioinput(com); /*- * Initialize critical variables, including input buffer watermarks. * The external device is asked to stop sending when the buffer * exactly reaches high water, or when the high level requests it. * The high level is notified immediately (rather than at a later * clock tick) when this watermark is reached. * The buffer size is chosen so the watermark should almost never * be reached. * The low watermark is invisibly 0 since the buffer is always * emptied all at once. */ com->iptr = com->ibuf = ibuf; com->ibufend = ibuf + ibufsize; com->ierroff = ibufsize; com->ihighwater = ibuf + 3 * ibufsize / 4; return (0); } static void comstart(tp) struct tty *tp; { struct com_s *com; int s; int unit; unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); if (com == NULL) return; s = spltty(); disable_intr(); if (tp->t_state & TS_TTSTOP) com->state &= ~CS_TTGO; else com->state |= CS_TTGO; if (tp->t_state & TS_TBLOCK) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if ((com_tiocm_get(com) & TIOCM_RTS) && (com->state & CS_RTS_IFLOW)) com_tiocm_bic(com, TIOCM_RTS); } else { if ((com->mcr_image & MCR_RTS) && (com->state & CS_RTS_IFLOW)) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); } #else if (com->mcr_image & MCR_RTS && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); #endif } else { #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (!(com_tiocm_get(com) & TIOCM_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) com_tiocm_bis(com, TIOCM_RTS); } else { if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } #else if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #endif } enable_intr(); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); return; } if (tp->t_outq.c_cc != 0) { struct lbq *qp; struct lbq *next; if (!com->obufs[0].l_queued) { com->obufs[0].l_tail = com->obuf1 + q_to_b(&tp->t_outq, com->obuf1, #ifdef PC98 com->obufsize); #else sizeof com->obuf1); #endif com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[0]; } else { com->obufq.l_head = com->obufs[0].l_head; com->obufq.l_tail = com->obufs[0].l_tail; com->obufq.l_next = &com->obufs[0]; com->state |= CS_BUSY; } enable_intr(); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { com->obufs[1].l_tail = com->obuf2 + q_to_b(&tp->t_outq, com->obuf2, #ifdef PC98 com->obufsize); #else sizeof com->obuf2); #endif com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; disable_intr(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[1]; } else { com->obufq.l_head = com->obufs[1].l_head; com->obufq.l_tail = com->obufs[1].l_tail; com->obufq.l_next = &com->obufs[1]; com->state |= CS_BUSY; } enable_intr(); } tp->t_state |= TS_BUSY; } disable_intr(); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ enable_intr(); ttwwakeup(tp); splx(s); } static void comstop(tp, rw) struct tty *tp; int rw; { struct com_s *com; #ifdef PC98 int rsa98_tmp = 0; #endif com = com_addr(DEV_TO_UNIT(tp->t_dev)); if (com == NULL || com->gone) return; disable_intr(); if (rw & FWRITE) { #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) for (rsa98_tmp = 0; rsa98_tmp < 2048; rsa98_tmp++) sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); } #endif com->obufs[0].l_queued = FALSE; com->obufs[1].l_queued = FALSE; if (com->state & CS_ODONE) com_events -= LOTS_OF_EVENTS; com->state &= ~(CS_ODONE | CS_BUSY); com->tp->t_state &= ~TS_BUSY; } if (rw & FREAD) { #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { if (com->pc98_if_type == COM_IF_RSA98III) for (rsa98_tmp = 0; rsa98_tmp < 2048; rsa98_tmp++) sio_getreg(com, com_data); #endif if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_RCV_RST | com->fifo_image); #ifdef PC98 } #endif com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } enable_intr(); comstart(tp); } static int commctl(com, bits, how) struct com_s *com; int bits; int how; { int mcr; int msr; if (how == DMGET) { bits = TIOCM_LE; /* XXX - always enabled while open */ mcr = com->mcr_image; if (mcr & MCR_DTR) bits |= TIOCM_DTR; if (mcr & MCR_RTS) bits |= TIOCM_RTS; msr = com->prev_modem_status; if (msr & MSR_CTS) bits |= TIOCM_CTS; if (msr & MSR_DCD) bits |= TIOCM_CD; if (msr & MSR_DSR) bits |= TIOCM_DSR; /* * XXX - MSR_RI is naturally volatile, and we make MSR_TERI * more volatile by reading the modem status a lot. Perhaps * we should latch both bits until the status is read here. */ if (msr & (MSR_RI | MSR_TERI)) bits |= TIOCM_RI; return (bits); } mcr = 0; if (bits & TIOCM_DTR) mcr |= MCR_DTR; if (bits & TIOCM_RTS) mcr |= MCR_RTS; if (com->gone) return(0); disable_intr(); switch (how) { case DMSET: outb(com->modem_ctl_port, com->mcr_image = mcr | (com->mcr_image & MCR_IENABLE)); break; case DMBIS: outb(com->modem_ctl_port, com->mcr_image |= mcr); break; case DMBIC: outb(com->modem_ctl_port, com->mcr_image &= ~mcr); break; } enable_intr(); return (0); } static void siosettimeout() { struct com_s *com; bool_t someopen; int unit; /* * Set our timeout period to 1 second if no polled devices are open. * Otherwise set it to max(1/200, 1/hz). * Enable timeouts iff some device is open. */ untimeout(comwakeup, (void *)NULL, sio_timeout_handle); sio_timeout = hz; someopen = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && com->tp != NULL && com->tp->t_state & TS_ISOPEN && !com->gone) { someopen = TRUE; if (com->poll || com->poll_output) { sio_timeout = hz > 200 ? hz / 200 : 1; break; } } } if (someopen) { sio_timeouts_until_log = hz / sio_timeout; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); } else { /* Flush error messages, if any. */ sio_timeouts_until_log = 1; comwakeup((void *)NULL); untimeout(comwakeup, (void *)NULL, sio_timeout_handle); } } static void comwakeup(chan) void *chan; { struct com_s *com; int unit; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); /* * Recover from lost output interrupts. * Poll any lines that don't use interrupts. */ for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && !com->gone && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { disable_intr(); siointr1(com); enable_intr(); } } /* * Check for and log errors, but not too often. */ if (--sio_timeouts_until_log > 0) return; sio_timeouts_until_log = hz / sio_timeout; for (unit = 0; unit < sio_numunits; ++unit) { int errnum; com = com_addr(unit); if (com == NULL) continue; if (com->gone) continue; for (errnum = 0; errnum < CE_NTYPES; ++errnum) { u_int delta; u_long total; disable_intr(); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; enable_intr(); if (delta == 0) continue; total = com->error_counts[errnum] += delta; log(LOG_ERR, "sio%d: %u more %s%s (total %lu)\n", unit, delta, error_desc[errnum], delta == 1 ? "" : "s", total); } } } #ifdef PC98 /* commint is called when modem control line changes */ static void commint(dev_t dev) { register struct tty *tp; int stat,delta; struct com_s *com; int mynor,unit; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); tp = com->tp; stat = com_tiocm_get(com); delta = com_tiocm_get_delta(com); if (com->state & CS_CTS_OFLOW) { if (stat & TIOCM_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } if ((delta & TIOCM_CAR) && (mynor & CALLOUT_MASK) == 0) { if (stat & TIOCM_CAR ) (void)(*linesw[tp->t_line].l_modem)(tp, 1); else if ((*linesw[tp->t_line].l_modem)(tp, 0) == 0) { /* negate DTR, RTS */ com_tiocm_bic(com, (tp->t_cflag & HUPCL) ? TIOCM_DTR|TIOCM_RTS|TIOCM_LE : TIOCM_LE ); /* disable IENABLE */ com_int_TxRx_disable( com ); } } } #endif static void disc_optim(tp, t, com) struct tty *tp; struct termios *t; struct com_s *com; { if (!(t->c_iflag & (ICRNL | IGNCR | IMAXBEL | INLCR | ISTRIP | IXON)) && (!(t->c_iflag & BRKINT) || (t->c_iflag & IGNBRK)) && (!(t->c_iflag & PARMRK) || (t->c_iflag & (IGNPAR | IGNBRK)) == (IGNPAR | IGNBRK)) && !(t->c_lflag & (ECHO | ICANON | IEXTEN | ISIG | PENDIN)) && linesw[tp->t_line].l_rint == ttyinput) tp->t_state |= TS_CAN_BYPASS_L_RINT; else tp->t_state &= ~TS_CAN_BYPASS_L_RINT; com->hotchar = linesw[tp->t_line].l_hotchar; } /* * Following are all routines needed for SIO to act as console */ #include struct siocnstate { u_char dlbl; u_char dlbh; u_char ier; u_char cfcr; u_char mcr; }; static speed_t siocngetspeed __P((Port_t, struct speedtab *)); static void siocnclose __P((struct siocnstate *sp, Port_t iobase)); static void siocnopen __P((struct siocnstate *sp, Port_t iobase, int speed)); static void siocntxwait __P((Port_t iobase)); static cn_probe_t siocnprobe; static cn_init_t siocninit; static cn_checkc_t siocncheckc; static cn_getc_t siocngetc; static cn_putc_t siocnputc; #ifdef __i386__ CONS_DRIVER(sio, siocnprobe, siocninit, NULL, siocngetc, siocncheckc, siocnputc, NULL); #endif /* To get the GDB related variables */ #if DDB > 0 #include #endif static void siocntxwait(iobase) Port_t iobase; { int timo; /* * Wait for any pending transmission to finish. Required to avoid * the UART lockup bug when the speed is changed, and for normal * transmits. */ timo = 100000; while ((inb(iobase + com_lsr) & (LSR_TSRE | LSR_TXRDY)) != (LSR_TSRE | LSR_TXRDY) && --timo != 0) ; } /* * Read the serial port specified and try to figure out what speed * it's currently running at. We're assuming the serial port has * been initialized and is basicly idle. This routine is only intended * to be run at system startup. * * If the value read from the serial port doesn't make sense, return 0. */ static speed_t siocngetspeed(iobase, table) Port_t iobase; struct speedtab *table; { int code; u_char dlbh; u_char dlbl; u_char cfcr; cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); dlbl = inb(iobase + com_dlbl); dlbh = inb(iobase + com_dlbh); outb(iobase + com_cfcr, cfcr); code = dlbh << 8 | dlbl; for (; table->sp_speed != -1; table++) if (table->sp_code == code) return (table->sp_speed); return (0); /* didn't match anything sane */ } static void siocnopen(sp, iobase, speed) struct siocnstate *sp; Port_t iobase; int speed; { int divisor; u_char dlbh; u_char dlbl; /* * Save all the device control registers except the fifo register * and set our default ones (cs8 -parenb speed=comdefaultrate). * We can't save the fifo register since it is read-only. */ sp->ier = inb(iobase + com_ier); outb(iobase + com_ier, 0); /* spltty() doesn't stop siointr() */ siocntxwait(iobase); sp->cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); sp->dlbl = inb(iobase + com_dlbl); sp->dlbh = inb(iobase + com_dlbh); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (Startech), setting them clears the * data input register. This also reduces the effects of the * UMC8669F bug. */ divisor = ttspeedtab(speed, comspeedtab); dlbl = divisor & 0xFF; if (sp->dlbl != dlbl) outb(iobase + com_dlbl, dlbl); dlbh = (u_int) divisor >> 8; if (sp->dlbh != dlbh) outb(iobase + com_dlbh, dlbh); outb(iobase + com_cfcr, CFCR_8BITS); sp->mcr = inb(iobase + com_mcr); /* * We don't want interrupts, but must be careful not to "disable" * them by clearing the MCR_IENABLE bit, since that might cause * an interrupt by floating the IRQ line. */ outb(iobase + com_mcr, (sp->mcr & MCR_IENABLE) | MCR_DTR | MCR_RTS); } static void siocnclose(sp, iobase) struct siocnstate *sp; Port_t iobase; { /* * Restore the device control registers. */ siocntxwait(iobase); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); if (sp->dlbl != inb(iobase + com_dlbl)) outb(iobase + com_dlbl, sp->dlbl); if (sp->dlbh != inb(iobase + com_dlbh)) outb(iobase + com_dlbh, sp->dlbh); outb(iobase + com_cfcr, sp->cfcr); /* * XXX damp oscillations of MCR_DTR and MCR_RTS by not restoring them. */ outb(iobase + com_mcr, sp->mcr | MCR_DTR | MCR_RTS); outb(iobase + com_ier, sp->ier); } static void siocnprobe(cp) struct consdev *cp; { speed_t boot_speed; u_char cfcr; int s, unit; struct siocnstate sp; /* * Find our first enabled console, if any. If it is a high-level * console device, then initialize it and return successfully. * If it is a low-level console device, then initialize it and * return unsuccessfully. It must be initialized in both cases * for early use by console drivers and debuggers. Initializing * the hardware is not necessary in all cases, since the i/o * routines initialize it on the fly, but it is necessary if * input might arrive while the hardware is switched back to an * uninitialized state. We can't handle multiple console devices * yet because our low-level routines don't take a device arg. * We trust the user to set the console flags properly so that we * don't need to probe. */ cp->cn_pri = CN_DEAD; for (unit = 0; unit < 16; unit++) { /* XXX need to know how many */ int flags; int disabled; if (resource_int_value("sio", unit, "disabled", &disabled) == 0) { if (disabled) continue; } if (resource_int_value("sio", unit, "flags", &flags)) continue; if (COM_CONSOLE(flags) || COM_DEBUGGER(flags)) { int port; Port_t iobase; if (resource_int_value("sio", unit, "port", &port)) continue; iobase = port; s = spltty(); if (boothowto & RB_SERIAL) { boot_speed = siocngetspeed(iobase, comspeedtab); if (boot_speed) comdefaultrate = boot_speed; } /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); outb(iobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(iobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(iobase + com_cfcr, cfcr); siocnopen(&sp, iobase, comdefaultrate); splx(s); if (COM_CONSOLE(flags) && !COM_LLCONSOLE(flags)) { cp->cn_dev = makedev(CDEV_MAJOR, unit); cp->cn_pri = COM_FORCECONSOLE(flags) || boothowto & RB_SERIAL ? CN_REMOTE : CN_NORMAL; siocniobase = iobase; siocnunit = unit; } if (COM_DEBUGGER(flags)) { printf("sio%d: gdb debugging port\n", unit); siogdbiobase = iobase; siogdbunit = unit; #if DDB > 0 gdbdev = makedev(CDEV_MAJOR, unit); gdb_getc = siocngetc; gdb_putc = siocnputc; #endif } } } #ifdef __i386__ #if DDB > 0 /* * XXX Ugly Compatability. * If no gdb port has been specified, set it to be the console * as some configuration files don't specify the gdb port. */ if (gdbdev == NODEV && (boothowto & RB_GDB)) { printf("Warning: no GDB port specified. Defaulting to sio%d.\n", siocnunit); printf("Set flag 0x80 on desired GDB port in your\n"); printf("configuration file (currently sio only).\n"); siogdbiobase = siocniobase; siogdbunit = siocnunit; gdbdev = makedev(CDEV_MAJOR, siocnunit); gdb_getc = siocngetc; gdb_putc = siocnputc; } #endif #endif } #ifdef __alpha__ CONS_DRIVER(sio, NULL, NULL, NULL, siocngetc, siocncheckc, siocnputc, NULL); int siocnattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siocniobase = port; comdefaultrate = speed; sio_consdev.cn_pri = CN_NORMAL; sio_consdev.cn_dev = makedev(CDEV_MAJOR, 0); s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siocniobase + com_cfcr); outb(siocniobase + com_cfcr, CFCR_DLAB | cfcr); outb(siocniobase + com_dlbl, COMBRD(comdefaultrate) & 0xff); outb(siocniobase + com_dlbh, (u_int) COMBRD(comdefaultrate) >> 8); outb(siocniobase + com_cfcr, cfcr); siocnopen(&sp, siocniobase, comdefaultrate); splx(s); cn_tab = &sio_consdev; return (0); } int siogdbattach(port, speed) int port; int speed; { int s; u_char cfcr; struct siocnstate sp; siogdbiobase = port; gdbdefaultrate = speed; s = spltty(); /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(siogdbiobase + com_cfcr); outb(siogdbiobase + com_cfcr, CFCR_DLAB | cfcr); outb(siogdbiobase + com_dlbl, COMBRD(gdbdefaultrate) & 0xff); outb(siogdbiobase + com_dlbh, (u_int) COMBRD(gdbdefaultrate) >> 8); outb(siogdbiobase + com_cfcr, cfcr); siocnopen(&sp, siogdbiobase, gdbdefaultrate); splx(s); return (0); } #endif static void siocninit(cp) struct consdev *cp; { comconsole = DEV_TO_UNIT(cp->cn_dev); } static int siocncheckc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); if (inb(iobase + com_lsr) & LSR_RXRDY) c = inb(iobase + com_data); else c = -1; siocnclose(&sp, iobase); splx(s); return (c); } int siocngetc(dev) dev_t dev; { int c; Port_t iobase; int s; struct siocnstate sp; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siocnputc(dev, c) dev_t dev; int c; { int s; struct siocnstate sp; Port_t iobase; if (minor(dev) == siogdbunit) iobase = siogdbiobase; else iobase = siocniobase; s = spltty(); siocnopen(&sp, iobase, comdefaultrate); siocntxwait(iobase); outb(iobase + com_data, c); siocnclose(&sp, iobase); splx(s); } #ifdef __alpha__ int siogdbgetc() { int c; Port_t iobase; int s; struct siocnstate sp; iobase = siogdbiobase; s = spltty(); siocnopen(&sp, iobase, gdbdefaultrate); while (!(inb(iobase + com_lsr) & LSR_RXRDY)) ; c = inb(iobase + com_data); siocnclose(&sp, iobase); splx(s); return (c); } void siogdbputc(c) int c; { int s; struct siocnstate sp; s = spltty(); siocnopen(&sp, siogdbiobase, gdbdefaultrate); siocntxwait(siogdbiobase); outb(siogdbiobase + com_data, c); siocnclose(&sp, siogdbiobase); splx(s); } #endif DRIVER_MODULE(sio, isa, sio_isa_driver, sio_devclass, 0, 0); #if NCARD > 0 DRIVER_MODULE(sio, pccard, sio_pccard_driver, sio_devclass, 0, 0); #endif #if NPCI > 0 DRIVER_MODULE(sio, pci, sio_pci_driver, sio_devclass, 0, 0); #endif #ifdef PC98 /* * pc98 local function */ static void com_tiocm_set(struct com_s *com, int msr) { int s; int tmp = 0; int mask = CMD8251_TxEN|CMD8251_RxEN|CMD8251_DTR|CMD8251_RTS; s=spltty(); com->pc98_prev_modem_status = ( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ) | ( com->pc98_prev_modem_status & ~(TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); tmp |= (CMD8251_TxEN|CMD8251_RxEN); if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_clear_or_cmd( com, mask, tmp ); splx(s); } static void com_tiocm_bis(struct com_s *com, int msr) { int s; int tmp = 0; s=spltty(); com->pc98_prev_modem_status |= ( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); tmp |= CMD8251_TxEN|CMD8251_RxEN; if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_or_cmd( com, tmp ); splx(s); } static void com_tiocm_bic(struct com_s *com, int msr) { int s; int tmp = msr; s=spltty(); com->pc98_prev_modem_status &= ~( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_clear_cmd( com, tmp ); splx(s); } static int com_tiocm_get(struct com_s *com) { return( com->pc98_prev_modem_status ); } static int com_tiocm_get_delta(struct com_s *com) { int tmp; tmp = com->pc98_modem_delta; com->pc98_modem_delta = 0; return( tmp ); } /* convert to TIOCM_?? ( ioctl.h ) */ static int pc98_get_modem_status(struct com_s *com) { register int msr; msr = com->pc98_prev_modem_status & ~(TIOCM_CAR|TIOCM_RI|TIOCM_DSR|TIOCM_CTS); if (com->pc98_8251fifo_enable) { int stat2; stat2 = inb(I8251F_msr); if ( stat2 & CICSCDF_CD ) msr |= TIOCM_CAR; if ( stat2 & CICSCDF_CI ) msr |= TIOCM_RI; if ( stat2 & CICSCDF_DR ) msr |= TIOCM_DSR; if ( stat2 & CICSCDF_CS ) msr |= TIOCM_CTS; #if COM_CARRIER_DETECT_EMULATE if ( msr & (TIOCM_DSR|TIOCM_CTS) ) { msr |= TIOCM_CAR; } #endif } else { int stat, stat2; stat = inb(com->sts_port); stat2 = inb(com->in_modem_port); if ( !(stat2 & CICSCD_CD) ) msr |= TIOCM_CAR; if ( !(stat2 & CICSCD_CI) ) msr |= TIOCM_RI; if ( stat & STS8251_DSR ) msr |= TIOCM_DSR; if ( !(stat2 & CICSCD_CS) ) msr |= TIOCM_CTS; #if COM_CARRIER_DETECT_EMULATE if ( msr & (TIOCM_DSR|TIOCM_CTS) ) { msr |= TIOCM_CAR; } #endif } return(msr); } static void pc98_check_msr(void* chan) { int msr, delta; int s; register struct tty *tp; struct com_s *com; int mynor; int unit; dev_t dev; dev=(dev_t)chan; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); tp = com->tp; s = spltty(); msr = pc98_get_modem_status(com); /* make change flag */ delta = msr ^ com->pc98_prev_modem_status; if ( delta & TIOCM_CAR ) { if ( com->modem_car_chg_timer ) { if ( -- com->modem_car_chg_timer ) msr ^= TIOCM_CAR; } else { if ((com->modem_car_chg_timer = (msr & TIOCM_CAR) ? DCD_ON_RECOGNITION : DCD_OFF_TOLERANCE) != 0) msr ^= TIOCM_CAR; } } else com->modem_car_chg_timer = 0; delta = ( msr ^ com->pc98_prev_modem_status ) & (TIOCM_CAR|TIOCM_RI|TIOCM_DSR|TIOCM_CTS); com->pc98_prev_modem_status = msr; delta = ( com->pc98_modem_delta |= delta ); splx(s); if ( com->modem_checking || (tp->t_state & (TS_ISOPEN)) ) { if ( delta ) { commint(dev); } timeout(pc98_check_msr, (caddr_t)dev, PC98_CHECK_MODEM_INTERVAL); } else { com->modem_checking = 0; } } static void pc98_msrint_start(dev_t dev) { struct com_s *com; int mynor; int unit; int s = spltty(); mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); com = com_addr(unit); /* modem control line check routine envoke interval is 1/10 sec */ if ( com->modem_checking == 0 ) { com->pc98_prev_modem_status = pc98_get_modem_status(com); com->pc98_modem_delta = 0; timeout(pc98_check_msr, (caddr_t)dev, PC98_CHECK_MODEM_INTERVAL); com->modem_checking = 1; } splx(s); } static void pc98_disable_i8251_interrupt(struct com_s *com, int mod) { /* disable interrupt */ register int tmp; mod |= ~(IEN_Tx|IEN_TxEMP|IEN_Rx); COM_INT_DISABLE tmp = inb( com->intr_ctrl_port ) & ~(IEN_Tx|IEN_TxEMP|IEN_Rx); outb( com->intr_ctrl_port, (com->intr_enable&=~mod) | tmp ); COM_INT_ENABLE } static void pc98_enable_i8251_interrupt(struct com_s *com, int mod) { register int tmp; COM_INT_DISABLE tmp = inb( com->intr_ctrl_port ) & ~(IEN_Tx|IEN_TxEMP|IEN_Rx); outb( com->intr_ctrl_port, (com->intr_enable|=mod) | tmp ); COM_INT_ENABLE } static int pc98_check_i8251_interrupt(struct com_s *com) { return ( com->intr_enable & 0x07 ); } static void pc98_i8251_clear_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE tmp = com->pc98_prev_siocmd & ~(x); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_or_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = com->pc98_prev_siocmd | (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_set_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_clear_or_cmd(struct com_s *com, int clr, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = com->pc98_prev_siocmd & ~(clr); tmp |= (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static int pc98_i8251_get_cmd(struct com_s *com) { return com->pc98_prev_siocmd; } static int pc98_i8251_get_mod(struct com_s *com) { return com->pc98_prev_siomod; } static void pc98_i8251_reset(struct com_s *com, int mode, int command) { if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, CMD8251_RESET); /* internal reset */ DELAY(2); outb(com->cmd_port, mode ); /* mode register */ com->pc98_prev_siomod = mode; DELAY(2); pc98_i8251_set_cmd( com, (command|CMD8251_ER) ); DELAY(10); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE | CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); } static void pc98_check_sysclock(void) { /* get system clock from port */ if ( pc98_machine_type & M_8M ) { /* 8 MHz system & H98 */ sysclock = 8; } else { /* 5 MHz system */ sysclock = 5; } } static void com_cflag_and_speed_set( struct com_s *com, int cflag, int speed) { int cfcr=0, count; int previnterrupt; count = pc98_ttspeedtab( com, speed ); if ( count < 0 ) return; previnterrupt = pc98_check_i8251_interrupt(com); pc98_disable_i8251_interrupt( com, IEN_Tx|IEN_TxEMP|IEN_Rx ); switch ( cflag&CSIZE ) { case CS5: cfcr = MOD8251_5BITS; break; case CS6: cfcr = MOD8251_6BITS; break; case CS7: cfcr = MOD8251_7BITS; break; case CS8: cfcr = MOD8251_8BITS; break; } if ( cflag&PARENB ) { if ( cflag&PARODD ) cfcr |= MOD8251_PODD; else cfcr |= MOD8251_PEVEN; } else cfcr |= MOD8251_PDISAB; if ( cflag&CSTOPB ) cfcr |= MOD8251_STOP2; else cfcr |= MOD8251_STOP1; if ( count & 0x10000 ) cfcr |= MOD8251_CLKX1; else cfcr |= MOD8251_CLKX16; if (epson_machine_id != 0x20) { /* XXX */ int tmp; while (!((tmp = inb(com->sts_port)) & STS8251_TxEMP)) ; } /* set baud rate from ospeed */ pc98_set_baud_rate( com, count ); if ( cfcr != pc98_i8251_get_mod(com) ) pc98_i8251_reset(com, cfcr, pc98_i8251_get_cmd(com) ); pc98_enable_i8251_interrupt( com, previnterrupt ); } static int pc98_ttspeedtab(struct com_s *com, int speed) { int if_type, effect_sp, count = -1, mod; if_type = com->pc98_if_type & 0x0f; switch (com->pc98_if_type) { case COM_IF_INTERNAL: if (PC98SIO_baud_rate_port(if_type) != -1) { count = ttspeedtab(speed, if_8251_type[if_type].speedtab); if (count > 0) { count |= COM1_EXT_CLOCK; break; } } /* for *1CLK asynchronous! mode, TEFUTEFU */ mod = (sysclock == 5) ? 2457600 : 1996800; effect_sp = ttspeedtab( speed, pc98speedtab ); if ( effect_sp < 0 ) /* XXX */ effect_sp = ttspeedtab( (speed - 1), pc98speedtab ); if ( effect_sp <= 0 ) return effect_sp; if ( effect_sp == speed ) mod /= 16; if ( mod % effect_sp ) return(-1); count = mod / effect_sp; if ( count > 65535 ) return(-1); if ( effect_sp != speed ) count |= 0x10000; break; case COM_IF_PC9861K_1: case COM_IF_PC9861K_2: count = 1; break; case COM_IF_IND_SS_1: case COM_IF_IND_SS_2: case COM_IF_PIO9032B_1: case COM_IF_PIO9032B_2: if ( speed == 0 ) return 0; count = ttspeedtab( speed, if_8251_type[if_type].speedtab ); break; case COM_IF_B98_01_1: case COM_IF_B98_01_2: if ( speed == 0 ) return 0; count = ttspeedtab( speed, if_8251_type[if_type].speedtab ); #ifdef B98_01_OLD if (count == 0 || count == 1) { count += 4; count |= 0x20000; /* x1 mode for 76800 and 153600 */ } #endif break; } return count; } static void pc98_set_baud_rate( struct com_s *com, int count ) { int if_type, io, s; if_type = com->pc98_if_type & 0x0f; io = rman_get_start(com->ioportres) & 0xff00; switch (com->pc98_if_type) { case COM_IF_INTERNAL: if (PC98SIO_baud_rate_port(if_type) != -1) { if (count & COM1_EXT_CLOCK) { outb((Port_t)PC98SIO_baud_rate_port(if_type), count & 0xff); break; } else { outb((Port_t)PC98SIO_baud_rate_port(if_type), 0x09); } } if ( count < 0 ) { printf( "[ Illegal count : %d ]", count ); return; } else if ( count == 0 ) return; /* set i8253 */ s = splclock(); if (count != 3) outb( 0x77, 0xb6 ); else outb( 0x77, 0xb4 ); outb( 0x5f, 0); outb( 0x75, count & 0xff ); outb( 0x5f, 0); outb( 0x75, (count >> 8) & 0xff ); splx(s); break; case COM_IF_IND_SS_1: case COM_IF_IND_SS_2: outb(io | PC98SIO_intr_ctrl_port(if_type), 0); outb(io | PC98SIO_baud_rate_port(if_type), 0); outb(io | PC98SIO_baud_rate_port(if_type), 0xc0); outb(io | PC98SIO_baud_rate_port(if_type), (count >> 8) | 0x80); outb(io | PC98SIO_baud_rate_port(if_type), count & 0xff); break; case COM_IF_PIO9032B_1: case COM_IF_PIO9032B_2: outb(io | PC98SIO_baud_rate_port(if_type), count); break; case COM_IF_B98_01_1: case COM_IF_B98_01_2: outb(io | PC98SIO_baud_rate_port(if_type), count & 0x0f); #ifdef B98_01_OLD /* * Some old B98_01 board should be controlled * in different way, but this hasn't been tested yet. */ outb(io | PC98SIO_func_port(if_type), (count & 0x20000) ? 0xf0 : 0xf2); #endif break; } } static int pc98_check_if_type(device_t dev, struct siodev *iod) { int irr, io, if_type, tmp; static short irq_tab[2][8] = { { 3, 5, 6, 9, 10, 12, 13, -1}, { 3, 10, 12, 13, 5, 6, 9, -1} }; if_type = iod->if_type & 0x0f; iod->irq = 0; io = isa_get_port(dev) & 0xff00; if (IS_8251(iod->if_type)) { if (PC98SIO_func_port(if_type) != -1) { outb(io | PC98SIO_func_port(if_type), 0xf2); tmp = ttspeedtab(9600, if_8251_type[if_type].speedtab); if (tmp != -1 && PC98SIO_baud_rate_port(if_type) != -1) outb(io | PC98SIO_baud_rate_port(if_type), tmp); } iod->cmd = io | PC98SIO_cmd_port(if_type); iod->sts = io | PC98SIO_sts_port(if_type); iod->mod = io | PC98SIO_in_modem_port(if_type); iod->ctrl = io | PC98SIO_intr_ctrl_port(if_type); if (iod->if_type == COM_IF_INTERNAL) { iod->irq = 4; if (pc98_check_8251vfast()) { PC98SIO_baud_rate_port(if_type) = I8251F_div; if_8251_type[if_type].speedtab = pc98fast_speedtab; } } else { tmp = inb( iod->mod ) & if_8251_type[if_type].irr_mask; if ((isa_get_port(dev) & 0xff) == IO_COM2) iod->irq = irq_tab[0][tmp]; else iod->irq = irq_tab[1][tmp]; } } else { irr = if_16550a_type[if_type].irr_read; #ifdef COM_MULTIPORT if (!COM_ISMULTIPORT(device_get_flags(dev)) || device_get_unit(dev) == COM_MPMASTER(device_get_flags(dev))) #endif if (irr != -1) { tmp = inb(io | irr); if (isa_get_port(dev) & 0x01) /* XXX depend on RSB-384 */ iod->irq = irq_tab[1][tmp >> 3]; else iod->irq = irq_tab[0][tmp & 0x07]; } } if ( iod->irq == -1 ) return -1; return 0; } static void pc98_set_ioport(struct com_s *com) { int if_type = com->pc98_if_type & 0x0f; Port_t io = rman_get_start(com->ioportres) & 0xff00; pc98_check_sysclock(); com->data_port = io | PC98SIO_data_port(if_type); com->cmd_port = io | PC98SIO_cmd_port(if_type); com->sts_port = io | PC98SIO_sts_port(if_type); com->in_modem_port = io | PC98SIO_in_modem_port(if_type); com->intr_ctrl_port = io | PC98SIO_intr_ctrl_port(if_type); } static int pc98_check_8251vfast(void) { int i; outb(I8251F_div, 0x8c); DELAY(10); for (i = 0; i < 100; i++) { if ((inb(I8251F_div) & 0x80) != 0) { i = 0; break; } DELAY(1); } outb(I8251F_div, 0); DELAY(10); for (; i < 100; i++) { if ((inb(I8251F_div) & 0x80) == 0) return 1; DELAY(1); } return 0; } static int pc98_check_8251fifo(void) { u_char tmp1, tmp2; tmp1 = inb(I8251F_iir); DELAY(10); tmp2 = inb(I8251F_iir); if (((tmp1 ^ tmp2) & 0x40) != 0 && ((tmp1 | tmp2) & 0x20) == 0) return 1; return 0; } #endif /* PC98 defined */ Index: head/sys/sys/sysctl.h =================================================================== --- head/sys/sys/sysctl.h (revision 62572) +++ head/sys/sys/sysctl.h (revision 62573) @@ -1,514 +1,514 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sysctl.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _SYS_SYSCTL_H_ #define _SYS_SYSCTL_H_ #include #include /* * Definitions for sysctl call. The sysctl call uses a hierarchical name * for objects that can be examined or modified. The name is expressed as * a sequence of integers. Like a file path name, the meaning of each * component depends on its place in the hierarchy. The top-level and kern * identifiers are defined here, and other identifiers are defined in the * respective subsystem header files. */ #define CTL_MAXNAME 12 /* largest number of components supported */ /* * Each subsystem defined by sysctl defines a list of variables * for that subsystem. Each name is either a node with further * levels defined below it, or it is a leaf of some particular * type given below. Each sysctl level defines a set of name/type * pairs to be used by sysctl(1) in manipulating the subsystem. */ struct ctlname { char *ctl_name; /* subsystem name */ int ctl_type; /* type of name */ }; #define CTLTYPE 0xf /* Mask for the type */ #define CTLTYPE_NODE 1 /* name is a node */ #define CTLTYPE_INT 2 /* name describes an integer */ #define CTLTYPE_STRING 3 /* name describes a string */ #define CTLTYPE_QUAD 4 /* name describes a 64-bit number */ #define CTLTYPE_OPAQUE 5 /* name describes a structure */ #define CTLTYPE_STRUCT CTLTYPE_OPAQUE /* name describes a structure */ #define CTLFLAG_RD 0x80000000 /* Allow reads of variable */ #define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */ #define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR) #define CTLFLAG_NOLOCK 0x20000000 /* XXX Don't Lock */ #define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */ #define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */ #define CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */ /* * USE THIS instead of a hardwired number from the categories below * to get dynamically assigned sysctl entries using the linker-set * technology. This is the way nearly all new sysctl variables should * be implemented. * e.g. SYSCTL_INT(_parent, OID_AUTO, name, CTLFLAG_RW, &variable, 0, ""); */ #define OID_AUTO (-1) #ifdef _KERNEL #define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, int arg2, \ struct sysctl_req *req /* * This describes the access space for a sysctl request. This is needed * so that we can use the interface from the kernel or from user-space. */ struct sysctl_req { struct proc *p; int lock; void *oldptr; size_t oldlen; size_t oldidx; int (*oldfunc)(struct sysctl_req *, const void *, size_t); void *newptr; size_t newlen; size_t newidx; int (*newfunc)(struct sysctl_req *, void *, size_t); }; SLIST_HEAD(sysctl_oid_list, sysctl_oid); /* * This describes one "oid" in the MIB tree. Potentially more nodes can * be hidden behind it, expanded by the handler. */ struct sysctl_oid { struct sysctl_oid_list *oid_parent; SLIST_ENTRY(sysctl_oid) oid_link; int oid_number; int oid_kind; void *oid_arg1; int oid_arg2; const char *oid_name; - int (*oid_handler) (SYSCTL_HANDLER_ARGS); + int (*oid_handler)(SYSCTL_HANDLER_ARGS); const char *oid_fmt; }; #define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l) #define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l) -int sysctl_handle_int (SYSCTL_HANDLER_ARGS); -int sysctl_handle_long (SYSCTL_HANDLER_ARGS); -int sysctl_handle_intptr (SYSCTL_HANDLER_ARGS); -int sysctl_handle_string (SYSCTL_HANDLER_ARGS); -int sysctl_handle_opaque (SYSCTL_HANDLER_ARGS); +int sysctl_handle_int(SYSCTL_HANDLER_ARGS); +int sysctl_handle_long(SYSCTL_HANDLER_ARGS); +int sysctl_handle_intptr(SYSCTL_HANDLER_ARGS); +int sysctl_handle_string(SYSCTL_HANDLER_ARGS); +int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS); /* * These functions are used to add/remove an oid from the mib. */ void sysctl_register_oid(struct sysctl_oid *oidp); void sysctl_unregister_oid(struct sysctl_oid *oidp); /* Declare an oid to allow child oids to be added to it. */ #define SYSCTL_DECL(name) \ extern struct sysctl_oid_list sysctl_##name##_children /* This constructs a "raw" MIB oid. */ #define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ static struct sysctl_oid sysctl__##parent##_##name = { \ &sysctl_##parent##_children, { 0 }, \ nbr, kind, a1, a2, #name, handler, fmt }; \ DATA_SET(sysctl_set, sysctl__##parent##_##name); /* This constructs a node from which other oids can hang. */ #define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \ struct sysctl_oid_list sysctl_##parent##_##name##_children; \ SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|access, \ (void*)&sysctl_##parent##_##name##_children, 0, handler, \ "N", descr); /* Oid for a string. len can be 0 to indicate '\0' termination. */ #define SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|access, \ arg, len, sysctl_handle_string, "A", descr) /* Oid for an int. If ptr is NULL, val is returned. */ #define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ ptr, val, sysctl_handle_int, "I", descr) /* Oid for a long. The pointer must be non NULL. */ #define SYSCTL_LONG(parent, nbr, name, access, ptr, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ ptr, 0, sysctl_handle_long, "L", descr) /* Oid for an opaque object. Specified by a pointer and a length. */ #define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|access, \ ptr, len, sysctl_handle_opaque, fmt, descr) /* Oid for a struct. Specified by a pointer and a type. */ #define SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|access, \ ptr, sizeof(struct type), sysctl_handle_opaque, \ "S," #type, descr) /* Oid for a procedure. Specified by a pointer and an arg. */ #define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ SYSCTL_OID(parent, nbr, name, access, \ ptr, arg, handler, fmt, descr) #endif /* _KERNEL */ /* * Top-level identifiers */ #define CTL_UNSPEC 0 /* unused */ #define CTL_KERN 1 /* "high kernel": proc, limits */ #define CTL_VM 2 /* virtual memory */ #define CTL_VFS 3 /* file system, mount type is next */ #define CTL_NET 4 /* network, see socket.h */ #define CTL_DEBUG 5 /* debugging parameters */ #define CTL_HW 6 /* generic cpu/io */ #define CTL_MACHDEP 7 /* machine dependent */ #define CTL_USER 8 /* user-level */ #define CTL_P1003_1B 9 /* POSIX 1003.1B */ #define CTL_MAXID 10 /* number of valid top-level ids */ #define CTL_NAMES { \ { 0, 0 }, \ { "kern", CTLTYPE_NODE }, \ { "vm", CTLTYPE_NODE }, \ { "vfs", CTLTYPE_NODE }, \ { "net", CTLTYPE_NODE }, \ { "debug", CTLTYPE_NODE }, \ { "hw", CTLTYPE_NODE }, \ { "machdep", CTLTYPE_NODE }, \ { "user", CTLTYPE_NODE }, \ { "p1003_1b", CTLTYPE_NODE }, \ } /* * CTL_KERN identifiers */ #define KERN_OSTYPE 1 /* string: system version */ #define KERN_OSRELEASE 2 /* string: system release */ #define KERN_OSREV 3 /* int: system revision */ #define KERN_VERSION 4 /* string: compile time info */ #define KERN_MAXVNODES 5 /* int: max vnodes */ #define KERN_MAXPROC 6 /* int: max processes */ #define KERN_MAXFILES 7 /* int: max open files */ #define KERN_ARGMAX 8 /* int: max arguments to exec */ #define KERN_SECURELVL 9 /* int: system security level */ #define KERN_HOSTNAME 10 /* string: hostname */ #define KERN_HOSTID 11 /* int: host identifier */ #define KERN_CLOCKRATE 12 /* struct: struct clockrate */ #define KERN_VNODE 13 /* struct: vnode structures */ #define KERN_PROC 14 /* struct: process entries */ #define KERN_FILE 15 /* struct: file entries */ #define KERN_PROF 16 /* node: kernel profiling info */ #define KERN_POSIX1 17 /* int: POSIX.1 version */ #define KERN_NGROUPS 18 /* int: # of supplemental group ids */ #define KERN_JOB_CONTROL 19 /* int: is job control available */ #define KERN_SAVED_IDS 20 /* int: saved set-user/group-ID */ #define KERN_BOOTTIME 21 /* struct: time kernel was booted */ #define KERN_NISDOMAINNAME 22 /* string: YP domain name */ #define KERN_UPDATEINTERVAL 23 /* int: update process sleep time */ #define KERN_OSRELDATE 24 /* int: OS release date */ #define KERN_NTP_PLL 25 /* node: NTP PLL control */ #define KERN_BOOTFILE 26 /* string: name of booted kernel */ #define KERN_MAXFILESPERPROC 27 /* int: max open files per proc */ #define KERN_MAXPROCPERUID 28 /* int: max processes per uid */ #define KERN_DUMPDEV 29 /* dev_t: device to dump on */ #define KERN_IPC 30 /* node: anything related to IPC */ #define KERN_DUMMY 31 /* unused */ #define KERN_PS_STRINGS 32 /* int: address of PS_STRINGS */ #define KERN_USRSTACK 33 /* int: address of USRSTACK */ #define KERN_LOGSIGEXIT 34 /* int: do we log sigexit procs? */ #define KERN_MAXID 35 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ { 0, 0 }, \ { "ostype", CTLTYPE_STRING }, \ { "osrelease", CTLTYPE_STRING }, \ { "osrevision", CTLTYPE_INT }, \ { "version", CTLTYPE_STRING }, \ { "maxvnodes", CTLTYPE_INT }, \ { "maxproc", CTLTYPE_INT }, \ { "maxfiles", CTLTYPE_INT }, \ { "argmax", CTLTYPE_INT }, \ { "securelevel", CTLTYPE_INT }, \ { "hostname", CTLTYPE_STRING }, \ { "hostid", CTLTYPE_INT }, \ { "clockrate", CTLTYPE_STRUCT }, \ { "vnode", CTLTYPE_STRUCT }, \ { "proc", CTLTYPE_STRUCT }, \ { "file", CTLTYPE_STRUCT }, \ { "profiling", CTLTYPE_NODE }, \ { "posix1version", CTLTYPE_INT }, \ { "ngroups", CTLTYPE_INT }, \ { "job_control", CTLTYPE_INT }, \ { "saved_ids", CTLTYPE_INT }, \ { "boottime", CTLTYPE_STRUCT }, \ { "nisdomainname", CTLTYPE_STRING }, \ { "update", CTLTYPE_INT }, \ { "osreldate", CTLTYPE_INT }, \ { "ntp_pll", CTLTYPE_NODE }, \ { "bootfile", CTLTYPE_STRING }, \ { "maxfilesperproc", CTLTYPE_INT }, \ { "maxprocperuid", CTLTYPE_INT }, \ { "dumpdev", CTLTYPE_STRUCT }, /* we lie; don't print as int */ \ { "ipc", CTLTYPE_NODE }, \ { "dummy", CTLTYPE_INT }, \ { "ps_strings", CTLTYPE_INT }, \ { "usrstack", CTLTYPE_INT }, \ { "logsigexit", CTLTYPE_INT }, \ } /* * CTL_VFS identifiers */ #define CTL_VFS_NAMES { \ { "vfsconf", CTLTYPE_STRUCT }, \ } /* * KERN_PROC subtypes */ #define KERN_PROC_ALL 0 /* everything */ #define KERN_PROC_PID 1 /* by process id */ #define KERN_PROC_PGRP 2 /* by process group id */ #define KERN_PROC_SESSION 3 /* by session of pid */ #define KERN_PROC_TTY 4 /* by controlling tty */ #define KERN_PROC_UID 5 /* by effective uid */ #define KERN_PROC_RUID 6 /* by real uid */ #define KERN_PROC_ARGS 7 /* get/set arguments/proctitle */ /* * KERN_IPC identifiers */ #define KIPC_MAXSOCKBUF 1 /* int: max size of a socket buffer */ #define KIPC_SOCKBUF_WASTE 2 /* int: wastage factor in sockbuf */ #define KIPC_SOMAXCONN 3 /* int: max length of connection q */ #define KIPC_MAX_LINKHDR 4 /* int: max length of link header */ #define KIPC_MAX_PROTOHDR 5 /* int: max length of network header */ #define KIPC_MAX_HDR 6 /* int: max total length of headers */ #define KIPC_MAX_DATALEN 7 /* int: max length of data? */ #define KIPC_MBSTAT 8 /* struct: mbuf usage statistics */ #define KIPC_NMBCLUSTERS 9 /* int: maximum mbuf clusters */ /* * CTL_HW identifiers */ #define HW_MACHINE 1 /* string: machine class */ #define HW_MODEL 2 /* string: specific machine model */ #define HW_NCPU 3 /* int: number of cpus */ #define HW_BYTEORDER 4 /* int: machine byte order */ #define HW_PHYSMEM 5 /* int: total memory */ #define HW_USERMEM 6 /* int: non-kernel memory */ #define HW_PAGESIZE 7 /* int: software page size */ #define HW_DISKNAMES 8 /* strings: disk drive names */ #define HW_DISKSTATS 9 /* struct: diskstats[] */ #define HW_FLOATINGPT 10 /* int: has HW floating point? */ #define HW_MACHINE_ARCH 11 /* string: machine architecture */ #define HW_MAXID 12 /* number of valid hw ids */ #define CTL_HW_NAMES { \ { 0, 0 }, \ { "machine", CTLTYPE_STRING }, \ { "model", CTLTYPE_STRING }, \ { "ncpu", CTLTYPE_INT }, \ { "byteorder", CTLTYPE_INT }, \ { "physmem", CTLTYPE_INT }, \ { "usermem", CTLTYPE_INT }, \ { "pagesize", CTLTYPE_INT }, \ { "disknames", CTLTYPE_STRUCT }, \ { "diskstats", CTLTYPE_STRUCT }, \ { "floatingpoint", CTLTYPE_INT }, \ } /* * CTL_USER definitions */ #define USER_CS_PATH 1 /* string: _CS_PATH */ #define USER_BC_BASE_MAX 2 /* int: BC_BASE_MAX */ #define USER_BC_DIM_MAX 3 /* int: BC_DIM_MAX */ #define USER_BC_SCALE_MAX 4 /* int: BC_SCALE_MAX */ #define USER_BC_STRING_MAX 5 /* int: BC_STRING_MAX */ #define USER_COLL_WEIGHTS_MAX 6 /* int: COLL_WEIGHTS_MAX */ #define USER_EXPR_NEST_MAX 7 /* int: EXPR_NEST_MAX */ #define USER_LINE_MAX 8 /* int: LINE_MAX */ #define USER_RE_DUP_MAX 9 /* int: RE_DUP_MAX */ #define USER_POSIX2_VERSION 10 /* int: POSIX2_VERSION */ #define USER_POSIX2_C_BIND 11 /* int: POSIX2_C_BIND */ #define USER_POSIX2_C_DEV 12 /* int: POSIX2_C_DEV */ #define USER_POSIX2_CHAR_TERM 13 /* int: POSIX2_CHAR_TERM */ #define USER_POSIX2_FORT_DEV 14 /* int: POSIX2_FORT_DEV */ #define USER_POSIX2_FORT_RUN 15 /* int: POSIX2_FORT_RUN */ #define USER_POSIX2_LOCALEDEF 16 /* int: POSIX2_LOCALEDEF */ #define USER_POSIX2_SW_DEV 17 /* int: POSIX2_SW_DEV */ #define USER_POSIX2_UPE 18 /* int: POSIX2_UPE */ #define USER_STREAM_MAX 19 /* int: POSIX2_STREAM_MAX */ #define USER_TZNAME_MAX 20 /* int: POSIX2_TZNAME_MAX */ #define USER_MAXID 21 /* number of valid user ids */ #define CTL_USER_NAMES { \ { 0, 0 }, \ { "cs_path", CTLTYPE_STRING }, \ { "bc_base_max", CTLTYPE_INT }, \ { "bc_dim_max", CTLTYPE_INT }, \ { "bc_scale_max", CTLTYPE_INT }, \ { "bc_string_max", CTLTYPE_INT }, \ { "coll_weights_max", CTLTYPE_INT }, \ { "expr_nest_max", CTLTYPE_INT }, \ { "line_max", CTLTYPE_INT }, \ { "re_dup_max", CTLTYPE_INT }, \ { "posix2_version", CTLTYPE_INT }, \ { "posix2_c_bind", CTLTYPE_INT }, \ { "posix2_c_dev", CTLTYPE_INT }, \ { "posix2_char_term", CTLTYPE_INT }, \ { "posix2_fort_dev", CTLTYPE_INT }, \ { "posix2_fort_run", CTLTYPE_INT }, \ { "posix2_localedef", CTLTYPE_INT }, \ { "posix2_sw_dev", CTLTYPE_INT }, \ { "posix2_upe", CTLTYPE_INT }, \ { "stream_max", CTLTYPE_INT }, \ { "tzname_max", CTLTYPE_INT }, \ } #define CTL_P1003_1B_ASYNCHRONOUS_IO 1 /* boolean */ #define CTL_P1003_1B_MAPPED_FILES 2 /* boolean */ #define CTL_P1003_1B_MEMLOCK 3 /* boolean */ #define CTL_P1003_1B_MEMLOCK_RANGE 4 /* boolean */ #define CTL_P1003_1B_MEMORY_PROTECTION 5 /* boolean */ #define CTL_P1003_1B_MESSAGE_PASSING 6 /* boolean */ #define CTL_P1003_1B_PRIORITIZED_IO 7 /* boolean */ #define CTL_P1003_1B_PRIORITY_SCHEDULING 8 /* boolean */ #define CTL_P1003_1B_REALTIME_SIGNALS 9 /* boolean */ #define CTL_P1003_1B_SEMAPHORES 10 /* boolean */ #define CTL_P1003_1B_FSYNC 11 /* boolean */ #define CTL_P1003_1B_SHARED_MEMORY_OBJECTS 12 /* boolean */ #define CTL_P1003_1B_SYNCHRONIZED_IO 13 /* boolean */ #define CTL_P1003_1B_TIMERS 14 /* boolean */ #define CTL_P1003_1B_AIO_LISTIO_MAX 15 /* int */ #define CTL_P1003_1B_AIO_MAX 16 /* int */ #define CTL_P1003_1B_AIO_PRIO_DELTA_MAX 17 /* int */ #define CTL_P1003_1B_DELAYTIMER_MAX 18 /* int */ #define CTL_P1003_1B_MQ_OPEN_MAX 19 /* int */ #define CTL_P1003_1B_PAGESIZE 20 /* int */ #define CTL_P1003_1B_RTSIG_MAX 21 /* int */ #define CTL_P1003_1B_SEM_NSEMS_MAX 22 /* int */ #define CTL_P1003_1B_SEM_VALUE_MAX 23 /* int */ #define CTL_P1003_1B_SIGQUEUE_MAX 24 /* int */ #define CTL_P1003_1B_TIMER_MAX 25 /* int */ #define CTL_P1003_1B_MAXID 26 #define CTL_P1003_1B_NAMES { \ { 0, 0 }, \ { "asynchronous_io", CTLTYPE_INT }, \ { "mapped_files", CTLTYPE_INT }, \ { "memlock", CTLTYPE_INT }, \ { "memlock_range", CTLTYPE_INT }, \ { "memory_protection", CTLTYPE_INT }, \ { "message_passing", CTLTYPE_INT }, \ { "prioritized_io", CTLTYPE_INT }, \ { "priority_scheduling", CTLTYPE_INT }, \ { "realtime_signals", CTLTYPE_INT }, \ { "semaphores", CTLTYPE_INT }, \ { "fsync", CTLTYPE_INT }, \ { "shared_memory_objects", CTLTYPE_INT }, \ { "synchronized_io", CTLTYPE_INT }, \ { "timers", CTLTYPE_INT }, \ { "aio_listio_max", CTLTYPE_INT }, \ { "aio_max", CTLTYPE_INT }, \ { "aio_prio_delta_max", CTLTYPE_INT }, \ { "delaytimer_max", CTLTYPE_INT }, \ { "mq_open_max", CTLTYPE_INT }, \ { "pagesize", CTLTYPE_INT }, \ { "rtsig_max", CTLTYPE_INT }, \ { "nsems_max", CTLTYPE_INT }, \ { "sem_value_max", CTLTYPE_INT }, \ { "sigqueue_max", CTLTYPE_INT }, \ { "timer_max", CTLTYPE_INT }, \ } #ifdef _KERNEL /* * Declare some common oids. */ extern struct sysctl_oid_list sysctl__children; SYSCTL_DECL(_kern); SYSCTL_DECL(_sysctl); SYSCTL_DECL(_vm); SYSCTL_DECL(_vfs); SYSCTL_DECL(_net); SYSCTL_DECL(_debug); SYSCTL_DECL(_hw); SYSCTL_DECL(_machdep); SYSCTL_DECL(_user); SYSCTL_DECL(_compat); extern char machine[]; extern char osrelease[]; extern char ostype[]; struct linker_set; void sysctl_register_set(struct linker_set *lsp); void sysctl_unregister_set(struct linker_set *lsp); int kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval); int userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval); int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, int *nindx, struct sysctl_req *req); #else /* !_KERNEL */ #include __BEGIN_DECLS int sysctl __P((int *, u_int, void *, size_t *, void *, size_t)); int sysctlbyname __P((const char *, void *, size_t *, void *, size_t)); __END_DECLS #endif /* _KERNEL */ #endif /* !_SYS_SYSCTL_H_ */ Index: head/sys/vm/vm_meter.c =================================================================== --- head/sys/vm/vm_meter.c (revision 62572) +++ head/sys/vm/vm_meter.c (revision 62573) @@ -1,327 +1,327 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct loadavg averunnable = { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ struct vmmeter cnt; static int maxslp = MAXSLP; /* * Constants for averages over 1, 5, and 15 minutes * when sampling at 5 second intervals. */ static fixpt_t cexp[3] = { 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 0.9944598480048967 * FSCALE, /* exp(-1/180) */ }; /* * Compute a tenex style load average of a quantity on * 1, 5 and 15 minute intervals. */ static void loadav(struct loadavg *avg) { register int i, nrun; register struct proc *p; for (nrun = 0, p = allproc.lh_first; p != 0; p = p->p_list.le_next) { switch (p->p_stat) { case SSLEEP: if (p->p_priority > PZERO || p->p_slptime != 0) continue; /* FALLTHROUGH */ case SRUN: case SIDL: nrun++; } } for (i = 0; i < 3; i++) avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; } void vmmeter() { if (time_second % 5 == 0) loadav(&averunnable); if (proc0.p_slptime > maxslp / 2) wakeup(&proc0); } SYSCTL_INT(_vm, VM_V_FREE_MIN, v_free_min, CTLFLAG_RW, &cnt.v_free_min, 0, ""); SYSCTL_INT(_vm, VM_V_FREE_TARGET, v_free_target, CTLFLAG_RW, &cnt.v_free_target, 0, ""); SYSCTL_INT(_vm, VM_V_FREE_RESERVED, v_free_reserved, CTLFLAG_RW, &cnt.v_free_reserved, 0, ""); SYSCTL_INT(_vm, VM_V_INACTIVE_TARGET, v_inactive_target, CTLFLAG_RW, &cnt.v_inactive_target, 0, ""); SYSCTL_INT(_vm, VM_V_CACHE_MIN, v_cache_min, CTLFLAG_RW, &cnt.v_cache_min, 0, ""); SYSCTL_INT(_vm, VM_V_CACHE_MAX, v_cache_max, CTLFLAG_RW, &cnt.v_cache_max, 0, ""); SYSCTL_INT(_vm, VM_V_PAGEOUT_FREE_MIN, v_pageout_free_min, CTLFLAG_RW, &cnt.v_pageout_free_min, 0, ""); SYSCTL_INT(_vm, OID_AUTO, v_free_severe, CTLFLAG_RW, &cnt.v_free_severe, 0, ""); SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD, &averunnable, loadavg, "Machine loadaverage history"); static int -vmtotal (SYSCTL_HANDLER_ARGS) +vmtotal(SYSCTL_HANDLER_ARGS) { struct proc *p; struct vmtotal total, *totalp; vm_map_entry_t entry; vm_object_t object; vm_map_t map; int paging; totalp = &total; bzero(totalp, sizeof *totalp); /* * Mark all objects as inactive. */ for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object,object_list)) vm_object_clear_flag(object, OBJ_ACTIVE); /* * Calculate process statistics. */ for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { if (p->p_flag & P_SYSTEM) continue; switch (p->p_stat) { case 0: continue; case SSLEEP: case SSTOP: if (p->p_flag & P_INMEM) { if (p->p_priority <= PZERO) totalp->t_dw++; else if (p->p_slptime < maxslp) totalp->t_sl++; } else if (p->p_slptime < maxslp) totalp->t_sw++; if (p->p_slptime >= maxslp) continue; break; case SRUN: case SIDL: if (p->p_flag & P_INMEM) totalp->t_rq++; else totalp->t_sw++; if (p->p_stat == SIDL) continue; break; } /* * Note active objects. */ paging = 0; for (map = &p->p_vmspace->vm_map, entry = map->header.next; entry != &map->header; entry = entry->next) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || entry->object.vm_object == NULL) continue; vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE); paging |= entry->object.vm_object->paging_in_progress; } if (paging) totalp->t_pw++; } /* * Calculate object memory usage statistics. */ for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object, object_list)) { /* * devices, like /dev/mem, will badly skew our totals */ if (object->type == OBJT_DEVICE) continue; totalp->t_vm += object->size; totalp->t_rm += object->resident_page_count; if (object->flags & OBJ_ACTIVE) { totalp->t_avm += object->size; totalp->t_arm += object->resident_page_count; } if (object->shadow_count > 1) { /* shared object */ totalp->t_vmshr += object->size; totalp->t_rmshr += object->resident_page_count; if (object->flags & OBJ_ACTIVE) { totalp->t_avmshr += object->size; totalp->t_armshr += object->resident_page_count; } } } totalp->t_free = cnt.v_free_count + cnt.v_cache_count; return (sysctl_handle_opaque(oidp, totalp, sizeof total, req)); } SYSCTL_PROC(_vm, VM_METER, vmmeter, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, sizeof(struct vmtotal), vmtotal, "S,vmtotal", "System virtual memory statistics"); SYSCTL_NODE(_vm, OID_AUTO, stats, CTLFLAG_RW, 0, "VM meter stats"); SYSCTL_NODE(_vm_stats, OID_AUTO, sys, CTLFLAG_RW, 0, "VM meter sys stats"); SYSCTL_NODE(_vm_stats, OID_AUTO, vm, CTLFLAG_RW, 0, "VM meter vm stats"); SYSCTL_NODE(_vm_stats, OID_AUTO, misc, CTLFLAG_RW, 0, "VM meter misc stats"); SYSCTL_INT(_vm_stats_sys, OID_AUTO, v_swtch, CTLFLAG_RD, &cnt.v_swtch, 0, "Context switches"); SYSCTL_INT(_vm_stats_sys, OID_AUTO, v_trap, CTLFLAG_RD, &cnt.v_trap, 0, "Traps"); SYSCTL_INT(_vm_stats_sys, OID_AUTO, v_syscall, CTLFLAG_RD, &cnt.v_syscall, 0, "Syscalls"); SYSCTL_INT(_vm_stats_sys, OID_AUTO, v_intr, CTLFLAG_RD, &cnt.v_intr, 0, "Hardware interrupts"); SYSCTL_INT(_vm_stats_sys, OID_AUTO, v_soft, CTLFLAG_RD, &cnt.v_soft, 0, "Software interrupts"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_vm_faults, CTLFLAG_RD, &cnt.v_vm_faults, 0, "VM faults"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_cow_faults, CTLFLAG_RD, &cnt.v_cow_faults, 0, "COW faults"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_cow_optim, CTLFLAG_RD, &cnt.v_cow_optim, 0, "Optimized COW faults"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_zfod, CTLFLAG_RD, &cnt.v_zfod, 0, "Zero fill"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_ozfod, CTLFLAG_RD, &cnt.v_ozfod, 0, "Optimized zero fill"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_swapin, CTLFLAG_RD, &cnt.v_swapin, 0, "Swapin operations"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_swapout, CTLFLAG_RD, &cnt.v_swapout, 0, "Swapout operations"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_swappgsin, CTLFLAG_RD, &cnt.v_swappgsin, 0, "Swapin pages"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_swappgsout, CTLFLAG_RD, &cnt.v_swappgsout, 0, "Swapout pages"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_vnodein, CTLFLAG_RD, &cnt.v_vnodein, 0, "Vnodein operations"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_vnodeout, CTLFLAG_RD, &cnt.v_vnodeout, 0, "Vnodeout operations"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_vnodepgsin, CTLFLAG_RD, &cnt.v_vnodepgsin, 0, "Vnodein pages"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_vnodepgsout, CTLFLAG_RD, &cnt.v_vnodepgsout, 0, "Vnodeout pages"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_intrans, CTLFLAG_RD, &cnt.v_intrans, 0, "In transit page blocking"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_reactivated, CTLFLAG_RD, &cnt.v_reactivated, 0, "Reactivated pages"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_pdwakeups, CTLFLAG_RD, &cnt.v_pdwakeups, 0, "Pagedaemon wakeups"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_pdpages, CTLFLAG_RD, &cnt.v_pdpages, 0, "Pagedaemon page scans"); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_dfree, CTLFLAG_RD, &cnt.v_dfree, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_pfree, CTLFLAG_RD, &cnt.v_pfree, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_tfree, CTLFLAG_RD, &cnt.v_tfree, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_page_size, CTLFLAG_RD, &cnt.v_page_size, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_page_count, CTLFLAG_RD, &cnt.v_page_count, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_free_reserved, CTLFLAG_RD, &cnt.v_free_reserved, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_free_target, CTLFLAG_RD, &cnt.v_free_target, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_free_min, CTLFLAG_RD, &cnt.v_free_min, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_free_count, CTLFLAG_RD, &cnt.v_free_count, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_wire_count, CTLFLAG_RD, &cnt.v_wire_count, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_active_count, CTLFLAG_RD, &cnt.v_active_count, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_inactive_target, CTLFLAG_RD, &cnt.v_inactive_target, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_inactive_count, CTLFLAG_RD, &cnt.v_inactive_count, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_cache_count, CTLFLAG_RD, &cnt.v_cache_count, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_cache_min, CTLFLAG_RD, &cnt.v_cache_min, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_cache_max, CTLFLAG_RD, &cnt.v_cache_max, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_pageout_free_min, CTLFLAG_RD, &cnt.v_pageout_free_min, 0, ""); SYSCTL_INT(_vm_stats_vm, OID_AUTO, v_interrupt_free_min, CTLFLAG_RD, &cnt.v_interrupt_free_min, 0, ""); SYSCTL_INT(_vm_stats_misc, OID_AUTO, zero_page_count, CTLFLAG_RD, &vm_page_zero_count, 0, ""); #if 0 SYSCTL_INT(_vm_stats_misc, OID_AUTO, page_mask, CTLFLAG_RD, &page_mask, 0, ""); SYSCTL_INT(_vm_stats_misc, OID_AUTO, page_shift, CTLFLAG_RD, &page_shift, 0, ""); SYSCTL_INT(_vm_stats_misc, OID_AUTO, first_page, CTLFLAG_RD, &first_page, 0, ""); SYSCTL_INT(_vm_stats_misc, OID_AUTO, last_page, CTLFLAG_RD, &last_page, 0, ""); SYSCTL_INT(_vm_stats_misc, OID_AUTO, vm_page_bucket_count, CTLFLAG_RD, &vm_page_bucket_count, 0, ""); SYSCTL_INT(_vm_stats_misc, OID_AUTO, vm_page_hash_mask, CTLFLAG_RD, &vm_page_hash_mask, 0, ""); #endif Index: head/sys/vm/vm_zone.c =================================================================== --- head/sys/vm/vm_zone.c (revision 62572) +++ head/sys/vm/vm_zone.c (revision 62573) @@ -1,476 +1,476 @@ /* * Copyright (c) 1997, 1998 John S. Dyson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header"); /* * This file comprises a very simple zone allocator. This is used * in lieu of the malloc allocator, where needed or more optimal. * * Note that the initial implementation of this had coloring, and * absolutely no improvement (actually perf degradation) occurred. * * Note also that the zones are type stable. The only restriction is * that the first two longwords of a data structure can be changed * between allocations. Any data that must be stable between allocations * must reside in areas after the first two longwords. * * zinitna, zinit, zbootinit are the initialization routines. * zalloc, zfree, are the interrupt/lock unsafe allocation/free routines. * zalloci, zfreei, are the interrupt/lock safe allocation/free routines. */ static struct vm_zone *zlist; -static int sysctl_vm_zone (SYSCTL_HANDLER_ARGS); +static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); static int zone_kmem_pages, zone_kern_pages, zone_kmem_kvaspace; /* * Create a zone, but don't allocate the zone structure. If the * zone had been previously created by the zone boot code, initialize * various parts of the zone code. * * If waits are not allowed during allocation (e.g. during interrupt * code), a-priori allocate the kernel virtual space, and allocate * only pages when needed. * * Arguments: * z pointer to zone structure. * obj pointer to VM object (opt). * name name of zone. * size size of zone entries. * nentries number of zone entries allocated (only ZONE_INTERRUPT.) * flags ZONE_INTERRUPT -- items can be allocated at interrupt time. * zalloc number of pages allocated when memory is needed. * * Note that when using ZONE_INTERRUPT, the size of the zone is limited * by the nentries argument. The size of the memory allocatable is * unlimited if ZONE_INTERRUPT is not set. * */ int zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, int nentries, int flags, int zalloc) { int totsize; if ((z->zflags & ZONE_BOOT) == 0) { z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1); simple_lock_init(&z->zlock); z->zfreecnt = 0; z->ztotal = 0; z->zmax = 0; z->zname = name; z->znalloc = 0; z->zitems = NULL; if (zlist == 0) { zlist = z; } else { z->znext = zlist; zlist = z; } } z->zflags |= flags; /* * If we cannot wait, allocate KVA space up front, and we will fill * in pages as needed. */ if (z->zflags & ZONE_INTERRUPT) { totsize = round_page(z->zsize * nentries); zone_kmem_kvaspace += totsize; z->zkva = kmem_alloc_pageable(kernel_map, totsize); if (z->zkva == 0) return 0; z->zpagemax = totsize / PAGE_SIZE; if (obj == NULL) { z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax); } else { z->zobj = obj; _vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj); } z->zallocflag = VM_ALLOC_INTERRUPT; z->zmax += nentries; } else { z->zallocflag = VM_ALLOC_SYSTEM; z->zmax = 0; } if (z->zsize > PAGE_SIZE) z->zfreemin = 1; else z->zfreemin = PAGE_SIZE / z->zsize; z->zpagecount = 0; if (zalloc) z->zalloc = zalloc; else z->zalloc = 1; return 1; } /* * Subroutine same as zinitna, except zone data structure is allocated * automatically by malloc. This routine should normally be used, except * in certain tricky startup conditions in the VM system -- then * zbootinit and zinitna can be used. Zinit is the standard zone * initialization call. */ vm_zone_t zinit(char *name, int size, int nentries, int flags, int zalloc) { vm_zone_t z; z = (vm_zone_t) malloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT); if (z == NULL) return NULL; z->zflags = 0; if (zinitna(z, NULL, name, size, nentries, flags, zalloc) == 0) { free(z, M_ZONE); return NULL; } return z; } /* * Initialize a zone before the system is fully up. This routine should * only be called before full VM startup. */ void zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems) { int i; z->zname = name; z->zsize = size; z->zpagemax = 0; z->zobj = NULL; z->zflags = ZONE_BOOT; z->zfreemin = 0; z->zallocflag = 0; z->zpagecount = 0; z->zalloc = 0; z->znalloc = 0; simple_lock_init(&z->zlock); bzero(item, nitems * z->zsize); z->zitems = NULL; for (i = 0; i < nitems; i++) { ((void **) item)[0] = z->zitems; #ifdef INVARIANTS ((void **) item)[1] = (void *) ZENTRY_FREE; #endif z->zitems = item; (char *) item += z->zsize; } z->zfreecnt = nitems; z->zmax = nitems; z->ztotal = nitems; if (zlist == 0) { zlist = z; } else { z->znext = zlist; zlist = z; } } /* * Zone critical region locks. */ static __inline int zlock(vm_zone_t z) { int s; s = splhigh(); simple_lock(&z->zlock); return s; } static __inline void zunlock(vm_zone_t z, int s) { simple_unlock(&z->zlock); splx(s); } /* * void *zalloc(vm_zone_t zone) -- * Returns an item from a specified zone. * * void zfree(vm_zone_t zone, void *item) -- * Frees an item back to a specified zone. * * void *zalloci(vm_zone_t zone) -- * Returns an item from a specified zone, interrupt safe. * * void zfreei(vm_zone_t zone, void *item) -- * Frees an item back to a specified zone, interrupt safe. * */ /* * Zone allocator/deallocator. These are interrupt / (or potentially SMP) * safe. The raw zalloc/zfree routines are in the vm_zone header file, * and are not interrupt safe, but are fast. */ void * zalloci(vm_zone_t z) { int s; void *item; s = zlock(z); item = _zalloc(z); zunlock(z, s); return item; } void zfreei(vm_zone_t z, void *item) { int s; s = zlock(z); _zfree(z, item); zunlock(z, s); return; } /* * Internal zone routine. Not to be called from external (non vm_zone) code. */ void * _zget(vm_zone_t z) { int i; vm_page_t m; int nitems, nbytes; void *item; if (z == NULL) panic("zget: null zone"); if (z->zflags & ZONE_INTERRUPT) { item = (char *) z->zkva + z->zpagecount * PAGE_SIZE; for (i = 0; ((i < z->zalloc) && (z->zpagecount < z->zpagemax)); i++) { vm_offset_t zkva; m = vm_page_alloc(z->zobj, z->zpagecount, z->zallocflag); if (m == NULL) break; zkva = z->zkva + z->zpagecount * PAGE_SIZE; pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); bzero((caddr_t) zkva, PAGE_SIZE); z->zpagecount++; zone_kmem_pages++; cnt.v_wire_count++; } nitems = (i * PAGE_SIZE) / z->zsize; } else { nbytes = z->zalloc * PAGE_SIZE; /* * Check to see if the kernel map is already locked. We could allow * for recursive locks, but that eliminates a valuable debugging * mechanism, and opens up the kernel map for potential corruption * by inconsistent data structure manipulation. We could also use * the interrupt allocation mechanism, but that has size limitations. * Luckily, we have kmem_map that is a submap of kernel map available * for memory allocation, and manipulation of that map doesn't affect * the kernel map structures themselves. * * We can wait, so just do normal map allocation in the appropriate * map. */ if (lockstatus(&kernel_map->lock, NULL)) { int s; s = splvm(); #ifdef SMP simple_unlock(&z->zlock); #endif item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK); #ifdef SMP simple_lock(&z->zlock); #endif if (item != NULL) zone_kmem_pages += z->zalloc; splx(s); } else { #ifdef SMP simple_unlock(&z->zlock); #endif item = (void *) kmem_alloc(kernel_map, nbytes); #ifdef SMP simple_lock(&z->zlock); #endif if (item != NULL) zone_kern_pages += z->zalloc; } if (item != NULL) { bzero(item, nbytes); } else { nbytes = 0; } nitems = nbytes / z->zsize; } z->ztotal += nitems; /* * Save one for immediate allocation */ if (nitems != 0) { nitems -= 1; for (i = 0; i < nitems; i++) { ((void **) item)[0] = z->zitems; #ifdef INVARIANTS ((void **) item)[1] = (void *) ZENTRY_FREE; #endif z->zitems = item; (char *) item += z->zsize; } z->zfreecnt += nitems; z->znalloc++; } else if (z->zfreecnt > 0) { item = z->zitems; z->zitems = ((void **) item)[0]; #ifdef INVARIANTS if (((void **) item)[1] != (void *) ZENTRY_FREE) zerror(ZONE_ERROR_NOTFREE); ((void **) item)[1] = 0; #endif z->zfreecnt--; z->znalloc++; } else { item = NULL; } return item; } static int -sysctl_vm_zone (SYSCTL_HANDLER_ARGS) +sysctl_vm_zone(SYSCTL_HANDLER_ARGS) { int error=0; vm_zone_t curzone, nextzone; char tmpbuf[128]; char tmpname[14]; snprintf(tmpbuf, sizeof(tmpbuf), "\nITEM SIZE LIMIT USED FREE REQUESTS\n"); error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf)); if (error) return (error); for (curzone = zlist; curzone; curzone = nextzone) { int i; int len; int offset; nextzone = curzone->znext; len = strlen(curzone->zname); if (len >= (sizeof(tmpname) - 1)) len = (sizeof(tmpname) - 1); for(i = 0; i < sizeof(tmpname) - 1; i++) tmpname[i] = ' '; tmpname[i] = 0; memcpy(tmpname, curzone->zname, len); tmpname[len] = ':'; offset = 0; if (curzone == zlist) { offset = 1; tmpbuf[0] = '\n'; } snprintf(tmpbuf + offset, sizeof(tmpbuf) - offset, "%s %6.6u, %8.8u, %6.6u, %6.6u, %8.8u\n", tmpname, curzone->zsize, curzone->zmax, (curzone->ztotal - curzone->zfreecnt), curzone->zfreecnt, curzone->znalloc); len = strlen((char *)tmpbuf); if (nextzone == NULL) tmpbuf[len - 1] = 0; error = SYSCTL_OUT(req, tmpbuf, len); if (error) return (error); } return (0); } #ifdef INVARIANT_SUPPORT void zerror(int error) { char *msg; switch (error) { case ZONE_ERROR_INVALID: msg = "zone: invalid zone"; break; case ZONE_ERROR_NOTFREE: msg = "zone: entry not free"; break; case ZONE_ERROR_ALREADYFREE: msg = "zone: freeing free entry"; break; default: msg = "zone: invalid error"; break; } panic(msg); } #endif SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \ NULL, 0, sysctl_vm_zone, "A", "Zone Info"); SYSCTL_INT(_vm, OID_AUTO, zone_kmem_pages, CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone"); SYSCTL_INT(_vm, OID_AUTO, zone_kmem_kvaspace, CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone"); SYSCTL_INT(_vm, OID_AUTO, zone_kern_pages, CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone");