Index: stable/11/sys/amd64/amd64/sys_machdep.c =================================================================== --- stable/11/sys/amd64/amd64/sys_machdep.c (revision 331642) +++ stable/11/sys/amd64/amd64/sys_machdep.c (revision 331643) @@ -1,759 +1,757 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for kernel_map */ #include #include #include #include #include #include #include #include #include static void user_ldt_deref(struct proc_ldt *pldt); static void user_ldt_derefl(struct proc_ldt *pldt); #define MAX_LD 8192 int max_ldt_segment = 512; SYSCTL_INT(_machdep, OID_AUTO, max_ldt_segment, CTLFLAG_RDTUN, &max_ldt_segment, 0, "Maximum number of allowed LDT segments in the single address space"); static void max_ldt_segment_init(void *arg __unused) { if (max_ldt_segment <= 0) max_ldt_segment = 1; if (max_ldt_segment > MAX_LD) max_ldt_segment = MAX_LD; } SYSINIT(maxldt, SI_SUB_VM_CONF, SI_ORDER_ANY, max_ldt_segment_init, NULL); #ifndef _SYS_SYSPROTO_H_ struct sysarch_args { int op; char *parms; }; #endif int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space) { struct i386_ldt_args *largs, la; struct user_segment_descriptor *lp; int error = 0; /* * XXXKIB check that the BSM generation code knows to encode * the op argument. */ AUDIT_ARG_CMD(uap->op); if (uap_space == UIO_USERSPACE) { error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args)); if (error != 0) return (error); largs = &la; } else largs = (struct i386_ldt_args *)uap->parms; switch (uap->op) { case I386_GET_LDT: error = amd64_get_ldt(td, largs); break; case I386_SET_LDT: if (largs->descs != NULL && largs->num > max_ldt_segment) return (EINVAL); set_pcb_flags(td->td_pcb, PCB_FULL_IRET); if (largs->descs != NULL) { lp = malloc(largs->num * sizeof(struct user_segment_descriptor), M_TEMP, M_WAITOK); error = copyin(largs->descs, lp, largs->num * sizeof(struct user_segment_descriptor)); if (error == 0) error = amd64_set_ldt(td, largs, lp); free(lp, M_TEMP); } else { error = amd64_set_ldt(td, largs, NULL); } break; } return (error); } void update_gdt_gsbase(struct thread *td, uint32_t base) { struct user_segment_descriptor *sd; if (td != curthread) return; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); critical_enter(); sd = PCPU_GET(gs32p); sd->sd_lobase = base & 0xffffff; sd->sd_hibase = (base >> 24) & 0xff; critical_exit(); } void update_gdt_fsbase(struct thread *td, uint32_t base) { struct user_segment_descriptor *sd; if (td != curthread) return; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); critical_enter(); sd = PCPU_GET(fs32p); sd->sd_lobase = base & 0xffffff; sd->sd_hibase = (base >> 24) & 0xff; critical_exit(); } int -sysarch(td, uap) - struct thread *td; - register struct sysarch_args *uap; +sysarch(struct thread *td, struct sysarch_args *uap) { int error = 0; struct pcb *pcb = curthread->td_pcb; uint32_t i386base; uint64_t a64base; struct i386_ioperm_args iargs; struct i386_get_xfpustate i386xfpu; struct amd64_get_xfpustate a64xfpu; #ifdef CAPABILITY_MODE /* * When adding new operations, add a new case statement here to * explicitly indicate whether or not the operation is safe to * perform in capability mode. */ if (IN_CAPABILITY_MODE(td)) { switch (uap->op) { case I386_GET_LDT: case I386_SET_LDT: case I386_GET_IOPERM: case I386_GET_FSBASE: case I386_SET_FSBASE: case I386_GET_GSBASE: case I386_SET_GSBASE: case I386_GET_XFPUSTATE: case AMD64_GET_FSBASE: case AMD64_SET_FSBASE: case AMD64_GET_GSBASE: case AMD64_SET_GSBASE: case AMD64_GET_XFPUSTATE: break; case I386_SET_IOPERM: default: #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); #endif return (ECAPMODE); } } #endif if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT) return (sysarch_ldt(td, uap, UIO_USERSPACE)); /* * XXXKIB check that the BSM generation code knows to encode * the op argument. */ AUDIT_ARG_CMD(uap->op); switch (uap->op) { case I386_GET_IOPERM: case I386_SET_IOPERM: if ((error = copyin(uap->parms, &iargs, sizeof(struct i386_ioperm_args))) != 0) return (error); break; case I386_GET_XFPUSTATE: if ((error = copyin(uap->parms, &i386xfpu, sizeof(struct i386_get_xfpustate))) != 0) return (error); a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr; a64xfpu.len = i386xfpu.len; break; case AMD64_GET_XFPUSTATE: if ((error = copyin(uap->parms, &a64xfpu, sizeof(struct amd64_get_xfpustate))) != 0) return (error); break; default: break; } switch (uap->op) { case I386_GET_IOPERM: error = amd64_get_ioperm(td, &iargs); if (error == 0) error = copyout(&iargs, uap->parms, sizeof(struct i386_ioperm_args)); break; case I386_SET_IOPERM: error = amd64_set_ioperm(td, &iargs); break; case I386_GET_FSBASE: update_pcb_bases(pcb); i386base = pcb->pcb_fsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_FSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = i386base; td->td_frame->tf_fs = _ufssel; update_gdt_fsbase(td, i386base); } break; case I386_GET_GSBASE: update_pcb_bases(pcb); i386base = pcb->pcb_gsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_GSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_gsbase = i386base; td->td_frame->tf_gs = _ugssel; update_gdt_gsbase(td, i386base); } break; case AMD64_GET_FSBASE: update_pcb_bases(pcb); error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase)); break; case AMD64_SET_FSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = a64base; td->td_frame->tf_fs = _ufssel; } else error = EINVAL; } break; case AMD64_GET_GSBASE: update_pcb_bases(pcb); error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase)); break; case AMD64_SET_GSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_gsbase = a64base; td->td_frame->tf_gs = _ugssel; } else error = EINVAL; } break; case I386_GET_XFPUSTATE: case AMD64_GET_XFPUSTATE: if (a64xfpu.len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); fpugetregs(td); error = copyout((char *)(get_pcb_user_save_td(td) + 1), a64xfpu.addr, a64xfpu.len); break; default: error = EINVAL; break; } return (error); } int amd64_set_ioperm(td, uap) struct thread *td; struct i386_ioperm_args *uap; { char *iomap; struct amd64tss *tssp; struct system_segment_descriptor *tss_sd; struct pcb *pcb; u_int i; int error; if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); if (uap->start > uap->start + uap->length || uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) return (EINVAL); /* * XXX * While this is restricted to root, we should probably figure out * whether any other driver is using this i/o address, as so not to * cause confusion. This probably requires a global 'usage registry'. */ pcb = td->td_pcb; if (pcb->pcb_tssp == NULL) { tssp = (struct amd64tss *)kmem_malloc(kernel_arena, ctob(IOPAGES + 1), M_WAITOK); pmap_pti_add_kva((vm_offset_t)tssp, (vm_offset_t)tssp + ctob(IOPAGES + 1), false); iomap = (char *)&tssp[1]; memset(iomap, 0xff, IOPERM_BITMAP_SIZE); critical_enter(); /* Takes care of tss_rsp0. */ memcpy(tssp, &common_tss[PCPU_GET(cpuid)], sizeof(struct amd64tss)); tssp->tss_iobase = sizeof(*tssp); pcb->pcb_tssp = tssp; tss_sd = PCPU_GET(tss); tss_sd->sd_lobase = (u_long)tssp & 0xffffff; tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful; tss_sd->sd_type = SDT_SYSTSS; ltr(GSEL(GPROC0_SEL, SEL_KPL)); PCPU_SET(tssp, tssp); critical_exit(); } else iomap = (char *)&pcb->pcb_tssp[1]; for (i = uap->start; i < uap->start + uap->length; i++) { if (uap->enable) iomap[i >> 3] &= ~(1 << (i & 7)); else iomap[i >> 3] |= (1 << (i & 7)); } return (error); } int amd64_get_ioperm(td, uap) struct thread *td; struct i386_ioperm_args *uap; { int i, state; char *iomap; if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) return (EINVAL); if (td->td_pcb->pcb_tssp == NULL) { uap->length = 0; goto done; } iomap = (char *)&td->td_pcb->pcb_tssp[1]; i = uap->start; state = (iomap[i >> 3] >> (i & 7)) & 1; uap->enable = !state; uap->length = 1; for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) break; uap->length++; } done: return (0); } /* * Update the GDT entry pointing to the LDT to point to the LDT of the * current process. */ static void set_user_ldt(struct mdproc *mdp) { *PCPU_GET(ldt) = mdp->md_ldt_sd; lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); } static void set_user_ldt_rv(struct vmspace *vmsp) { struct thread *td; td = curthread; if (vmsp != td->td_proc->p_vmspace) return; set_user_ldt(&td->td_proc->p_md); } struct proc_ldt * user_ldt_alloc(struct proc *p, int force) { struct proc_ldt *pldt, *new_ldt; struct mdproc *mdp; struct soft_segment_descriptor sldt; vm_offset_t sva; vm_size_t sz; mtx_assert(&dt_lock, MA_OWNED); mdp = &p->p_md; if (!force && mdp->md_ldt != NULL) return (mdp->md_ldt); mtx_unlock(&dt_lock); new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK); sz = max_ldt_segment * sizeof(struct user_segment_descriptor); sva = kmem_malloc(kernel_arena, sz, M_WAITOK | M_ZERO); new_ldt->ldt_base = (caddr_t)sva; pmap_pti_add_kva(sva, sva + sz, false); new_ldt->ldt_refcnt = 1; sldt.ssd_base = sva; sldt.ssd_limit = sz - 1; sldt.ssd_type = SDT_SYSLDT; sldt.ssd_dpl = SEL_KPL; sldt.ssd_p = 1; sldt.ssd_long = 0; sldt.ssd_def32 = 0; sldt.ssd_gran = 0; mtx_lock(&dt_lock); pldt = mdp->md_ldt; if (pldt != NULL && !force) { pmap_pti_remove_kva(sva, sva + sz); kmem_free(kernel_arena, sva, sz); free(new_ldt, M_SUBPROC); return (pldt); } if (pldt != NULL) { bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment * sizeof(struct user_segment_descriptor)); user_ldt_derefl(pldt); } critical_enter(); ssdtosyssd(&sldt, &p->p_md.md_ldt_sd); atomic_thread_fence_rel(); mdp->md_ldt = new_ldt; critical_exit(); smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, NULL, p->p_vmspace); return (mdp->md_ldt); } void user_ldt_free(struct thread *td) { struct proc *p = td->td_proc; struct mdproc *mdp = &p->p_md; struct proc_ldt *pldt; mtx_assert(&dt_lock, MA_OWNED); if ((pldt = mdp->md_ldt) == NULL) { mtx_unlock(&dt_lock); return; } critical_enter(); mdp->md_ldt = NULL; atomic_thread_fence_rel(); bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd)); if (td == curthread) lldt(GSEL(GNULL_SEL, SEL_KPL)); critical_exit(); user_ldt_deref(pldt); } static void user_ldt_derefl(struct proc_ldt *pldt) { vm_offset_t sva; vm_size_t sz; if (--pldt->ldt_refcnt == 0) { sva = (vm_offset_t)pldt->ldt_base; sz = max_ldt_segment * sizeof(struct user_segment_descriptor); pmap_pti_remove_kva(sva, sva + sz); kmem_free(kernel_arena, sva, sz); free(pldt, M_SUBPROC); } } static void user_ldt_deref(struct proc_ldt *pldt) { mtx_assert(&dt_lock, MA_OWNED); user_ldt_derefl(pldt); mtx_unlock(&dt_lock); } /* * Note for the authors of compat layers (linux, etc): copyout() in * the function below is not a problem since it presents data in * arch-specific format (i.e. i386-specific in this case), not in * the OS-specific one. */ int amd64_get_ldt(struct thread *td, struct i386_ldt_args *uap) { struct proc_ldt *pldt; struct user_segment_descriptor *lp; uint64_t *data; u_int i, num; int error; #ifdef DEBUG printf("amd64_get_ldt: start=%u num=%u descs=%p\n", uap->start, uap->num, (void *)uap->descs); #endif pldt = td->td_proc->p_md.md_ldt; if (pldt == NULL || uap->start >= max_ldt_segment || uap->num == 0) { td->td_retval[0] = 0; return (0); } num = min(uap->num, max_ldt_segment - uap->start); lp = &((struct user_segment_descriptor *)(pldt->ldt_base))[uap->start]; data = malloc(num * sizeof(struct user_segment_descriptor), M_TEMP, M_WAITOK); mtx_lock(&dt_lock); for (i = 0; i < num; i++) data[i] = ((volatile uint64_t *)lp)[i]; mtx_unlock(&dt_lock); error = copyout(data, uap->descs, num * sizeof(struct user_segment_descriptor)); free(data, M_TEMP); if (error == 0) td->td_retval[0] = num; return (error); } int amd64_set_ldt(struct thread *td, struct i386_ldt_args *uap, struct user_segment_descriptor *descs) { struct mdproc *mdp; struct proc_ldt *pldt; struct user_segment_descriptor *dp; struct proc *p; u_int largest_ld, i; int error; #ifdef DEBUG printf("amd64_set_ldt: start=%u num=%u descs=%p\n", uap->start, uap->num, (void *)uap->descs); #endif mdp = &td->td_proc->p_md; error = 0; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); p = td->td_proc; if (descs == NULL) { /* Free descriptors */ if (uap->start == 0 && uap->num == 0) uap->num = max_ldt_segment; if (uap->num == 0) return (EINVAL); if ((pldt = mdp->md_ldt) == NULL || uap->start >= max_ldt_segment) return (0); largest_ld = uap->start + uap->num; if (largest_ld > max_ldt_segment) largest_ld = max_ldt_segment; if (largest_ld < uap->start) return (EINVAL); mtx_lock(&dt_lock); for (i = uap->start; i < largest_ld; i++) ((volatile uint64_t *)(pldt->ldt_base))[i] = 0; mtx_unlock(&dt_lock); return (0); } if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { /* verify range of descriptors to modify */ largest_ld = uap->start + uap->num; if (uap->start >= max_ldt_segment || largest_ld > max_ldt_segment || largest_ld < uap->start) return (EINVAL); } /* Check descriptors for access violations */ for (i = 0; i < uap->num; i++) { dp = &descs[i]; switch (dp->sd_type) { case SDT_SYSNULL: /* system null */ dp->sd_p = 0; break; case SDT_SYS286TSS: case SDT_SYSLDT: case SDT_SYS286BSY: case SDT_SYS286CGT: case SDT_SYSTASKGT: case SDT_SYS286IGT: case SDT_SYS286TGT: case SDT_SYSNULL2: case SDT_SYSTSS: case SDT_SYSNULL3: case SDT_SYSBSY: case SDT_SYSCGT: case SDT_SYSNULL4: case SDT_SYSIGT: case SDT_SYSTGT: return (EACCES); /* memory segment types */ case SDT_MEMEC: /* memory execute only conforming */ case SDT_MEMEAC: /* memory execute only accessed conforming */ case SDT_MEMERC: /* memory execute read conforming */ case SDT_MEMERAC: /* memory execute read accessed conforming */ /* Must be "present" if executable and conforming. */ if (dp->sd_p == 0) return (EACCES); break; case SDT_MEMRO: /* memory read only */ case SDT_MEMROA: /* memory read only accessed */ case SDT_MEMRW: /* memory read write */ case SDT_MEMRWA: /* memory read write accessed */ case SDT_MEMROD: /* memory read only expand dwn limit */ case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ case SDT_MEMRWD: /* memory read write expand dwn limit */ case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ case SDT_MEME: /* memory execute only */ case SDT_MEMEA: /* memory execute only accessed */ case SDT_MEMER: /* memory execute read */ case SDT_MEMERA: /* memory execute read accessed */ break; default: return(EINVAL); } /* Only user (ring-3) descriptors may be present. */ if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL)) return (EACCES); } if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { /* Allocate a free slot */ mtx_lock(&dt_lock); pldt = user_ldt_alloc(p, 0); if (pldt == NULL) { mtx_unlock(&dt_lock); return (ENOMEM); } /* * start scanning a bit up to leave room for NVidia and * Wine, which still user the "Blat" method of allocation. */ i = 16; dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i]; for (; i < max_ldt_segment; ++i, ++dp) { if (dp->sd_type == SDT_SYSNULL) break; } if (i >= max_ldt_segment) { mtx_unlock(&dt_lock); return (ENOSPC); } uap->start = i; error = amd64_set_ldt_data(td, i, 1, descs); mtx_unlock(&dt_lock); } else { largest_ld = uap->start + uap->num; if (largest_ld > max_ldt_segment) return (EINVAL); mtx_lock(&dt_lock); if (user_ldt_alloc(p, 0) != NULL) { error = amd64_set_ldt_data(td, uap->start, uap->num, descs); } mtx_unlock(&dt_lock); } if (error == 0) td->td_retval[0] = uap->start; return (error); } int amd64_set_ldt_data(struct thread *td, int start, int num, struct user_segment_descriptor *descs) { struct mdproc *mdp; struct proc_ldt *pldt; volatile uint64_t *dst, *src; int i; mtx_assert(&dt_lock, MA_OWNED); mdp = &td->td_proc->p_md; pldt = mdp->md_ldt; dst = (volatile uint64_t *)(pldt->ldt_base); src = (volatile uint64_t *)descs; for (i = 0; i < num; i++) dst[start + i] = src[i]; return (0); } Index: stable/11/sys/amd64/amd64/vm_machdep.c =================================================================== --- stable/11/sys/amd64/amd64/vm_machdep.c (revision 331642) +++ stable/11/sys/amd64/amd64/vm_machdep.c (revision 331643) @@ -1,730 +1,726 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_cpu.h" #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void cpu_reset_real(void); #ifdef SMP static void cpu_reset_proxy(void); static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread), "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread."); _Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb), "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb."); _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf."); struct savefpu * get_pcb_user_save_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area")); return ((struct savefpu *)p); } struct savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { vm_offset_t p; p = (vm_offset_t)(pcb + 1); return ((struct savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - sizeof(struct pcb); return ((struct pcb *)p); } void * alloc_fpusave(int flags) { void *res; struct savefpu_ymm *sf; res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } return (res); } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void -cpu_fork(td1, p2, td2, flags) - register struct thread *td1; - register struct proc *p2; - struct thread *td2; - int flags; +cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { - register struct proc *p1; + struct proc *p1; struct pcb *pcb2; struct mdproc *mdp1, *mdp2; struct proc_ldt *pldt; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ mdp1 = &p1->p_md; mtx_lock(&dt_lock); if ((pldt = mdp1->md_ldt) != NULL && pldt->ldt_refcnt > 1 && user_ldt_alloc(p1, 1) == NULL) panic("could not copy LDT"); mtx_unlock(&dt_lock); } return; } /* Ensure that td1's pcb is up to date. */ fpuexit(td1); update_pcb_bases(td1->td_pcb); /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ td2->td_frame = (struct trapframe *)td2->td_pcb - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_rax = 0; /* Child returns zero */ td2->td_frame->tf_rflags &= ~PSL_C; /* success */ td2->td_frame->tf_rdx = 1; /* * If the parent process has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame unless the debugger had set PF_FORK * on the parent. Otherwise, the child will receive a (likely * unexpected) SIGTRAP when it executes the first instruction after * returning to userland. */ if ((p1->p_pfsflags & PF_FORK) == 0) td2->td_frame->tf_rflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); pcb2->pcb_rbx = (register_t)td2; /* fork_trampoline argument */ pcb2->pcb_rip = (register_t)fork_trampoline; /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_[fg]sbase: cloned above */ /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; td2->td_md.md_invl_gen.gen = 0; /* As an i386, do not copy io permission bitmap. */ pcb2->pcb_tssp = NULL; /* New segment registers. */ set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* Copy the LDT, if necessary. */ mdp1 = &td1->td_proc->p_md; mdp2 = &p2->p_md; mtx_lock(&dt_lock); if (mdp1->md_ldt != NULL) { if (flags & RFMEM) { mdp1->md_ldt->ldt_refcnt++; mdp2->md_ldt = mdp1->md_ldt; bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct system_segment_descriptor)); } else { mdp2->md_ldt = NULL; mdp2->md_ldt = user_ldt_alloc(p2, 0); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); amd64_set_ldt_data(td2, 0, max_ldt_segment, (struct user_segment_descriptor *) mdp1->md_ldt->ldt_base); } } else mdp2->md_ldt = NULL; mtx_unlock(&dt_lock); /* * Now, cpu_switch() can schedule the new process. * pcb_rsp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_r12 = (long) func; /* function */ td->td_pcb->pcb_rbx = (long) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. */ mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != 0) user_ldt_free(td); else mtx_unlock(&dt_lock); } void cpu_thread_exit(struct thread *td) { struct pcb *pcb; critical_enter(); if (td == PCPU_GET(fpcurthread)) fpudrop(); critical_exit(); pcb = td->td_pcb; /* Disable any hardware breakpoints. */ if (pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); clear_pcb_flags(pcb, PCB_DBREGS); } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; /* * Clean TSS/iomap */ if (pcb->pcb_tssp != NULL) { pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp, (vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1)); kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_tssp, ctob(IOPAGES + 1)); pcb->pcb_tssp = NULL; } } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; struct xstate_hdr *xhdr; td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)pcb - 1; pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); } void cpu_set_syscall_retval(struct thread *td, int error) { switch (error) { case 0: td->td_frame->tf_rax = td->td_retval[0]; td->td_frame->tf_rdx = td->td_retval[1]; td->td_frame->tf_rflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, we know that 'syscall' is 2 bytes, * lcall $X,y is 7 bytes, int 0x80 is 2 bytes. * We saved this in tf_err. * %r10 (which was holding the value of %rcx) is restored * for the next iteration. * %r10 restore is only required for freebsd/amd64 processes, * but shall be innocent for any ia32 ABI. * * Require full context restore to get the arguments * in the registers reloaded at return to usermode. */ td->td_frame->tf_rip -= td->td_frame->tf_err; td->td_frame->tf_r10 = td->td_frame->tf_rcx; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); break; case EJUSTRETURN: break; default: td->td_frame->tf_rax = SV_ABI_ERRNO(td->td_proc, error); td->td_frame->tf_rflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. */ update_pcb_bases(td0->td_pcb); bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE | PCB_KERNFPU); pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* * Create a new fresh stack for the new thread. */ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); /* If the current thread has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame. Otherwise, the new thread will * receive a (likely unexpected) SIGTRAP when it executes the first * instruction after returning to userland. */ td->td_frame->tf_rflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_r12 = (register_t)fork_return; /* trampoline arg */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_rbx = (register_t)td; /* trampoline arg */ pcb2->pcb_rip = (register_t)fork_trampoline; /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_[fg]sbase: cloned above */ /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = PSL_KERNEL | PSL_I; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_rbp = 0; td->td_frame->tf_rsp = (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_rip = (uintptr_t)entry; /* Return address sentinel value to stop stack unwinding. */ suword32((void *)td->td_frame->tf_rsp, 0); /* Pass the argument to the entry point. */ suword32((void *)(td->td_frame->tf_rsp + sizeof(int32_t)), (uint32_t)(uintptr_t)arg); return; } #endif /* * Set the trap frame to point at the beginning of the uts * function. */ td->td_frame->tf_rbp = 0; td->td_frame->tf_rsp = ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f; td->td_frame->tf_rsp -= 8; td->td_frame->tf_rip = (register_t)entry; td->td_frame->tf_ds = _udatasel; td->td_frame->tf_es = _udatasel; td->td_frame->tf_fs = _ufssel; td->td_frame->tf_gs = _ugssel; td->td_frame->tf_flags = TF_HASSEGS; /* Return address sentinel value to stop stack unwinding. */ suword((void *)td->td_frame->tf_rsp, 0); /* Pass the argument to the entry point. */ td->td_frame->tf_rdi = (register_t)arg; } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; set_pcb_flags(pcb, PCB_FULL_IRET); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { pcb->pcb_gsbase = (register_t)tls_base; return (0); } #endif pcb->pcb_fsbase = (register_t)tls_base; return (0); } #ifdef SMP static void cpu_reset_proxy() { cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ia32_pause(); /* Wait for other cpu to see that we've started */ CPU_SETOF(cpu_reset_proxyid, &tcrp); stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); } #endif void cpu_reset() { #ifdef SMP cpuset_t map; u_int cnt; if (smp_started) { map = all_cpus; CPU_CLR(PCPU_GET(cpuid), &map); CPU_NAND(&map, &stopped_cpus); if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ CPU_SETOF(0, &started_cpus); wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) { ia32_pause(); cnt++; /* Wait for BSP to announce restart */ } if (cpu_reset_proxy_active == 0) printf("cpu_reset: Failed to restart BSP\n"); enable_intr(); cpu_reset_proxy_active = 2; while (1) ia32_pause(); /* NOTREACHED */ } DELAY(1000000); } #endif cpu_reset_real(); /* NOTREACHED */ } static void cpu_reset_real() { struct region_descriptor null_idt; int b; disable_intr(); /* * Attempt to do a CPU reset via the keyboard controller, * do not turn off GateA20, as any machine that fails * to do the reset here would then end up in no man's land. */ outb(IO_KBD + 4, 0xFE); DELAY(500000); /* wait 0.5 sec to see if that did it */ /* * Attempt to force a reset via the Reset Control register at * I/O port 0xcf9. Bit 2 forces a system reset when it * transitions from 0 to 1. Bit 1 selects the type of reset * to attempt: 0 selects a "soft" reset, and 1 selects a * "hard" reset. We try a "hard" reset. The first write sets * bit 1 to select a "hard" reset and clears bit 2. The * second write forces a 0 -> 1 transition in bit 2 to trigger * a reset. */ outb(0xcf9, 0x2); outb(0xcf9, 0x6); DELAY(500000); /* wait 0.5 sec to see if that did it */ /* * Attempt to force a reset via the Fast A20 and Init register * at I/O port 0x92. Bit 1 serves as an alternate A20 gate. * Bit 0 asserts INIT# when set to 1. We are careful to only * preserve bit 1 while setting bit 0. We also must clear bit * 0 before setting it if it isn't already clear. */ b = inb(0x92); if (b != 0xff) { if ((b & 0x1) != 0) outb(0x92, b & 0xfe); outb(0x92, b | 0x1); DELAY(500000); /* wait 0.5 sec to see if that did it */ } printf("No known reset method worked, attempting CPU shutdown\n"); DELAY(1000000); /* wait 1 sec for printf to complete */ /* Wipe the IDT. */ null_idt.rd_limit = 0; null_idt.rd_base = 0; lidt(&null_idt); /* "good night, sweet prince .... " */ breakpoint(); /* NOTREACHED */ while(1); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(vm_paddr_t addr) { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } Index: stable/11/sys/arm/arm/sys_machdep.c =================================================================== --- stable/11/sys/arm/arm/sys_machdep.c (revision 331642) +++ stable/11/sys/arm/arm/sys_machdep.c (revision 331643) @@ -1,246 +1,244 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef _SYS_SYSPROTO_H_ struct sysarch_args { int op; char *parms; }; #endif /* Prototypes */ static int arm32_sync_icache (struct thread *, void *); static int arm32_drain_writebuf(struct thread *, void *); #if __ARM_ARCH >= 6 static int sync_icache(uintptr_t addr, size_t len) { size_t size; vm_offset_t rv; /* * Align starting address to even number because value of "1" * is used as return value for success. */ len += addr & 1; addr &= ~1; /* Break whole range to pages. */ do { size = PAGE_SIZE - (addr & PAGE_MASK); size = min(size, len); rv = dcache_wb_pou_checked(addr, size); if (rv == 1) /* see dcache_wb_pou_checked() */ rv = icache_inv_pou_checked(addr, size); if (rv != 1) { if (!useracc((void *)addr, size, VM_PROT_READ)) { /* Invalid access */ return (rv); } /* Valid but unmapped page - skip it. */ } len -= size; addr += size; } while (len > 0); /* Invalidate branch predictor buffer. */ bpb_inv_all(); return (1); } #endif static int arm32_sync_icache(struct thread *td, void *args) { struct arm_sync_icache_args ua; int error; ksiginfo_t ksi; #if __ARM_ARCH >= 6 vm_offset_t rv; #endif if ((error = copyin(args, &ua, sizeof(ua))) != 0) return (error); if (ua.len == 0) { td->td_retval[0] = 0; return (0); } /* * Validate arguments. Address and length are unsigned, * so we can use wrapped overflow check. */ if (((ua.addr + ua.len) < ua.addr) || ((ua.addr + ua.len) > VM_MAXUSER_ADDRESS)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_ACCERR; ksi.ksi_addr = (void *)max(ua.addr, VM_MAXUSER_ADDRESS); trapsignal(td, &ksi); return (EINVAL); } #if __ARM_ARCH >= 6 rv = sync_icache(ua.addr, ua.len); if (rv != 1) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_code = SEGV_MAPERR; ksi.ksi_addr = (void *)rv; trapsignal(td, &ksi); return (EINVAL); } #else cpu_icache_sync_range(ua.addr, ua.len); #endif td->td_retval[0] = 0; return (0); } static int arm32_drain_writebuf(struct thread *td, void *args) { /* No args. */ #if __ARM_ARCH < 6 cpu_drain_writebuf(); #else dsb(); cpu_l2cache_drain_writebuf(); #endif td->td_retval[0] = 0; return (0); } static int arm32_set_tp(struct thread *td, void *args) { #if __ARM_ARCH >= 6 set_tls(args); #else td->td_md.md_tp = (register_t)args; *(register_t *)ARM_TP_ADDRESS = (register_t)args; #endif return (0); } static int arm32_get_tp(struct thread *td, void *args) { #if __ARM_ARCH >= 6 td->td_retval[0] = (register_t)get_tls(); #else td->td_retval[0] = *(register_t *)ARM_TP_ADDRESS; #endif return (0); } int -sysarch(td, uap) - struct thread *td; - register struct sysarch_args *uap; +sysarch(struct thread *td, struct sysarch_args *uap) { int error; #ifdef CAPABILITY_MODE /* * When adding new operations, add a new case statement here to * explicitly indicate whether or not the operation is safe to * perform in capability mode. */ if (IN_CAPABILITY_MODE(td)) { switch (uap->op) { case ARM_SYNC_ICACHE: case ARM_DRAIN_WRITEBUF: case ARM_SET_TP: case ARM_GET_TP: case ARM_GET_VFPSTATE: break; default: #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); #endif return (ECAPMODE); } } #endif switch (uap->op) { case ARM_SYNC_ICACHE: error = arm32_sync_icache(td, uap->parms); break; case ARM_DRAIN_WRITEBUF: error = arm32_drain_writebuf(td, uap->parms); break; case ARM_SET_TP: error = arm32_set_tp(td, uap->parms); break; case ARM_GET_TP: error = arm32_get_tp(td, uap->parms); break; case ARM_GET_VFPSTATE: error = arm_get_vfpstate(td, uap->parms); break; default: error = EINVAL; break; } return (error); } Index: stable/11/sys/arm/arm/vm_machdep.c =================================================================== --- stable/11/sys/arm/arm/vm_machdep.c (revision 331642) +++ stable/11/sys/arm/arm/vm_machdep.c (revision 331643) @@ -1,363 +1,362 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary :forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include "opt_compat.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * struct switchframe and trapframe must both be a multiple of 8 * for correct stack alignment. */ _Static_assert((sizeof(struct switchframe) % 8) == 0, "Bad alignment"); _Static_assert((sizeof(struct trapframe) % 8) == 0, "Bad alignment"); uint32_t initial_fpscr = VFPSCR_DN | VFPSCR_FZ; /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void -cpu_fork(register struct thread *td1, register struct proc *p2, - struct thread *td2, int flags) +cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; struct mdproc *mdp2; if ((flags & RFPROC) == 0) return; /* Point the pcb to the top of the stack */ pcb2 = (struct pcb *) (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; #ifdef __XSCALE__ #ifndef CPU_XSCALE_CORE3 pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE); #endif #endif #ifdef VFP /* Store actual state of VFP */ if (curthread == td1) { critical_enter(); vfp_store(&td1->td_pcb->pcb_vfpstate, false); critical_exit(); } #endif td2->td_pcb = pcb2; /* Clone td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point to mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2)); /* Point the frame to the stack in front of pcb and copy td1's frame */ td2->td_frame = (struct trapframe *)pcb2 - 1; *td2->td_frame = *td1->td_frame; /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2); pcb2->pcb_regs.sf_r4 = (register_t)fork_return; pcb2->pcb_regs.sf_r5 = (register_t)td2; pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline; pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame); #if __ARM_ARCH >= 6 pcb2->pcb_regs.sf_tpidrurw = (register_t)get_tls(); #endif pcb2->pcb_vfpcpu = -1; pcb2->pcb_vfpstate.fpscr = initial_fpscr; tf = td2->td_frame; tf->tf_spsr &= ~PSR_C; tf->tf_r0 = 0; tf->tf_r1 = 0; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_cspr = PSR_SVC32_MODE; #if __ARM_ARCH < 6 td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS; #endif } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; int fixup; #ifdef __ARMEB__ u_int call; #endif frame = td->td_frame; fixup = 0; #ifdef __ARMEB__ /* * __syscall returns an off_t while most other syscalls return an * int. As an off_t is 64-bits and an int is 32-bits we need to * place the returned data into r1. As the lseek and freebsd6_lseek * syscalls also return an off_t they do not need this fixup. */ call = frame->tf_r7; if (call == SYS___syscall) { register_t *ap = &frame->tf_r0; register_t code = ap[_QUAD_LOWWORD]; if (td->td_proc->p_sysent->sv_mask) code &= td->td_proc->p_sysent->sv_mask; fixup = (code != SYS_lseek); } #endif switch (error) { case 0: if (fixup) { frame->tf_r0 = 0; frame->tf_r1 = td->td_retval[0]; } else { frame->tf_r0 = td->td_retval[0]; frame->tf_r1 = td->td_retval[1]; } frame->tf_spsr &= ~PSR_C; /* carry bit */ break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ #if __ARM_ARCH >= 7 if ((frame->tf_spsr & PSR_T) != 0) frame->tf_pc -= THUMB_INSN_SIZE; else #endif frame->tf_pc -= INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_r0 = error; frame->tf_spsr |= PSR_C; /* carry bit */ break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_regs.sf_r4 = (register_t)fork_return; td->td_pcb->pcb_regs.sf_r5 = (register_t)td; td->td_pcb->pcb_regs.sf_lr = (register_t)fork_trampoline; td->td_pcb->pcb_regs.sf_sp = STACKALIGN(td->td_frame); td->td_frame->tf_spsr &= ~PSR_C; td->td_frame->tf_r0 = 0; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = PSR_SVC32_MODE; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size); tf->tf_pc = (int)entry; tf->tf_r0 = (int)arg; tf->tf_spsr = PSR_USR32_MODE; } int cpu_set_user_tls(struct thread *td, void *tls_base) { #if __ARM_ARCH >= 6 td->td_pcb->pcb_regs.sf_tpidrurw = (register_t)tls_base; if (td == curthread) set_tls(tls_base); #else td->td_md.md_tp = (register_t)tls_base; if (td == curthread) { critical_enter(); *(register_t *)ARM_TP_ADDRESS = (register_t)tls_base; critical_exit(); } #endif return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; /* * Ensure td_frame is aligned to an 8 byte boundary as it will be * placed into the stack pointer which must be 8 byte aligned in * the ARM EABI. */ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1; #ifdef __XSCALE__ #ifndef CPU_XSCALE_CORE3 pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE); #endif #endif } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_regs.sf_r4 = (register_t)func; /* function */ td->td_pcb->pcb_regs.sf_r5 = (register_t)arg; /* first arg */ } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending) busdma_swi(); } void cpu_exit(struct thread *td) { } Index: stable/11/sys/arm/include/atomic-v4.h =================================================================== --- stable/11/sys/arm/include/atomic-v4.h (revision 331642) +++ stable/11/sys/arm/include/atomic-v4.h (revision 331643) @@ -1,538 +1,538 @@ /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */ /*- * Copyright (C) 2003-2004 Olivier Houchard * Copyright (C) 1994-1997 Mark Brinicombe * Copyright (C) 1994 Brini * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of Brini may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_V4_H_ #define _MACHINE_ATOMIC_V4_H_ #ifndef _MACHINE_ATOMIC_H_ #error Do not include this file directly, use #endif #if __ARM_ARCH <= 5 #define isb() __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory") #define dsb() __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory") #define dmb() dsb() #else #error Only use this file with ARMv5 and earlier #endif #define mb() dmb() #define wmb() dmb() #define rmb() dmb() #define __with_interrupts_disabled(expr) \ do { \ u_int cpsr_save, tmp; \ \ __asm __volatile( \ "mrs %0, cpsr;" \ "orr %1, %0, %2;" \ "msr cpsr_fsxc, %1;" \ : "=r" (cpsr_save), "=r" (tmp) \ : "I" (PSR_I | PSR_F) \ : "cc" ); \ (expr); \ __asm __volatile( \ "msr cpsr_fsxc, %0" \ : /* no output */ \ : "r" (cpsr_save) \ : "cc" ); \ } while(0) static __inline uint32_t __swp(uint32_t val, volatile uint32_t *ptr) { __asm __volatile("swp %0, %2, [%3]" : "=&r" (val), "=m" (*ptr) : "r" (val), "r" (ptr), "m" (*ptr) : "memory"); return (val); } #ifdef _KERNEL #define ARM_HAVE_ATOMIC64 static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t val) { __with_interrupts_disabled(*p += val); } static __inline void atomic_add_64(volatile u_int64_t *p, u_int64_t val) { __with_interrupts_disabled(*p += val); } static __inline void atomic_clear_32(volatile uint32_t *address, uint32_t clearmask) { __with_interrupts_disabled(*address &= ~clearmask); } static __inline void atomic_clear_64(volatile uint64_t *address, uint64_t clearmask) { __with_interrupts_disabled(*address &= ~clearmask); } static __inline int atomic_fcmpset_32(volatile u_int32_t *p, volatile u_int32_t *cmpval, volatile u_int32_t newval) { u_int32_t ret; __with_interrupts_disabled( { ret = *p; if (*p == *cmpval) { *p = newval; ret = 1; } else { *cmpval = *p; ret = 0; } }); return (ret); } static __inline int atomic_fcmpset_64(volatile u_int64_t *p, volatile u_int64_t *cmpval, volatile u_int64_t newval) { u_int64_t ret; __with_interrupts_disabled( { if (*p == *cmpval) { *p = newval; ret = 1; } else { *cmpval = *p; ret = 0; } }); return (ret); } static __inline u_int32_t atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { int ret; __with_interrupts_disabled( { if (*p == cmpval) { *p = newval; ret = 1; } else { ret = 0; } }); return (ret); } static __inline u_int64_t atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval) { int ret; __with_interrupts_disabled( { if (*p == cmpval) { *p = newval; ret = 1; } else { ret = 0; } }); return (ret); } static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t v) { uint32_t value; __with_interrupts_disabled( { value = *p; *p += v; }); return (value); } static __inline uint64_t atomic_fetchadd_64(volatile uint64_t *p, uint64_t v) { uint64_t value; __with_interrupts_disabled( { value = *p; *p += v; }); return (value); } static __inline uint64_t atomic_load_64(volatile uint64_t *p) { uint64_t value; __with_interrupts_disabled(value = *p); return (value); } static __inline void atomic_set_32(volatile uint32_t *address, uint32_t setmask) { __with_interrupts_disabled(*address |= setmask); } static __inline void atomic_set_64(volatile uint64_t *address, uint64_t setmask) { __with_interrupts_disabled(*address |= setmask); } static __inline void atomic_store_64(volatile uint64_t *p, uint64_t value) { __with_interrupts_disabled(*p = value); } static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t val) { __with_interrupts_disabled(*p -= val); } static __inline void atomic_subtract_64(volatile u_int64_t *p, u_int64_t val) { __with_interrupts_disabled(*p -= val); } #else /* !_KERNEL */ static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t val) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "add %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val) : : "memory"); } static __inline void atomic_clear_32(volatile uint32_t *address, uint32_t clearmask) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "bic %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask) : : "memory"); } static __inline u_int32_t atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { - register int done, ras_start = ARM_RAS_START; + int done, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "cmp %1, %3\n" "streq %4, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" "moveq %1, #1\n" "movne %1, #0\n" : "+r" (ras_start), "=r" (done) ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory"); return (done); } static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t v) { uint32_t start, tmp, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%3]\n" "mov %2, %1\n" "add %2, %2, %4\n" "str %2, [%3]\n" "2:\n" "mov %2, #0\n" "str %2, [%0]\n" "mov %2, #0xffffffff\n" "str %2, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v) : : "memory"); return (start); } static __inline void atomic_set_32(volatile uint32_t *address, uint32_t setmask) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "orr %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask) : : "memory"); } static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t val) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "sub %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val) : : "memory"); } #endif /* _KERNEL */ static __inline uint32_t atomic_readandclear_32(volatile u_int32_t *p) { return (__swp(0, p)); } static __inline uint32_t atomic_swap_32(volatile u_int32_t *p, u_int32_t v) { return (__swp(v, p)); } #define atomic_fcmpset_rel_32 atomic_fcmpset_32 #define atomic_fcmpset_acq_32 atomic_fcmpset_32 #define atomic_fcmpset_rel_64 atomic_fcmpset_64 #define atomic_fcmpset_acq_64 atomic_fcmpset_64 #define atomic_fcmpset_acq_long atomic_fcmpset_long #define atomic_fcmpset_rel_long atomic_fcmpset_long #define atomic_cmpset_rel_32 atomic_cmpset_32 #define atomic_cmpset_acq_32 atomic_cmpset_32 #define atomic_cmpset_rel_64 atomic_cmpset_64 #define atomic_cmpset_acq_64 atomic_cmpset_64 #define atomic_set_rel_32 atomic_set_32 #define atomic_set_acq_32 atomic_set_32 #define atomic_clear_rel_32 atomic_clear_32 #define atomic_clear_acq_32 atomic_clear_32 #define atomic_add_rel_32 atomic_add_32 #define atomic_add_acq_32 atomic_add_32 #define atomic_subtract_rel_32 atomic_subtract_32 #define atomic_subtract_acq_32 atomic_subtract_32 #define atomic_store_rel_32 atomic_store_32 #define atomic_store_rel_long atomic_store_long #define atomic_load_acq_32 atomic_load_32 #define atomic_load_acq_long atomic_load_long #define atomic_add_acq_long atomic_add_long #define atomic_add_rel_long atomic_add_long #define atomic_subtract_acq_long atomic_subtract_long #define atomic_subtract_rel_long atomic_subtract_long #define atomic_clear_acq_long atomic_clear_long #define atomic_clear_rel_long atomic_clear_long #define atomic_set_acq_long atomic_set_long #define atomic_set_rel_long atomic_set_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_load_acq_long atomic_load_long #undef __with_interrupts_disabled static __inline void atomic_add_long(volatile u_long *p, u_long v) { atomic_add_32((volatile uint32_t *)p, v); } static __inline void atomic_clear_long(volatile u_long *p, u_long v) { atomic_clear_32((volatile uint32_t *)p, v); } static __inline int atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe) { return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe)); } #ifdef _KERNEL /* atomic_fcmpset_32 is only defined for the kernel */ static __inline u_long atomic_fcmpset_long(volatile u_long *dst, u_long *old, u_long newe) { return (atomic_fcmpset_32((volatile uint32_t *)dst, (uint32_t *)old, newe)); } #endif static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { return (atomic_fetchadd_32((volatile uint32_t *)p, v)); } static __inline void atomic_readandclear_long(volatile u_long *p) { atomic_readandclear_32((volatile uint32_t *)p); } static __inline void atomic_set_long(volatile u_long *p, u_long v) { atomic_set_32((volatile uint32_t *)p, v); } static __inline void atomic_subtract_long(volatile u_long *p, u_long v) { atomic_subtract_32((volatile uint32_t *)p, v); } /* * ARMv5 does not support SMP. For both kernel and user modes, only a * compiler barrier is needed for fences, since CPU is always * self-consistent. */ static __inline void atomic_thread_fence_acq(void) { __compiler_membar(); } static __inline void atomic_thread_fence_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_acq_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_seq_cst(void) { __compiler_membar(); } #endif /* _MACHINE_ATOMIC_H_ */ Index: stable/11/sys/crypto/des/des_enc.c =================================================================== --- stable/11/sys/crypto/des/des_enc.c (revision 331642) +++ stable/11/sys/crypto/des/des_enc.c (revision 331643) @@ -1,297 +1,297 @@ /* $KAME: des_enc.c,v 1.1 2001/09/10 04:03:58 itojun Exp $ */ /* crypto/des/des_enc.c */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * * This package is an SSL implementation written * by Eric Young (eay@cryptsoft.com). * The implementation was written so as to conform with Netscapes SSL. * * This library is free for commercial and non-commercial use as long as * the following conditions are aheared to. The following conditions * apply to all code found in this distribution, be it the RC4, RSA, * lhash, DES, etc., code; not just the SSL code. The SSL documentation * included with this distribution is covered by the same copyright terms * except that the holder is Tim Hudson (tjh@cryptsoft.com). * * Copyright remains Eric Young's, and as such any Copyright notices in * the code are not to be removed. * If this package is used in a product, Eric Young should be given attribution * as the author of the parts of the library used. * This can be in the form of a textual message at program startup or * in documentation (online or textual) provided with the package. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * "This product includes cryptographic software written by * Eric Young (eay@cryptsoft.com)" * The word 'cryptographic' can be left out if the rouines from the library * being used are not cryptographic related :-). * 4. If you include any Windows specific code (or a derivative thereof) from * the apps directory (application code) you must include an acknowledgement: * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" * * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * The licence and distribution terms for any publically available version or * derivative of this code cannot be changed. i.e. this code cannot simply be * copied and put under another distribution licence * [including the GNU Public Licence.] */ #include __FBSDID("$FreeBSD$"); #include #include extern const DES_LONG des_SPtrans[8][64]; void des_encrypt1(DES_LONG *data, des_key_schedule ks, int enc) { - register DES_LONG l,r,t,u; + DES_LONG l,r,t,u; #ifdef DES_PTR - register const unsigned char *des_SP=(const unsigned char *)des_SPtrans; + const unsigned char *des_SP=(const unsigned char *)des_SPtrans; #endif #ifndef DES_UNROLL - register int i; + int i; #endif - register DES_LONG *s; + DES_LONG *s; r=data[0]; l=data[1]; IP(r,l); /* Things have been modified so that the initial rotate is * done outside the loop. This required the * des_SPtrans values in sp.h to be rotated 1 bit to the right. * One perl script later and things have a 5% speed up on a sparc2. * Thanks to Richard Outerbridge <71755.204@CompuServe.COM> * for pointing this out. */ /* clear the top bits on machines with 8byte longs */ /* shift left by 2 */ r=ROTATE(r,29)&0xffffffffL; l=ROTATE(l,29)&0xffffffffL; s=ks->ks.deslong; /* I don't know if it is worth the effort of loop unrolling the * inner loop */ if (enc) { #ifdef DES_UNROLL D_ENCRYPT(l,r, 0); /* 1 */ D_ENCRYPT(r,l, 2); /* 2 */ D_ENCRYPT(l,r, 4); /* 3 */ D_ENCRYPT(r,l, 6); /* 4 */ D_ENCRYPT(l,r, 8); /* 5 */ D_ENCRYPT(r,l,10); /* 6 */ D_ENCRYPT(l,r,12); /* 7 */ D_ENCRYPT(r,l,14); /* 8 */ D_ENCRYPT(l,r,16); /* 9 */ D_ENCRYPT(r,l,18); /* 10 */ D_ENCRYPT(l,r,20); /* 11 */ D_ENCRYPT(r,l,22); /* 12 */ D_ENCRYPT(l,r,24); /* 13 */ D_ENCRYPT(r,l,26); /* 14 */ D_ENCRYPT(l,r,28); /* 15 */ D_ENCRYPT(r,l,30); /* 16 */ #else for (i=0; i<32; i+=8) { D_ENCRYPT(l,r,i+0); /* 1 */ D_ENCRYPT(r,l,i+2); /* 2 */ D_ENCRYPT(l,r,i+4); /* 3 */ D_ENCRYPT(r,l,i+6); /* 4 */ } #endif } else { #ifdef DES_UNROLL D_ENCRYPT(l,r,30); /* 16 */ D_ENCRYPT(r,l,28); /* 15 */ D_ENCRYPT(l,r,26); /* 14 */ D_ENCRYPT(r,l,24); /* 13 */ D_ENCRYPT(l,r,22); /* 12 */ D_ENCRYPT(r,l,20); /* 11 */ D_ENCRYPT(l,r,18); /* 10 */ D_ENCRYPT(r,l,16); /* 9 */ D_ENCRYPT(l,r,14); /* 8 */ D_ENCRYPT(r,l,12); /* 7 */ D_ENCRYPT(l,r,10); /* 6 */ D_ENCRYPT(r,l, 8); /* 5 */ D_ENCRYPT(l,r, 6); /* 4 */ D_ENCRYPT(r,l, 4); /* 3 */ D_ENCRYPT(l,r, 2); /* 2 */ D_ENCRYPT(r,l, 0); /* 1 */ #else for (i=30; i>0; i-=8) { D_ENCRYPT(l,r,i-0); /* 16 */ D_ENCRYPT(r,l,i-2); /* 15 */ D_ENCRYPT(l,r,i-4); /* 14 */ D_ENCRYPT(r,l,i-6); /* 13 */ } #endif } /* rotate and clear the top bits on machines with 8byte longs */ l=ROTATE(l,3)&0xffffffffL; r=ROTATE(r,3)&0xffffffffL; FP(r,l); data[0]=l; data[1]=r; l=r=t=u=0; } void des_encrypt2(DES_LONG *data, des_key_schedule ks, int enc) { - register DES_LONG l,r,t,u; + DES_LONG l,r,t,u; #ifdef DES_PTR - register const unsigned char *des_SP=(const unsigned char *)des_SPtrans; + const unsigned char *des_SP=(const unsigned char *)des_SPtrans; #endif #ifndef DES_UNROLL - register int i; + int i; #endif - register DES_LONG *s; + DES_LONG *s; r=data[0]; l=data[1]; /* Things have been modified so that the initial rotate is * done outside the loop. This required the * des_SPtrans values in sp.h to be rotated 1 bit to the right. * One perl script later and things have a 5% speed up on a sparc2. * Thanks to Richard Outerbridge <71755.204@CompuServe.COM> * for pointing this out. */ /* clear the top bits on machines with 8byte longs */ r=ROTATE(r,29)&0xffffffffL; l=ROTATE(l,29)&0xffffffffL; s=ks->ks.deslong; /* I don't know if it is worth the effort of loop unrolling the * inner loop */ if (enc) { #ifdef DES_UNROLL D_ENCRYPT(l,r, 0); /* 1 */ D_ENCRYPT(r,l, 2); /* 2 */ D_ENCRYPT(l,r, 4); /* 3 */ D_ENCRYPT(r,l, 6); /* 4 */ D_ENCRYPT(l,r, 8); /* 5 */ D_ENCRYPT(r,l,10); /* 6 */ D_ENCRYPT(l,r,12); /* 7 */ D_ENCRYPT(r,l,14); /* 8 */ D_ENCRYPT(l,r,16); /* 9 */ D_ENCRYPT(r,l,18); /* 10 */ D_ENCRYPT(l,r,20); /* 11 */ D_ENCRYPT(r,l,22); /* 12 */ D_ENCRYPT(l,r,24); /* 13 */ D_ENCRYPT(r,l,26); /* 14 */ D_ENCRYPT(l,r,28); /* 15 */ D_ENCRYPT(r,l,30); /* 16 */ #else for (i=0; i<32; i+=8) { D_ENCRYPT(l,r,i+0); /* 1 */ D_ENCRYPT(r,l,i+2); /* 2 */ D_ENCRYPT(l,r,i+4); /* 3 */ D_ENCRYPT(r,l,i+6); /* 4 */ } #endif } else { #ifdef DES_UNROLL D_ENCRYPT(l,r,30); /* 16 */ D_ENCRYPT(r,l,28); /* 15 */ D_ENCRYPT(l,r,26); /* 14 */ D_ENCRYPT(r,l,24); /* 13 */ D_ENCRYPT(l,r,22); /* 12 */ D_ENCRYPT(r,l,20); /* 11 */ D_ENCRYPT(l,r,18); /* 10 */ D_ENCRYPT(r,l,16); /* 9 */ D_ENCRYPT(l,r,14); /* 8 */ D_ENCRYPT(r,l,12); /* 7 */ D_ENCRYPT(l,r,10); /* 6 */ D_ENCRYPT(r,l, 8); /* 5 */ D_ENCRYPT(l,r, 6); /* 4 */ D_ENCRYPT(r,l, 4); /* 3 */ D_ENCRYPT(l,r, 2); /* 2 */ D_ENCRYPT(r,l, 0); /* 1 */ #else for (i=30; i>0; i-=8) { D_ENCRYPT(l,r,i-0); /* 16 */ D_ENCRYPT(r,l,i-2); /* 15 */ D_ENCRYPT(l,r,i-4); /* 14 */ D_ENCRYPT(r,l,i-6); /* 13 */ } #endif } /* rotate and clear the top bits on machines with 8byte longs */ data[0]=ROTATE(l,3)&0xffffffffL; data[1]=ROTATE(r,3)&0xffffffffL; l=r=t=u=0; } void des_encrypt3(DES_LONG *data, des_key_schedule ks1, des_key_schedule ks2, des_key_schedule ks3) { - register DES_LONG l,r; + DES_LONG l,r; l=data[0]; r=data[1]; IP(l,r); data[0]=l; data[1]=r; des_encrypt2((DES_LONG *)data,ks1,DES_ENCRYPT); des_encrypt2((DES_LONG *)data,ks2,DES_DECRYPT); des_encrypt2((DES_LONG *)data,ks3,DES_ENCRYPT); l=data[0]; r=data[1]; FP(r,l); data[0]=l; data[1]=r; } void des_decrypt3(DES_LONG *data, des_key_schedule ks1, des_key_schedule ks2, des_key_schedule ks3) { - register DES_LONG l,r; + DES_LONG l,r; l=data[0]; r=data[1]; IP(l,r); data[0]=l; data[1]=r; des_encrypt2((DES_LONG *)data,ks3,DES_DECRYPT); des_encrypt2((DES_LONG *)data,ks2,DES_ENCRYPT); des_encrypt2((DES_LONG *)data,ks1,DES_DECRYPT); l=data[0]; r=data[1]; FP(r,l); data[0]=l; data[1]=r; } Index: stable/11/sys/crypto/des/des_setkey.c =================================================================== --- stable/11/sys/crypto/des/des_setkey.c (revision 331642) +++ stable/11/sys/crypto/des/des_setkey.c (revision 331643) @@ -1,236 +1,236 @@ /* $KAME: des_setkey.c,v 1.7 2001/09/10 04:03:58 itojun Exp $ */ /* crypto/des/set_key.c */ /* Copyright (C) 1995-1996 Eric Young (eay@mincom.oz.au) * All rights reserved. * * This file is part of an SSL implementation written * by Eric Young (eay@mincom.oz.au). * The implementation was written so as to conform with Netscapes SSL * specification. This library and applications are * FREE FOR COMMERCIAL AND NON-COMMERCIAL USE * as long as the following conditions are aheared to. * * Copyright remains Eric Young's, and as such any Copyright notices in * the code are not to be removed. If this code is used in a product, * Eric Young should be given attribution as the author of the parts used. * This can be in the form of a textual message at program startup or * in documentation (online or textual) provided with the package. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Eric Young (eay@mincom.oz.au) * * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * The licence and distribution terms for any publically available version or * derivative of this code cannot be changed. i.e. this code cannot simply be * copied and put under another distribution licence * [including the GNU Public Licence.] */ /* set_key.c v 1.4 eay 24/9/91 * 1.4 Speed up by 400% :-) * 1.3 added register declarations. * 1.2 unrolled make_key_sched a bit more * 1.1 added norm_expand_bits * 1.0 First working version */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include int des_check_key=0; void des_set_odd_parity(des_cblock *key) { int i; for (i=0; i>(n))^(b))&(m)),\ * (b)^=(t),\ * (a)=((a)^((t)<<(n)))) */ #define HPERM_OP(a,t,n,m) ((t)=((((a)<<(16-(n)))^(a))&(m)),\ (a)=(a)^(t)^(t>>(16-(n)))) int des_set_key(des_cblock *key, des_key_schedule schedule) { if (des_check_key) { return des_set_key_checked(key, schedule); } else { des_set_key_unchecked(key, schedule); return 0; } } /* return 0 if key parity is odd (correct), * return -1 if key parity error, * return -2 if illegal weak key. */ int des_set_key_checked(des_cblock *key, des_key_schedule schedule) { if (!des_check_key_parity(key)) return(-1); if (des_is_weak_key(key)) return(-2); des_set_key_unchecked(key, schedule); return 0; } void des_set_key_unchecked(des_cblock *key, des_key_schedule schedule) { static int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0}; - register DES_LONG c,d,t,s,t2; - register const unsigned char *in; - register DES_LONG *k; - register int i; + DES_LONG c,d,t,s,t2; + const unsigned char *in; + DES_LONG *k; + int i; k = &schedule->ks.deslong[0]; in = &(*key)[0]; c2l(in,c); c2l(in,d); /* do PC1 in 47 simple operations :-) * Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov) * for the inspiration. :-) */ PERM_OP (d,c,t,4,0x0f0f0f0fL); HPERM_OP(c,t,-2,0xcccc0000L); HPERM_OP(d,t,-2,0xcccc0000L); PERM_OP (d,c,t,1,0x55555555L); PERM_OP (c,d,t,8,0x00ff00ffL); PERM_OP (d,c,t,1,0x55555555L); d= (((d&0x000000ffL)<<16L)| (d&0x0000ff00L) | ((d&0x00ff0000L)>>16L)|((c&0xf0000000L)>>4L)); c&=0x0fffffffL; for (i=0; i>2L)|(c<<26L)); d=((d>>2L)|(d<<26L)); } else { c=((c>>1L)|(c<<27L)); d=((d>>1L)|(d<<27L)); } c&=0x0fffffffL; d&=0x0fffffffL; /* could be a few less shifts but I am to lazy at this * point in time to investigate */ s= des_skb[0][ (c )&0x3f ]| des_skb[1][((c>> 6L)&0x03)|((c>> 7L)&0x3c)]| des_skb[2][((c>>13L)&0x0f)|((c>>14L)&0x30)]| des_skb[3][((c>>20L)&0x01)|((c>>21L)&0x06) | ((c>>22L)&0x38)]; t= des_skb[4][ (d )&0x3f ]| des_skb[5][((d>> 7L)&0x03)|((d>> 8L)&0x3c)]| des_skb[6][ (d>>15L)&0x3f ]| des_skb[7][((d>>21L)&0x0f)|((d>>22L)&0x30)]; /* table contained 0213 4657 */ t2=((t<<16L)|(s&0x0000ffffL))&0xffffffffL; *(k++)=ROTATE(t2,30)&0xffffffffL; t2=((s>>16L)|(t&0xffff0000L)); *(k++)=ROTATE(t2,26)&0xffffffffL; } } int des_key_sched(des_cblock *key, des_key_schedule schedule) { return(des_set_key(key,schedule)); } void des_fixup_key_parity(des_cblock *key) { des_set_odd_parity(key); } Index: stable/11/sys/ddb/db_access.c =================================================================== --- stable/11/sys/ddb/db_access.c (revision 331642) +++ stable/11/sys/ddb/db_access.c (revision 331643) @@ -1,107 +1,107 @@ /*- * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Author: David B. Golub, Carnegie Mellon University * Date: 7/90 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include /* * Access unaligned data items on aligned (longword) * boundaries. */ static unsigned db_extend[] = { /* table for sign-extending */ 0, 0xFFFFFF80U, 0xFFFF8000U, 0xFF800000U }; #ifndef BYTE_MSF #define BYTE_MSF 0 #endif db_expr_t db_get_value(db_addr_t addr, int size, bool is_signed) { char data[sizeof(u_int64_t)]; - register db_expr_t value; - register int i; + db_expr_t value; + int i; if (db_read_bytes(addr, size, data) != 0) { db_printf("*** error reading from address %llx ***\n", (long long)addr); kdb_reenter(); } value = 0; #if BYTE_MSF for (i = 0; i < size; i++) #else /* BYTE_LSF */ for (i = size - 1; i >= 0; i--) #endif { value = (value << 8) + (data[i] & 0xFF); } if (size < 4) { if (is_signed && (value & db_extend[size]) != 0) value |= db_extend[size]; } return (value); } void db_put_value(db_addr_t addr, int size, db_expr_t value) { char data[sizeof(int)]; - register int i; + int i; #if BYTE_MSF for (i = size - 1; i >= 0; i--) #else /* BYTE_LSF */ for (i = 0; i < size; i++) #endif { data[i] = value & 0xFF; value >>= 8; } if (db_write_bytes(addr, size, data) != 0) { db_printf("*** error writing to address %llx ***\n", (long long)addr); kdb_reenter(); } } Index: stable/11/sys/ddb/db_output.c =================================================================== --- stable/11/sys/ddb/db_output.c (revision 331642) +++ stable/11/sys/ddb/db_output.c (revision 331643) @@ -1,394 +1,394 @@ /*- * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Author: David B. Golub, Carnegie Mellon University * Date: 7/90 */ /* * Printf and character output for debugger. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include struct dbputchar_arg { size_t da_nbufr; size_t da_remain; char *da_pbufr; char *da_pnext; }; /* * Character output - tracks position in line. * To do this correctly, we should know how wide * the output device is - then we could zero * the line position when the output device wraps * around to the start of the next line. * * Instead, we count the number of spaces printed * since the last printing character so that we * don't print trailing spaces. This avoids most * of the wraparounds. */ static int db_output_position = 0; /* output column */ static int db_last_non_space = 0; /* last non-space character */ db_expr_t db_tab_stop_width = 8; /* how wide are tab stops? */ #define NEXT_TAB(i) rounddown((i) + db_tab_stop_width, db_tab_stop_width) db_expr_t db_max_width = 79; /* output line width */ db_expr_t db_lines_per_page = 20; /* lines per page */ volatile int db_pager_quit; /* user requested quit */ static int db_newlines; /* # lines this page */ static int db_maxlines; /* max lines/page when paging */ static int ddb_use_printf = 0; SYSCTL_INT(_debug, OID_AUTO, ddb_use_printf, CTLFLAG_RW, &ddb_use_printf, 0, "use printf for all ddb output"); static void db_putc(int c); static void db_puts(const char *str); static void db_putchar(int c, void *arg); static void db_pager(void); /* * Force pending whitespace. */ void db_force_whitespace(void) { - register int last_print, next_tab; + int last_print, next_tab; last_print = db_last_non_space; while (last_print < db_output_position) { next_tab = NEXT_TAB(last_print); if (next_tab <= db_output_position) { while (last_print < next_tab) { /* DON'T send a tab!!! */ cnputc(' '); db_capture_writech(' '); last_print++; } } else { cnputc(' '); db_capture_writech(' '); last_print++; } } db_last_non_space = db_output_position; } /* * Output character. Buffer whitespace. */ static void db_putchar(int c, void *arg) { struct dbputchar_arg *dap = arg; if (dap->da_pbufr == NULL) { /* No bufferized output is provided. */ db_putc(c); } else { *dap->da_pnext++ = c; dap->da_remain--; /* Leave always the buffer 0 terminated. */ *dap->da_pnext = '\0'; /* Check if the buffer needs to be flushed. */ if (dap->da_remain < 2 || c == '\n') { db_puts(dap->da_pbufr); dap->da_pnext = dap->da_pbufr; dap->da_remain = dap->da_nbufr; *dap->da_pnext = '\0'; } } } static void db_putc(int c) { /* * If not in the debugger or the user requests it, output data to * both the console and the message buffer. */ if (!kdb_active || ddb_use_printf) { printf("%c", c); if (!kdb_active) return; if (c == '\r' || c == '\n') db_check_interrupt(); if (c == '\n' && db_maxlines > 0) { db_newlines++; if (db_newlines >= db_maxlines) db_pager(); } return; } /* Otherwise, output data directly to the console. */ if (c > ' ' && c <= '~') { /* * Printing character. * If we have spaces to print, print them first. * Use tabs if possible. */ db_force_whitespace(); cnputc(c); db_capture_writech(c); db_output_position++; db_last_non_space = db_output_position; } else if (c == '\n') { /* Newline */ cnputc(c); db_capture_writech(c); db_output_position = 0; db_last_non_space = 0; db_check_interrupt(); if (db_maxlines > 0) { db_newlines++; if (db_newlines >= db_maxlines) db_pager(); } } else if (c == '\r') { /* Return */ cnputc(c); db_capture_writech(c); db_output_position = 0; db_last_non_space = 0; db_check_interrupt(); } else if (c == '\t') { /* assume tabs every 8 positions */ db_output_position = NEXT_TAB(db_output_position); } else if (c == ' ') { /* space */ db_output_position++; } else if (c == '\007') { /* bell */ cnputc(c); /* No need to beep in a log: db_capture_writech(c); */ } /* other characters are assumed non-printing */ } static void db_puts(const char *str) { int i; for (i = 0; str[i] != '\0'; i++) db_putc(str[i]); } /* * Turn on the pager. */ void db_enable_pager(void) { if (db_maxlines == 0) { db_maxlines = db_lines_per_page; db_newlines = 0; db_pager_quit = 0; } } /* * Turn off the pager. */ void db_disable_pager(void) { db_maxlines = 0; } /* * A simple paging callout function. It supports several simple more(1)-like * commands as well as a quit command that sets db_pager_quit which db * commands can poll to see if they should terminate early. */ void db_pager(void) { int c, done; db_capture_enterpager(); db_printf("--More--\r"); done = 0; while (!done) { c = cngetc(); switch (c) { case 'e': case 'j': case '\n': /* Just one more line. */ db_maxlines = 1; done++; break; case 'd': /* Half a page. */ db_maxlines = db_lines_per_page / 2; done++; break; case 'f': case ' ': /* Another page. */ db_maxlines = db_lines_per_page; done++; break; case 'q': case 'Q': case 'x': case 'X': /* Quit */ db_maxlines = 0; db_pager_quit = 1; done++; break; #if 0 /* FALLTHROUGH */ default: cnputc('\007'); #endif } } db_printf(" "); db_force_whitespace(); db_printf("\r"); db_newlines = 0; db_capture_exitpager(); } /* * Return output position */ int db_print_position(void) { return (db_output_position); } /* * Printing */ int db_printf(const char *fmt, ...) { #ifdef DDB_BUFR_SIZE char bufr[DDB_BUFR_SIZE]; #endif struct dbputchar_arg dca; va_list listp; int retval; #ifdef DDB_BUFR_SIZE dca.da_pbufr = bufr; dca.da_pnext = dca.da_pbufr; dca.da_nbufr = sizeof(bufr); dca.da_remain = sizeof(bufr); *dca.da_pnext = '\0'; #else dca.da_pbufr = NULL; #endif va_start(listp, fmt); retval = kvprintf (fmt, db_putchar, &dca, db_radix, listp); va_end(listp); #ifdef DDB_BUFR_SIZE if (*dca.da_pbufr != '\0') db_puts(dca.da_pbufr); #endif return (retval); } int db_indent; void db_iprintf(const char *fmt,...) { #ifdef DDB_BUFR_SIZE char bufr[DDB_BUFR_SIZE]; #endif struct dbputchar_arg dca; - register int i; + int i; va_list listp; for (i = db_indent; i >= 8; i -= 8) db_printf("\t"); while (--i >= 0) db_printf(" "); #ifdef DDB_BUFR_SIZE dca.da_pbufr = bufr; dca.da_pnext = dca.da_pbufr; dca.da_nbufr = sizeof(bufr); dca.da_remain = sizeof(bufr); *dca.da_pnext = '\0'; #else dca.da_pbufr = NULL; #endif va_start(listp, fmt); kvprintf (fmt, db_putchar, &dca, db_radix, listp); va_end(listp); #ifdef DDB_BUFR_SIZE if (*dca.da_pbufr != '\0') db_puts(dca.da_pbufr); #endif } /* * End line if too long. */ void db_end_line(int field_width) { if (db_output_position + field_width > db_max_width) db_printf("\n"); } Index: stable/11/sys/ddb/db_sym.c =================================================================== --- stable/11/sys/ddb/db_sym.c (revision 331642) +++ stable/11/sys/ddb/db_sym.c (revision 331643) @@ -1,474 +1,473 @@ /*- * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Author: David B. Golub, Carnegie Mellon University * Date: 7/90 */ #include __FBSDID("$FreeBSD$"); #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include /* * Multiple symbol tables */ #ifndef MAXNOSYMTABS #define MAXNOSYMTABS 3 /* mach, ux, emulator */ #endif static db_symtab_t db_symtabs[MAXNOSYMTABS] = {{0,},}; static int db_nsymtab = 0; static db_symtab_t *db_last_symtab; /* where last symbol was found */ static c_db_sym_t db_lookup( const char *symstr); static char *db_qualify(c_db_sym_t sym, char *symtabname); static bool db_symbol_is_ambiguous(c_db_sym_t sym); static bool db_line_at_pc(c_db_sym_t, char **, int *, db_expr_t); static int db_cpu = -1; #ifdef VIMAGE static void *db_vnet = NULL; #endif /* * Validate the CPU number used to interpret per-CPU variables so we can * avoid later confusion if an invalid CPU is requested. */ int db_var_db_cpu(struct db_variable *vp, db_expr_t *valuep, int op) { switch (op) { case DB_VAR_GET: *valuep = db_cpu; return (1); case DB_VAR_SET: if (*(int *)valuep < -1 || *(int *)valuep > mp_maxid) { db_printf("Invalid value: %d\n", *(int*)valuep); return (0); } db_cpu = *(int *)valuep; return (1); default: db_printf("db_var_db_cpu: unknown operation\n"); return (0); } } /* * Read-only variable reporting the current CPU, which is what we use when * db_cpu is set to -1. */ int db_var_curcpu(struct db_variable *vp, db_expr_t *valuep, int op) { switch (op) { case DB_VAR_GET: *valuep = curcpu; return (1); case DB_VAR_SET: db_printf("Read-only variable.\n"); return (0); default: db_printf("db_var_curcpu: unknown operation\n"); return (0); } } #ifdef VIMAGE /* * Validate the virtual network pointer used to interpret per-vnet global * variable expansion. Right now we don't do much here, really we should * walk the global vnet list to check it's an OK pointer. */ int db_var_db_vnet(struct db_variable *vp, db_expr_t *valuep, int op) { switch (op) { case DB_VAR_GET: *valuep = (db_expr_t)db_vnet; return (1); case DB_VAR_SET: db_vnet = *(void **)valuep; return (1); default: db_printf("db_var_db_vnet: unknown operation\n"); return (0); } } /* * Read-only variable reporting the current vnet, which is what we use when * db_vnet is set to NULL. */ int db_var_curvnet(struct db_variable *vp, db_expr_t *valuep, int op) { switch (op) { case DB_VAR_GET: *valuep = (db_expr_t)curvnet; return (1); case DB_VAR_SET: db_printf("Read-only variable.\n"); return (0); default: db_printf("db_var_curvnet: unknown operation\n"); return (0); } } #endif /* * Add symbol table, with given name, to list of symbol tables. */ void db_add_symbol_table(char *start, char *end, char *name, char *ref) { if (db_nsymtab >= MAXNOSYMTABS) { printf ("No slots left for %s symbol table", name); panic ("db_sym.c: db_add_symbol_table"); } db_symtabs[db_nsymtab].start = start; db_symtabs[db_nsymtab].end = end; db_symtabs[db_nsymtab].name = name; db_symtabs[db_nsymtab].private = ref; db_nsymtab++; } /* * db_qualify("vm_map", "ux") returns "unix:vm_map". * * Note: return value points to static data whose content is * overwritten by each call... but in practice this seems okay. */ static char * db_qualify(c_db_sym_t sym, char *symtabname) { const char *symname; static char tmp[256]; db_symbol_values(sym, &symname, 0); snprintf(tmp, sizeof(tmp), "%s:%s", symtabname, symname); return tmp; } bool db_eqname(const char *src, const char *dst, int c) { if (!strcmp(src, dst)) return (true); if (src[0] == c) return (!strcmp(src+1,dst)); return (false); } bool db_value_of_name(const char *name, db_expr_t *valuep) { c_db_sym_t sym; sym = db_lookup(name); if (sym == C_DB_SYM_NULL) return (false); db_symbol_values(sym, &name, valuep); return (true); } bool db_value_of_name_pcpu(const char *name, db_expr_t *valuep) { static char tmp[256]; db_expr_t value; c_db_sym_t sym; int cpu; if (db_cpu != -1) cpu = db_cpu; else cpu = curcpu; snprintf(tmp, sizeof(tmp), "pcpu_entry_%s", name); sym = db_lookup(tmp); if (sym == C_DB_SYM_NULL) return (false); db_symbol_values(sym, &name, &value); if (value < DPCPU_START || value >= DPCPU_STOP) return (false); *valuep = (db_expr_t)((uintptr_t)value + dpcpu_off[cpu]); return (true); } bool db_value_of_name_vnet(const char *name, db_expr_t *valuep) { #ifdef VIMAGE static char tmp[256]; db_expr_t value; c_db_sym_t sym; struct vnet *vnet; if (db_vnet != NULL) vnet = db_vnet; else vnet = curvnet; snprintf(tmp, sizeof(tmp), "vnet_entry_%s", name); sym = db_lookup(tmp); if (sym == C_DB_SYM_NULL) return (false); db_symbol_values(sym, &name, &value); if (value < VNET_START || value >= VNET_STOP) return (false); *valuep = (db_expr_t)((uintptr_t)value + vnet->vnet_data_base); return (true); #else return (false); #endif } /* * Lookup a symbol. * If the symbol has a qualifier (e.g., ux:vm_map), * then only the specified symbol table will be searched; * otherwise, all symbol tables will be searched. */ static c_db_sym_t db_lookup(const char *symstr) { c_db_sym_t sp; - register int i; + int i; int symtab_start = 0; int symtab_end = db_nsymtab; - register const char *cp; + const char *cp; /* * Look for, remove, and remember any symbol table specifier. */ for (cp = symstr; *cp; cp++) { if (*cp == ':') { for (i = 0; i < db_nsymtab; i++) { int n = strlen(db_symtabs[i].name); if ( n == (cp - symstr) && strncmp(symstr, db_symtabs[i].name, n) == 0 ) { symtab_start = i; symtab_end = i + 1; break; } } if (i == db_nsymtab) { db_error("invalid symbol table name"); } symstr = cp+1; } } /* * Look in the specified set of symbol tables. * Return on first match. */ for (i = symtab_start; i < symtab_end; i++) { sp = X_db_lookup(&db_symtabs[i], symstr); if (sp) { db_last_symtab = &db_symtabs[i]; return sp; } } return 0; } /* * If true, check across symbol tables for multiple occurrences * of a name. Might slow things down quite a bit. */ static volatile bool db_qualify_ambiguous_names = false; /* * Does this symbol name appear in more than one symbol table? * Used by db_symbol_values to decide whether to qualify a symbol. */ static bool db_symbol_is_ambiguous(c_db_sym_t sym) { const char *sym_name; - register int i; - register bool found_once = false; + int i; + bool found_once = false; if (!db_qualify_ambiguous_names) return (false); db_symbol_values(sym, &sym_name, 0); for (i = 0; i < db_nsymtab; i++) { if (X_db_lookup(&db_symtabs[i], sym_name)) { if (found_once) return (true); found_once = true; } } return (false); } /* * Find the closest symbol to val, and return its name * and the difference between val and the symbol found. */ c_db_sym_t db_search_symbol(db_addr_t val, db_strategy_t strategy, db_expr_t *offp) { - register unsigned int diff; size_t newdiff; - register int i; + int i; c_db_sym_t ret = C_DB_SYM_NULL, sym; newdiff = diff = ~0; for (i = 0; i < db_nsymtab; i++) { sym = X_db_search_symbol(&db_symtabs[i], val, strategy, &newdiff); if (newdiff < diff) { db_last_symtab = &db_symtabs[i]; diff = newdiff; ret = sym; } } *offp = diff; return ret; } /* * Return name and value of a symbol */ void db_symbol_values(c_db_sym_t sym, const char **namep, db_expr_t *valuep) { db_expr_t value; if (sym == DB_SYM_NULL) { *namep = NULL; return; } X_db_symbol_values(db_last_symtab, sym, namep, &value); if (db_symbol_is_ambiguous(sym)) *namep = db_qualify(sym, db_last_symtab->name); if (valuep) *valuep = value; } /* * Print a the closest symbol to value * * After matching the symbol according to the given strategy * we print it in the name+offset format, provided the symbol's * value is close enough (eg smaller than db_maxoff). * We also attempt to print [filename:linenum] when applicable * (eg for procedure names). * * If we could not find a reasonable name+offset representation, * then we just print the value in hex. Small values might get * bogus symbol associations, e.g. 3 might get some absolute * value like _INCLUDE_VERSION or something, therefore we do * not accept symbols whose value is "small" (and use plain hex). */ db_expr_t db_maxoff = 0x10000; void db_printsym(db_expr_t off, db_strategy_t strategy) { db_expr_t d; char *filename; const char *name; db_expr_t value; int linenum; c_db_sym_t cursym; cursym = db_search_symbol(off, strategy, &d); db_symbol_values(cursym, &name, &value); if (name == NULL) value = off; if (value >= DB_SMALL_VALUE_MIN && value <= DB_SMALL_VALUE_MAX) { db_printf("%+#lr", (long)off); return; } if (name == NULL || d >= (unsigned long)db_maxoff) { db_printf("%#lr", (unsigned long)off); return; } #ifdef DDB_NUMSYM db_printf("%#lr = %s", (unsigned long)off, name); #else db_printf("%s", name); #endif if (d) db_printf("+%+#lr", (long)d); if (strategy == DB_STGY_PROC) { if (db_line_at_pc(cursym, &filename, &linenum, off)) db_printf(" [%s:%d]", filename, linenum); } } static bool db_line_at_pc(c_db_sym_t sym, char **filename, int *linenum, db_expr_t pc) { return (X_db_line_at_pc(db_last_symtab, sym, filename, linenum, pc)); } bool db_sym_numargs(c_db_sym_t sym, int *nargp, char **argnames) { return (X_db_sym_numargs(db_last_symtab, sym, nargp, argnames)); } Index: stable/11/sys/dev/cs/if_cs.c =================================================================== --- stable/11/sys/dev/cs/if_cs.c (revision 331642) +++ stable/11/sys/dev/cs/if_cs.c (revision 331643) @@ -1,1227 +1,1227 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997,1998 Maxim Bolotin and Oleg Sharoiko. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * * Device driver for Crystal Semiconductor CS8920 based ethernet * adapters. By Maxim Bolotin and Oleg Sharoiko, 27-April-1997 */ /* #define CS_DEBUG */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CS_USE_64K_DMA #define CS_DMA_BUFFER_SIZE 65536 #else #define CS_DMA_BUFFER_SIZE 16384 #endif static void cs_init(void *); static void cs_init_locked(struct cs_softc *); static int cs_ioctl(struct ifnet *, u_long, caddr_t); static void cs_start(struct ifnet *); static void cs_start_locked(struct ifnet *); static void cs_stop(struct cs_softc *); static void cs_reset(struct cs_softc *); static void cs_watchdog(void *); static int cs_mediachange(struct ifnet *); static void cs_mediastatus(struct ifnet *, struct ifmediareq *); static int cs_mediaset(struct cs_softc *, int); static void cs_write_mbufs(struct cs_softc*, struct mbuf*); static void cs_xmit_buf(struct cs_softc*); static int cs_get_packet(struct cs_softc*); static void cs_setmode(struct cs_softc*); static int get_eeprom_data(struct cs_softc *sc, int, int, uint16_t *); static int get_eeprom_cksum(int, int, uint16_t *); static int wait_eeprom_ready( struct cs_softc *); static void control_dc_dc( struct cs_softc *, int ); static int enable_tp(struct cs_softc *); static int enable_aui(struct cs_softc *); static int enable_bnc(struct cs_softc *); static int cs_duplex_auto(struct cs_softc *); devclass_t cs_devclass; driver_intr_t csintr; /* sysctl vars */ static SYSCTL_NODE(_hw, OID_AUTO, cs, CTLFLAG_RD, 0, "cs device parameters"); int cs_ignore_cksum_failure = 0; SYSCTL_INT(_hw_cs, OID_AUTO, ignore_checksum_failure, CTLFLAG_RWTUN, &cs_ignore_cksum_failure, 0, "ignore checksum errors in cs card EEPROM"); static int cs_recv_delay = 570; SYSCTL_INT(_hw_cs, OID_AUTO, recv_delay, CTLFLAG_RWTUN, &cs_recv_delay, 570, ""); static int cs8900_eeint2irq[16] = { 10, 11, 12, 5, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }; static int cs8900_irq2eeint[16] = { 255, 255, 255, 255, 255, 3, 255, 255, 255, 0, 1, 2, 255, 255, 255, 255 }; static int get_eeprom_data(struct cs_softc *sc, int off, int len, uint16_t *buffer) { int i; #ifdef CS_DEBUG device_printf(sc->dev, "EEPROM data from %x for %x:\n", off, len); #endif for (i=0; i < len; i++) { if (wait_eeprom_ready(sc) < 0) return (-1); /* Send command to EEPROM to read */ cs_writereg(sc, PP_EECMD, (off + i) | EEPROM_READ_CMD); if (wait_eeprom_ready(sc) < 0) return (-1); buffer[i] = cs_readreg(sc, PP_EEData); #ifdef CS_DEBUG printf("%04x ",buffer[i]); #endif } #ifdef CS_DEBUG printf("\n"); #endif return (0); } static int get_eeprom_cksum(int off, int len, uint16_t *buffer) { int i; uint16_t cksum=0; for (i = 0; i < len; i++) cksum += buffer[i]; cksum &= 0xffff; if (cksum == 0 || cs_ignore_cksum_failure) return (0); return (-1); } static int wait_eeprom_ready(struct cs_softc *sc) { int i; /* * From the CS8900A datasheet, section 3.5.2: * "Before issuing any command to the EEPROM, the host must wait * for the SIBUSY bit (Register 16, SelfST, bit 8) to clear. After * each command has been issued, the host must wait again for SIBUSY * to clear." * * Before we issue the command, we should be !busy, so that will * be fast. The datasheet suggests that clock out from the part * per word will be on the order of 25us, which is consistent with * the 1MHz serial clock and 16bits... We should never hit 100, * let alone 15,000 here. The original code did an unconditional * 30ms DELAY here. Bad Kharma. cs_readreg takes ~2us. */ for (i = 0; i < 15000; i++) /* 30ms max */ if (!(cs_readreg(sc, PP_SelfST) & SI_BUSY)) return (0); return (1); } static void control_dc_dc(struct cs_softc *sc, int on_not_off) { unsigned int self_control = HCB1_ENBL; if (((sc->adapter_cnf & A_CNF_DC_DC_POLARITY)!=0) ^ on_not_off) self_control |= HCB1; else self_control &= ~HCB1; cs_writereg(sc, PP_SelfCTL, self_control); DELAY(500000); /* Bad! */ } static int cs_duplex_auto(struct cs_softc *sc) { int i, error=0; cs_writereg(sc, PP_AutoNegCTL, RE_NEG_NOW | ALLOW_FDX | AUTO_NEG_ENABLE); for (i=0; cs_readreg(sc, PP_AutoNegST) & AUTO_NEG_BUSY; i++) { if (i > 4000) { device_printf(sc->dev, "full/half duplex auto negotiation timeout\n"); error = ETIMEDOUT; break; } DELAY(1000); } return (error); } static int enable_tp(struct cs_softc *sc) { cs_writereg(sc, PP_LineCTL, sc->line_ctl & ~AUI_ONLY); control_dc_dc(sc, 0); return (0); } static int enable_aui(struct cs_softc *sc) { cs_writereg(sc, PP_LineCTL, (sc->line_ctl & ~AUTO_AUI_10BASET) | AUI_ONLY); control_dc_dc(sc, 0); return (0); } static int enable_bnc(struct cs_softc *sc) { cs_writereg(sc, PP_LineCTL, (sc->line_ctl & ~AUTO_AUI_10BASET) | AUI_ONLY); control_dc_dc(sc, 1); return (0); } int cs_cs89x0_probe(device_t dev) { int i; int error; rman_res_t irq, junk; struct cs_softc *sc = device_get_softc(dev); unsigned rev_type = 0; uint16_t id; char chip_revision; uint16_t eeprom_buff[CHKSUM_LEN]; int chip_type, pp_isaint; sc->dev = dev; error = cs_alloc_port(dev, 0, CS_89x0_IO_PORTS); if (error) return (error); if ((cs_inw(sc, ADD_PORT) & ADD_MASK) != ADD_SIG) { /* Chip not detected. Let's try to reset it */ if (bootverbose) device_printf(dev, "trying to reset the chip.\n"); cs_outw(sc, ADD_PORT, PP_SelfCTL); i = cs_inw(sc, DATA_PORT); cs_outw(sc, ADD_PORT, PP_SelfCTL); cs_outw(sc, DATA_PORT, i | POWER_ON_RESET); if ((cs_inw(sc, ADD_PORT) & ADD_MASK) != ADD_SIG) return (ENXIO); } for (i = 0; i < 10000; i++) { id = cs_readreg(sc, PP_ChipID); if (id == CHIP_EISA_ID_SIG) break; } if (i == 10000) return (ENXIO); rev_type = cs_readreg(sc, PRODUCT_ID_ADD); chip_type = rev_type & ~REVISON_BITS; chip_revision = ((rev_type & REVISON_BITS) >> 8) + 'A'; sc->chip_type = chip_type; if (chip_type == CS8900) { pp_isaint = PP_CS8900_ISAINT; sc->send_cmd = TX_CS8900_AFTER_ALL; } else { pp_isaint = PP_CS8920_ISAINT; sc->send_cmd = TX_CS8920_AFTER_ALL; } /* * Clear some fields so that fail of EEPROM will left them clean */ sc->auto_neg_cnf = 0; sc->adapter_cnf = 0; sc->isa_config = 0; /* * If no interrupt specified, use what the board tells us. */ error = bus_get_resource(dev, SYS_RES_IRQ, 0, &irq, &junk); /* * Get data from EEPROM */ if((cs_readreg(sc, PP_SelfST) & EEPROM_PRESENT) == 0) { device_printf(dev, "No EEPROM, assuming defaults.\n"); } else if (get_eeprom_data(sc,START_EEPROM_DATA,CHKSUM_LEN, eeprom_buff)<0) { device_printf(dev, "EEPROM read failed, assuming defaults.\n"); } else if (get_eeprom_cksum(START_EEPROM_DATA,CHKSUM_LEN, eeprom_buff)<0) { device_printf(dev, "EEPROM cheksum bad, assuming defaults.\n"); } else { sc->auto_neg_cnf = eeprom_buff[AUTO_NEG_CNF_OFFSET]; sc->adapter_cnf = eeprom_buff[ADAPTER_CNF_OFFSET]; sc->isa_config = eeprom_buff[ISA_CNF_OFFSET]; for (i=0; ienaddr[i*2] = eeprom_buff[i]; sc->enaddr[i*2+1] = eeprom_buff[i] >> 8; } /* * If no interrupt specified, use what the * board tells us. */ if (error) { irq = sc->isa_config & INT_NO_MASK; error = 0; if (chip_type == CS8900) { irq = cs8900_eeint2irq[irq]; } else { if (irq > CS8920_NO_INTS) irq = 255; } if (irq == 255) { device_printf(dev, "invalid irq in EEPROM.\n"); error = EINVAL; } if (!error) bus_set_resource(dev, SYS_RES_IRQ, 0, irq, 1); } } if (!error && !(sc->flags & CS_NO_IRQ)) { if (chip_type == CS8900) { if (irq < 16) irq = cs8900_irq2eeint[irq]; else irq = 255; } else { if (irq > CS8920_NO_INTS) irq = 255; } if (irq == 255) error = EINVAL; } if (error) { device_printf(dev, "Unknown or invalid irq\n"); return (error); } if (!(sc->flags & CS_NO_IRQ)) cs_writereg(sc, pp_isaint, irq); if (bootverbose) device_printf(dev, "CS89%c0%s rev %c media%s%s%s\n", chip_type == CS8900 ? '0' : '2', chip_type == CS8920M ? "M" : "", chip_revision, (sc->adapter_cnf & A_CNF_10B_T) ? " TP" : "", (sc->adapter_cnf & A_CNF_AUI) ? " AUI" : "", (sc->adapter_cnf & A_CNF_10B_2) ? " BNC" : ""); if ((sc->adapter_cnf & A_CNF_EXTND_10B_2) && (sc->adapter_cnf & A_CNF_LOW_RX_SQUELCH)) sc->line_ctl = LOW_RX_SQUELCH; else sc->line_ctl = 0; return (0); } /* * Allocate a port resource with the given resource id. */ int cs_alloc_port(device_t dev, int rid, int size) { struct cs_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &rid, size, RF_ACTIVE); if (res == NULL) return (ENOENT); sc->port_rid = rid; sc->port_res = res; return (0); } /* * Allocate an irq resource with the given resource id. */ int cs_alloc_irq(device_t dev, int rid) { struct cs_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (res == NULL) return (ENOENT); sc->irq_rid = rid; sc->irq_res = res; return (0); } /* * Release all resources */ void cs_release_resources(device_t dev) { struct cs_softc *sc = device_get_softc(dev); if (sc->port_res) { bus_release_resource(dev, SYS_RES_IOPORT, sc->port_rid, sc->port_res); sc->port_res = 0; } if (sc->irq_res) { bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = 0; } } /* * Install the interface into kernel networking data structures */ int cs_attach(device_t dev) { int error, media=0; struct cs_softc *sc = device_get_softc(dev); struct ifnet *ifp; sc->dev = dev; ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); cs_release_resources(dev); return (ENOMEM); } mtx_init(&sc->lock, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->timer, &sc->lock, 0); CS_LOCK(sc); cs_stop(sc); CS_UNLOCK(sc); ifp->if_softc=sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_start=cs_start; ifp->if_ioctl=cs_ioctl; ifp->if_init=cs_init; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_flags=(IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); /* * this code still in progress (DMA support) * sc->recv_ring=malloc(CS_DMA_BUFFER_SIZE<<1, M_DEVBUF, M_NOWAIT); if (sc->recv_ring == NULL) { log(LOG_ERR, "%s: Couldn't allocate memory for NIC\n", ifp->if_xname); return(0); } if ((sc->recv_ring-(sc->recv_ring & 0x1FFFF)) < (128*1024-CS_DMA_BUFFER_SIZE)) sc->recv_ring+=16*1024; */ sc->buffer=malloc(ETHER_MAX_LEN-ETHER_CRC_LEN,M_DEVBUF,M_NOWAIT); if (sc->buffer == NULL) { device_printf(sc->dev, "Couldn't allocate memory for NIC\n"); if_free(ifp); mtx_destroy(&sc->lock); cs_release_resources(dev); return(ENOMEM); } /* * Initialize the media structures. */ ifmedia_init(&sc->media, 0, cs_mediachange, cs_mediastatus); if (sc->adapter_cnf & A_CNF_10B_T) { ifmedia_add(&sc->media, IFM_ETHER|IFM_10_T, 0, NULL); if (sc->chip_type != CS8900) { ifmedia_add(&sc->media, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->media, IFM_ETHER|IFM_10_T|IFM_HDX, 0, NULL); } } if (sc->adapter_cnf & A_CNF_10B_2) ifmedia_add(&sc->media, IFM_ETHER|IFM_10_2, 0, NULL); if (sc->adapter_cnf & A_CNF_AUI) ifmedia_add(&sc->media, IFM_ETHER|IFM_10_5, 0, NULL); if (sc->adapter_cnf & A_CNF_MEDIA) ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); /* Set default media from EEPROM */ switch (sc->adapter_cnf & A_CNF_MEDIA_TYPE) { case A_CNF_MEDIA_AUTO: media = IFM_ETHER|IFM_AUTO; break; case A_CNF_MEDIA_10B_T: media = IFM_ETHER|IFM_10_T; break; case A_CNF_MEDIA_10B_2: media = IFM_ETHER|IFM_10_2; break; case A_CNF_MEDIA_AUI: media = IFM_ETHER|IFM_10_5; break; default: device_printf(sc->dev, "no media, assuming 10baseT\n"); sc->adapter_cnf |= A_CNF_10B_T; ifmedia_add(&sc->media, IFM_ETHER|IFM_10_T, 0, NULL); if (sc->chip_type != CS8900) { ifmedia_add(&sc->media, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->media, IFM_ETHER|IFM_10_T|IFM_HDX, 0, NULL); } media = IFM_ETHER | IFM_10_T; break; } ifmedia_set(&sc->media, media); cs_mediaset(sc, media); ether_ifattach(ifp, sc->enaddr); error = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE, NULL, csintr, sc, &sc->irq_handle); if (error) { ether_ifdetach(ifp); free(sc->buffer, M_DEVBUF); if_free(ifp); mtx_destroy(&sc->lock); cs_release_resources(dev); return (error); } return (0); } int cs_detach(device_t dev) { struct cs_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; CS_LOCK(sc); cs_stop(sc); CS_UNLOCK(sc); callout_drain(&sc->timer); ether_ifdetach(ifp); bus_teardown_intr(dev, sc->irq_res, sc->irq_handle); cs_release_resources(dev); free(sc->buffer, M_DEVBUF); if_free(ifp); mtx_destroy(&sc->lock); return (0); } /* * Initialize the board */ static void cs_init(void *xsc) { struct cs_softc *sc=(struct cs_softc *)xsc; CS_LOCK(sc); cs_init_locked(sc); CS_UNLOCK(sc); } static void cs_init_locked(struct cs_softc *sc) { struct ifnet *ifp = sc->ifp; int i, rx_cfg; /* * reset watchdog timer */ sc->tx_timeout = 0; sc->buf_len = 0; /* * Hardware initialization of cs */ /* Enable receiver and transmitter */ cs_writereg(sc, PP_LineCTL, cs_readreg(sc, PP_LineCTL) | SERIAL_RX_ON | SERIAL_TX_ON); /* Configure the receiver mode */ cs_setmode(sc); /* * This defines what type of frames will cause interrupts * Bad frames should generate interrupts so that the driver * could track statistics of discarded packets */ rx_cfg = RX_OK_ENBL | RX_CRC_ERROR_ENBL | RX_RUNT_ENBL | RX_EXTRA_DATA_ENBL; if (sc->isa_config & STREAM_TRANSFER) rx_cfg |= RX_STREAM_ENBL; cs_writereg(sc, PP_RxCFG, rx_cfg); cs_writereg(sc, PP_TxCFG, TX_LOST_CRS_ENBL | TX_SQE_ERROR_ENBL | TX_OK_ENBL | TX_LATE_COL_ENBL | TX_JBR_ENBL | TX_ANY_COL_ENBL | TX_16_COL_ENBL); cs_writereg(sc, PP_BufCFG, READY_FOR_TX_ENBL | RX_MISS_COUNT_OVRFLOW_ENBL | TX_COL_COUNT_OVRFLOW_ENBL | TX_UNDERRUN_ENBL /*| RX_DMA_ENBL*/); /* Write MAC address into IA filter */ for (i=0; ienaddr[i * 2] | (sc->enaddr[i * 2 + 1] << 8) ); /* * Now enable everything */ /* #ifdef CS_USE_64K_DMA cs_writereg(sc, PP_BusCTL, ENABLE_IRQ | RX_DMA_SIZE_64K); #else cs_writereg(sc, PP_BusCTL, ENABLE_IRQ); #endif */ cs_writereg(sc, PP_BusCTL, ENABLE_IRQ); /* * Set running and clear output active flags */ sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->timer, hz, cs_watchdog, sc); /* * Start sending process */ cs_start_locked(ifp); } /* * Get the packet from the board and send it to the upper layer. */ static int cs_get_packet(struct cs_softc *sc) { struct ifnet *ifp = sc->ifp; int status, length; struct mbuf *m; #ifdef CS_DEBUG int i; #endif status = cs_inw(sc, RX_FRAME_PORT); length = cs_inw(sc, RX_FRAME_PORT); #ifdef CS_DEBUG device_printf(sc->dev, "rcvd: stat %x, len %d\n", status, length); #endif if (!(status & RX_OK)) { #ifdef CS_DEBUG device_printf(sc->dev, "bad pkt stat %x\n", status); #endif if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return (-1); } MGETHDR(m, M_NOWAIT, MT_DATA); if (m==NULL) return (-1); if (length > MHLEN) { if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); return (-1); } } /* Initialize packet's header info */ m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = length; m->m_len = length; /* Get the data */ bus_read_multi_2(sc->port_res, RX_FRAME_PORT, mtod(m, uint16_t *), (length + 1) >> 1); #ifdef CS_DEBUG for (i=0;im_data+i))); printf( "\n" ); #endif if (status & (RX_IA | RX_BROADCAST) || (ifp->if_flags & IFF_MULTICAST && status & RX_HASHED)) { /* Feed the packet to the upper layer */ (*ifp->if_input)(ifp, m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if (length == ETHER_MAX_LEN-ETHER_CRC_LEN) DELAY(cs_recv_delay); } else { m_freem(m); } return (0); } /* * Handle interrupts */ void csintr(void *arg) { struct cs_softc *sc = (struct cs_softc*) arg; struct ifnet *ifp = sc->ifp; int status; #ifdef CS_DEBUG device_printf(sc->dev, "Interrupt.\n"); #endif CS_LOCK(sc); while ((status=cs_inw(sc, ISQ_PORT))) { #ifdef CS_DEBUG device_printf(sc->dev, "from ISQ: %04x\n", status); #endif switch (status & ISQ_EVENT_MASK) { case ISQ_RECEIVER_EVENT: cs_get_packet(sc); break; case ISQ_TRANSMITTER_EVENT: if (status & TX_OK) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->tx_timeout = 0; break; case ISQ_BUFFER_EVENT: if (status & READY_FOR_TX) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->tx_timeout = 0; } if (status & TX_UNDERRUN) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->tx_timeout = 0; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } break; case ISQ_RX_MISS_EVENT: if_inc_counter(ifp, IFCOUNTER_IERRORS, status >> 6); break; case ISQ_TX_COL_EVENT: if_inc_counter(ifp, IFCOUNTER_COLLISIONS, status >> 6); break; } } if (!(ifp->if_drv_flags & IFF_DRV_OACTIVE)) { cs_start_locked(ifp); } CS_UNLOCK(sc); } /* * Save the data in buffer */ static void cs_write_mbufs( struct cs_softc *sc, struct mbuf *m ) { int len; struct mbuf *mp; unsigned char *data, *buf; for (mp=m, buf=sc->buffer, sc->buf_len=0; mp != NULL; mp=mp->m_next) { len = mp->m_len; /* * Ignore empty parts */ if (!len) continue; /* * Find actual data address */ data = mtod(mp, caddr_t); bcopy((caddr_t) data, (caddr_t) buf, len); buf += len; sc->buf_len += len; } } static void cs_xmit_buf( struct cs_softc *sc ) { bus_write_multi_2(sc->port_res, TX_FRAME_PORT, (uint16_t *)sc->buffer, (sc->buf_len + 1) >> 1); sc->buf_len = 0; } static void cs_start(struct ifnet *ifp) { struct cs_softc *sc = ifp->if_softc; CS_LOCK(sc); cs_start_locked(ifp); CS_UNLOCK(sc); } static void cs_start_locked(struct ifnet *ifp) { int length; struct mbuf *m, *mp; struct cs_softc *sc = ifp->if_softc; for (;;) { if (sc->buf_len) length = sc->buf_len; else { IF_DEQUEUE( &ifp->if_snd, m ); if (m==NULL) { return; } for (length=0, mp=m; mp != NULL; mp=mp->m_next) length += mp->m_len; /* Skip zero-length packets */ if (length == 0) { m_freem(m); continue; } cs_write_mbufs(sc, m); BPF_MTAP(ifp, m); m_freem(m); } /* * Issue a SEND command */ cs_outw(sc, TX_CMD_PORT, sc->send_cmd); cs_outw(sc, TX_LEN_PORT, length ); /* * If there's no free space in the buffer then leave * this packet for the next time: indicate output active * and return. */ if (!(cs_readreg(sc, PP_BusST) & READY_FOR_TX_NOW)) { sc->tx_timeout = sc->buf_len; ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } cs_xmit_buf(sc); /* * Set the watchdog timer in case we never hear * from board again. (I don't know about correct * value for this timeout) */ sc->tx_timeout = length; ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } } /* * Stop everything on the interface */ static void cs_stop(struct cs_softc *sc) { CS_ASSERT_LOCKED(sc); cs_writereg(sc, PP_RxCFG, 0); cs_writereg(sc, PP_TxCFG, 0); cs_writereg(sc, PP_BufCFG, 0); cs_writereg(sc, PP_BusCTL, 0); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->tx_timeout = 0; callout_stop(&sc->timer); } /* * Reset the interface */ static void cs_reset(struct cs_softc *sc) { CS_ASSERT_LOCKED(sc); cs_stop(sc); cs_init_locked(sc); } static uint16_t cs_hash_index(struct sockaddr_dl *addr) { uint32_t crc; uint16_t idx; caddr_t lla; lla = LLADDR(addr); crc = ether_crc32_le(lla, ETHER_ADDR_LEN); idx = crc >> 26; return (idx); } static void cs_setmode(struct cs_softc *sc) { int rx_ctl; uint16_t af[4]; uint16_t port, mask, index; struct ifnet *ifp = sc->ifp; struct ifmultiaddr *ifma; /* Stop the receiver while changing filters */ cs_writereg(sc, PP_LineCTL, cs_readreg(sc, PP_LineCTL) & ~SERIAL_RX_ON); if (ifp->if_flags & IFF_PROMISC) { /* Turn on promiscuous mode. */ rx_ctl = RX_OK_ACCEPT | RX_PROM_ACCEPT; } else if (ifp->if_flags & IFF_MULTICAST) { /* Allow receiving frames with multicast addresses */ rx_ctl = RX_IA_ACCEPT | RX_BROADCAST_ACCEPT | RX_OK_ACCEPT | RX_MULTCAST_ACCEPT; /* Start with an empty filter */ af[0] = af[1] = af[2] = af[3] = 0x0000; if (ifp->if_flags & IFF_ALLMULTI) { /* Accept all multicast frames */ af[0] = af[1] = af[2] = af[3] = 0xffff; } else { /* * Set up the filter to only accept multicast * frames we're interested in. */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { struct sockaddr_dl *dl = (struct sockaddr_dl *)ifma->ifma_addr; index = cs_hash_index(dl); port = (u_int16_t) (index >> 4); mask = (u_int16_t) (1 << (index & 0xf)); af[port] |= mask; } if_maddr_runlock(ifp); } cs_writereg(sc, PP_LAF + 0, af[0]); cs_writereg(sc, PP_LAF + 2, af[1]); cs_writereg(sc, PP_LAF + 4, af[2]); cs_writereg(sc, PP_LAF + 6, af[3]); } else { /* * Receive only good frames addressed for us and * good broadcasts. */ rx_ctl = RX_IA_ACCEPT | RX_BROADCAST_ACCEPT | RX_OK_ACCEPT; } /* Set up the filter */ cs_writereg(sc, PP_RxCTL, RX_DEF_ACCEPT | rx_ctl); /* Turn on receiver */ cs_writereg(sc, PP_LineCTL, cs_readreg(sc, PP_LineCTL) | SERIAL_RX_ON); } static int -cs_ioctl(register struct ifnet *ifp, u_long command, caddr_t data) +cs_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct cs_softc *sc=ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int error=0; #ifdef CS_DEBUG if_printf(ifp, "%s command=%lx\n", __func__, command); #endif switch (command) { case SIOCSIFFLAGS: /* * Switch interface state between "running" and * "stopped", reflecting the UP flag. */ CS_LOCK(sc); if (sc->ifp->if_flags & IFF_UP) { if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING)==0) { cs_init_locked(sc); } } else { if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING)!=0) { cs_stop(sc); } } /* * Promiscuous and/or multicast flags may have changed, * so reprogram the multicast filter and/or receive mode. * * See note about multicasts in cs_setmode */ cs_setmode(sc); CS_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * Multicast list has changed; set the hardware filter * accordingly. * * See note about multicasts in cs_setmode */ CS_LOCK(sc); cs_setmode(sc); CS_UNLOCK(sc); error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* * Device timeout/watchdog routine. Entered if the device neglects to * generate an interrupt after a transmit has been started on it. */ static void cs_watchdog(void *arg) { struct cs_softc *sc = arg; struct ifnet *ifp = sc->ifp; CS_ASSERT_LOCKED(sc); if (sc->tx_timeout && --sc->tx_timeout == 0) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); log(LOG_ERR, "%s: device timeout\n", ifp->if_xname); /* Reset the interface */ if (ifp->if_flags & IFF_UP) cs_reset(sc); else cs_stop(sc); } callout_reset(&sc->timer, hz, cs_watchdog, sc); } static int cs_mediachange(struct ifnet *ifp) { struct cs_softc *sc = ifp->if_softc; struct ifmedia *ifm = &sc->media; int error; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); CS_LOCK(sc); error = cs_mediaset(sc, ifm->ifm_media); CS_UNLOCK(sc); return (error); } static void cs_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { int line_status; struct cs_softc *sc = ifp->if_softc; CS_LOCK(sc); ifmr->ifm_active = IFM_ETHER; line_status = cs_readreg(sc, PP_LineST); if (line_status & TENBASET_ON) { ifmr->ifm_active |= IFM_10_T; if (sc->chip_type != CS8900) { if (cs_readreg(sc, PP_AutoNegST) & FDX_ACTIVE) ifmr->ifm_active |= IFM_FDX; if (cs_readreg(sc, PP_AutoNegST) & HDX_ACTIVE) ifmr->ifm_active |= IFM_HDX; } ifmr->ifm_status = IFM_AVALID; if (line_status & LINK_OK) ifmr->ifm_status |= IFM_ACTIVE; } else { if (line_status & AUI_ON) { cs_writereg(sc, PP_SelfCTL, cs_readreg(sc, PP_SelfCTL) | HCB1_ENBL); if (((sc->adapter_cnf & A_CNF_DC_DC_POLARITY)!=0)^ (cs_readreg(sc, PP_SelfCTL) & HCB1)) ifmr->ifm_active |= IFM_10_2; else ifmr->ifm_active |= IFM_10_5; } } CS_UNLOCK(sc); } static int cs_mediaset(struct cs_softc *sc, int media) { int error = 0; /* Stop the receiver & transmitter */ cs_writereg(sc, PP_LineCTL, cs_readreg(sc, PP_LineCTL) & ~(SERIAL_RX_ON | SERIAL_TX_ON)); #ifdef CS_DEBUG device_printf(sc->dev, "%s media=%x\n", __func__, media); #endif switch (IFM_SUBTYPE(media)) { default: case IFM_AUTO: /* * This chip makes it a little hard to support this, so treat * it as IFM_10_T, auto duplex. */ enable_tp(sc); cs_duplex_auto(sc); break; case IFM_10_T: enable_tp(sc); if (media & IFM_FDX) cs_duplex_full(sc); else if (media & IFM_HDX) cs_duplex_half(sc); else error = cs_duplex_auto(sc); break; case IFM_10_2: enable_bnc(sc); break; case IFM_10_5: enable_aui(sc); break; } /* * Turn the transmitter & receiver back on */ cs_writereg(sc, PP_LineCTL, cs_readreg(sc, PP_LineCTL) | SERIAL_RX_ON | SERIAL_TX_ON); return (error); } Index: stable/11/sys/dev/ixgb/if_ixgb.c =================================================================== --- stable/11/sys/dev/ixgb/if_ixgb.c (revision 331642) +++ stable/11/sys/dev/ixgb/if_ixgb.c (revision 331643) @@ -1,2538 +1,2538 @@ /******************************************************************************* SPDX-License-Identifier: BSD-3-Clause Copyright (c) 2001-2004, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ /*$FreeBSD$*/ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int ixgb_display_debug_stats = 0; /********************************************************************* * Linked list of board private structures for all NICs found *********************************************************************/ struct adapter *ixgb_adapter_list = NULL; /********************************************************************* * Driver version *********************************************************************/ char ixgb_driver_version[] = "1.0.6"; char ixgb_copyright[] = "Copyright (c) 2001-2004 Intel Corporation."; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgb_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixgb_vendor_info_t ixgb_vendor_info_array[] = { /* Intel(R) PRO/10000 Network Connection */ {IXGB_VENDOR_ID, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0}, {IXGB_VENDOR_ID, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *ixgb_strings[] = { "Intel(R) PRO/10GbE Network Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixgb_probe(device_t); static int ixgb_attach(device_t); static int ixgb_detach(device_t); static int ixgb_shutdown(device_t); static void ixgb_intr(void *); static void ixgb_start(struct ifnet *); static void ixgb_start_locked(struct ifnet *); static int ixgb_ioctl(struct ifnet *, IOCTL_CMD_TYPE, caddr_t); static uint64_t ixgb_get_counter(struct ifnet *, ift_counter); static void ixgb_watchdog(struct adapter *); static void ixgb_init(void *); static void ixgb_init_locked(struct adapter *); static void ixgb_stop(void *); static void ixgb_media_status(struct ifnet *, struct ifmediareq *); static int ixgb_media_change(struct ifnet *); static void ixgb_identify_hardware(struct adapter *); static int ixgb_allocate_pci_resources(struct adapter *); static void ixgb_free_pci_resources(struct adapter *); static void ixgb_local_timer(void *); static int ixgb_hardware_init(struct adapter *); static int ixgb_setup_interface(device_t, struct adapter *); static int ixgb_setup_transmit_structures(struct adapter *); static void ixgb_initialize_transmit_unit(struct adapter *); static int ixgb_setup_receive_structures(struct adapter *); static void ixgb_initialize_receive_unit(struct adapter *); static void ixgb_enable_intr(struct adapter *); static void ixgb_disable_intr(struct adapter *); static void ixgb_free_transmit_structures(struct adapter *); static void ixgb_free_receive_structures(struct adapter *); static void ixgb_update_stats_counters(struct adapter *); static void ixgb_clean_transmit_interrupts(struct adapter *); static int ixgb_allocate_receive_structures(struct adapter *); static int ixgb_allocate_transmit_structures(struct adapter *); static int ixgb_process_receive_interrupts(struct adapter *, int); static void ixgb_receive_checksum(struct adapter *, struct ixgb_rx_desc * rx_desc, struct mbuf *); static void ixgb_transmit_checksum_setup(struct adapter *, struct mbuf *, u_int8_t *); static void ixgb_set_promisc(struct adapter *); static void ixgb_disable_promisc(struct adapter *); static void ixgb_set_multi(struct adapter *); static void ixgb_print_hw_stats(struct adapter *); static void ixgb_print_link_status(struct adapter *); static int ixgb_get_buf(int i, struct adapter *, struct mbuf *); static void ixgb_enable_vlans(struct adapter * adapter); static int ixgb_encap(struct adapter * adapter, struct mbuf * m_head); static int ixgb_sysctl_stats(SYSCTL_HANDLER_ARGS); static int ixgb_dma_malloc(struct adapter *, bus_size_t, struct ixgb_dma_alloc *, int); static void ixgb_dma_free(struct adapter *, struct ixgb_dma_alloc *); #ifdef DEVICE_POLLING static poll_handler_t ixgb_poll; #endif /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixgb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixgb_probe), DEVMETHOD(device_attach, ixgb_attach), DEVMETHOD(device_detach, ixgb_detach), DEVMETHOD(device_shutdown, ixgb_shutdown), DEVMETHOD_END }; static driver_t ixgb_driver = { "ixgb", ixgb_methods, sizeof(struct adapter), }; static devclass_t ixgb_devclass; DRIVER_MODULE(ixgb, pci, ixgb_driver, ixgb_devclass, 0, 0); MODULE_DEPEND(ixgb, pci, 1, 1, 1); MODULE_DEPEND(ixgb, ether, 1, 1, 1); /* some defines for controlling descriptor fetches in h/w */ #define RXDCTL_PTHRESH_DEFAULT 128 /* chip considers prefech below this */ #define RXDCTL_HTHRESH_DEFAULT 16 /* chip will only prefetch if tail is * pushed this many descriptors from * head */ #define RXDCTL_WTHRESH_DEFAULT 0 /* chip writes back at this many or RXT0 */ /********************************************************************* * Device identification routine * * ixgb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return 0 on success, positive on failure *********************************************************************/ static int ixgb_probe(device_t dev) { ixgb_vendor_info_t *ent; u_int16_t pci_vendor_id = 0; u_int16_t pci_device_id = 0; u_int16_t pci_subvendor_id = 0; u_int16_t pci_subdevice_id = 0; char adapter_name[60]; INIT_DEBUGOUT("ixgb_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGB_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixgb_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s, Version - %s", ixgb_strings[ent->index], ixgb_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixgb_attach(device_t dev) { struct adapter *adapter; int tsize, rsize; int error = 0; device_printf(dev, "%s\n", ixgb_copyright); INIT_DEBUGOUT("ixgb_attach: begin"); /* Allocate, clear, and link in our adapter structure */ if (!(adapter = device_get_softc(dev))) { device_printf(dev, "adapter structure allocation failed\n"); return (ENOMEM); } bzero(adapter, sizeof(struct adapter)); adapter->dev = dev; adapter->osdep.dev = dev; IXGB_LOCK_INIT(adapter, device_get_nameunit(dev)); if (ixgb_adapter_list != NULL) ixgb_adapter_list->prev = adapter; adapter->next = ixgb_adapter_list; ixgb_adapter_list = adapter; /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, (void *)adapter, 0, ixgb_sysctl_stats, "I", "Statistics"); callout_init_mtx(&adapter->timer, &adapter->mtx, 0); /* Determine hardware revision */ ixgb_identify_hardware(adapter); /* Parameters (to be read from user) */ adapter->num_tx_desc = IXGB_MAX_TXD; adapter->num_rx_desc = IXGB_MAX_RXD; adapter->tx_int_delay = TIDV; adapter->rx_int_delay = RDTR; adapter->rx_buffer_len = IXGB_RXBUFFER_2048; adapter->hw.fc.high_water = FCRTH; adapter->hw.fc.low_water = FCRTL; adapter->hw.fc.pause_time = FCPAUSE; adapter->hw.fc.send_xon = TRUE; adapter->hw.fc.type = FLOW_CONTROL; /* Set the max frame size assuming standard ethernet sized frames */ adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ixgb_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } tsize = IXGB_ROUNDUP(adapter->num_tx_desc * sizeof(struct ixgb_tx_desc), 4096); /* Allocate Transmit Descriptor ring */ if (ixgb_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TxDescriptor memory\n"); error = ENOMEM; goto err_tx_desc; } adapter->tx_desc_base = (struct ixgb_tx_desc *) adapter->txdma.dma_vaddr; rsize = IXGB_ROUNDUP(adapter->num_rx_desc * sizeof(struct ixgb_rx_desc), 4096); /* Allocate Receive Descriptor ring */ if (ixgb_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate rx_desc memory\n"); error = ENOMEM; goto err_rx_desc; } adapter->rx_desc_base = (struct ixgb_rx_desc *) adapter->rxdma.dma_vaddr; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u_int8_t) * IXGB_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_hw_init; } /* Initialize the hardware */ if (ixgb_hardware_init(adapter)) { device_printf(dev, "Unable to initialize the hardware\n"); error = EIO; goto err_hw_init; } /* Setup OS specific network interface */ if (ixgb_setup_interface(dev, adapter) != 0) goto err_hw_init; /* Initialize statistics */ ixgb_clear_hw_cntrs(&adapter->hw); ixgb_update_stats_counters(adapter); INIT_DEBUGOUT("ixgb_attach: end"); return (0); err_hw_init: ixgb_dma_free(adapter, &adapter->rxdma); err_rx_desc: ixgb_dma_free(adapter, &adapter->txdma); err_tx_desc: err_pci: if (adapter->ifp != NULL) if_free(adapter->ifp); ixgb_free_pci_resources(adapter); sysctl_ctx_free(&adapter->sysctl_ctx); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixgb_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("ixgb_detach: begin"); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif IXGB_LOCK(adapter); adapter->in_detach = 1; ixgb_stop(adapter); IXGB_UNLOCK(adapter); #if __FreeBSD_version < 500000 ether_ifdetach(ifp, ETHER_BPF_SUPPORTED); #else ether_ifdetach(ifp); #endif callout_drain(&adapter->timer); ixgb_free_pci_resources(adapter); #if __FreeBSD_version >= 500000 if_free(ifp); #endif /* Free Transmit Descriptor ring */ if (adapter->tx_desc_base) { ixgb_dma_free(adapter, &adapter->txdma); adapter->tx_desc_base = NULL; } /* Free Receive Descriptor ring */ if (adapter->rx_desc_base) { ixgb_dma_free(adapter, &adapter->rxdma); adapter->rx_desc_base = NULL; } /* Remove from the adapter list */ if (ixgb_adapter_list == adapter) ixgb_adapter_list = adapter->next; if (adapter->next != NULL) adapter->next->prev = adapter->prev; if (adapter->prev != NULL) adapter->prev->next = adapter->next; free(adapter->mta, M_DEVBUF); IXGB_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixgb_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); IXGB_LOCK(adapter); ixgb_stop(adapter); IXGB_UNLOCK(adapter); return (0); } /********************************************************************* * Transmit entry point * * ixgb_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void ixgb_start_locked(struct ifnet * ifp) { struct mbuf *m_head; struct adapter *adapter = ifp->if_softc; IXGB_LOCK_ASSERT(adapter); if (!adapter->link_active) return; while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (ixgb_encap(adapter, m_head)) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ #if __FreeBSD_version < 500000 if (ifp->if_bpf) bpf_mtap(ifp, m_head); #else ETHER_BPF_MTAP(ifp, m_head); #endif /* Set timeout in case hardware has problems transmitting */ adapter->tx_timer = IXGB_TX_TIMEOUT; } return; } static void ixgb_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; IXGB_LOCK(adapter); ixgb_start_locked(ifp); IXGB_UNLOCK(adapter); return; } /********************************************************************* * Ioctl entry point * * ixgb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixgb_ioctl(struct ifnet * ifp, IOCTL_CMD_TYPE command, caddr_t data) { int mask, error = 0; struct ifreq *ifr = (struct ifreq *) data; struct adapter *adapter = ifp->if_softc; if (adapter->in_detach) goto out; switch (command) { case SIOCSIFADDR: case SIOCGIFADDR: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFADDR (Get/Set Interface Addr)"); ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGB_MAX_JUMBO_FRAME_SIZE - ETHER_HDR_LEN) { error = EINVAL; } else { IXGB_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->hw.max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgb_init_locked(adapter); IXGB_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)"); IXGB_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { ixgb_init_locked(adapter); } ixgb_disable_promisc(adapter); ixgb_set_promisc(adapter); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgb_stop(adapter); } } IXGB_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGB_LOCK(adapter); ixgb_disable_intr(adapter); ixgb_set_multi(adapter); ixgb_enable_intr(adapter); IXGB_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(ixgb_poll, ifp); if (error) return(error); IXGB_LOCK(adapter); ixgb_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; IXGB_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ IXGB_LOCK(adapter); ixgb_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; IXGB_UNLOCK(adapter); } } #endif /* DEVICE_POLLING */ if (mask & IFCAP_HWCSUM) { if (IFCAP_HWCSUM & ifp->if_capenable) ifp->if_capenable &= ~IFCAP_HWCSUM; else ifp->if_capenable |= IFCAP_HWCSUM; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgb_init(adapter); } break; default: IOCTL_DEBUGOUT1("ioctl received: UNKNOWN (0x%X)\n", (int)command); error = EINVAL; } out: return (error); } /********************************************************************* * Watchdog entry point * * This routine is called whenever hardware quits transmitting. * **********************************************************************/ static void ixgb_watchdog(struct adapter *adapter) { struct ifnet *ifp; ifp = adapter->ifp; /* * If we are in this routine because of pause frames, then don't * reset the hardware. */ if (IXGB_READ_REG(&adapter->hw, STATUS) & IXGB_STATUS_TXOFF) { adapter->tx_timer = IXGB_TX_TIMEOUT; return; } if_printf(ifp, "watchdog timeout -- resetting\n"); ixgb_stop(adapter); ixgb_init_locked(adapter); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return; } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void ixgb_init_locked(struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixgb_init: begin"); IXGB_LOCK_ASSERT(adapter); ixgb_stop(adapter); ifp = adapter->ifp; /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), adapter->hw.curr_mac_addr, IXGB_ETH_LENGTH_OF_ADDRESS); /* Initialize the hardware */ if (ixgb_hardware_init(adapter)) { if_printf(ifp, "Unable to initialize the hardware\n"); return; } ixgb_enable_vlans(adapter); /* Prepare transmit descriptors and buffers */ if (ixgb_setup_transmit_structures(adapter)) { if_printf(ifp, "Could not setup transmit structures\n"); ixgb_stop(adapter); return; } ixgb_initialize_transmit_unit(adapter); /* Setup Multicast table */ ixgb_set_multi(adapter); /* Prepare receive descriptors and buffers */ if (ixgb_setup_receive_structures(adapter)) { if_printf(ifp, "Could not setup receive structures\n"); ixgb_stop(adapter); return; } ixgb_initialize_receive_unit(adapter); /* Don't lose promiscuous settings */ ixgb_set_promisc(adapter); ifp = adapter->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = IXGB_CHECKSUM_FEATURES; else ifp->if_hwassist = 0; /* Enable jumbo frames */ if (ifp->if_mtu > ETHERMTU) { uint32_t temp_reg; IXGB_WRITE_REG(&adapter->hw, MFS, adapter->hw.max_frame_size << IXGB_MFS_SHIFT); temp_reg = IXGB_READ_REG(&adapter->hw, CTRL0); temp_reg |= IXGB_CTRL0_JFE; IXGB_WRITE_REG(&adapter->hw, CTRL0, temp_reg); } callout_reset(&adapter->timer, hz, ixgb_local_timer, adapter); ixgb_clear_hw_cntrs(&adapter->hw); #ifdef DEVICE_POLLING /* * Only disable interrupts if we are polling, make sure they are on * otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) ixgb_disable_intr(adapter); else #endif ixgb_enable_intr(adapter); return; } static void ixgb_init(void *arg) { struct adapter *adapter = arg; IXGB_LOCK(adapter); ixgb_init_locked(adapter); IXGB_UNLOCK(adapter); return; } #ifdef DEVICE_POLLING static int ixgb_poll_locked(struct ifnet * ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; u_int32_t reg_icr; int rx_npkts; IXGB_LOCK_ASSERT(adapter); if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = IXGB_READ_REG(&adapter->hw, ICR); if (reg_icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC)) { ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); } } rx_npkts = ixgb_process_receive_interrupts(adapter, count); ixgb_clean_transmit_interrupts(adapter); if (ifp->if_snd.ifq_head != NULL) ixgb_start_locked(ifp); return (rx_npkts); } static int ixgb_poll(struct ifnet * ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; int rx_npkts = 0; IXGB_LOCK(adapter); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rx_npkts = ixgb_poll_locked(ifp, cmd, count); IXGB_UNLOCK(adapter); return (rx_npkts); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Interrupt Service routine * **********************************************************************/ static void ixgb_intr(void *arg) { u_int32_t loop_cnt = IXGB_MAX_INTR; u_int32_t reg_icr; struct ifnet *ifp; struct adapter *adapter = arg; boolean_t rxdmt0 = FALSE; IXGB_LOCK(adapter); ifp = adapter->ifp; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { IXGB_UNLOCK(adapter); return; } #endif reg_icr = IXGB_READ_REG(&adapter->hw, ICR); if (reg_icr == 0) { IXGB_UNLOCK(adapter); return; } if (reg_icr & IXGB_INT_RXDMT0) rxdmt0 = TRUE; #ifdef _SV_ if (reg_icr & IXGB_INT_RXDMT0) adapter->sv_stats.icr_rxdmt0++; if (reg_icr & IXGB_INT_RXO) adapter->sv_stats.icr_rxo++; if (reg_icr & IXGB_INT_RXT0) adapter->sv_stats.icr_rxt0++; if (reg_icr & IXGB_INT_TXDW) adapter->sv_stats.icr_TXDW++; #endif /* _SV_ */ /* Link status change */ if (reg_icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC)) { ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); } while (loop_cnt > 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgb_process_receive_interrupts(adapter, -1); ixgb_clean_transmit_interrupts(adapter); } loop_cnt--; } if (rxdmt0 && adapter->raidc) { IXGB_WRITE_REG(&adapter->hw, IMC, IXGB_INT_RXDMT0); IXGB_WRITE_REG(&adapter->hw, IMS, IXGB_INT_RXDMT0); } if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_snd.ifq_head != NULL) ixgb_start_locked(ifp); IXGB_UNLOCK(adapter); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixgb_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("ixgb_media_status: begin"); ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->hw.link_up) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixgb_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("ixgb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int ixgb_encap(struct adapter * adapter, struct mbuf * m_head) { u_int8_t txd_popts; int i, j, error, nsegs; #if __FreeBSD_version < 500000 struct ifvlan *ifv = NULL; #endif bus_dma_segment_t segs[IXGB_MAX_SCATTER]; bus_dmamap_t map; struct ixgb_buffer *tx_buffer = NULL; struct ixgb_tx_desc *current_tx_desc = NULL; struct ifnet *ifp = adapter->ifp; /* * Force a cleanup if number of TX descriptors available hits the * threshold */ if (adapter->num_tx_desc_avail <= IXGB_TX_CLEANUP_THRESHOLD) { ixgb_clean_transmit_interrupts(adapter); } if (adapter->num_tx_desc_avail <= IXGB_TX_CLEANUP_THRESHOLD) { adapter->no_tx_desc_avail1++; return (ENOBUFS); } /* * Map the packet for DMA. */ if (bus_dmamap_create(adapter->txtag, BUS_DMA_NOWAIT, &map)) { adapter->no_tx_map_avail++; return (ENOMEM); } error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { adapter->no_tx_dma_setup++; if_printf(ifp, "ixgb_encap: bus_dmamap_load_mbuf failed; " "error %u\n", error); bus_dmamap_destroy(adapter->txtag, map); return (error); } KASSERT(nsegs != 0, ("ixgb_encap: empty packet")); if (nsegs > adapter->num_tx_desc_avail) { adapter->no_tx_desc_avail2++; bus_dmamap_destroy(adapter->txtag, map); return (ENOBUFS); } if (ifp->if_hwassist > 0) { ixgb_transmit_checksum_setup(adapter, m_head, &txd_popts); } else txd_popts = 0; /* Find out if we are in vlan mode */ #if __FreeBSD_version < 500000 if ((m_head->m_flags & (M_PROTO1 | M_PKTHDR)) == (M_PROTO1 | M_PKTHDR) && m_head->m_pkthdr.rcvif != NULL && m_head->m_pkthdr.rcvif->if_type == IFT_L2VLAN) ifv = m_head->m_pkthdr.rcvif->if_softc; #elseif __FreeBSD_version < 700000 mtag = VLAN_OUTPUT_TAG(ifp, m_head); #endif i = adapter->next_avail_tx_desc; for (j = 0; j < nsegs; j++) { tx_buffer = &adapter->tx_buffer_area[i]; current_tx_desc = &adapter->tx_desc_base[i]; current_tx_desc->buff_addr = htole64(segs[j].ds_addr); current_tx_desc->cmd_type_len = (adapter->txd_cmd | segs[j].ds_len); current_tx_desc->popts = txd_popts; if (++i == adapter->num_tx_desc) i = 0; tx_buffer->m_head = NULL; } adapter->num_tx_desc_avail -= nsegs; adapter->next_avail_tx_desc = i; #if __FreeBSD_version < 500000 if (ifv != NULL) { /* Set the vlan id */ current_tx_desc->vlan = ifv->ifv_tag; #elseif __FreeBSD_version < 700000 if (mtag != NULL) { /* Set the vlan id */ current_tx_desc->vlan = VLAN_TAG_VALUE(mtag); #else if (m_head->m_flags & M_VLANTAG) { current_tx_desc->vlan = m_head->m_pkthdr.ether_vtag; #endif /* Tell hardware to add tag */ current_tx_desc->cmd_type_len |= IXGB_TX_DESC_CMD_VLE; } tx_buffer->m_head = m_head; tx_buffer->map = map; bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet needs End Of Packet (EOP) */ current_tx_desc->cmd_type_len |= (IXGB_TX_DESC_CMD_EOP); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000 * that this frame is available to transmit. */ IXGB_WRITE_REG(&adapter->hw, TDT, i); return (0); } static void ixgb_set_promisc(struct adapter * adapter) { u_int32_t reg_rctl; struct ifnet *ifp = adapter->ifp; reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (IXGB_RCTL_UPE | IXGB_RCTL_MPE); IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= IXGB_RCTL_MPE; reg_rctl &= ~IXGB_RCTL_UPE; IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); } return; } static void ixgb_disable_promisc(struct adapter * adapter) { u_int32_t reg_rctl; reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); reg_rctl &= (~IXGB_RCTL_UPE); reg_rctl &= (~IXGB_RCTL_MPE); IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); return; } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void ixgb_set_multi(struct adapter * adapter) { u_int32_t reg_rctl = 0; u_int8_t *mta; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = adapter->ifp; IOCTL_DEBUGOUT("ixgb_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u_int8_t) * IXGB_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES); if_maddr_rlock(ifp); #if __FreeBSD_version < 500000 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { #else TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { #endif if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * IXGB_ETH_LENGTH_OF_ADDRESS], IXGB_ETH_LENGTH_OF_ADDRESS); mcnt++; } if_maddr_runlock(ifp); if (mcnt > MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); reg_rctl |= IXGB_RCTL_MPE; IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); } else ixgb_mc_addr_list_update(&adapter->hw, mta, mcnt, 0); return; } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void ixgb_local_timer(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; ifp = adapter->ifp; IXGB_LOCK_ASSERT(adapter); ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); ixgb_update_stats_counters(adapter); if (ixgb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgb_print_hw_stats(adapter); } if (adapter->tx_timer != 0 && --adapter->tx_timer == 0) ixgb_watchdog(adapter); callout_reset(&adapter->timer, hz, ixgb_local_timer, adapter); } static void ixgb_print_link_status(struct adapter * adapter) { if (adapter->hw.link_up) { if (!adapter->link_active) { if_printf(adapter->ifp, "Link is up %d Mbps %s \n", 10000, "Full Duplex"); adapter->link_active = 1; } } else { if (adapter->link_active) { if_printf(adapter->ifp, "Link is Down \n"); adapter->link_active = 0; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixgb_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; ifp = adapter->ifp; IXGB_LOCK_ASSERT(adapter); INIT_DEBUGOUT("ixgb_stop: begin\n"); ixgb_disable_intr(adapter); adapter->hw.adapter_stopped = FALSE; ixgb_adapter_stop(&adapter->hw); callout_stop(&adapter->timer); ixgb_free_transmit_structures(adapter); ixgb_free_receive_structures(adapter); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); adapter->tx_timer = 0; return; } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void ixgb_identify_hardware(struct adapter * adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Set MacType, etc. based on this PCI info */ switch (adapter->hw.device_id) { case IXGB_DEVICE_ID_82597EX: case IXGB_DEVICE_ID_82597EX_SR: adapter->hw.mac_type = ixgb_82597; break; default: INIT_DEBUGOUT1("Unknown device if 0x%x", adapter->hw.device_id); device_printf(dev, "unsupported device id 0x%x\n", adapter->hw.device_id); } return; } static int ixgb_allocate_pci_resources(struct adapter * adapter) { int rid; device_t dev = adapter->dev; rid = IXGB_MMBA; adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->res_memory)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->res_memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory); adapter->hw.hw_addr = (uint8_t *) & adapter->osdep.mem_bus_space_handle; rid = 0x0; adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!(adapter->res_interrupt)) { device_printf(dev, "Unable to allocate bus resource: interrupt\n"); return (ENXIO); } if (bus_setup_intr(dev, adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, NULL, (void (*) (void *))ixgb_intr, adapter, &adapter->int_handler_tag)) { device_printf(dev, "Error registering interrupt handler!\n"); return (ENXIO); } adapter->hw.back = &adapter->osdep; return (0); } static void ixgb_free_pci_resources(struct adapter * adapter) { device_t dev = adapter->dev; if (adapter->res_interrupt != NULL) { bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag); bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt); } if (adapter->res_memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, IXGB_MMBA, adapter->res_memory); } if (adapter->res_ioport != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid, adapter->res_ioport); } return; } /********************************************************************* * * Initialize the hardware to a configuration as specified by the * adapter structure. The controller is reset, the EEPROM is * verified, the MAC address is set, then the shared initialization * routines are called. * **********************************************************************/ static int ixgb_hardware_init(struct adapter * adapter) { /* Issue a global reset */ adapter->hw.adapter_stopped = FALSE; ixgb_adapter_stop(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (!ixgb_validate_eeprom_checksum(&adapter->hw)) { device_printf(adapter->dev, "The EEPROM Checksum Is Not Valid\n"); return (EIO); } if (!ixgb_init_hw(&adapter->hw)) { device_printf(adapter->dev, "Hardware Initialization Failed"); return (EIO); } return (0); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixgb_setup_interface(device_t dev, struct adapter * adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixgb_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } #if __FreeBSD_version >= 502000 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); #else ifp->if_unit = device_get_unit(dev); ifp->if_name = "ixgb"; #endif ifp->if_baudrate = 1000000000; ifp->if_init = ixgb_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixgb_ioctl; ifp->if_start = ixgb_start; ifp->if_get_counter = ixgb_get_counter; ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 1; #if __FreeBSD_version < 500000 ether_ifattach(ifp, ETHER_BPF_SUPPORTED); #else ether_ifattach(ifp, adapter->hw.curr_mac_addr); #endif ifp->if_capabilities = IFCAP_HWCSUM; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); #if __FreeBSD_version >= 500000 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; #endif ifp->if_capenable = ifp->if_capabilities; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixgb_media_change, ixgb_media_status); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /******************************************************************** * Manage DMA'able memory. *******************************************************************/ static void ixgb_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs->ds_addr; return; } static int ixgb_dma_malloc(struct adapter * adapter, bus_size_t size, struct ixgb_dma_alloc * dma, int mapflags) { device_t dev; int r; dev = adapter->dev; r = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ #if __FreeBSD_version >= 502000 NULL, /* lockfunc */ NULL, /* lockfuncarg */ #endif &dma->dma_tag); if (r != 0) { device_printf(dev, "ixgb_dma_malloc: bus_dma_tag_create failed; " "error %u\n", r); goto fail_0; } r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) { device_printf(dev, "ixgb_dma_malloc: bus_dmamem_alloc failed; " "error %u\n", r); goto fail_1; } r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, ixgb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) { device_printf(dev, "ixgb_dma_malloc: bus_dmamap_load failed; " "error %u\n", r); goto fail_2; } dma->dma_size = size; return (0); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); fail_1: bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (r); } static void ixgb_dma_free(struct adapter * adapter, struct ixgb_dma_alloc * dma) { bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. * **********************************************************************/ static int ixgb_allocate_transmit_structures(struct adapter * adapter) { if (!(adapter->tx_buffer_area = (struct ixgb_buffer *) malloc(sizeof(struct ixgb_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n"); return ENOMEM; } bzero(adapter->tx_buffer_area, sizeof(struct ixgb_buffer) * adapter->num_tx_desc); return 0; } /********************************************************************* * * Allocate and initialize transmit structures. * **********************************************************************/ static int ixgb_setup_transmit_structures(struct adapter * adapter) { /* * Setup DMA descriptor areas. */ if (bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES * IXGB_MAX_SCATTER, /* maxsize */ IXGB_MAX_SCATTER, /* nsegments */ MCLBYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ #if __FreeBSD_version >= 502000 NULL, /* lockfunc */ NULL, /* lockfuncarg */ #endif &adapter->txtag)) { device_printf(adapter->dev, "Unable to allocate TX DMA tag\n"); return (ENOMEM); } if (ixgb_allocate_transmit_structures(adapter)) return ENOMEM; bzero((void *)adapter->tx_desc_base, (sizeof(struct ixgb_tx_desc)) * adapter->num_tx_desc); adapter->next_avail_tx_desc = 0; adapter->oldest_used_tx_desc = 0; /* Set number of descriptors available */ adapter->num_tx_desc_avail = adapter->num_tx_desc; /* Set checksum context */ adapter->active_checksum_context = OFFLOAD_NONE; return 0; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void ixgb_initialize_transmit_unit(struct adapter * adapter) { u_int32_t reg_tctl; u_int64_t tdba = adapter->txdma.dma_paddr; /* Setup the Base and Length of the Tx Descriptor Ring */ IXGB_WRITE_REG(&adapter->hw, TDBAL, (tdba & 0x00000000ffffffffULL)); IXGB_WRITE_REG(&adapter->hw, TDBAH, (tdba >> 32)); IXGB_WRITE_REG(&adapter->hw, TDLEN, adapter->num_tx_desc * sizeof(struct ixgb_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGB_WRITE_REG(&adapter->hw, TDH, 0); IXGB_WRITE_REG(&adapter->hw, TDT, 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", IXGB_READ_REG(&adapter->hw, TDBAL), IXGB_READ_REG(&adapter->hw, TDLEN)); IXGB_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay); /* Program the Transmit Control Register */ reg_tctl = IXGB_READ_REG(&adapter->hw, TCTL); reg_tctl = IXGB_TCTL_TCE | IXGB_TCTL_TXEN | IXGB_TCTL_TPDE; IXGB_WRITE_REG(&adapter->hw, TCTL, reg_tctl); /* Setup Transmit Descriptor Settings for this adapter */ adapter->txd_cmd = IXGB_TX_DESC_TYPE | IXGB_TX_DESC_CMD_RS; if (adapter->tx_int_delay > 0) adapter->txd_cmd |= IXGB_TX_DESC_CMD_IDE; return; } /********************************************************************* * * Free all transmit related data structures. * **********************************************************************/ static void ixgb_free_transmit_structures(struct adapter * adapter) { struct ixgb_buffer *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_structures: begin"); if (adapter->tx_buffer_area != NULL) { tx_buffer = adapter->tx_buffer_area; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_unload(adapter->txtag, tx_buffer->map); bus_dmamap_destroy(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); } tx_buffer->m_head = NULL; } } if (adapter->tx_buffer_area != NULL) { free(adapter->tx_buffer_area, M_DEVBUF); adapter->tx_buffer_area = NULL; } if (adapter->txtag != NULL) { bus_dma_tag_destroy(adapter->txtag); adapter->txtag = NULL; } return; } /********************************************************************* * * The offload context needs to be set when we transfer the first * packet of a particular protocol (TCP/UDP). We change the * context only if the protocol type changes. * **********************************************************************/ static void ixgb_transmit_checksum_setup(struct adapter * adapter, struct mbuf * mp, u_int8_t * txd_popts) { struct ixgb_context_desc *TXD; struct ixgb_buffer *tx_buffer; int curr_txd; if (mp->m_pkthdr.csum_flags) { if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_popts = IXGB_TX_DESC_POPTS_TXSM; if (adapter->active_checksum_context == OFFLOAD_TCP_IP) return; else adapter->active_checksum_context = OFFLOAD_TCP_IP; } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_popts = IXGB_TX_DESC_POPTS_TXSM; if (adapter->active_checksum_context == OFFLOAD_UDP_IP) return; else adapter->active_checksum_context = OFFLOAD_UDP_IP; } else { *txd_popts = 0; return; } } else { *txd_popts = 0; return; } /* * If we reach this point, the checksum offload context needs to be * reset. */ curr_txd = adapter->next_avail_tx_desc; tx_buffer = &adapter->tx_buffer_area[curr_txd]; TXD = (struct ixgb_context_desc *) & adapter->tx_desc_base[curr_txd]; TXD->tucss = ENET_HEADER_SIZE + sizeof(struct ip); TXD->tucse = 0; TXD->mss = 0; if (adapter->active_checksum_context == OFFLOAD_TCP_IP) { TXD->tucso = ENET_HEADER_SIZE + sizeof(struct ip) + offsetof(struct tcphdr, th_sum); } else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) { TXD->tucso = ENET_HEADER_SIZE + sizeof(struct ip) + offsetof(struct udphdr, uh_sum); } TXD->cmd_type_len = IXGB_CONTEXT_DESC_CMD_TCP | IXGB_TX_DESC_CMD_RS | IXGB_CONTEXT_DESC_CMD_IDE; tx_buffer->m_head = NULL; if (++curr_txd == adapter->num_tx_desc) curr_txd = 0; adapter->num_tx_desc_avail--; adapter->next_avail_tx_desc = curr_txd; return; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void ixgb_clean_transmit_interrupts(struct adapter * adapter) { int i, num_avail; struct ixgb_buffer *tx_buffer; struct ixgb_tx_desc *tx_desc; IXGB_LOCK_ASSERT(adapter); if (adapter->num_tx_desc_avail == adapter->num_tx_desc) return; #ifdef _SV_ adapter->clean_tx_interrupts++; #endif num_avail = adapter->num_tx_desc_avail; i = adapter->oldest_used_tx_desc; tx_buffer = &adapter->tx_buffer_area[i]; tx_desc = &adapter->tx_desc_base[i]; while (tx_desc->status & IXGB_TX_DESC_STATUS_DD) { tx_desc->status = 0; num_avail++; if (tx_buffer->m_head) { bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); bus_dmamap_destroy(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } if (++i == adapter->num_tx_desc) i = 0; tx_buffer = &adapter->tx_buffer_area[i]; tx_desc = &adapter->tx_desc_base[i]; } adapter->oldest_used_tx_desc = i; /* * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack that * it is OK to send packets. If there are no pending descriptors, * clear the timeout. Otherwise, if some descriptors have been freed, * restart the timeout. */ if (num_avail > IXGB_TX_CLEANUP_THRESHOLD) { struct ifnet *ifp = adapter->ifp; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (num_avail == adapter->num_tx_desc) adapter->tx_timer = 0; else if (num_avail == adapter->num_tx_desc_avail) adapter->tx_timer = IXGB_TX_TIMEOUT; } adapter->num_tx_desc_avail = num_avail; return; } /********************************************************************* * * Get a buffer from system mbuf buffer pool. * **********************************************************************/ static int ixgb_get_buf(int i, struct adapter * adapter, struct mbuf * nmp) { - register struct mbuf *mp = nmp; + struct mbuf *mp = nmp; struct ixgb_buffer *rx_buffer; struct ifnet *ifp; bus_addr_t paddr; int error; ifp = adapter->ifp; if (mp == NULL) { mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mp == NULL) { adapter->mbuf_alloc_failed++; return (ENOBUFS); } mp->m_len = mp->m_pkthdr.len = MCLBYTES; } else { mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } if (ifp->if_mtu <= ETHERMTU) { m_adj(mp, ETHER_ALIGN); } rx_buffer = &adapter->rx_buffer_area[i]; /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ error = bus_dmamap_load(adapter->rxtag, rx_buffer->map, mtod(mp, void *), mp->m_len, ixgb_dmamap_cb, &paddr, 0); if (error) { m_free(mp); return (error); } rx_buffer->m_head = mp; adapter->rx_desc_base[i].buff_addr = htole64(paddr); bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int ixgb_allocate_receive_structures(struct adapter * adapter) { int i, error; struct ixgb_buffer *rx_buffer; if (!(adapter->rx_buffer_area = (struct ixgb_buffer *) malloc(sizeof(struct ixgb_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(adapter->dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } bzero(adapter->rx_buffer_area, sizeof(struct ixgb_buffer) * adapter->num_rx_desc); error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),/* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ #if __FreeBSD_version >= 502000 NULL, /* lockfunc */ NULL, /* lockfuncarg */ #endif &adapter->rxtag); if (error != 0) { device_printf(adapter->dev, "ixgb_allocate_receive_structures: " "bus_dma_tag_create failed; error %u\n", error); goto fail_0; } rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, &rx_buffer->map); if (error != 0) { device_printf(adapter->dev, "ixgb_allocate_receive_structures: " "bus_dmamap_create failed; error %u\n", error); goto fail_1; } } for (i = 0; i < adapter->num_rx_desc; i++) { if (ixgb_get_buf(i, adapter, NULL) == ENOBUFS) { adapter->rx_buffer_area[i].m_head = NULL; adapter->rx_desc_base[i].buff_addr = 0; return (ENOBUFS); } } return (0); fail_1: bus_dma_tag_destroy(adapter->rxtag); fail_0: adapter->rxtag = NULL; free(adapter->rx_buffer_area, M_DEVBUF); adapter->rx_buffer_area = NULL; return (error); } /********************************************************************* * * Allocate and initialize receive structures. * **********************************************************************/ static int ixgb_setup_receive_structures(struct adapter * adapter) { bzero((void *)adapter->rx_desc_base, (sizeof(struct ixgb_rx_desc)) * adapter->num_rx_desc); if (ixgb_allocate_receive_structures(adapter)) return ENOMEM; /* Setup our descriptor pointers */ adapter->next_rx_desc_to_check = 0; adapter->next_rx_desc_to_use = 0; return (0); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void ixgb_initialize_receive_unit(struct adapter * adapter) { u_int32_t reg_rctl; u_int32_t reg_rxcsum; u_int32_t reg_rxdctl; struct ifnet *ifp; u_int64_t rdba = adapter->rxdma.dma_paddr; ifp = adapter->ifp; /* * Make sure receives are disabled while setting up the descriptor * ring */ reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl & ~IXGB_RCTL_RXEN); /* Set the Receive Delay Timer Register */ IXGB_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay); /* Setup the Base and Length of the Rx Descriptor Ring */ IXGB_WRITE_REG(&adapter->hw, RDBAL, (rdba & 0x00000000ffffffffULL)); IXGB_WRITE_REG(&adapter->hw, RDBAH, (rdba >> 32)); IXGB_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc * sizeof(struct ixgb_rx_desc)); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGB_WRITE_REG(&adapter->hw, RDH, 0); IXGB_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1); reg_rxdctl = RXDCTL_WTHRESH_DEFAULT << IXGB_RXDCTL_WTHRESH_SHIFT | RXDCTL_HTHRESH_DEFAULT << IXGB_RXDCTL_HTHRESH_SHIFT | RXDCTL_PTHRESH_DEFAULT << IXGB_RXDCTL_PTHRESH_SHIFT; IXGB_WRITE_REG(&adapter->hw, RXDCTL, reg_rxdctl); adapter->raidc = 1; if (adapter->raidc) { uint32_t raidc; uint8_t poll_threshold; #define IXGB_RAIDC_POLL_DEFAULT 120 poll_threshold = ((adapter->num_rx_desc - 1) >> 3); poll_threshold >>= 1; poll_threshold &= 0x3F; raidc = IXGB_RAIDC_EN | IXGB_RAIDC_RXT_GATE | (IXGB_RAIDC_POLL_DEFAULT << IXGB_RAIDC_POLL_SHIFT) | (adapter->rx_int_delay << IXGB_RAIDC_DELAY_SHIFT) | poll_threshold; IXGB_WRITE_REG(&adapter->hw, RAIDC, raidc); } /* Enable Receive Checksum Offload for TCP and UDP ? */ if (ifp->if_capenable & IFCAP_RXCSUM) { reg_rxcsum = IXGB_READ_REG(&adapter->hw, RXCSUM); reg_rxcsum |= IXGB_RXCSUM_TUOFL; IXGB_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum); } /* Setup the Receive Control Register */ reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); reg_rctl &= ~(3 << IXGB_RCTL_MO_SHIFT); reg_rctl |= IXGB_RCTL_BAM | IXGB_RCTL_RDMTS_1_2 | IXGB_RCTL_SECRC | IXGB_RCTL_CFF | (adapter->hw.mc_filter_type << IXGB_RCTL_MO_SHIFT); switch (adapter->rx_buffer_len) { default: case IXGB_RXBUFFER_2048: reg_rctl |= IXGB_RCTL_BSIZE_2048; break; case IXGB_RXBUFFER_4096: reg_rctl |= IXGB_RCTL_BSIZE_4096; break; case IXGB_RXBUFFER_8192: reg_rctl |= IXGB_RCTL_BSIZE_8192; break; case IXGB_RXBUFFER_16384: reg_rctl |= IXGB_RCTL_BSIZE_16384; break; } reg_rctl |= IXGB_RCTL_RXEN; /* Enable Receives */ IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); return; } /********************************************************************* * * Free receive related data structures. * **********************************************************************/ static void ixgb_free_receive_structures(struct adapter * adapter) { struct ixgb_buffer *rx_buffer; int i; INIT_DEBUGOUT("free_receive_structures: begin"); if (adapter->rx_buffer_area != NULL) { rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { if (rx_buffer->map != NULL) { bus_dmamap_unload(adapter->rxtag, rx_buffer->map); bus_dmamap_destroy(adapter->rxtag, rx_buffer->map); } if (rx_buffer->m_head != NULL) m_freem(rx_buffer->m_head); rx_buffer->m_head = NULL; } } if (adapter->rx_buffer_area != NULL) { free(adapter->rx_buffer_area, M_DEVBUF); adapter->rx_buffer_area = NULL; } if (adapter->rxtag != NULL) { bus_dma_tag_destroy(adapter->rxtag); adapter->rxtag = NULL; } return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * *********************************************************************/ static int ixgb_process_receive_interrupts(struct adapter * adapter, int count) { struct ifnet *ifp; struct mbuf *mp; #if __FreeBSD_version < 500000 struct ether_header *eh; #endif int eop = 0; int len; u_int8_t accept_frame = 0; int i; int next_to_use = 0; int eop_desc; int rx_npkts = 0; /* Pointer to the receive descriptor being examined. */ struct ixgb_rx_desc *current_desc; IXGB_LOCK_ASSERT(adapter); ifp = adapter->ifp; i = adapter->next_rx_desc_to_check; next_to_use = adapter->next_rx_desc_to_use; eop_desc = adapter->next_rx_desc_to_check; current_desc = &adapter->rx_desc_base[i]; if (!((current_desc->status) & IXGB_RX_DESC_STATUS_DD)) { #ifdef _SV_ adapter->no_pkts_avail++; #endif return (rx_npkts); } while ((current_desc->status & IXGB_RX_DESC_STATUS_DD) && (count != 0)) { mp = adapter->rx_buffer_area[i].m_head; bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map, BUS_DMASYNC_POSTREAD); accept_frame = 1; if (current_desc->status & IXGB_RX_DESC_STATUS_EOP) { count--; eop = 1; } else { eop = 0; } len = current_desc->length; if (current_desc->errors & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE | IXGB_RX_DESC_ERRORS_P | IXGB_RX_DESC_ERRORS_RXE)) { accept_frame = 0; } if (accept_frame) { /* Assign correct length to the current fragment */ mp->m_len = len; if (adapter->fmp == NULL) { mp->m_pkthdr.len = len; adapter->fmp = mp; /* Store the first mbuf */ adapter->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; adapter->lmp->m_next = mp; adapter->lmp = adapter->lmp->m_next; adapter->fmp->m_pkthdr.len += len; } if (eop) { eop_desc = i; adapter->fmp->m_pkthdr.rcvif = ifp; #if __FreeBSD_version < 500000 eh = mtod(adapter->fmp, struct ether_header *); /* Remove ethernet header from mbuf */ m_adj(adapter->fmp, sizeof(struct ether_header)); ixgb_receive_checksum(adapter, current_desc, adapter->fmp); if (current_desc->status & IXGB_RX_DESC_STATUS_VP) VLAN_INPUT_TAG(eh, adapter->fmp, current_desc->special); else ether_input(ifp, eh, adapter->fmp); #else ixgb_receive_checksum(adapter, current_desc, adapter->fmp); #if __FreeBSD_version < 700000 if (current_desc->status & IXGB_RX_DESC_STATUS_VP) VLAN_INPUT_TAG(ifp, adapter->fmp, current_desc->special); #else if (current_desc->status & IXGB_RX_DESC_STATUS_VP) { adapter->fmp->m_pkthdr.ether_vtag = current_desc->special; adapter->fmp->m_flags |= M_VLANTAG; } #endif if (adapter->fmp != NULL) { IXGB_UNLOCK(adapter); (*ifp->if_input) (ifp, adapter->fmp); IXGB_LOCK(adapter); rx_npkts++; } #endif adapter->fmp = NULL; adapter->lmp = NULL; } adapter->rx_buffer_area[i].m_head = NULL; } else { adapter->dropped_pkts++; if (adapter->fmp != NULL) m_freem(adapter->fmp); adapter->fmp = NULL; adapter->lmp = NULL; } /* Zero out the receive descriptors status */ current_desc->status = 0; /* Advance our pointers to the next descriptor */ if (++i == adapter->num_rx_desc) { i = 0; current_desc = adapter->rx_desc_base; } else current_desc++; } adapter->next_rx_desc_to_check = i; if (--i < 0) i = (adapter->num_rx_desc - 1); /* * 82597EX: Workaround for redundent write back in receive descriptor ring (causes * memory corruption). Avoid using and re-submitting the most recently received RX * descriptor back to hardware. * * if(Last written back descriptor == EOP bit set descriptor) * then avoid re-submitting the most recently received RX descriptor * back to hardware. * if(Last written back descriptor != EOP bit set descriptor) * then avoid re-submitting the most recently received RX descriptors * till last EOP bit set descriptor. */ if (eop_desc != i) { if (++eop_desc == adapter->num_rx_desc) eop_desc = 0; i = eop_desc; } /* Replenish the descriptors with new mbufs till last EOP bit set descriptor */ while (next_to_use != i) { current_desc = &adapter->rx_desc_base[next_to_use]; if ((current_desc->errors & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE | IXGB_RX_DESC_ERRORS_P | IXGB_RX_DESC_ERRORS_RXE))) { mp = adapter->rx_buffer_area[next_to_use].m_head; ixgb_get_buf(next_to_use, adapter, mp); } else { if (ixgb_get_buf(next_to_use, adapter, NULL) == ENOBUFS) break; } /* Advance our pointers to the next descriptor */ if (++next_to_use == adapter->num_rx_desc) { next_to_use = 0; current_desc = adapter->rx_desc_base; } else current_desc++; } adapter->next_rx_desc_to_use = next_to_use; if (--next_to_use < 0) next_to_use = (adapter->num_rx_desc - 1); /* Advance the IXGB's Receive Queue #0 "Tail Pointer" */ IXGB_WRITE_REG(&adapter->hw, RDT, next_to_use); return (rx_npkts); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void ixgb_receive_checksum(struct adapter * adapter, struct ixgb_rx_desc * rx_desc, struct mbuf * mp) { if (rx_desc->status & IXGB_RX_DESC_STATUS_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if (rx_desc->status & IXGB_RX_DESC_STATUS_IPCS) { /* Did it pass? */ if (!(rx_desc->errors & IXGB_RX_DESC_ERRORS_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else { mp->m_pkthdr.csum_flags = 0; } } if (rx_desc->status & IXGB_RX_DESC_STATUS_TCPCS) { /* Did it pass? */ if (!(rx_desc->errors & IXGB_RX_DESC_ERRORS_TCPE)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } return; } static void ixgb_enable_vlans(struct adapter * adapter) { uint32_t ctrl; ctrl = IXGB_READ_REG(&adapter->hw, CTRL0); ctrl |= IXGB_CTRL0_VME; IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl); return; } static void ixgb_enable_intr(struct adapter * adapter) { IXGB_WRITE_REG(&adapter->hw, IMS, (IXGB_INT_RXT0 | IXGB_INT_TXDW | IXGB_INT_RXDMT0 | IXGB_INT_LSC | IXGB_INT_RXO)); return; } static void ixgb_disable_intr(struct adapter * adapter) { IXGB_WRITE_REG(&adapter->hw, IMC, ~0); return; } void ixgb_write_pci_cfg(struct ixgb_hw * hw, uint32_t reg, uint16_t * value) { pci_write_config(((struct ixgb_osdep *) hw->back)->dev, reg, *value, 2); } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void ixgb_update_stats_counters(struct adapter * adapter) { adapter->stats.crcerrs += IXGB_READ_REG(&adapter->hw, CRCERRS); adapter->stats.gprcl += IXGB_READ_REG(&adapter->hw, GPRCL); adapter->stats.gprch += IXGB_READ_REG(&adapter->hw, GPRCH); adapter->stats.gorcl += IXGB_READ_REG(&adapter->hw, GORCL); adapter->stats.gorch += IXGB_READ_REG(&adapter->hw, GORCH); adapter->stats.bprcl += IXGB_READ_REG(&adapter->hw, BPRCL); adapter->stats.bprch += IXGB_READ_REG(&adapter->hw, BPRCH); adapter->stats.mprcl += IXGB_READ_REG(&adapter->hw, MPRCL); adapter->stats.mprch += IXGB_READ_REG(&adapter->hw, MPRCH); adapter->stats.roc += IXGB_READ_REG(&adapter->hw, ROC); adapter->stats.mpc += IXGB_READ_REG(&adapter->hw, MPC); adapter->stats.dc += IXGB_READ_REG(&adapter->hw, DC); adapter->stats.rlec += IXGB_READ_REG(&adapter->hw, RLEC); adapter->stats.xonrxc += IXGB_READ_REG(&adapter->hw, XONRXC); adapter->stats.xontxc += IXGB_READ_REG(&adapter->hw, XONTXC); adapter->stats.xoffrxc += IXGB_READ_REG(&adapter->hw, XOFFRXC); adapter->stats.xofftxc += IXGB_READ_REG(&adapter->hw, XOFFTXC); adapter->stats.gptcl += IXGB_READ_REG(&adapter->hw, GPTCL); adapter->stats.gptch += IXGB_READ_REG(&adapter->hw, GPTCH); adapter->stats.gotcl += IXGB_READ_REG(&adapter->hw, GOTCL); adapter->stats.gotch += IXGB_READ_REG(&adapter->hw, GOTCH); adapter->stats.ruc += IXGB_READ_REG(&adapter->hw, RUC); adapter->stats.rfc += IXGB_READ_REG(&adapter->hw, RFC); adapter->stats.rjc += IXGB_READ_REG(&adapter->hw, RJC); adapter->stats.torl += IXGB_READ_REG(&adapter->hw, TORL); adapter->stats.torh += IXGB_READ_REG(&adapter->hw, TORH); adapter->stats.totl += IXGB_READ_REG(&adapter->hw, TOTL); adapter->stats.toth += IXGB_READ_REG(&adapter->hw, TOTH); adapter->stats.tprl += IXGB_READ_REG(&adapter->hw, TPRL); adapter->stats.tprh += IXGB_READ_REG(&adapter->hw, TPRH); adapter->stats.tptl += IXGB_READ_REG(&adapter->hw, TPTL); adapter->stats.tpth += IXGB_READ_REG(&adapter->hw, TPTH); adapter->stats.plt64c += IXGB_READ_REG(&adapter->hw, PLT64C); adapter->stats.mptcl += IXGB_READ_REG(&adapter->hw, MPTCL); adapter->stats.mptch += IXGB_READ_REG(&adapter->hw, MPTCH); adapter->stats.bptcl += IXGB_READ_REG(&adapter->hw, BPTCL); adapter->stats.bptch += IXGB_READ_REG(&adapter->hw, BPTCH); adapter->stats.uprcl += IXGB_READ_REG(&adapter->hw, UPRCL); adapter->stats.uprch += IXGB_READ_REG(&adapter->hw, UPRCH); adapter->stats.vprcl += IXGB_READ_REG(&adapter->hw, VPRCL); adapter->stats.vprch += IXGB_READ_REG(&adapter->hw, VPRCH); adapter->stats.jprcl += IXGB_READ_REG(&adapter->hw, JPRCL); adapter->stats.jprch += IXGB_READ_REG(&adapter->hw, JPRCH); adapter->stats.rnbc += IXGB_READ_REG(&adapter->hw, RNBC); adapter->stats.icbc += IXGB_READ_REG(&adapter->hw, ICBC); adapter->stats.ecbc += IXGB_READ_REG(&adapter->hw, ECBC); adapter->stats.uptcl += IXGB_READ_REG(&adapter->hw, UPTCL); adapter->stats.uptch += IXGB_READ_REG(&adapter->hw, UPTCH); adapter->stats.vptcl += IXGB_READ_REG(&adapter->hw, VPTCL); adapter->stats.vptch += IXGB_READ_REG(&adapter->hw, VPTCH); adapter->stats.jptcl += IXGB_READ_REG(&adapter->hw, JPTCL); adapter->stats.jptch += IXGB_READ_REG(&adapter->hw, JPTCH); adapter->stats.tsctc += IXGB_READ_REG(&adapter->hw, TSCTC); adapter->stats.tsctfc += IXGB_READ_REG(&adapter->hw, TSCTFC); adapter->stats.ibic += IXGB_READ_REG(&adapter->hw, IBIC); adapter->stats.lfc += IXGB_READ_REG(&adapter->hw, LFC); adapter->stats.pfrc += IXGB_READ_REG(&adapter->hw, PFRC); adapter->stats.pftc += IXGB_READ_REG(&adapter->hw, PFTC); adapter->stats.mcfrc += IXGB_READ_REG(&adapter->hw, MCFRC); } static uint64_t ixgb_get_counter(struct ifnet *ifp, ift_counter cnt) { struct adapter *adapter; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->stats.gprcl); case IFCOUNTER_OPACKETS: return ( adapter->stats.gptcl); case IFCOUNTER_IBYTES: return (adapter->stats.gorcl); case IFCOUNTER_OBYTES: return (adapter->stats.gotcl); case IFCOUNTER_IMCASTS: return ( adapter->stats.mprcl); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.crcerrs + adapter->stats.rnbc + adapter->stats.mpc + adapter->stats.rlec); default: return (if_get_counter_default(ifp, cnt)); } } /********************************************************************** * * This routine is called only when ixgb_display_debug_stats is enabled. * This routine provides a way to take a look at important statistics * maintained by the driver and hardware. * **********************************************************************/ static void ixgb_print_hw_stats(struct adapter * adapter) { char buf_speed[100], buf_type[100]; ixgb_bus_speed bus_speed; ixgb_bus_type bus_type; device_t dev; dev = adapter->dev; #ifdef _SV_ device_printf(dev, "Packets not Avail = %ld\n", adapter->no_pkts_avail); device_printf(dev, "CleanTxInterrupts = %ld\n", adapter->clean_tx_interrupts); device_printf(dev, "ICR RXDMT0 = %lld\n", (long long)adapter->sv_stats.icr_rxdmt0); device_printf(dev, "ICR RXO = %lld\n", (long long)adapter->sv_stats.icr_rxo); device_printf(dev, "ICR RXT0 = %lld\n", (long long)adapter->sv_stats.icr_rxt0); device_printf(dev, "ICR TXDW = %lld\n", (long long)adapter->sv_stats.icr_TXDW); #endif /* _SV_ */ bus_speed = adapter->hw.bus.speed; bus_type = adapter->hw.bus.type; sprintf(buf_speed, bus_speed == ixgb_bus_speed_33 ? "33MHz" : bus_speed == ixgb_bus_speed_66 ? "66MHz" : bus_speed == ixgb_bus_speed_100 ? "100MHz" : bus_speed == ixgb_bus_speed_133 ? "133MHz" : "UNKNOWN"); device_printf(dev, "PCI_Bus_Speed = %s\n", buf_speed); sprintf(buf_type, bus_type == ixgb_bus_type_pci ? "PCI" : bus_type == ixgb_bus_type_pcix ? "PCI-X" : "UNKNOWN"); device_printf(dev, "PCI_Bus_Type = %s\n", buf_type); device_printf(dev, "Tx Descriptors not Avail1 = %ld\n", adapter->no_tx_desc_avail1); device_printf(dev, "Tx Descriptors not Avail2 = %ld\n", adapter->no_tx_desc_avail2); device_printf(dev, "Std Mbuf Failed = %ld\n", adapter->mbuf_alloc_failed); device_printf(dev, "Std Cluster Failed = %ld\n", adapter->mbuf_cluster_failed); device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc); device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc); device_printf(dev, "Receive No Buffers = %lld\n", (long long)adapter->stats.rnbc); device_printf(dev, "Receive length errors = %lld\n", (long long)adapter->stats.rlec); device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs); device_printf(dev, "Driver dropped packets = %ld\n", adapter->dropped_pkts); device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc); device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc); device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc); device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc); device_printf(dev, "Good Packets Rcvd = %lld\n", (long long)adapter->stats.gprcl); device_printf(dev, "Good Packets Xmtd = %lld\n", (long long)adapter->stats.gptcl); device_printf(dev, "Jumbo frames recvd = %lld\n", (long long)adapter->stats.jprcl); device_printf(dev, "Jumbo frames Xmtd = %lld\n", (long long)adapter->stats.jptcl); return; } static int ixgb_sysctl_stats(SYSCTL_HANDLER_ARGS) { int error; int result; struct adapter *adapter; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; ixgb_print_hw_stats(adapter); } return error; } Index: stable/11/sys/dev/lge/if_lge.c =================================================================== --- stable/11/sys/dev/lge/if_lge.c (revision 331642) +++ stable/11/sys/dev/lge/if_lge.c (revision 331643) @@ -1,1590 +1,1590 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2001 Wind River Systems * Copyright (c) 1997, 1998, 1999, 2000, 2001 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Level 1 LXT1001 gigabit ethernet driver for FreeBSD. Public * documentation not available, but ask me nicely. * * The Level 1 chip is used on some D-Link, SMC and Addtron NICs. * It's a 64-bit PCI part that supports TCP/IP checksum offload, * VLAN tagging/insertion, GMII and TBI (1000baseX) ports. There * are three supported methods for data transfer between host and * NIC: programmed I/O, traditional scatter/gather DMA and Packet * Propulsion Technology (tm) DMA. The latter mechanism is a form * of double buffer DMA where the packet data is copied to a * pre-allocated DMA buffer who's physical address has been loaded * into a table at device initialization time. The rationale is that * the virtual to physical address translation needed for normal * scatter/gather DMA is more expensive than the data copy needed * for double buffering. This may be true in Windows NT and the like, * but it isn't true for us, at least on the x86 arch. This driver * uses the scatter/gather I/O method for both TX and RX. * * The LXT1001 only supports TCP/IP checksum offload on receive. * Also, the VLAN tagging is done using a 16-entry table which allows * the chip to perform hardware filtering based on VLAN tags. Sadly, * our vlan support doesn't currently play well with this kind of * hardware support. * * Special thanks to: * - Jeff James at Intel, for arranging to have the LXT1001 manual * released (at long last) * - Beny Chen at D-Link, for actually sending it to me * - Brad Short and Keith Alexis at SMC, for sending me sample * SMC9462SX and SMC9462TX adapters for testing * - Paul Saab at Y!, for not killing me (though it remains to be seen * if in fact he did me much of a favor) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for vtophys */ #include /* for vtophys */ #include #include #include #include #include #include #include #include #define LGE_USEIOSPACE #include /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* * Various supported device vendors/types and their names. */ static const struct lge_type lge_devs[] = { { LGE_VENDORID, LGE_DEVICEID, "Level 1 Gigabit Ethernet" }, { 0, 0, NULL } }; static int lge_probe(device_t); static int lge_attach(device_t); static int lge_detach(device_t); static int lge_alloc_jumbo_mem(struct lge_softc *); static void lge_free_jumbo_mem(struct lge_softc *); static void *lge_jalloc(struct lge_softc *); static void lge_jfree(struct mbuf *, void *, void *); static int lge_newbuf(struct lge_softc *, struct lge_rx_desc *, struct mbuf *); static int lge_encap(struct lge_softc *, struct mbuf *, u_int32_t *); static void lge_rxeof(struct lge_softc *, int); static void lge_rxeoc(struct lge_softc *); static void lge_txeof(struct lge_softc *); static void lge_intr(void *); static void lge_tick(void *); static void lge_start(struct ifnet *); static void lge_start_locked(struct ifnet *); static int lge_ioctl(struct ifnet *, u_long, caddr_t); static void lge_init(void *); static void lge_init_locked(struct lge_softc *); static void lge_stop(struct lge_softc *); static void lge_watchdog(struct lge_softc *); static int lge_shutdown(device_t); static int lge_ifmedia_upd(struct ifnet *); static void lge_ifmedia_upd_locked(struct ifnet *); static void lge_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void lge_eeprom_getword(struct lge_softc *, int, u_int16_t *); static void lge_read_eeprom(struct lge_softc *, caddr_t, int, int, int); static int lge_miibus_readreg(device_t, int, int); static int lge_miibus_writereg(device_t, int, int, int); static void lge_miibus_statchg(device_t); static void lge_setmulti(struct lge_softc *); static void lge_reset(struct lge_softc *); static int lge_list_rx_init(struct lge_softc *); static int lge_list_tx_init(struct lge_softc *); #ifdef LGE_USEIOSPACE #define LGE_RES SYS_RES_IOPORT #define LGE_RID LGE_PCI_LOIO #else #define LGE_RES SYS_RES_MEMORY #define LGE_RID LGE_PCI_LOMEM #endif static device_method_t lge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, lge_probe), DEVMETHOD(device_attach, lge_attach), DEVMETHOD(device_detach, lge_detach), DEVMETHOD(device_shutdown, lge_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, lge_miibus_readreg), DEVMETHOD(miibus_writereg, lge_miibus_writereg), DEVMETHOD(miibus_statchg, lge_miibus_statchg), DEVMETHOD_END }; static driver_t lge_driver = { "lge", lge_methods, sizeof(struct lge_softc) }; static devclass_t lge_devclass; DRIVER_MODULE(lge, pci, lge_driver, lge_devclass, 0, 0); DRIVER_MODULE(miibus, lge, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(lge, pci, 1, 1, 1); MODULE_DEPEND(lge, ether, 1, 1, 1); MODULE_DEPEND(lge, miibus, 1, 1, 1); #define LGE_SETBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) | (x)) #define LGE_CLRBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) & ~(x)) #define SIO_SET(x) \ CSR_WRITE_4(sc, LGE_MEAR, CSR_READ_4(sc, LGE_MEAR) | x) #define SIO_CLR(x) \ CSR_WRITE_4(sc, LGE_MEAR, CSR_READ_4(sc, LGE_MEAR) & ~x) /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void lge_eeprom_getword(sc, addr, dest) struct lge_softc *sc; int addr; u_int16_t *dest; { - register int i; + int i; u_int32_t val; CSR_WRITE_4(sc, LGE_EECTL, LGE_EECTL_CMD_READ| LGE_EECTL_SINGLEACCESS|((addr >> 1) << 8)); for (i = 0; i < LGE_TIMEOUT; i++) if (!(CSR_READ_4(sc, LGE_EECTL) & LGE_EECTL_CMD_READ)) break; if (i == LGE_TIMEOUT) { device_printf(sc->lge_dev, "EEPROM read timed out\n"); return; } val = CSR_READ_4(sc, LGE_EEDATA); if (addr & 1) *dest = (val >> 16) & 0xFFFF; else *dest = val & 0xFFFF; return; } /* * Read a sequence of words from the EEPROM. */ static void lge_read_eeprom(sc, dest, off, cnt, swap) struct lge_softc *sc; caddr_t dest; int off; int cnt; int swap; { int i; u_int16_t word = 0, *ptr; for (i = 0; i < cnt; i++) { lge_eeprom_getword(sc, off + i, &word); ptr = (u_int16_t *)(dest + (i * 2)); if (swap) *ptr = ntohs(word); else *ptr = word; } return; } static int lge_miibus_readreg(dev, phy, reg) device_t dev; int phy, reg; { struct lge_softc *sc; int i; sc = device_get_softc(dev); /* * If we have a non-PCS PHY, pretend that the internal * autoneg stuff at PHY address 0 isn't there so that * the miibus code will find only the GMII PHY. */ if (sc->lge_pcs == 0 && phy == 0) return(0); CSR_WRITE_4(sc, LGE_GMIICTL, (phy << 8) | reg | LGE_GMIICMD_READ); for (i = 0; i < LGE_TIMEOUT; i++) if (!(CSR_READ_4(sc, LGE_GMIICTL) & LGE_GMIICTL_CMDBUSY)) break; if (i == LGE_TIMEOUT) { device_printf(sc->lge_dev, "PHY read timed out\n"); return(0); } return(CSR_READ_4(sc, LGE_GMIICTL) >> 16); } static int lge_miibus_writereg(dev, phy, reg, data) device_t dev; int phy, reg, data; { struct lge_softc *sc; int i; sc = device_get_softc(dev); CSR_WRITE_4(sc, LGE_GMIICTL, (data << 16) | (phy << 8) | reg | LGE_GMIICMD_WRITE); for (i = 0; i < LGE_TIMEOUT; i++) if (!(CSR_READ_4(sc, LGE_GMIICTL) & LGE_GMIICTL_CMDBUSY)) break; if (i == LGE_TIMEOUT) { device_printf(sc->lge_dev, "PHY write timed out\n"); return(0); } return(0); } static void lge_miibus_statchg(dev) device_t dev; { struct lge_softc *sc; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->lge_miibus); LGE_CLRBIT(sc, LGE_GMIIMODE, LGE_GMIIMODE_SPEED); switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_1000_T: case IFM_1000_SX: LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_1000); break; case IFM_100_TX: LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_100); break; case IFM_10_T: LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_10); break; default: /* * Choose something, even if it's wrong. Clearing * all the bits will hose autoneg on the internal * PHY. */ LGE_SETBIT(sc, LGE_GMIIMODE, LGE_SPEED_1000); break; } if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) { LGE_SETBIT(sc, LGE_GMIIMODE, LGE_GMIIMODE_FDX); } else { LGE_CLRBIT(sc, LGE_GMIIMODE, LGE_GMIIMODE_FDX); } return; } static void lge_setmulti(sc) struct lge_softc *sc; { struct ifnet *ifp; struct ifmultiaddr *ifma; u_int32_t h = 0, hashes[2] = { 0, 0 }; ifp = sc->lge_ifp; LGE_LOCK_ASSERT(sc); /* Make sure multicast hash table is enabled. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_MCAST); if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { CSR_WRITE_4(sc, LGE_MAR0, 0xFFFFFFFF); CSR_WRITE_4(sc, LGE_MAR1, 0xFFFFFFFF); return; } /* first, zot all the existing hash bits */ CSR_WRITE_4(sc, LGE_MAR0, 0); CSR_WRITE_4(sc, LGE_MAR1, 0); /* now program new ones */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } if_maddr_runlock(ifp); CSR_WRITE_4(sc, LGE_MAR0, hashes[0]); CSR_WRITE_4(sc, LGE_MAR1, hashes[1]); return; } static void lge_reset(sc) struct lge_softc *sc; { - register int i; + int i; LGE_SETBIT(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL0|LGE_MODE1_SOFTRST); for (i = 0; i < LGE_TIMEOUT; i++) { if (!(CSR_READ_4(sc, LGE_MODE1) & LGE_MODE1_SOFTRST)) break; } if (i == LGE_TIMEOUT) device_printf(sc->lge_dev, "reset never completed\n"); /* Wait a little while for the chip to get its brains in order. */ DELAY(1000); return; } /* * Probe for a Level 1 chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int lge_probe(dev) device_t dev; { const struct lge_type *t; t = lge_devs; while(t->lge_name != NULL) { if ((pci_get_vendor(dev) == t->lge_vid) && (pci_get_device(dev) == t->lge_did)) { device_set_desc(dev, t->lge_name); return(BUS_PROBE_DEFAULT); } t++; } return(ENXIO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int lge_attach(dev) device_t dev; { u_char eaddr[ETHER_ADDR_LEN]; struct lge_softc *sc; struct ifnet *ifp = NULL; int error = 0, rid; sc = device_get_softc(dev); sc->lge_dev = dev; mtx_init(&sc->lge_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->lge_stat_callout, &sc->lge_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); rid = LGE_RID; sc->lge_res = bus_alloc_resource_any(dev, LGE_RES, &rid, RF_ACTIVE); if (sc->lge_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } sc->lge_btag = rman_get_bustag(sc->lge_res); sc->lge_bhandle = rman_get_bushandle(sc->lge_res); /* Allocate interrupt */ rid = 0; sc->lge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->lge_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Reset the adapter. */ lge_reset(sc); /* * Get station address from the EEPROM. */ lge_read_eeprom(sc, (caddr_t)&eaddr[0], LGE_EE_NODEADDR_0, 1, 0); lge_read_eeprom(sc, (caddr_t)&eaddr[2], LGE_EE_NODEADDR_1, 1, 0); lge_read_eeprom(sc, (caddr_t)&eaddr[4], LGE_EE_NODEADDR_2, 1, 0); sc->lge_ldata = contigmalloc(sizeof(struct lge_list_data), M_DEVBUF, M_NOWAIT | M_ZERO, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->lge_ldata == NULL) { device_printf(dev, "no memory for list buffers!\n"); error = ENXIO; goto fail; } /* Try to allocate memory for jumbo buffers. */ if (lge_alloc_jumbo_mem(sc)) { device_printf(dev, "jumbo buffer allocation failed\n"); error = ENXIO; goto fail; } ifp = sc->lge_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = lge_ioctl; ifp->if_start = lge_start; ifp->if_init = lge_init; ifp->if_snd.ifq_maxlen = LGE_TX_LIST_CNT - 1; ifp->if_capabilities = IFCAP_RXCSUM; ifp->if_capenable = ifp->if_capabilities; if (CSR_READ_4(sc, LGE_GMIIMODE) & LGE_GMIIMODE_PCSENH) sc->lge_pcs = 1; else sc->lge_pcs = 0; /* * Do MII setup. */ error = mii_attach(dev, &sc->lge_miibus, ifp, lge_ifmedia_upd, lge_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); error = bus_setup_intr(dev, sc->lge_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, lge_intr, sc, &sc->lge_intrhand); if (error) { ether_ifdetach(ifp); device_printf(dev, "couldn't set up irq\n"); goto fail; } return (0); fail: lge_free_jumbo_mem(sc); if (sc->lge_ldata) contigfree(sc->lge_ldata, sizeof(struct lge_list_data), M_DEVBUF); if (ifp) if_free(ifp); if (sc->lge_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->lge_irq); if (sc->lge_res) bus_release_resource(dev, LGE_RES, LGE_RID, sc->lge_res); mtx_destroy(&sc->lge_mtx); return(error); } static int lge_detach(dev) device_t dev; { struct lge_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->lge_ifp; LGE_LOCK(sc); lge_reset(sc); lge_stop(sc); LGE_UNLOCK(sc); callout_drain(&sc->lge_stat_callout); ether_ifdetach(ifp); bus_generic_detach(dev); device_delete_child(dev, sc->lge_miibus); bus_teardown_intr(dev, sc->lge_irq, sc->lge_intrhand); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->lge_irq); bus_release_resource(dev, LGE_RES, LGE_RID, sc->lge_res); contigfree(sc->lge_ldata, sizeof(struct lge_list_data), M_DEVBUF); if_free(ifp); lge_free_jumbo_mem(sc); mtx_destroy(&sc->lge_mtx); return(0); } /* * Initialize the transmit descriptors. */ static int lge_list_tx_init(sc) struct lge_softc *sc; { struct lge_list_data *ld; struct lge_ring_data *cd; int i; cd = &sc->lge_cdata; ld = sc->lge_ldata; for (i = 0; i < LGE_TX_LIST_CNT; i++) { ld->lge_tx_list[i].lge_mbuf = NULL; ld->lge_tx_list[i].lge_ctl = 0; } cd->lge_tx_prod = cd->lge_tx_cons = 0; return(0); } /* * Initialize the RX descriptors and allocate mbufs for them. Note that * we arralge the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int lge_list_rx_init(sc) struct lge_softc *sc; { struct lge_list_data *ld; struct lge_ring_data *cd; int i; ld = sc->lge_ldata; cd = &sc->lge_cdata; cd->lge_rx_prod = cd->lge_rx_cons = 0; CSR_WRITE_4(sc, LGE_RXDESC_ADDR_HI, 0); for (i = 0; i < LGE_RX_LIST_CNT; i++) { if (CSR_READ_1(sc, LGE_RXCMDFREE_8BIT) == 0) break; if (lge_newbuf(sc, &ld->lge_rx_list[i], NULL) == ENOBUFS) return(ENOBUFS); } /* Clear possible 'rx command queue empty' interrupt. */ CSR_READ_4(sc, LGE_ISR); return(0); } /* * Initialize an RX descriptor and attach an MBUF cluster. */ static int lge_newbuf(sc, c, m) struct lge_softc *sc; struct lge_rx_desc *c; struct mbuf *m; { struct mbuf *m_new = NULL; caddr_t *buf = NULL; if (m == NULL) { MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { device_printf(sc->lge_dev, "no memory for rx list " "-- packet dropped!\n"); return(ENOBUFS); } /* Allocate the jumbo buffer */ buf = lge_jalloc(sc); if (buf == NULL) { #ifdef LGE_VERBOSE device_printf(sc->lge_dev, "jumbo allocation failed " "-- packet dropped!\n"); #endif m_freem(m_new); return(ENOBUFS); } /* Attach the buffer to the mbuf */ m_new->m_data = (void *)buf; m_new->m_len = m_new->m_pkthdr.len = LGE_JUMBO_FRAMELEN; MEXTADD(m_new, buf, LGE_JUMBO_FRAMELEN, lge_jfree, buf, (struct lge_softc *)sc, 0, EXT_NET_DRV); } else { m_new = m; m_new->m_len = m_new->m_pkthdr.len = LGE_JUMBO_FRAMELEN; m_new->m_data = m_new->m_ext.ext_buf; } /* * Adjust alignment so packet payload begins on a * longword boundary. Mandatory for Alpha, useful on * x86 too. */ m_adj(m_new, ETHER_ALIGN); c->lge_mbuf = m_new; c->lge_fragptr_hi = 0; c->lge_fragptr_lo = vtophys(mtod(m_new, caddr_t)); c->lge_fraglen = m_new->m_len; c->lge_ctl = m_new->m_len | LGE_RXCTL_WANTINTR | LGE_FRAGCNT(1); c->lge_sts = 0; /* * Put this buffer in the RX command FIFO. To do this, * we just write the physical address of the descriptor * into the RX descriptor address registers. Note that * there are two registers, one high DWORD and one low * DWORD, which lets us specify a 64-bit address if * desired. We only use a 32-bit address for now. * Writing to the low DWORD register is what actually * causes the command to be issued, so we do that * last. */ CSR_WRITE_4(sc, LGE_RXDESC_ADDR_LO, vtophys(c)); LGE_INC(sc->lge_cdata.lge_rx_prod, LGE_RX_LIST_CNT); return(0); } static int lge_alloc_jumbo_mem(sc) struct lge_softc *sc; { caddr_t ptr; - register int i; + int i; struct lge_jpool_entry *entry; /* Grab a big chunk o' storage. */ sc->lge_cdata.lge_jumbo_buf = contigmalloc(LGE_JMEM, M_DEVBUF, M_NOWAIT, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->lge_cdata.lge_jumbo_buf == NULL) { device_printf(sc->lge_dev, "no memory for jumbo buffers!\n"); return(ENOBUFS); } SLIST_INIT(&sc->lge_jfree_listhead); SLIST_INIT(&sc->lge_jinuse_listhead); /* * Now divide it up into 9K pieces and save the addresses * in an array. */ ptr = sc->lge_cdata.lge_jumbo_buf; for (i = 0; i < LGE_JSLOTS; i++) { sc->lge_cdata.lge_jslots[i] = ptr; ptr += LGE_JLEN; entry = malloc(sizeof(struct lge_jpool_entry), M_DEVBUF, M_NOWAIT); if (entry == NULL) { device_printf(sc->lge_dev, "no memory for jumbo " "buffer queue!\n"); return(ENOBUFS); } entry->slot = i; SLIST_INSERT_HEAD(&sc->lge_jfree_listhead, entry, jpool_entries); } return(0); } static void lge_free_jumbo_mem(sc) struct lge_softc *sc; { struct lge_jpool_entry *entry; if (sc->lge_cdata.lge_jumbo_buf == NULL) return; while ((entry = SLIST_FIRST(&sc->lge_jinuse_listhead))) { device_printf(sc->lge_dev, "asked to free buffer that is in use!\n"); SLIST_REMOVE_HEAD(&sc->lge_jinuse_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc->lge_jfree_listhead, entry, jpool_entries); } while (!SLIST_EMPTY(&sc->lge_jfree_listhead)) { entry = SLIST_FIRST(&sc->lge_jfree_listhead); SLIST_REMOVE_HEAD(&sc->lge_jfree_listhead, jpool_entries); free(entry, M_DEVBUF); } contigfree(sc->lge_cdata.lge_jumbo_buf, LGE_JMEM, M_DEVBUF); return; } /* * Allocate a jumbo buffer. */ static void * lge_jalloc(sc) struct lge_softc *sc; { struct lge_jpool_entry *entry; entry = SLIST_FIRST(&sc->lge_jfree_listhead); if (entry == NULL) { #ifdef LGE_VERBOSE device_printf(sc->lge_dev, "no free jumbo buffers\n"); #endif return(NULL); } SLIST_REMOVE_HEAD(&sc->lge_jfree_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc->lge_jinuse_listhead, entry, jpool_entries); return(sc->lge_cdata.lge_jslots[entry->slot]); } /* * Release a jumbo buffer. */ static void lge_jfree(struct mbuf *m, void *buf, void *args) { struct lge_softc *sc; int i; struct lge_jpool_entry *entry; /* Extract the softc struct pointer. */ sc = args; if (sc == NULL) panic("lge_jfree: can't find softc pointer!"); /* calculate the slot this buffer belongs to */ i = ((vm_offset_t)buf - (vm_offset_t)sc->lge_cdata.lge_jumbo_buf) / LGE_JLEN; if ((i < 0) || (i >= LGE_JSLOTS)) panic("lge_jfree: asked to free buffer that we don't manage!"); entry = SLIST_FIRST(&sc->lge_jinuse_listhead); if (entry == NULL) panic("lge_jfree: buffer not in use!"); entry->slot = i; SLIST_REMOVE_HEAD(&sc->lge_jinuse_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc->lge_jfree_listhead, entry, jpool_entries); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static void lge_rxeof(sc, cnt) struct lge_softc *sc; int cnt; { struct mbuf *m; struct ifnet *ifp; struct lge_rx_desc *cur_rx; int c, i, total_len = 0; u_int32_t rxsts, rxctl; ifp = sc->lge_ifp; /* Find out how many frames were processed. */ c = cnt; i = sc->lge_cdata.lge_rx_cons; /* Suck them in. */ while(c) { struct mbuf *m0 = NULL; cur_rx = &sc->lge_ldata->lge_rx_list[i]; rxctl = cur_rx->lge_ctl; rxsts = cur_rx->lge_sts; m = cur_rx->lge_mbuf; cur_rx->lge_mbuf = NULL; total_len = LGE_RXBYTES(cur_rx); LGE_INC(i, LGE_RX_LIST_CNT); c--; /* * If an error occurs, update stats, clear the * status word and leave the mbuf cluster in place: * it should simply get re-used next time this descriptor * comes up in the ring. */ if (rxctl & LGE_RXCTL_ERRMASK) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); lge_newbuf(sc, &LGE_RXTAIL(sc), m); continue; } if (lge_newbuf(sc, &LGE_RXTAIL(sc), NULL) == ENOBUFS) { m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp, NULL); lge_newbuf(sc, &LGE_RXTAIL(sc), m); if (m0 == NULL) { device_printf(sc->lge_dev, "no receive buffers " "available -- packet dropped!\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); continue; } m = m0; } else { m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = total_len; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* Do IP checksum checking. */ if (rxsts & LGE_RXSTS_ISIP) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxsts & LGE_RXSTS_IPCSUMERR)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if ((rxsts & LGE_RXSTS_ISTCP && !(rxsts & LGE_RXSTS_TCPCSUMERR)) || (rxsts & LGE_RXSTS_ISUDP && !(rxsts & LGE_RXSTS_UDPCSUMERR))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } LGE_UNLOCK(sc); (*ifp->if_input)(ifp, m); LGE_LOCK(sc); } sc->lge_cdata.lge_rx_cons = i; return; } static void lge_rxeoc(sc) struct lge_softc *sc; { struct ifnet *ifp; ifp = sc->lge_ifp; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lge_init_locked(sc); return; } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void lge_txeof(sc) struct lge_softc *sc; { struct lge_tx_desc *cur_tx = NULL; struct ifnet *ifp; u_int32_t idx, txdone; ifp = sc->lge_ifp; /* Clear the timeout timer. */ sc->lge_timer = 0; /* * Go through our tx list and free mbufs for those * frames that have been transmitted. */ idx = sc->lge_cdata.lge_tx_cons; txdone = CSR_READ_1(sc, LGE_TXDMADONE_8BIT); while (idx != sc->lge_cdata.lge_tx_prod && txdone) { cur_tx = &sc->lge_ldata->lge_tx_list[idx]; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (cur_tx->lge_mbuf != NULL) { m_freem(cur_tx->lge_mbuf); cur_tx->lge_mbuf = NULL; } cur_tx->lge_ctl = 0; txdone--; LGE_INC(idx, LGE_TX_LIST_CNT); sc->lge_timer = 0; } sc->lge_cdata.lge_tx_cons = idx; if (cur_tx != NULL) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } static void lge_tick(xsc) void *xsc; { struct lge_softc *sc; struct mii_data *mii; struct ifnet *ifp; sc = xsc; ifp = sc->lge_ifp; LGE_LOCK_ASSERT(sc); CSR_WRITE_4(sc, LGE_STATSIDX, LGE_STATS_SINGLE_COLL_PKTS); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, CSR_READ_4(sc, LGE_STATSVAL)); CSR_WRITE_4(sc, LGE_STATSIDX, LGE_STATS_MULTI_COLL_PKTS); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, CSR_READ_4(sc, LGE_STATSVAL)); if (!sc->lge_link) { mii = device_get_softc(sc->lge_miibus); mii_tick(mii); if (mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->lge_link++; if (bootverbose && (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX|| IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T)) device_printf(sc->lge_dev, "gigabit link up\n"); if (ifp->if_snd.ifq_head != NULL) lge_start_locked(ifp); } } if (sc->lge_timer != 0 && --sc->lge_timer == 0) lge_watchdog(sc); callout_reset(&sc->lge_stat_callout, hz, lge_tick, sc); return; } static void lge_intr(arg) void *arg; { struct lge_softc *sc; struct ifnet *ifp; u_int32_t status; sc = arg; ifp = sc->lge_ifp; LGE_LOCK(sc); /* Suppress unwanted interrupts */ if (!(ifp->if_flags & IFF_UP)) { lge_stop(sc); LGE_UNLOCK(sc); return; } for (;;) { /* * Reading the ISR register clears all interrupts, and * clears the 'interrupts enabled' bit in the IMR * register. */ status = CSR_READ_4(sc, LGE_ISR); if ((status & LGE_INTRS) == 0) break; if ((status & (LGE_ISR_TXCMDFIFO_EMPTY|LGE_ISR_TXDMA_DONE))) lge_txeof(sc); if (status & LGE_ISR_RXDMA_DONE) lge_rxeof(sc, LGE_RX_DMACNT(status)); if (status & LGE_ISR_RXCMDFIFO_EMPTY) lge_rxeoc(sc); if (status & LGE_ISR_PHY_INTR) { sc->lge_link = 0; callout_stop(&sc->lge_stat_callout); lge_tick(sc); } } /* Re-enable interrupts. */ CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_SETRST_CTL0|LGE_IMR_INTR_ENB); if (ifp->if_snd.ifq_head != NULL) lge_start_locked(ifp); LGE_UNLOCK(sc); return; } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int lge_encap(sc, m_head, txidx) struct lge_softc *sc; struct mbuf *m_head; u_int32_t *txidx; { struct lge_frag *f = NULL; struct lge_tx_desc *cur_tx; struct mbuf *m; int frag = 0, tot_len = 0; /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; cur_tx = &sc->lge_ldata->lge_tx_list[*txidx]; frag = 0; for (m = m_head; m != NULL; m = m->m_next) { if (m->m_len != 0) { tot_len += m->m_len; f = &cur_tx->lge_frags[frag]; f->lge_fraglen = m->m_len; f->lge_fragptr_lo = vtophys(mtod(m, vm_offset_t)); f->lge_fragptr_hi = 0; frag++; } } if (m != NULL) return(ENOBUFS); cur_tx->lge_mbuf = m_head; cur_tx->lge_ctl = LGE_TXCTL_WANTINTR|LGE_FRAGCNT(frag)|tot_len; LGE_INC((*txidx), LGE_TX_LIST_CNT); /* Queue for transmit */ CSR_WRITE_4(sc, LGE_TXDESC_ADDR_LO, vtophys(cur_tx)); return(0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void lge_start(ifp) struct ifnet *ifp; { struct lge_softc *sc; sc = ifp->if_softc; LGE_LOCK(sc); lge_start_locked(ifp); LGE_UNLOCK(sc); } static void lge_start_locked(ifp) struct ifnet *ifp; { struct lge_softc *sc; struct mbuf *m_head = NULL; u_int32_t idx; sc = ifp->if_softc; if (!sc->lge_link) return; idx = sc->lge_cdata.lge_tx_prod; if (ifp->if_drv_flags & IFF_DRV_OACTIVE) return; while(sc->lge_ldata->lge_tx_list[idx].lge_mbuf == NULL) { if (CSR_READ_1(sc, LGE_TXCMDFREE_8BIT) == 0) break; IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (lge_encap(sc, m_head, &idx)) { IF_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, m_head); } sc->lge_cdata.lge_tx_prod = idx; /* * Set a timeout in case the chip goes out to lunch. */ sc->lge_timer = 5; return; } static void lge_init(xsc) void *xsc; { struct lge_softc *sc = xsc; LGE_LOCK(sc); lge_init_locked(sc); LGE_UNLOCK(sc); } static void lge_init_locked(sc) struct lge_softc *sc; { struct ifnet *ifp = sc->lge_ifp; LGE_LOCK_ASSERT(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; /* * Cancel pending I/O and free all RX/TX buffers. */ lge_stop(sc); lge_reset(sc); /* Set MAC address */ CSR_WRITE_4(sc, LGE_PAR0, *(u_int32_t *)(&IF_LLADDR(sc->lge_ifp)[0])); CSR_WRITE_4(sc, LGE_PAR1, *(u_int32_t *)(&IF_LLADDR(sc->lge_ifp)[4])); /* Init circular RX list. */ if (lge_list_rx_init(sc) == ENOBUFS) { device_printf(sc->lge_dev, "initialization failed: no " "memory for rx buffers\n"); lge_stop(sc); return; } /* * Init tx descriptors. */ lge_list_tx_init(sc); /* Set initial value for MODE1 register. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_UCAST| LGE_MODE1_TX_CRC|LGE_MODE1_TXPAD| LGE_MODE1_RX_FLOWCTL|LGE_MODE1_SETRST_CTL0| LGE_MODE1_SETRST_CTL1|LGE_MODE1_SETRST_CTL2); /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & IFF_PROMISC) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_PROMISC); } else { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_PROMISC); } /* * Set the capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_BCAST); } else { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_BCAST); } /* Packet padding workaround? */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RMVPAD); /* No error frames */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_ERRPKTS); /* Receive large frames */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_GIANTS); /* Workaround: disable RX/TX flow control */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_TX_FLOWCTL); CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_FLOWCTL); /* Make sure to strip CRC from received frames */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_CRC); /* Turn off magic packet mode */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_MPACK_ENB); /* Turn off all VLAN stuff */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_VLAN_RX|LGE_MODE1_VLAN_TX| LGE_MODE1_VLAN_STRIP|LGE_MODE1_VLAN_INSERT); /* Workarond: FIFO overflow */ CSR_WRITE_2(sc, LGE_RXFIFO_HIWAT, 0x3FFF); CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_SETRST_CTL1|LGE_IMR_RXFIFO_WAT); /* * Load the multicast filter. */ lge_setmulti(sc); /* * Enable hardware checksum validation for all received IPv4 * packets, do not reject packets with bad checksums. */ CSR_WRITE_4(sc, LGE_MODE2, LGE_MODE2_RX_IPCSUM| LGE_MODE2_RX_TCPCSUM|LGE_MODE2_RX_UDPCSUM| LGE_MODE2_RX_ERRCSUM); /* * Enable the delivery of PHY interrupts based on * link/speed/duplex status chalges. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL0|LGE_MODE1_GMIIPOLL); /* Enable receiver and transmitter. */ CSR_WRITE_4(sc, LGE_RXDESC_ADDR_HI, 0); CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_RX_ENB); CSR_WRITE_4(sc, LGE_TXDESC_ADDR_HI, 0); CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1|LGE_MODE1_TX_ENB); /* * Enable interrupts. */ CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_SETRST_CTL0| LGE_IMR_SETRST_CTL1|LGE_IMR_INTR_ENB|LGE_INTRS); lge_ifmedia_upd_locked(ifp); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->lge_stat_callout, hz, lge_tick, sc); return; } /* * Set media options. */ static int lge_ifmedia_upd(ifp) struct ifnet *ifp; { struct lge_softc *sc; sc = ifp->if_softc; LGE_LOCK(sc); lge_ifmedia_upd_locked(ifp); LGE_UNLOCK(sc); return(0); } static void lge_ifmedia_upd_locked(ifp) struct ifnet *ifp; { struct lge_softc *sc; struct mii_data *mii; struct mii_softc *miisc; sc = ifp->if_softc; LGE_LOCK_ASSERT(sc); mii = device_get_softc(sc->lge_miibus); sc->lge_link = 0; LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); mii_mediachg(mii); } /* * Report current media status. */ static void lge_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct lge_softc *sc; struct mii_data *mii; sc = ifp->if_softc; LGE_LOCK(sc); mii = device_get_softc(sc->lge_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; LGE_UNLOCK(sc); return; } static int lge_ioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct lge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int error = 0; switch(command) { case SIOCSIFMTU: LGE_LOCK(sc); if (ifr->ifr_mtu > LGE_JUMBO_MTU) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; LGE_UNLOCK(sc); break; case SIOCSIFFLAGS: LGE_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->lge_if_flags & IFF_PROMISC)) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_SETRST_CTL1| LGE_MODE1_RX_PROMISC); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->lge_if_flags & IFF_PROMISC) { CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_PROMISC); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lge_init_locked(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) lge_stop(sc); } sc->lge_if_flags = ifp->if_flags; LGE_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: LGE_LOCK(sc); lge_setmulti(sc); LGE_UNLOCK(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->lge_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; default: error = ether_ioctl(ifp, command, data); break; } return(error); } static void lge_watchdog(sc) struct lge_softc *sc; { struct ifnet *ifp; LGE_LOCK_ASSERT(sc); ifp = sc->lge_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); lge_stop(sc); lge_reset(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lge_init_locked(sc); if (ifp->if_snd.ifq_head != NULL) lge_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void lge_stop(sc) struct lge_softc *sc; { - register int i; + int i; struct ifnet *ifp; LGE_LOCK_ASSERT(sc); ifp = sc->lge_ifp; sc->lge_timer = 0; callout_stop(&sc->lge_stat_callout); CSR_WRITE_4(sc, LGE_IMR, LGE_IMR_INTR_ENB); /* Disable receiver and transmitter. */ CSR_WRITE_4(sc, LGE_MODE1, LGE_MODE1_RX_ENB|LGE_MODE1_TX_ENB); sc->lge_link = 0; /* * Free data in the RX lists. */ for (i = 0; i < LGE_RX_LIST_CNT; i++) { if (sc->lge_ldata->lge_rx_list[i].lge_mbuf != NULL) { m_freem(sc->lge_ldata->lge_rx_list[i].lge_mbuf); sc->lge_ldata->lge_rx_list[i].lge_mbuf = NULL; } } bzero((char *)&sc->lge_ldata->lge_rx_list, sizeof(sc->lge_ldata->lge_rx_list)); /* * Free the TX list buffers. */ for (i = 0; i < LGE_TX_LIST_CNT; i++) { if (sc->lge_ldata->lge_tx_list[i].lge_mbuf != NULL) { m_freem(sc->lge_ldata->lge_tx_list[i].lge_mbuf); sc->lge_ldata->lge_tx_list[i].lge_mbuf = NULL; } } bzero((char *)&sc->lge_ldata->lge_tx_list, sizeof(sc->lge_ldata->lge_tx_list)); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); return; } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int lge_shutdown(dev) device_t dev; { struct lge_softc *sc; sc = device_get_softc(dev); LGE_LOCK(sc); lge_reset(sc); lge_stop(sc); LGE_UNLOCK(sc); return (0); } Index: stable/11/sys/dev/mse/mse_isa.c =================================================================== --- stable/11/sys/dev/mse/mse_isa.c (revision 331642) +++ stable/11/sys/dev/mse/mse_isa.c (revision 331643) @@ -1,392 +1,392 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 M. Warner Losh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /*- * Copyright 1992 by the University of Guelph * * Permission to use, copy and modify this * software and its documentation for any purpose and without * fee is hereby granted, provided that the above copyright * notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting * documentation. * University of Guelph makes no representations about the suitability of * this software for any purpose. It is provided "as is" * without express or implied warranty. */ /* * Driver for the Logitech and ATI Inport Bus mice for use with 386bsd and * the X386 port, courtesy of * Rick Macklem, rick@snowhite.cis.uoguelph.ca * Caveats: The driver currently uses spltty(), but doesn't use any * generic tty code. It could use splmse() (that only masks off the * bus mouse interrupt, but that would require hacking in i386/isa/icu.s. * (This may be worth the effort, since the Logitech generates 30/60 * interrupts/sec continuously while it is open.) * NB: The ATI has NOT been tested yet! */ /* * Modification history: * Sep 6, 1994 -- Lars Fredriksen(fredriks@mcs.com) * improved probe based on input from Logitech. * * Oct 19, 1992 -- E. Stark (stark@cs.sunysb.edu) * fixes to make it work with Microsoft InPort busmouse * * Jan, 1993 -- E. Stark (stark@cs.sunysb.edu) * added patches for new "select" interface * * May 4, 1993 -- E. Stark (stark@cs.sunysb.edu) * changed position of some spl()'s in mseread * * October 8, 1993 -- E. Stark (stark@cs.sunysb.edu) * limit maximum negative x/y value to -127 to work around XFree problem * that causes spurious button pushes. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mse_isa_probe(device_t dev); static int mse_isa_attach(device_t dev); static device_method_t mse_methods[] = { DEVMETHOD(device_probe, mse_isa_probe), DEVMETHOD(device_attach, mse_isa_attach), DEVMETHOD(device_detach, mse_detach), { 0, 0 } }; static driver_t mse_driver = { "mse", mse_methods, sizeof(mse_softc_t), }; DRIVER_MODULE(mse, isa, mse_driver, mse_devclass, 0, 0); static struct isa_pnp_id mse_ids[] = { { 0x000fd041, "Bus mouse" }, /* PNP0F00 */ { 0x020fd041, "InPort mouse" }, /* PNP0F02 */ { 0x0d0fd041, "InPort mouse compatible" }, /* PNP0F0D */ { 0x110fd041, "Bus mouse compatible" }, /* PNP0F11 */ { 0x150fd041, "Logitech bus mouse" }, /* PNP0F15 */ { 0x180fd041, "Logitech bus mouse compatible" },/* PNP0F18 */ { 0 } }; /* * Logitech bus mouse definitions */ #define MSE_SETUP 0x91 /* What does this mean? */ /* The definition for the control port */ /* is as follows: */ /* D7 = Mode set flag (1 = active) */ /* D6,D5 = Mode selection (port A) */ /* 00 = Mode 0 = Basic I/O */ /* 01 = Mode 1 = Strobed I/O */ /* 10 = Mode 2 = Bi-dir bus */ /* D4 = Port A direction (1 = input)*/ /* D3 = Port C (upper 4 bits) */ /* direction. (1 = input) */ /* D2 = Mode selection (port B & C) */ /* 0 = Mode 0 = Basic I/O */ /* 1 = Mode 1 = Strobed I/O */ /* D1 = Port B direction (1 = input)*/ /* D0 = Port C (lower 4 bits) */ /* direction. (1 = input) */ /* So 91 means Basic I/O on all 3 ports,*/ /* Port A is an input port, B is an */ /* output port, C is split with upper */ /* 4 bits being an output port and lower*/ /* 4 bits an input port, and enable the */ /* sucker. */ /* Courtesy Intel 8255 databook. Lars */ #define MSE_HOLD 0x80 #define MSE_RXLOW 0x00 #define MSE_RXHIGH 0x20 #define MSE_RYLOW 0x40 #define MSE_RYHIGH 0x60 #define MSE_DISINTR 0x10 #define MSE_INTREN 0x00 static int mse_probelogi(device_t dev, mse_softc_t *sc); static void mse_disablelogi(struct resource *port); static void mse_getlogi(struct resource *port, int *dx, int *dy, int *but); static void mse_enablelogi(struct resource *port); /* * ATI Inport mouse definitions */ #define MSE_INPORT_RESET 0x80 #define MSE_INPORT_STATUS 0x00 #define MSE_INPORT_DX 0x01 #define MSE_INPORT_DY 0x02 #define MSE_INPORT_MODE 0x07 #define MSE_INPORT_HOLD 0x20 #define MSE_INPORT_INTREN 0x09 static int mse_probeati(device_t dev, mse_softc_t *sc); static void mse_enableati(struct resource *port); static void mse_disableati(struct resource *port); static void mse_getati(struct resource *port, int *dx, int *dy, int *but); static struct mse_types mse_types[] = { { MSE_ATIINPORT, mse_probeati, mse_enableati, mse_disableati, mse_getati, { 2, MOUSE_IF_INPORT, MOUSE_MOUSE, MOUSE_MODEL_GENERIC, 0, }, { MOUSE_PROTO_INPORT, -1, -1, 0, 0, MOUSE_MSC_PACKETSIZE, { MOUSE_MSC_SYNCMASK, MOUSE_MSC_SYNC, }, }, }, { MSE_LOGITECH, mse_probelogi, mse_enablelogi, mse_disablelogi, mse_getlogi, { 2, MOUSE_IF_BUS, MOUSE_MOUSE, MOUSE_MODEL_GENERIC, 0, }, { MOUSE_PROTO_BUS, -1, -1, 0, 0, MOUSE_MSC_PACKETSIZE, { MOUSE_MSC_SYNCMASK, MOUSE_MSC_SYNC, }, }, }, { 0, }, }; static int mse_isa_probe(device_t dev) { mse_softc_t *sc; int error; int rid; int i; /* check PnP IDs */ error = ISA_PNP_PROBE(device_get_parent(dev), dev, mse_ids); if (error == ENXIO) return error; sc = device_get_softc(dev); rid = 0; sc->sc_port = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &rid, MSE_IOSIZE, RF_ACTIVE); if (sc->sc_port == NULL) return ENXIO; /* * Check for each mouse type in the table. */ i = 0; while (mse_types[i].m_type) { if ((*mse_types[i].m_probe)(dev, sc)) { sc->sc_mousetype = mse_types[i].m_type; sc->sc_enablemouse = mse_types[i].m_enable; sc->sc_disablemouse = mse_types[i].m_disable; sc->sc_getmouse = mse_types[i].m_get; sc->hw = mse_types[i].m_hw; sc->mode = mse_types[i].m_mode; bus_release_resource(dev, SYS_RES_IOPORT, rid, sc->sc_port); device_set_desc(dev, "Bus/InPort Mouse"); return 0; } i++; } bus_release_resource(dev, SYS_RES_IOPORT, rid, sc->sc_port); return ENXIO; } static int mse_isa_attach(device_t dev) { mse_softc_t *sc; int rid; sc = device_get_softc(dev); rid = 0; sc->sc_port = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &rid, MSE_IOSIZE, RF_ACTIVE); if (sc->sc_port == NULL) return ENXIO; return (mse_common_attach(dev)); } /* * Routines for the Logitech mouse. */ /* * Test for a Logitech bus mouse and return 1 if it is. * (until I know how to use the signature port properly, just disable * interrupts and return 1) */ static int mse_probelogi(device_t dev, mse_softc_t *sc) { int sig; bus_write_1(sc->sc_port, MSE_PORTD, MSE_SETUP); /* set the signature port */ bus_write_1(sc->sc_port, MSE_PORTB, MSE_LOGI_SIG); DELAY(30000); /* 30 ms delay */ sig = bus_read_1(sc->sc_port, MSE_PORTB) & 0xFF; if (sig == MSE_LOGI_SIG) { bus_write_1(sc->sc_port, MSE_PORTC, MSE_DISINTR); return(1); } else { if (bootverbose) device_printf(dev, "wrong signature %x\n", sig); return(0); } } /* * Initialize Logitech mouse and enable interrupts. */ static void mse_enablelogi(struct resource *port) { int dx, dy, but; bus_write_1(port, MSE_PORTD, MSE_SETUP); mse_getlogi(port, &dx, &dy, &but); } /* * Disable interrupts for Logitech mouse. */ static void mse_disablelogi(struct resource *port) { bus_write_1(port, MSE_PORTC, MSE_DISINTR); } /* * Get the current dx, dy and button up/down state. */ static void mse_getlogi(struct resource *port, int *dx, int *dy, int *but) { - register char x, y; + char x, y; bus_write_1(port, MSE_PORTC, MSE_HOLD | MSE_RXLOW); x = bus_read_1(port, MSE_PORTA); *but = (x >> 5) & MOUSE_MSC_BUTTONS; x &= 0xf; bus_write_1(port, MSE_PORTC, MSE_HOLD | MSE_RXHIGH); x |= (bus_read_1(port, MSE_PORTA) << 4); bus_write_1(port, MSE_PORTC, MSE_HOLD | MSE_RYLOW); y = (bus_read_1(port, MSE_PORTA) & 0xf); bus_write_1(port, MSE_PORTC, MSE_HOLD | MSE_RYHIGH); y |= (bus_read_1(port, MSE_PORTA) << 4); *dx = x; *dy = y; bus_write_1(port, MSE_PORTC, MSE_INTREN); } /* * Routines for the ATI Inport bus mouse. */ /* * Test for an ATI Inport bus mouse and return 1 if it is. * (do not enable interrupts) */ static int mse_probeati(device_t dev, mse_softc_t *sc) { int i; for (i = 0; i < 2; i++) if (bus_read_1(sc->sc_port, MSE_PORTC) == 0xde) return (1); return (0); } /* * Initialize ATI Inport mouse and enable interrupts. */ static void mse_enableati(struct resource *port) { bus_write_1(port, MSE_PORTA, MSE_INPORT_RESET); bus_write_1(port, MSE_PORTA, MSE_INPORT_MODE); bus_write_1(port, MSE_PORTB, MSE_INPORT_INTREN); } /* * Disable interrupts for ATI Inport mouse. */ static void mse_disableati(struct resource *port) { bus_write_1(port, MSE_PORTA, MSE_INPORT_MODE); bus_write_1(port, MSE_PORTB, 0); } /* * Get current dx, dy and up/down button state. */ static void mse_getati(struct resource *port, int *dx, int *dy, int *but) { char byte; bus_write_1(port, MSE_PORTA, MSE_INPORT_MODE); bus_write_1(port, MSE_PORTB, MSE_INPORT_HOLD); bus_write_1(port, MSE_PORTA, MSE_INPORT_STATUS); *but = ~bus_read_1(port, MSE_PORTB) & MOUSE_MSC_BUTTONS; bus_write_1(port, MSE_PORTA, MSE_INPORT_DX); byte = bus_read_1(port, MSE_PORTB); *dx = byte; bus_write_1(port, MSE_PORTA, MSE_INPORT_DY); byte = bus_read_1(port, MSE_PORTB); *dy = byte; bus_write_1(port, MSE_PORTA, MSE_INPORT_MODE); bus_write_1(port, MSE_PORTB, MSE_INPORT_INTREN); } Index: stable/11/sys/dev/my/if_my.c =================================================================== --- stable/11/sys/dev/my/if_my.c (revision 331642) +++ stable/11/sys/dev/my/if_my.c (revision 331643) @@ -1,1776 +1,1776 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Written by: yen_cw@myson.com.tw * Copyright (c) 2002 Myson Technology Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Myson fast ethernet PCI NIC driver, available at: http://www.myson.com.tw/ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define NBPFILTER 1 #include #include #include #include #include #include #include #include #include /* for vtophys */ #include /* for vtophys */ #include #include #include #include #include #include /* * #define MY_USEIOSPACE */ static int MY_USEIOSPACE = 1; #ifdef MY_USEIOSPACE #define MY_RES SYS_RES_IOPORT #define MY_RID MY_PCI_LOIO #else #define MY_RES SYS_RES_MEMORY #define MY_RID MY_PCI_LOMEM #endif #include /* * Various supported device vendors/types and their names. */ struct my_type *my_info_tmp; static struct my_type my_devs[] = { {MYSONVENDORID, MTD800ID, "Myson MTD80X Based Fast Ethernet Card"}, {MYSONVENDORID, MTD803ID, "Myson MTD80X Based Fast Ethernet Card"}, {MYSONVENDORID, MTD891ID, "Myson MTD89X Based Giga Ethernet Card"}, {0, 0, NULL} }; /* * Various supported PHY vendors/types and their names. Note that this driver * will work with pretty much any MII-compliant PHY, so failure to positively * identify the chip is not a fatal error. */ static struct my_type my_phys[] = { {MysonPHYID0, MysonPHYID0, ""}, {SeeqPHYID0, SeeqPHYID0, ""}, {AhdocPHYID0, AhdocPHYID0, ""}, {MarvellPHYID0, MarvellPHYID0, ""}, {LevelOnePHYID0, LevelOnePHYID0, ""}, {0, 0, ""} }; static int my_probe(device_t); static int my_attach(device_t); static int my_detach(device_t); static int my_newbuf(struct my_softc *, struct my_chain_onefrag *); static int my_encap(struct my_softc *, struct my_chain *, struct mbuf *); static void my_rxeof(struct my_softc *); static void my_txeof(struct my_softc *); static void my_txeoc(struct my_softc *); static void my_intr(void *); static void my_start(struct ifnet *); static void my_start_locked(struct ifnet *); static int my_ioctl(struct ifnet *, u_long, caddr_t); static void my_init(void *); static void my_init_locked(struct my_softc *); static void my_stop(struct my_softc *); static void my_autoneg_timeout(void *); static void my_watchdog(void *); static int my_shutdown(device_t); static int my_ifmedia_upd(struct ifnet *); static void my_ifmedia_sts(struct ifnet *, struct ifmediareq *); static u_int16_t my_phy_readreg(struct my_softc *, int); static void my_phy_writereg(struct my_softc *, int, int); static void my_autoneg_xmit(struct my_softc *); static void my_autoneg_mii(struct my_softc *, int, int); static void my_setmode_mii(struct my_softc *, int); static void my_getmode_mii(struct my_softc *); static void my_setcfg(struct my_softc *, int); static void my_setmulti(struct my_softc *); static void my_reset(struct my_softc *); static int my_list_rx_init(struct my_softc *); static int my_list_tx_init(struct my_softc *); static long my_send_cmd_to_phy(struct my_softc *, int, int); #define MY_SETBIT(sc, reg, x) CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) | (x)) #define MY_CLRBIT(sc, reg, x) CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) & ~(x)) static device_method_t my_methods[] = { /* Device interface */ DEVMETHOD(device_probe, my_probe), DEVMETHOD(device_attach, my_attach), DEVMETHOD(device_detach, my_detach), DEVMETHOD(device_shutdown, my_shutdown), DEVMETHOD_END }; static driver_t my_driver = { "my", my_methods, sizeof(struct my_softc) }; static devclass_t my_devclass; DRIVER_MODULE(my, pci, my_driver, my_devclass, 0, 0); MODULE_DEPEND(my, pci, 1, 1, 1); MODULE_DEPEND(my, ether, 1, 1, 1); static long my_send_cmd_to_phy(struct my_softc * sc, int opcode, int regad) { long miir; int i; int mask, data; MY_LOCK_ASSERT(sc); /* enable MII output */ miir = CSR_READ_4(sc, MY_MANAGEMENT); miir &= 0xfffffff0; miir |= MY_MASK_MIIR_MII_WRITE + MY_MASK_MIIR_MII_MDO; /* send 32 1's preamble */ for (i = 0; i < 32; i++) { /* low MDC; MDO is already high (miir) */ miir &= ~MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); /* high MDC */ miir |= MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); } /* calculate ST+OP+PHYAD+REGAD+TA */ data = opcode | (sc->my_phy_addr << 7) | (regad << 2); /* sent out */ mask = 0x8000; while (mask) { /* low MDC, prepare MDO */ miir &= ~(MY_MASK_MIIR_MII_MDC + MY_MASK_MIIR_MII_MDO); if (mask & data) miir |= MY_MASK_MIIR_MII_MDO; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); /* high MDC */ miir |= MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); DELAY(30); /* next */ mask >>= 1; if (mask == 0x2 && opcode == MY_OP_READ) miir &= ~MY_MASK_MIIR_MII_WRITE; } return miir; } static u_int16_t my_phy_readreg(struct my_softc * sc, int reg) { long miir; int mask, data; MY_LOCK_ASSERT(sc); if (sc->my_info->my_did == MTD803ID) data = CSR_READ_2(sc, MY_PHYBASE + reg * 2); else { miir = my_send_cmd_to_phy(sc, MY_OP_READ, reg); /* read data */ mask = 0x8000; data = 0; while (mask) { /* low MDC */ miir &= ~MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); /* read MDI */ miir = CSR_READ_4(sc, MY_MANAGEMENT); if (miir & MY_MASK_MIIR_MII_MDI) data |= mask; /* high MDC, and wait */ miir |= MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); DELAY(30); /* next */ mask >>= 1; } /* low MDC */ miir &= ~MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); } return (u_int16_t) data; } static void my_phy_writereg(struct my_softc * sc, int reg, int data) { long miir; int mask; MY_LOCK_ASSERT(sc); if (sc->my_info->my_did == MTD803ID) CSR_WRITE_2(sc, MY_PHYBASE + reg * 2, data); else { miir = my_send_cmd_to_phy(sc, MY_OP_WRITE, reg); /* write data */ mask = 0x8000; while (mask) { /* low MDC, prepare MDO */ miir &= ~(MY_MASK_MIIR_MII_MDC + MY_MASK_MIIR_MII_MDO); if (mask & data) miir |= MY_MASK_MIIR_MII_MDO; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); DELAY(1); /* high MDC */ miir |= MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); DELAY(1); /* next */ mask >>= 1; } /* low MDC */ miir &= ~MY_MASK_MIIR_MII_MDC; CSR_WRITE_4(sc, MY_MANAGEMENT, miir); } return; } /* * Program the 64-bit multicast hash filter. */ static void my_setmulti(struct my_softc * sc) { struct ifnet *ifp; int h = 0; u_int32_t hashes[2] = {0, 0}; struct ifmultiaddr *ifma; u_int32_t rxfilt; int mcnt = 0; MY_LOCK_ASSERT(sc); ifp = sc->my_ifp; rxfilt = CSR_READ_4(sc, MY_TCRRCR); if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { rxfilt |= MY_AM; CSR_WRITE_4(sc, MY_TCRRCR, rxfilt); CSR_WRITE_4(sc, MY_MAR0, 0xFFFFFFFF); CSR_WRITE_4(sc, MY_MAR1, 0xFFFFFFFF); return; } /* first, zot all the existing hash bits */ CSR_WRITE_4(sc, MY_MAR0, 0); CSR_WRITE_4(sc, MY_MAR1, 0); /* now program new ones */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ~ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); mcnt++; } if_maddr_runlock(ifp); if (mcnt) rxfilt |= MY_AM; else rxfilt &= ~MY_AM; CSR_WRITE_4(sc, MY_MAR0, hashes[0]); CSR_WRITE_4(sc, MY_MAR1, hashes[1]); CSR_WRITE_4(sc, MY_TCRRCR, rxfilt); return; } /* * Initiate an autonegotiation session. */ static void my_autoneg_xmit(struct my_softc * sc) { u_int16_t phy_sts = 0; MY_LOCK_ASSERT(sc); my_phy_writereg(sc, PHY_BMCR, PHY_BMCR_RESET); DELAY(500); while (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_RESET); phy_sts = my_phy_readreg(sc, PHY_BMCR); phy_sts |= PHY_BMCR_AUTONEGENBL | PHY_BMCR_AUTONEGRSTR; my_phy_writereg(sc, PHY_BMCR, phy_sts); return; } static void my_autoneg_timeout(void *arg) { struct my_softc *sc; sc = arg; MY_LOCK_ASSERT(sc); my_autoneg_mii(sc, MY_FLAG_DELAYTIMEO, 1); } /* * Invoke autonegotiation on a PHY. */ static void my_autoneg_mii(struct my_softc * sc, int flag, int verbose) { u_int16_t phy_sts = 0, media, advert, ability; u_int16_t ability2 = 0; struct ifnet *ifp; struct ifmedia *ifm; MY_LOCK_ASSERT(sc); ifm = &sc->ifmedia; ifp = sc->my_ifp; ifm->ifm_media = IFM_ETHER | IFM_AUTO; #ifndef FORCE_AUTONEG_TFOUR /* * First, see if autoneg is supported. If not, there's no point in * continuing. */ phy_sts = my_phy_readreg(sc, PHY_BMSR); if (!(phy_sts & PHY_BMSR_CANAUTONEG)) { if (verbose) device_printf(sc->my_dev, "autonegotiation not supported\n"); ifm->ifm_media = IFM_ETHER | IFM_10_T | IFM_HDX; return; } #endif switch (flag) { case MY_FLAG_FORCEDELAY: /* * XXX Never use this option anywhere but in the probe * routine: making the kernel stop dead in its tracks for * three whole seconds after we've gone multi-user is really * bad manners. */ my_autoneg_xmit(sc); DELAY(5000000); break; case MY_FLAG_SCHEDDELAY: /* * Wait for the transmitter to go idle before starting an * autoneg session, otherwise my_start() may clobber our * timeout, and we don't want to allow transmission during an * autoneg session since that can screw it up. */ if (sc->my_cdata.my_tx_head != NULL) { sc->my_want_auto = 1; MY_UNLOCK(sc); return; } my_autoneg_xmit(sc); callout_reset(&sc->my_autoneg_timer, hz * 5, my_autoneg_timeout, sc); sc->my_autoneg = 1; sc->my_want_auto = 0; return; case MY_FLAG_DELAYTIMEO: callout_stop(&sc->my_autoneg_timer); sc->my_autoneg = 0; break; default: device_printf(sc->my_dev, "invalid autoneg flag: %d\n", flag); return; } if (my_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_AUTONEGCOMP) { if (verbose) device_printf(sc->my_dev, "autoneg complete, "); phy_sts = my_phy_readreg(sc, PHY_BMSR); } else { if (verbose) device_printf(sc->my_dev, "autoneg not complete, "); } media = my_phy_readreg(sc, PHY_BMCR); /* Link is good. Report modes and set duplex mode. */ if (my_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_LINKSTAT) { if (verbose) device_printf(sc->my_dev, "link status good. "); advert = my_phy_readreg(sc, PHY_ANAR); ability = my_phy_readreg(sc, PHY_LPAR); if ((sc->my_pinfo->my_vid == MarvellPHYID0) || (sc->my_pinfo->my_vid == LevelOnePHYID0)) { ability2 = my_phy_readreg(sc, PHY_1000SR); if (ability2 & PHY_1000SR_1000BTXFULL) { advert = 0; ability = 0; /* * this version did not support 1000M, * ifm->ifm_media = * IFM_ETHER|IFM_1000_T|IFM_FDX; */ ifm->ifm_media = IFM_ETHER | IFM_100_TX | IFM_FDX; media &= ~PHY_BMCR_SPEEDSEL; media |= PHY_BMCR_1000; media |= PHY_BMCR_DUPLEX; printf("(full-duplex, 1000Mbps)\n"); } else if (ability2 & PHY_1000SR_1000BTXHALF) { advert = 0; ability = 0; /* * this version did not support 1000M, * ifm->ifm_media = IFM_ETHER|IFM_1000_T; */ ifm->ifm_media = IFM_ETHER | IFM_100_TX; media &= ~PHY_BMCR_SPEEDSEL; media &= ~PHY_BMCR_DUPLEX; media |= PHY_BMCR_1000; printf("(half-duplex, 1000Mbps)\n"); } } if (advert & PHY_ANAR_100BT4 && ability & PHY_ANAR_100BT4) { ifm->ifm_media = IFM_ETHER | IFM_100_T4; media |= PHY_BMCR_SPEEDSEL; media &= ~PHY_BMCR_DUPLEX; printf("(100baseT4)\n"); } else if (advert & PHY_ANAR_100BTXFULL && ability & PHY_ANAR_100BTXFULL) { ifm->ifm_media = IFM_ETHER | IFM_100_TX | IFM_FDX; media |= PHY_BMCR_SPEEDSEL; media |= PHY_BMCR_DUPLEX; printf("(full-duplex, 100Mbps)\n"); } else if (advert & PHY_ANAR_100BTXHALF && ability & PHY_ANAR_100BTXHALF) { ifm->ifm_media = IFM_ETHER | IFM_100_TX | IFM_HDX; media |= PHY_BMCR_SPEEDSEL; media &= ~PHY_BMCR_DUPLEX; printf("(half-duplex, 100Mbps)\n"); } else if (advert & PHY_ANAR_10BTFULL && ability & PHY_ANAR_10BTFULL) { ifm->ifm_media = IFM_ETHER | IFM_10_T | IFM_FDX; media &= ~PHY_BMCR_SPEEDSEL; media |= PHY_BMCR_DUPLEX; printf("(full-duplex, 10Mbps)\n"); } else if (advert) { ifm->ifm_media = IFM_ETHER | IFM_10_T | IFM_HDX; media &= ~PHY_BMCR_SPEEDSEL; media &= ~PHY_BMCR_DUPLEX; printf("(half-duplex, 10Mbps)\n"); } media &= ~PHY_BMCR_AUTONEGENBL; /* Set ASIC's duplex mode to match the PHY. */ my_phy_writereg(sc, PHY_BMCR, media); my_setcfg(sc, media); } else { if (verbose) device_printf(sc->my_dev, "no carrier\n"); } my_init_locked(sc); if (sc->my_tx_pend) { sc->my_autoneg = 0; sc->my_tx_pend = 0; my_start_locked(ifp); } return; } /* * To get PHY ability. */ static void my_getmode_mii(struct my_softc * sc) { u_int16_t bmsr; struct ifnet *ifp; MY_LOCK_ASSERT(sc); ifp = sc->my_ifp; bmsr = my_phy_readreg(sc, PHY_BMSR); if (bootverbose) device_printf(sc->my_dev, "PHY status word: %x\n", bmsr); /* fallback */ sc->ifmedia.ifm_media = IFM_ETHER | IFM_10_T | IFM_HDX; if (bmsr & PHY_BMSR_10BTHALF) { if (bootverbose) device_printf(sc->my_dev, "10Mbps half-duplex mode supported\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_10_T | IFM_HDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_10_T, 0, NULL); } if (bmsr & PHY_BMSR_10BTFULL) { if (bootverbose) device_printf(sc->my_dev, "10Mbps full-duplex mode supported\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); sc->ifmedia.ifm_media = IFM_ETHER | IFM_10_T | IFM_FDX; } if (bmsr & PHY_BMSR_100BTXHALF) { if (bootverbose) device_printf(sc->my_dev, "100Mbps half-duplex mode supported\n"); ifp->if_baudrate = 100000000; ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_TX | IFM_HDX, 0, NULL); sc->ifmedia.ifm_media = IFM_ETHER | IFM_100_TX | IFM_HDX; } if (bmsr & PHY_BMSR_100BTXFULL) { if (bootverbose) device_printf(sc->my_dev, "100Mbps full-duplex mode supported\n"); ifp->if_baudrate = 100000000; ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); sc->ifmedia.ifm_media = IFM_ETHER | IFM_100_TX | IFM_FDX; } /* Some also support 100BaseT4. */ if (bmsr & PHY_BMSR_100BT4) { if (bootverbose) device_printf(sc->my_dev, "100baseT4 mode supported\n"); ifp->if_baudrate = 100000000; ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_100_T4, 0, NULL); sc->ifmedia.ifm_media = IFM_ETHER | IFM_100_T4; #ifdef FORCE_AUTONEG_TFOUR if (bootverbose) device_printf(sc->my_dev, "forcing on autoneg support for BT4\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_AUTO, 0 NULL): sc->ifmedia.ifm_media = IFM_ETHER | IFM_AUTO; #endif } #if 0 /* this version did not support 1000M, */ if (sc->my_pinfo->my_vid == MarvellPHYID0) { if (bootverbose) device_printf(sc->my_dev, "1000Mbps half-duplex mode supported\n"); ifp->if_baudrate = 1000000000; ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_1000_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_1000_T | IFM_HDX, 0, NULL); if (bootverbose) device_printf(sc->my_dev, "1000Mbps full-duplex mode supported\n"); ifp->if_baudrate = 1000000000; ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); sc->ifmedia.ifm_media = IFM_ETHER | IFM_1000_T | IFM_FDX; } #endif if (bmsr & PHY_BMSR_CANAUTONEG) { if (bootverbose) device_printf(sc->my_dev, "autoneg supported\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER | IFM_AUTO, 0, NULL); sc->ifmedia.ifm_media = IFM_ETHER | IFM_AUTO; } return; } /* * Set speed and duplex mode. */ static void my_setmode_mii(struct my_softc * sc, int media) { u_int16_t bmcr; MY_LOCK_ASSERT(sc); /* * If an autoneg session is in progress, stop it. */ if (sc->my_autoneg) { device_printf(sc->my_dev, "canceling autoneg session\n"); callout_stop(&sc->my_autoneg_timer); sc->my_autoneg = sc->my_want_auto = 0; bmcr = my_phy_readreg(sc, PHY_BMCR); bmcr &= ~PHY_BMCR_AUTONEGENBL; my_phy_writereg(sc, PHY_BMCR, bmcr); } device_printf(sc->my_dev, "selecting MII, "); bmcr = my_phy_readreg(sc, PHY_BMCR); bmcr &= ~(PHY_BMCR_AUTONEGENBL | PHY_BMCR_SPEEDSEL | PHY_BMCR_1000 | PHY_BMCR_DUPLEX | PHY_BMCR_LOOPBK); #if 0 /* this version did not support 1000M, */ if (IFM_SUBTYPE(media) == IFM_1000_T) { printf("1000Mbps/T4, half-duplex\n"); bmcr &= ~PHY_BMCR_SPEEDSEL; bmcr &= ~PHY_BMCR_DUPLEX; bmcr |= PHY_BMCR_1000; } #endif if (IFM_SUBTYPE(media) == IFM_100_T4) { printf("100Mbps/T4, half-duplex\n"); bmcr |= PHY_BMCR_SPEEDSEL; bmcr &= ~PHY_BMCR_DUPLEX; } if (IFM_SUBTYPE(media) == IFM_100_TX) { printf("100Mbps, "); bmcr |= PHY_BMCR_SPEEDSEL; } if (IFM_SUBTYPE(media) == IFM_10_T) { printf("10Mbps, "); bmcr &= ~PHY_BMCR_SPEEDSEL; } if ((media & IFM_GMASK) == IFM_FDX) { printf("full duplex\n"); bmcr |= PHY_BMCR_DUPLEX; } else { printf("half duplex\n"); bmcr &= ~PHY_BMCR_DUPLEX; } my_phy_writereg(sc, PHY_BMCR, bmcr); my_setcfg(sc, bmcr); return; } /* * The Myson manual states that in order to fiddle with the 'full-duplex' and * '100Mbps' bits in the netconfig register, we first have to put the * transmit and/or receive logic in the idle state. */ static void my_setcfg(struct my_softc * sc, int bmcr) { int i, restart = 0; MY_LOCK_ASSERT(sc); if (CSR_READ_4(sc, MY_TCRRCR) & (MY_TE | MY_RE)) { restart = 1; MY_CLRBIT(sc, MY_TCRRCR, (MY_TE | MY_RE)); for (i = 0; i < MY_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, MY_TCRRCR) & (MY_TXRUN | MY_RXRUN))) break; } if (i == MY_TIMEOUT) device_printf(sc->my_dev, "failed to force tx and rx to idle \n"); } MY_CLRBIT(sc, MY_TCRRCR, MY_PS1000); MY_CLRBIT(sc, MY_TCRRCR, MY_PS10); if (bmcr & PHY_BMCR_1000) MY_SETBIT(sc, MY_TCRRCR, MY_PS1000); else if (!(bmcr & PHY_BMCR_SPEEDSEL)) MY_SETBIT(sc, MY_TCRRCR, MY_PS10); if (bmcr & PHY_BMCR_DUPLEX) MY_SETBIT(sc, MY_TCRRCR, MY_FD); else MY_CLRBIT(sc, MY_TCRRCR, MY_FD); if (restart) MY_SETBIT(sc, MY_TCRRCR, MY_TE | MY_RE); return; } static void my_reset(struct my_softc * sc) { - register int i; + int i; MY_LOCK_ASSERT(sc); MY_SETBIT(sc, MY_BCR, MY_SWR); for (i = 0; i < MY_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, MY_BCR) & MY_SWR)) break; } if (i == MY_TIMEOUT) device_printf(sc->my_dev, "reset never completed!\n"); /* Wait a little while for the chip to get its brains in order. */ DELAY(1000); return; } /* * Probe for a Myson chip. Check the PCI vendor and device IDs against our * list and return a device name if we find a match. */ static int my_probe(device_t dev) { struct my_type *t; t = my_devs; while (t->my_name != NULL) { if ((pci_get_vendor(dev) == t->my_vid) && (pci_get_device(dev) == t->my_did)) { device_set_desc(dev, t->my_name); my_info_tmp = t; return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } /* * Attach the interface. Allocate softc structures, do ifmedia setup and * ethernet/BPF attach. */ static int my_attach(device_t dev) { int i; u_char eaddr[ETHER_ADDR_LEN]; u_int32_t iobase; struct my_softc *sc; struct ifnet *ifp; int media = IFM_ETHER | IFM_100_TX | IFM_FDX; unsigned int round; caddr_t roundptr; struct my_type *p; u_int16_t phy_vid, phy_did, phy_sts = 0; int rid, error = 0; sc = device_get_softc(dev); sc->my_dev = dev; mtx_init(&sc->my_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->my_autoneg_timer, &sc->my_mtx, 0); callout_init_mtx(&sc->my_watchdog, &sc->my_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); if (my_info_tmp->my_did == MTD800ID) { iobase = pci_read_config(dev, MY_PCI_LOIO, 4); if (iobase & 0x300) MY_USEIOSPACE = 0; } rid = MY_RID; sc->my_res = bus_alloc_resource_any(dev, MY_RES, &rid, RF_ACTIVE); if (sc->my_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto destroy_mutex; } sc->my_btag = rman_get_bustag(sc->my_res); sc->my_bhandle = rman_get_bushandle(sc->my_res); rid = 0; sc->my_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->my_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto release_io; } sc->my_info = my_info_tmp; /* Reset the adapter. */ MY_LOCK(sc); my_reset(sc); MY_UNLOCK(sc); /* * Get station address */ for (i = 0; i < ETHER_ADDR_LEN; ++i) eaddr[i] = CSR_READ_1(sc, MY_PAR0 + i); sc->my_ldata_ptr = malloc(sizeof(struct my_list_data) + 8, M_DEVBUF, M_NOWAIT); if (sc->my_ldata_ptr == NULL) { device_printf(dev, "no memory for list buffers!\n"); error = ENXIO; goto release_irq; } sc->my_ldata = (struct my_list_data *) sc->my_ldata_ptr; round = (uintptr_t)sc->my_ldata_ptr & 0xF; roundptr = sc->my_ldata_ptr; for (i = 0; i < 8; i++) { if (round % 8) { round++; roundptr++; } else break; } sc->my_ldata = (struct my_list_data *) roundptr; bzero(sc->my_ldata, sizeof(struct my_list_data)); ifp = sc->my_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto free_ldata; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = my_ioctl; ifp->if_start = my_start; ifp->if_init = my_init; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); if (sc->my_info->my_did == MTD803ID) sc->my_pinfo = my_phys; else { if (bootverbose) device_printf(dev, "probing for a PHY\n"); MY_LOCK(sc); for (i = MY_PHYADDR_MIN; i < MY_PHYADDR_MAX + 1; i++) { if (bootverbose) device_printf(dev, "checking address: %d\n", i); sc->my_phy_addr = i; phy_sts = my_phy_readreg(sc, PHY_BMSR); if ((phy_sts != 0) && (phy_sts != 0xffff)) break; else phy_sts = 0; } if (phy_sts) { phy_vid = my_phy_readreg(sc, PHY_VENID); phy_did = my_phy_readreg(sc, PHY_DEVID); if (bootverbose) { device_printf(dev, "found PHY at address %d, ", sc->my_phy_addr); printf("vendor id: %x device id: %x\n", phy_vid, phy_did); } p = my_phys; while (p->my_vid) { if (phy_vid == p->my_vid) { sc->my_pinfo = p; break; } p++; } if (sc->my_pinfo == NULL) sc->my_pinfo = &my_phys[PHY_UNKNOWN]; if (bootverbose) device_printf(dev, "PHY type: %s\n", sc->my_pinfo->my_name); } else { MY_UNLOCK(sc); device_printf(dev, "MII without any phy!\n"); error = ENXIO; goto free_if; } MY_UNLOCK(sc); } /* Do ifmedia setup. */ ifmedia_init(&sc->ifmedia, 0, my_ifmedia_upd, my_ifmedia_sts); MY_LOCK(sc); my_getmode_mii(sc); my_autoneg_mii(sc, MY_FLAG_FORCEDELAY, 1); media = sc->ifmedia.ifm_media; my_stop(sc); MY_UNLOCK(sc); ifmedia_set(&sc->ifmedia, media); ether_ifattach(ifp, eaddr); error = bus_setup_intr(dev, sc->my_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, my_intr, sc, &sc->my_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); goto detach_if; } return (0); detach_if: ether_ifdetach(ifp); free_if: if_free(ifp); free_ldata: free(sc->my_ldata_ptr, M_DEVBUF); release_irq: bus_release_resource(dev, SYS_RES_IRQ, 0, sc->my_irq); release_io: bus_release_resource(dev, MY_RES, MY_RID, sc->my_res); destroy_mutex: mtx_destroy(&sc->my_mtx); return (error); } static int my_detach(device_t dev) { struct my_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->my_ifp; ether_ifdetach(ifp); MY_LOCK(sc); my_stop(sc); MY_UNLOCK(sc); bus_teardown_intr(dev, sc->my_irq, sc->my_intrhand); callout_drain(&sc->my_watchdog); callout_drain(&sc->my_autoneg_timer); if_free(ifp); free(sc->my_ldata_ptr, M_DEVBUF); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->my_irq); bus_release_resource(dev, MY_RES, MY_RID, sc->my_res); mtx_destroy(&sc->my_mtx); return (0); } /* * Initialize the transmit descriptors. */ static int my_list_tx_init(struct my_softc * sc) { struct my_chain_data *cd; struct my_list_data *ld; int i; MY_LOCK_ASSERT(sc); cd = &sc->my_cdata; ld = sc->my_ldata; for (i = 0; i < MY_TX_LIST_CNT; i++) { cd->my_tx_chain[i].my_ptr = &ld->my_tx_list[i]; if (i == (MY_TX_LIST_CNT - 1)) cd->my_tx_chain[i].my_nextdesc = &cd->my_tx_chain[0]; else cd->my_tx_chain[i].my_nextdesc = &cd->my_tx_chain[i + 1]; } cd->my_tx_free = &cd->my_tx_chain[0]; cd->my_tx_tail = cd->my_tx_head = NULL; return (0); } /* * Initialize the RX descriptors and allocate mbufs for them. Note that we * arrange the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int my_list_rx_init(struct my_softc * sc) { struct my_chain_data *cd; struct my_list_data *ld; int i; MY_LOCK_ASSERT(sc); cd = &sc->my_cdata; ld = sc->my_ldata; for (i = 0; i < MY_RX_LIST_CNT; i++) { cd->my_rx_chain[i].my_ptr = (struct my_desc *) & ld->my_rx_list[i]; if (my_newbuf(sc, &cd->my_rx_chain[i]) == ENOBUFS) { MY_UNLOCK(sc); return (ENOBUFS); } if (i == (MY_RX_LIST_CNT - 1)) { cd->my_rx_chain[i].my_nextdesc = &cd->my_rx_chain[0]; ld->my_rx_list[i].my_next = vtophys(&ld->my_rx_list[0]); } else { cd->my_rx_chain[i].my_nextdesc = &cd->my_rx_chain[i + 1]; ld->my_rx_list[i].my_next = vtophys(&ld->my_rx_list[i + 1]); } } cd->my_rx_head = &cd->my_rx_chain[0]; return (0); } /* * Initialize an RX descriptor and attach an MBUF cluster. */ static int my_newbuf(struct my_softc * sc, struct my_chain_onefrag * c) { struct mbuf *m_new = NULL; MY_LOCK_ASSERT(sc); MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { device_printf(sc->my_dev, "no memory for rx list -- packet dropped!\n"); return (ENOBUFS); } if (!(MCLGET(m_new, M_NOWAIT))) { device_printf(sc->my_dev, "no memory for rx list -- packet dropped!\n"); m_freem(m_new); return (ENOBUFS); } c->my_mbuf = m_new; c->my_ptr->my_data = vtophys(mtod(m_new, caddr_t)); c->my_ptr->my_ctl = (MCLBYTES - 1) << MY_RBSShift; c->my_ptr->my_status = MY_OWNByNIC; return (0); } /* * A frame has been uploaded: pass the resulting mbuf chain up to the higher * level protocols. */ static void my_rxeof(struct my_softc * sc) { struct ether_header *eh; struct mbuf *m; struct ifnet *ifp; struct my_chain_onefrag *cur_rx; int total_len = 0; u_int32_t rxstat; MY_LOCK_ASSERT(sc); ifp = sc->my_ifp; while (!((rxstat = sc->my_cdata.my_rx_head->my_ptr->my_status) & MY_OWNByNIC)) { cur_rx = sc->my_cdata.my_rx_head; sc->my_cdata.my_rx_head = cur_rx->my_nextdesc; if (rxstat & MY_ES) { /* error summary: give up this rx pkt */ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->my_ptr->my_status = MY_OWNByNIC; continue; } /* No errors; receive the packet. */ total_len = (rxstat & MY_FLNGMASK) >> MY_FLNGShift; total_len -= ETHER_CRC_LEN; if (total_len < MINCLSIZE) { m = m_devget(mtod(cur_rx->my_mbuf, char *), total_len, 0, ifp, NULL); cur_rx->my_ptr->my_status = MY_OWNByNIC; if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); continue; } } else { m = cur_rx->my_mbuf; /* * Try to conjure up a new mbuf cluster. If that * fails, it means we have an out of memory condition * and should leave the buffer in place and continue. * This will result in a lost packet, but there's * little else we can do in this situation. */ if (my_newbuf(sc, cur_rx) == ENOBUFS) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->my_ptr->my_status = MY_OWNByNIC; continue; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = total_len; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); eh = mtod(m, struct ether_header *); #if NBPFILTER > 0 /* * Handle BPF listeners. Let the BPF user see the packet, but * don't pass it up to the ether_input() layer unless it's a * broadcast packet, multicast packet, matches our ethernet * address or the interface is in promiscuous mode. */ if (bpf_peers_present(ifp->if_bpf)) { bpf_mtap(ifp->if_bpf, m); if (ifp->if_flags & IFF_PROMISC && (bcmp(eh->ether_dhost, IF_LLADDR(sc->my_ifp), ETHER_ADDR_LEN) && (eh->ether_dhost[0] & 1) == 0)) { m_freem(m); continue; } } #endif MY_UNLOCK(sc); (*ifp->if_input)(ifp, m); MY_LOCK(sc); } return; } /* * A frame was downloaded to the chip. It's safe for us to clean up the list * buffers. */ static void my_txeof(struct my_softc * sc) { struct my_chain *cur_tx; struct ifnet *ifp; MY_LOCK_ASSERT(sc); ifp = sc->my_ifp; /* Clear the timeout timer. */ sc->my_timer = 0; if (sc->my_cdata.my_tx_head == NULL) { return; } /* * Go through our tx list and free mbufs for those frames that have * been transmitted. */ while (sc->my_cdata.my_tx_head->my_mbuf != NULL) { u_int32_t txstat; cur_tx = sc->my_cdata.my_tx_head; txstat = MY_TXSTATUS(cur_tx); if ((txstat & MY_OWNByNIC) || txstat == MY_UNSENT) break; if (!(CSR_READ_4(sc, MY_TCRRCR) & MY_Enhanced)) { if (txstat & MY_TXERR) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (txstat & MY_EC) /* excessive collision */ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); if (txstat & MY_LC) /* late collision */ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); } if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (txstat & MY_NCRMASK) >> MY_NCRShift); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); m_freem(cur_tx->my_mbuf); cur_tx->my_mbuf = NULL; if (sc->my_cdata.my_tx_head == sc->my_cdata.my_tx_tail) { sc->my_cdata.my_tx_head = NULL; sc->my_cdata.my_tx_tail = NULL; break; } sc->my_cdata.my_tx_head = cur_tx->my_nextdesc; } if (CSR_READ_4(sc, MY_TCRRCR) & MY_Enhanced) { if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (CSR_READ_4(sc, MY_TSR) & MY_NCRMask)); } return; } /* * TX 'end of channel' interrupt handler. */ static void my_txeoc(struct my_softc * sc) { struct ifnet *ifp; MY_LOCK_ASSERT(sc); ifp = sc->my_ifp; sc->my_timer = 0; if (sc->my_cdata.my_tx_head == NULL) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->my_cdata.my_tx_tail = NULL; if (sc->my_want_auto) my_autoneg_mii(sc, MY_FLAG_SCHEDDELAY, 1); } else { if (MY_TXOWN(sc->my_cdata.my_tx_head) == MY_UNSENT) { MY_TXOWN(sc->my_cdata.my_tx_head) = MY_OWNByNIC; sc->my_timer = 5; CSR_WRITE_4(sc, MY_TXPDR, 0xFFFFFFFF); } } return; } static void my_intr(void *arg) { struct my_softc *sc; struct ifnet *ifp; u_int32_t status; sc = arg; MY_LOCK(sc); ifp = sc->my_ifp; if (!(ifp->if_flags & IFF_UP)) { MY_UNLOCK(sc); return; } /* Disable interrupts. */ CSR_WRITE_4(sc, MY_IMR, 0x00000000); for (;;) { status = CSR_READ_4(sc, MY_ISR); status &= MY_INTRS; if (status) CSR_WRITE_4(sc, MY_ISR, status); else break; if (status & MY_RI) /* receive interrupt */ my_rxeof(sc); if ((status & MY_RBU) || (status & MY_RxErr)) { /* rx buffer unavailable or rx error */ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); #ifdef foo my_stop(sc); my_reset(sc); my_init_locked(sc); #endif } if (status & MY_TI) /* tx interrupt */ my_txeof(sc); if (status & MY_ETI) /* tx early interrupt */ my_txeof(sc); if (status & MY_TBU) /* tx buffer unavailable */ my_txeoc(sc); #if 0 /* 90/1/18 delete */ if (status & MY_FBE) { my_reset(sc); my_init_locked(sc); } #endif } /* Re-enable interrupts. */ CSR_WRITE_4(sc, MY_IMR, MY_INTRS); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) my_start_locked(ifp); MY_UNLOCK(sc); return; } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int my_encap(struct my_softc * sc, struct my_chain * c, struct mbuf * m_head) { struct my_desc *f = NULL; int total_len; struct mbuf *m, *m_new = NULL; MY_LOCK_ASSERT(sc); /* calculate the total tx pkt length */ total_len = 0; for (m = m_head; m != NULL; m = m->m_next) total_len += m->m_len; /* * Start packing the mbufs in this chain into the fragment pointers. * Stop when we run out of fragments or hit the end of the mbuf * chain. */ m = m_head; MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { device_printf(sc->my_dev, "no memory for tx list"); return (1); } if (m_head->m_pkthdr.len > MHLEN) { if (!(MCLGET(m_new, M_NOWAIT))) { m_freem(m_new); device_printf(sc->my_dev, "no memory for tx list"); return (1); } } m_copydata(m_head, 0, m_head->m_pkthdr.len, mtod(m_new, caddr_t)); m_new->m_pkthdr.len = m_new->m_len = m_head->m_pkthdr.len; m_freem(m_head); m_head = m_new; f = &c->my_ptr->my_frag[0]; f->my_status = 0; f->my_data = vtophys(mtod(m_new, caddr_t)); total_len = m_new->m_len; f->my_ctl = MY_TXFD | MY_TXLD | MY_CRCEnable | MY_PADEnable; f->my_ctl |= total_len << MY_PKTShift; /* pkt size */ f->my_ctl |= total_len; /* buffer size */ /* 89/12/29 add, for mtd891 *//* [ 89? ] */ if (sc->my_info->my_did == MTD891ID) f->my_ctl |= MY_ETIControl | MY_RetryTxLC; c->my_mbuf = m_head; c->my_lastdesc = 0; MY_TXNEXT(c) = vtophys(&c->my_nextdesc->my_ptr->my_frag[0]); return (0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void my_start(struct ifnet * ifp) { struct my_softc *sc; sc = ifp->if_softc; MY_LOCK(sc); my_start_locked(ifp); MY_UNLOCK(sc); } static void my_start_locked(struct ifnet * ifp) { struct my_softc *sc; struct mbuf *m_head = NULL; struct my_chain *cur_tx = NULL, *start_tx; sc = ifp->if_softc; MY_LOCK_ASSERT(sc); if (sc->my_autoneg) { sc->my_tx_pend = 1; return; } /* * Check for an available queue slot. If there are none, punt. */ if (sc->my_cdata.my_tx_free->my_mbuf != NULL) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } start_tx = sc->my_cdata.my_tx_free; while (sc->my_cdata.my_tx_free->my_mbuf == NULL) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* Pick a descriptor off the free list. */ cur_tx = sc->my_cdata.my_tx_free; sc->my_cdata.my_tx_free = cur_tx->my_nextdesc; /* Pack the data into the descriptor. */ my_encap(sc, cur_tx, m_head); if (cur_tx != start_tx) MY_TXOWN(cur_tx) = MY_OWNByNIC; #if NBPFILTER > 0 /* * If there's a BPF listener, bounce a copy of this frame to * him. */ BPF_MTAP(ifp, cur_tx->my_mbuf); #endif } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) { return; } /* * Place the request for the upload interrupt in the last descriptor * in the chain. This way, if we're chaining several packets at once, * we'll only get an interrupt once for the whole chain rather than * once for each packet. */ MY_TXCTL(cur_tx) |= MY_TXIC; cur_tx->my_ptr->my_frag[0].my_ctl |= MY_TXIC; sc->my_cdata.my_tx_tail = cur_tx; if (sc->my_cdata.my_tx_head == NULL) sc->my_cdata.my_tx_head = start_tx; MY_TXOWN(start_tx) = MY_OWNByNIC; CSR_WRITE_4(sc, MY_TXPDR, 0xFFFFFFFF); /* tx polling demand */ /* * Set a timeout in case the chip goes out to lunch. */ sc->my_timer = 5; return; } static void my_init(void *xsc) { struct my_softc *sc = xsc; MY_LOCK(sc); my_init_locked(sc); MY_UNLOCK(sc); } static void my_init_locked(struct my_softc *sc) { struct ifnet *ifp = sc->my_ifp; u_int16_t phy_bmcr = 0; MY_LOCK_ASSERT(sc); if (sc->my_autoneg) { return; } if (sc->my_pinfo != NULL) phy_bmcr = my_phy_readreg(sc, PHY_BMCR); /* * Cancel pending I/O and free all RX/TX buffers. */ my_stop(sc); my_reset(sc); /* * Set cache alignment and burst length. */ #if 0 /* 89/9/1 modify, */ CSR_WRITE_4(sc, MY_BCR, MY_RPBLE512); CSR_WRITE_4(sc, MY_TCRRCR, MY_TFTSF); #endif CSR_WRITE_4(sc, MY_BCR, MY_PBL8); CSR_WRITE_4(sc, MY_TCRRCR, MY_TFTSF | MY_RBLEN | MY_RPBLE512); /* * 89/12/29 add, for mtd891, */ if (sc->my_info->my_did == MTD891ID) { MY_SETBIT(sc, MY_BCR, MY_PROG); MY_SETBIT(sc, MY_TCRRCR, MY_Enhanced); } my_setcfg(sc, phy_bmcr); /* Init circular RX list. */ if (my_list_rx_init(sc) == ENOBUFS) { device_printf(sc->my_dev, "init failed: no memory for rx buffers\n"); my_stop(sc); return; } /* Init TX descriptors. */ my_list_tx_init(sc); /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & IFF_PROMISC) MY_SETBIT(sc, MY_TCRRCR, MY_PROM); else MY_CLRBIT(sc, MY_TCRRCR, MY_PROM); /* * Set capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) MY_SETBIT(sc, MY_TCRRCR, MY_AB); else MY_CLRBIT(sc, MY_TCRRCR, MY_AB); /* * Program the multicast filter, if necessary. */ my_setmulti(sc); /* * Load the address of the RX list. */ MY_CLRBIT(sc, MY_TCRRCR, MY_RE); CSR_WRITE_4(sc, MY_RXLBA, vtophys(&sc->my_ldata->my_rx_list[0])); /* * Enable interrupts. */ CSR_WRITE_4(sc, MY_IMR, MY_INTRS); CSR_WRITE_4(sc, MY_ISR, 0xFFFFFFFF); /* Enable receiver and transmitter. */ MY_SETBIT(sc, MY_TCRRCR, MY_RE); MY_CLRBIT(sc, MY_TCRRCR, MY_TE); CSR_WRITE_4(sc, MY_TXLBA, vtophys(&sc->my_ldata->my_tx_list[0])); MY_SETBIT(sc, MY_TCRRCR, MY_TE); /* Restore state of BMCR */ if (sc->my_pinfo != NULL) my_phy_writereg(sc, PHY_BMCR, phy_bmcr); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->my_watchdog, hz, my_watchdog, sc); return; } /* * Set media options. */ static int my_ifmedia_upd(struct ifnet * ifp) { struct my_softc *sc; struct ifmedia *ifm; sc = ifp->if_softc; MY_LOCK(sc); ifm = &sc->ifmedia; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) { MY_UNLOCK(sc); return (EINVAL); } if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) my_autoneg_mii(sc, MY_FLAG_SCHEDDELAY, 1); else my_setmode_mii(sc, ifm->ifm_media); MY_UNLOCK(sc); return (0); } /* * Report current media status. */ static void my_ifmedia_sts(struct ifnet * ifp, struct ifmediareq * ifmr) { struct my_softc *sc; u_int16_t advert = 0, ability = 0; sc = ifp->if_softc; MY_LOCK(sc); ifmr->ifm_active = IFM_ETHER; if (!(my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_AUTONEGENBL)) { #if 0 /* this version did not support 1000M, */ if (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_1000) ifmr->ifm_active = IFM_ETHER | IFM_1000TX; #endif if (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_SPEEDSEL) ifmr->ifm_active = IFM_ETHER | IFM_100_TX; else ifmr->ifm_active = IFM_ETHER | IFM_10_T; if (my_phy_readreg(sc, PHY_BMCR) & PHY_BMCR_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; MY_UNLOCK(sc); return; } ability = my_phy_readreg(sc, PHY_LPAR); advert = my_phy_readreg(sc, PHY_ANAR); #if 0 /* this version did not support 1000M, */ if (sc->my_pinfo->my_vid = MarvellPHYID0) { ability2 = my_phy_readreg(sc, PHY_1000SR); if (ability2 & PHY_1000SR_1000BTXFULL) { advert = 0; ability = 0; ifmr->ifm_active = IFM_ETHER|IFM_1000_T|IFM_FDX; } else if (ability & PHY_1000SR_1000BTXHALF) { advert = 0; ability = 0; ifmr->ifm_active = IFM_ETHER|IFM_1000_T|IFM_HDX; } } #endif if (advert & PHY_ANAR_100BT4 && ability & PHY_ANAR_100BT4) ifmr->ifm_active = IFM_ETHER | IFM_100_T4; else if (advert & PHY_ANAR_100BTXFULL && ability & PHY_ANAR_100BTXFULL) ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX; else if (advert & PHY_ANAR_100BTXHALF && ability & PHY_ANAR_100BTXHALF) ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_HDX; else if (advert & PHY_ANAR_10BTFULL && ability & PHY_ANAR_10BTFULL) ifmr->ifm_active = IFM_ETHER | IFM_10_T | IFM_FDX; else if (advert & PHY_ANAR_10BTHALF && ability & PHY_ANAR_10BTHALF) ifmr->ifm_active = IFM_ETHER | IFM_10_T | IFM_HDX; MY_UNLOCK(sc); return; } static int my_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct my_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int error; switch (command) { case SIOCSIFFLAGS: MY_LOCK(sc); if (ifp->if_flags & IFF_UP) my_init_locked(sc); else if (ifp->if_drv_flags & IFF_DRV_RUNNING) my_stop(sc); MY_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: MY_LOCK(sc); my_setmulti(sc); MY_UNLOCK(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void my_watchdog(void *arg) { struct my_softc *sc; struct ifnet *ifp; sc = arg; MY_LOCK_ASSERT(sc); callout_reset(&sc->my_watchdog, hz, my_watchdog, sc); if (sc->my_timer == 0 || --sc->my_timer > 0) return; ifp = sc->my_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); if (!(my_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_LINKSTAT)) if_printf(ifp, "no carrier - transceiver cable problem?\n"); my_stop(sc); my_reset(sc); my_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) my_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the RX and TX lists. */ static void my_stop(struct my_softc * sc) { - register int i; + int i; struct ifnet *ifp; MY_LOCK_ASSERT(sc); ifp = sc->my_ifp; callout_stop(&sc->my_autoneg_timer); callout_stop(&sc->my_watchdog); MY_CLRBIT(sc, MY_TCRRCR, (MY_RE | MY_TE)); CSR_WRITE_4(sc, MY_IMR, 0x00000000); CSR_WRITE_4(sc, MY_TXLBA, 0x00000000); CSR_WRITE_4(sc, MY_RXLBA, 0x00000000); /* * Free data in the RX lists. */ for (i = 0; i < MY_RX_LIST_CNT; i++) { if (sc->my_cdata.my_rx_chain[i].my_mbuf != NULL) { m_freem(sc->my_cdata.my_rx_chain[i].my_mbuf); sc->my_cdata.my_rx_chain[i].my_mbuf = NULL; } } bzero((char *)&sc->my_ldata->my_rx_list, sizeof(sc->my_ldata->my_rx_list)); /* * Free the TX list buffers. */ for (i = 0; i < MY_TX_LIST_CNT; i++) { if (sc->my_cdata.my_tx_chain[i].my_mbuf != NULL) { m_freem(sc->my_cdata.my_tx_chain[i].my_mbuf); sc->my_cdata.my_tx_chain[i].my_mbuf = NULL; } } bzero((char *)&sc->my_ldata->my_tx_list, sizeof(sc->my_ldata->my_tx_list)); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); return; } /* * Stop all chip I/O so that the kernel's probe routines don't get confused * by errant DMAs when rebooting. */ static int my_shutdown(device_t dev) { struct my_softc *sc; sc = device_get_softc(dev); MY_LOCK(sc); my_stop(sc); MY_UNLOCK(sc); return 0; } Index: stable/11/sys/dev/pcn/if_pcn.c =================================================================== --- stable/11/sys/dev/pcn/if_pcn.c (revision 331642) +++ stable/11/sys/dev/pcn/if_pcn.c (revision 331643) @@ -1,1520 +1,1520 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2000 Berkeley Software Design, Inc. * Copyright (c) 1997, 1998, 1999, 2000 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * AMD Am79c972 fast ethernet PCI NIC driver. Datasheets are available * from http://www.amd.com. * * The AMD PCnet/PCI controllers are more advanced and functional * versions of the venerable 7990 LANCE. The PCnet/PCI chips retain * backwards compatibility with the LANCE and thus can be made * to work with older LANCE drivers. This is in fact how the * PCnet/PCI chips were supported in FreeBSD originally. The trouble * is that the PCnet/PCI devices offer several performance enhancements * which can't be exploited in LANCE compatibility mode. Chief among * these enhancements is the ability to perform PCI DMA operations * using 32-bit addressing (which eliminates the need for ISA * bounce-buffering), and special receive buffer alignment (which * allows the receive handler to pass packets to the upper protocol * layers without copying on both the x86 and alpha platforms). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for vtophys */ #include /* for vtophys */ #include #include #include #include #include #include #include #include #define PCN_USEIOSPACE #include MODULE_DEPEND(pcn, pci, 1, 1, 1); MODULE_DEPEND(pcn, ether, 1, 1, 1); MODULE_DEPEND(pcn, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* * Various supported device vendors/types and their names. */ static const struct pcn_type pcn_devs[] = { { PCN_VENDORID, PCN_DEVICEID_PCNET, "AMD PCnet/PCI 10/100BaseTX" }, { PCN_VENDORID, PCN_DEVICEID_HOME, "AMD PCnet/Home HomePNA" }, { 0, 0, NULL } }; static const struct pcn_chipid { u_int32_t id; const char *name; } pcn_chipid[] = { { Am79C971, "Am79C971" }, { Am79C972, "Am79C972" }, { Am79C973, "Am79C973" }, { Am79C978, "Am79C978" }, { Am79C975, "Am79C975" }, { Am79C976, "Am79C976" }, { 0, NULL }, }; static const char *pcn_chipid_name(u_int32_t); static u_int32_t pcn_chip_id(device_t); static const struct pcn_type *pcn_match(u_int16_t, u_int16_t); static u_int32_t pcn_csr_read(struct pcn_softc *, int); static u_int16_t pcn_csr_read16(struct pcn_softc *, int); static u_int16_t pcn_bcr_read16(struct pcn_softc *, int); static void pcn_csr_write(struct pcn_softc *, int, int); static u_int32_t pcn_bcr_read(struct pcn_softc *, int); static void pcn_bcr_write(struct pcn_softc *, int, int); static int pcn_probe(device_t); static int pcn_attach(device_t); static int pcn_detach(device_t); static int pcn_newbuf(struct pcn_softc *, int, struct mbuf *); static int pcn_encap(struct pcn_softc *, struct mbuf *, u_int32_t *); static void pcn_rxeof(struct pcn_softc *); static void pcn_txeof(struct pcn_softc *); static void pcn_intr(void *); static void pcn_tick(void *); static void pcn_start(struct ifnet *); static void pcn_start_locked(struct ifnet *); static int pcn_ioctl(struct ifnet *, u_long, caddr_t); static void pcn_init(void *); static void pcn_init_locked(struct pcn_softc *); static void pcn_stop(struct pcn_softc *); static void pcn_watchdog(struct pcn_softc *); static int pcn_shutdown(device_t); static int pcn_ifmedia_upd(struct ifnet *); static void pcn_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int pcn_miibus_readreg(device_t, int, int); static int pcn_miibus_writereg(device_t, int, int, int); static void pcn_miibus_statchg(device_t); static void pcn_setfilt(struct ifnet *); static void pcn_setmulti(struct pcn_softc *); static void pcn_reset(struct pcn_softc *); static int pcn_list_rx_init(struct pcn_softc *); static int pcn_list_tx_init(struct pcn_softc *); #ifdef PCN_USEIOSPACE #define PCN_RES SYS_RES_IOPORT #define PCN_RID PCN_PCI_LOIO #else #define PCN_RES SYS_RES_MEMORY #define PCN_RID PCN_PCI_LOMEM #endif static device_method_t pcn_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcn_probe), DEVMETHOD(device_attach, pcn_attach), DEVMETHOD(device_detach, pcn_detach), DEVMETHOD(device_shutdown, pcn_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, pcn_miibus_readreg), DEVMETHOD(miibus_writereg, pcn_miibus_writereg), DEVMETHOD(miibus_statchg, pcn_miibus_statchg), DEVMETHOD_END }; static driver_t pcn_driver = { "pcn", pcn_methods, sizeof(struct pcn_softc) }; static devclass_t pcn_devclass; DRIVER_MODULE(pcn, pci, pcn_driver, pcn_devclass, 0, 0); DRIVER_MODULE(miibus, pcn, miibus_driver, miibus_devclass, 0, 0); #define PCN_CSR_SETBIT(sc, reg, x) \ pcn_csr_write(sc, reg, pcn_csr_read(sc, reg) | (x)) #define PCN_CSR_CLRBIT(sc, reg, x) \ pcn_csr_write(sc, reg, pcn_csr_read(sc, reg) & ~(x)) #define PCN_BCR_SETBIT(sc, reg, x) \ pcn_bcr_write(sc, reg, pcn_bcr_read(sc, reg) | (x)) #define PCN_BCR_CLRBIT(sc, reg, x) \ pcn_bcr_write(sc, reg, pcn_bcr_read(sc, reg) & ~(x)) static u_int32_t pcn_csr_read(sc, reg) struct pcn_softc *sc; int reg; { CSR_WRITE_4(sc, PCN_IO32_RAP, reg); return(CSR_READ_4(sc, PCN_IO32_RDP)); } static u_int16_t pcn_csr_read16(sc, reg) struct pcn_softc *sc; int reg; { CSR_WRITE_2(sc, PCN_IO16_RAP, reg); return(CSR_READ_2(sc, PCN_IO16_RDP)); } static void pcn_csr_write(sc, reg, val) struct pcn_softc *sc; int reg; int val; { CSR_WRITE_4(sc, PCN_IO32_RAP, reg); CSR_WRITE_4(sc, PCN_IO32_RDP, val); return; } static u_int32_t pcn_bcr_read(sc, reg) struct pcn_softc *sc; int reg; { CSR_WRITE_4(sc, PCN_IO32_RAP, reg); return(CSR_READ_4(sc, PCN_IO32_BDP)); } static u_int16_t pcn_bcr_read16(sc, reg) struct pcn_softc *sc; int reg; { CSR_WRITE_2(sc, PCN_IO16_RAP, reg); return(CSR_READ_2(sc, PCN_IO16_BDP)); } static void pcn_bcr_write(sc, reg, val) struct pcn_softc *sc; int reg; int val; { CSR_WRITE_4(sc, PCN_IO32_RAP, reg); CSR_WRITE_4(sc, PCN_IO32_BDP, val); return; } static int pcn_miibus_readreg(dev, phy, reg) device_t dev; int phy, reg; { struct pcn_softc *sc; int val; sc = device_get_softc(dev); /* * At least Am79C971 with DP83840A wedge when isolating the * external PHY so we can't allow multiple external PHYs. * There are cards that use Am79C971 with both the internal * and an external PHY though. * For internal PHYs it doesn't really matter whether we can * isolate the remaining internal and the external ones in * the PHY drivers as the internal PHYs have to be enabled * individually in PCN_BCR_PHYSEL, PCN_CSR_MODE, etc. * With Am79C97{3,5,8} we don't support switching beetween * the internal and external PHYs, yet, so we can't allow * multiple PHYs with these either. * Am79C97{2,6} actually only support external PHYs (not * connectable internal ones respond at the usual addresses, * which don't hurt if we let them show up on the bus) and * isolating them works. */ if (((sc->pcn_type == Am79C971 && phy != PCN_PHYAD_10BT) || sc->pcn_type == Am79C973 || sc->pcn_type == Am79C975 || sc->pcn_type == Am79C978) && sc->pcn_extphyaddr != -1 && phy != sc->pcn_extphyaddr) return(0); pcn_bcr_write(sc, PCN_BCR_MIIADDR, reg | (phy << 5)); val = pcn_bcr_read(sc, PCN_BCR_MIIDATA) & 0xFFFF; if (val == 0xFFFF) return(0); if (((sc->pcn_type == Am79C971 && phy != PCN_PHYAD_10BT) || sc->pcn_type == Am79C973 || sc->pcn_type == Am79C975 || sc->pcn_type == Am79C978) && sc->pcn_extphyaddr == -1) sc->pcn_extphyaddr = phy; return(val); } static int pcn_miibus_writereg(dev, phy, reg, data) device_t dev; int phy, reg, data; { struct pcn_softc *sc; sc = device_get_softc(dev); pcn_bcr_write(sc, PCN_BCR_MIIADDR, reg | (phy << 5)); pcn_bcr_write(sc, PCN_BCR_MIIDATA, data); return(0); } static void pcn_miibus_statchg(dev) device_t dev; { struct pcn_softc *sc; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->pcn_miibus); if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) { PCN_BCR_SETBIT(sc, PCN_BCR_DUPLEX, PCN_DUPLEX_FDEN); } else { PCN_BCR_CLRBIT(sc, PCN_BCR_DUPLEX, PCN_DUPLEX_FDEN); } return; } static void pcn_setmulti(sc) struct pcn_softc *sc; { struct ifnet *ifp; struct ifmultiaddr *ifma; u_int32_t h, i; u_int16_t hashes[4] = { 0, 0, 0, 0 }; ifp = sc->pcn_ifp; PCN_CSR_SETBIT(sc, PCN_CSR_EXTCTL1, PCN_EXTCTL1_SPND); if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { for (i = 0; i < 4; i++) pcn_csr_write(sc, PCN_CSR_MAR0 + i, 0xFFFF); PCN_CSR_CLRBIT(sc, PCN_CSR_EXTCTL1, PCN_EXTCTL1_SPND); return; } /* first, zot all the existing hash bits */ for (i = 0; i < 4; i++) pcn_csr_write(sc, PCN_CSR_MAR0 + i, 0); /* now program new ones */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ether_crc32_le(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; hashes[h >> 4] |= 1 << (h & 0xF); } if_maddr_runlock(ifp); for (i = 0; i < 4; i++) pcn_csr_write(sc, PCN_CSR_MAR0 + i, hashes[i]); PCN_CSR_CLRBIT(sc, PCN_CSR_EXTCTL1, PCN_EXTCTL1_SPND); return; } static void pcn_reset(sc) struct pcn_softc *sc; { /* * Issue a reset by reading from the RESET register. * Note that we don't know if the chip is operating in * 16-bit or 32-bit mode at this point, so we attempt * to reset the chip both ways. If one fails, the other * will succeed. */ CSR_READ_2(sc, PCN_IO16_RESET); CSR_READ_4(sc, PCN_IO32_RESET); /* Wait a little while for the chip to get its brains in order. */ DELAY(1000); /* Select 32-bit (DWIO) mode */ CSR_WRITE_4(sc, PCN_IO32_RDP, 0); /* Select software style 3. */ pcn_bcr_write(sc, PCN_BCR_SSTYLE, PCN_SWSTYLE_PCNETPCI_BURST); return; } static const char * pcn_chipid_name(u_int32_t id) { const struct pcn_chipid *p; p = pcn_chipid; while (p->name) { if (id == p->id) return (p->name); p++; } return ("Unknown"); } static u_int32_t pcn_chip_id(device_t dev) { struct pcn_softc *sc; u_int32_t chip_id; sc = device_get_softc(dev); /* * Note: we can *NOT* put the chip into * 32-bit mode yet. The le(4) driver will only * work in 16-bit mode, and once the chip * goes into 32-bit mode, the only way to * get it out again is with a hardware reset. * So if pcn_probe() is called before the * le(4) driver's probe routine, the chip will * be locked into 32-bit operation and the * le(4) driver will be unable to attach to it. * Note II: if the chip happens to already * be in 32-bit mode, we still need to check * the chip ID, but first we have to detect * 32-bit mode using only 16-bit operations. * The safest way to do this is to read the * PCI subsystem ID from BCR23/24 and compare * that with the value read from PCI config * space. */ chip_id = pcn_bcr_read16(sc, PCN_BCR_PCISUBSYSID); chip_id <<= 16; chip_id |= pcn_bcr_read16(sc, PCN_BCR_PCISUBVENID); /* * Note III: the test for 0x10001000 is a hack to * pacify VMware, who's pseudo-PCnet interface is * broken. Reading the subsystem register from PCI * config space yields 0x00000000 while reading the * same value from I/O space yields 0x10001000. It's * not supposed to be that way. */ if (chip_id == pci_read_config(dev, PCIR_SUBVEND_0, 4) || chip_id == 0x10001000) { /* We're in 16-bit mode. */ chip_id = pcn_csr_read16(sc, PCN_CSR_CHIPID1); chip_id <<= 16; chip_id |= pcn_csr_read16(sc, PCN_CSR_CHIPID0); } else { /* We're in 32-bit mode. */ chip_id = pcn_csr_read(sc, PCN_CSR_CHIPID1); chip_id <<= 16; chip_id |= pcn_csr_read(sc, PCN_CSR_CHIPID0); } return (chip_id); } static const struct pcn_type * pcn_match(u_int16_t vid, u_int16_t did) { const struct pcn_type *t; t = pcn_devs; while (t->pcn_name != NULL) { if ((vid == t->pcn_vid) && (did == t->pcn_did)) return (t); t++; } return (NULL); } /* * Probe for an AMD chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int pcn_probe(dev) device_t dev; { const struct pcn_type *t; struct pcn_softc *sc; int rid; u_int32_t chip_id; t = pcn_match(pci_get_vendor(dev), pci_get_device(dev)); if (t == NULL) return (ENXIO); sc = device_get_softc(dev); /* * Temporarily map the I/O space so we can read the chip ID register. */ rid = PCN_RID; sc->pcn_res = bus_alloc_resource_any(dev, PCN_RES, &rid, RF_ACTIVE); if (sc->pcn_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); return(ENXIO); } sc->pcn_btag = rman_get_bustag(sc->pcn_res); sc->pcn_bhandle = rman_get_bushandle(sc->pcn_res); chip_id = pcn_chip_id(dev); bus_release_resource(dev, PCN_RES, PCN_RID, sc->pcn_res); switch((chip_id >> 12) & PART_MASK) { case Am79C971: case Am79C972: case Am79C973: case Am79C975: case Am79C976: case Am79C978: break; default: return(ENXIO); } device_set_desc(dev, t->pcn_name); return(BUS_PROBE_DEFAULT); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int pcn_attach(dev) device_t dev; { u_int32_t eaddr[2]; struct pcn_softc *sc; struct mii_data *mii; struct mii_softc *miisc; struct ifnet *ifp; int error = 0, rid; sc = device_get_softc(dev); /* Initialize our mutex. */ mtx_init(&sc->pcn_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); /* * Map control/status registers. */ pci_enable_busmaster(dev); /* Retrieve the chip ID */ sc->pcn_type = (pcn_chip_id(dev) >> 12) & PART_MASK; device_printf(dev, "Chip ID %04x (%s)\n", sc->pcn_type, pcn_chipid_name(sc->pcn_type)); rid = PCN_RID; sc->pcn_res = bus_alloc_resource_any(dev, PCN_RES, &rid, RF_ACTIVE); if (sc->pcn_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } sc->pcn_btag = rman_get_bustag(sc->pcn_res); sc->pcn_bhandle = rman_get_bushandle(sc->pcn_res); /* Allocate interrupt */ rid = 0; sc->pcn_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->pcn_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Reset the adapter. */ pcn_reset(sc); /* * Get station address from the EEPROM. */ eaddr[0] = CSR_READ_4(sc, PCN_IO32_APROM00); eaddr[1] = CSR_READ_4(sc, PCN_IO32_APROM01); callout_init_mtx(&sc->pcn_stat_callout, &sc->pcn_mtx, 0); sc->pcn_ldata = contigmalloc(sizeof(struct pcn_list_data), M_DEVBUF, M_NOWAIT, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->pcn_ldata == NULL) { device_printf(dev, "no memory for list buffers!\n"); error = ENXIO; goto fail; } bzero(sc->pcn_ldata, sizeof(struct pcn_list_data)); ifp = sc->pcn_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = pcn_ioctl; ifp->if_start = pcn_start; ifp->if_init = pcn_init; ifp->if_snd.ifq_maxlen = PCN_TX_LIST_CNT - 1; /* * Do MII setup. * See the comment in pcn_miibus_readreg() for why we can't * universally pass MIIF_NOISOLATE here. */ sc->pcn_extphyaddr = -1; error = mii_attach(dev, &sc->pcn_miibus, ifp, pcn_ifmedia_upd, pcn_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Record the media instances of internal PHYs, which map the * built-in interfaces to the MII, so we can set the active * PHY/port based on the currently selected media. */ sc->pcn_inst_10bt = -1; mii = device_get_softc(sc->pcn_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) { switch (miisc->mii_phy) { case PCN_PHYAD_10BT: sc->pcn_inst_10bt = miisc->mii_inst; break; /* * XXX deal with the Am79C97{3,5} internal 100baseT * and the Am79C978 internal HomePNA PHYs. */ } } /* * Call MI attach routine. */ ether_ifattach(ifp, (u_int8_t *) eaddr); /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->pcn_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, pcn_intr, sc, &sc->pcn_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } fail: if (error) pcn_detach(dev); return(error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int pcn_detach(dev) device_t dev; { struct pcn_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->pcn_ifp; KASSERT(mtx_initialized(&sc->pcn_mtx), ("pcn mutex not initialized")); /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { PCN_LOCK(sc); pcn_reset(sc); pcn_stop(sc); PCN_UNLOCK(sc); callout_drain(&sc->pcn_stat_callout); ether_ifdetach(ifp); } if (sc->pcn_miibus) device_delete_child(dev, sc->pcn_miibus); bus_generic_detach(dev); if (sc->pcn_intrhand) bus_teardown_intr(dev, sc->pcn_irq, sc->pcn_intrhand); if (sc->pcn_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->pcn_irq); if (sc->pcn_res) bus_release_resource(dev, PCN_RES, PCN_RID, sc->pcn_res); if (ifp) if_free(ifp); if (sc->pcn_ldata) { contigfree(sc->pcn_ldata, sizeof(struct pcn_list_data), M_DEVBUF); } mtx_destroy(&sc->pcn_mtx); return(0); } /* * Initialize the transmit descriptors. */ static int pcn_list_tx_init(sc) struct pcn_softc *sc; { struct pcn_list_data *ld; struct pcn_ring_data *cd; int i; cd = &sc->pcn_cdata; ld = sc->pcn_ldata; for (i = 0; i < PCN_TX_LIST_CNT; i++) { cd->pcn_tx_chain[i] = NULL; ld->pcn_tx_list[i].pcn_tbaddr = 0; ld->pcn_tx_list[i].pcn_txctl = 0; ld->pcn_tx_list[i].pcn_txstat = 0; } cd->pcn_tx_prod = cd->pcn_tx_cons = cd->pcn_tx_cnt = 0; return(0); } /* * Initialize the RX descriptors and allocate mbufs for them. */ static int pcn_list_rx_init(sc) struct pcn_softc *sc; { struct pcn_ring_data *cd; int i; cd = &sc->pcn_cdata; for (i = 0; i < PCN_RX_LIST_CNT; i++) { if (pcn_newbuf(sc, i, NULL) == ENOBUFS) return(ENOBUFS); } cd->pcn_rx_prod = 0; return(0); } /* * Initialize an RX descriptor and attach an MBUF cluster. */ static int pcn_newbuf(sc, idx, m) struct pcn_softc *sc; int idx; struct mbuf *m; { struct mbuf *m_new = NULL; struct pcn_rx_desc *c; c = &sc->pcn_ldata->pcn_rx_list[idx]; if (m == NULL) { MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) return(ENOBUFS); if (!(MCLGET(m_new, M_NOWAIT))) { m_freem(m_new); return(ENOBUFS); } m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; } else { m_new = m; m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; m_new->m_data = m_new->m_ext.ext_buf; } m_adj(m_new, ETHER_ALIGN); sc->pcn_cdata.pcn_rx_chain[idx] = m_new; c->pcn_rbaddr = vtophys(mtod(m_new, caddr_t)); c->pcn_bufsz = (~(PCN_RXLEN) + 1) & PCN_RXLEN_BUFSZ; c->pcn_bufsz |= PCN_RXLEN_MBO; c->pcn_rxstat = PCN_RXSTAT_STP|PCN_RXSTAT_ENP|PCN_RXSTAT_OWN; return(0); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static void pcn_rxeof(sc) struct pcn_softc *sc; { struct mbuf *m; struct ifnet *ifp; struct pcn_rx_desc *cur_rx; int i; PCN_LOCK_ASSERT(sc); ifp = sc->pcn_ifp; i = sc->pcn_cdata.pcn_rx_prod; while(PCN_OWN_RXDESC(&sc->pcn_ldata->pcn_rx_list[i])) { cur_rx = &sc->pcn_ldata->pcn_rx_list[i]; m = sc->pcn_cdata.pcn_rx_chain[i]; sc->pcn_cdata.pcn_rx_chain[i] = NULL; /* * If an error occurs, update stats, clear the * status word and leave the mbuf cluster in place: * it should simply get re-used next time this descriptor * comes up in the ring. */ if (cur_rx->pcn_rxstat & PCN_RXSTAT_ERR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); pcn_newbuf(sc, i, m); PCN_INC(i, PCN_RX_LIST_CNT); continue; } if (pcn_newbuf(sc, i, NULL)) { /* Ran out of mbufs; recycle this one. */ pcn_newbuf(sc, i, m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); PCN_INC(i, PCN_RX_LIST_CNT); continue; } PCN_INC(i, PCN_RX_LIST_CNT); /* No errors; receive the packet. */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_len = m->m_pkthdr.len = cur_rx->pcn_rxlen - ETHER_CRC_LEN; m->m_pkthdr.rcvif = ifp; PCN_UNLOCK(sc); (*ifp->if_input)(ifp, m); PCN_LOCK(sc); } sc->pcn_cdata.pcn_rx_prod = i; return; } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void pcn_txeof(sc) struct pcn_softc *sc; { struct pcn_tx_desc *cur_tx = NULL; struct ifnet *ifp; u_int32_t idx; ifp = sc->pcn_ifp; /* * Go through our tx list and free mbufs for those * frames that have been transmitted. */ idx = sc->pcn_cdata.pcn_tx_cons; while (idx != sc->pcn_cdata.pcn_tx_prod) { cur_tx = &sc->pcn_ldata->pcn_tx_list[idx]; if (!PCN_OWN_TXDESC(cur_tx)) break; if (!(cur_tx->pcn_txctl & PCN_TXCTL_ENP)) { sc->pcn_cdata.pcn_tx_cnt--; PCN_INC(idx, PCN_TX_LIST_CNT); continue; } if (cur_tx->pcn_txctl & PCN_TXCTL_ERR) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (cur_tx->pcn_txstat & PCN_TXSTAT_EXDEF) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); if (cur_tx->pcn_txstat & PCN_TXSTAT_RTRY) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); } if_inc_counter(ifp, IFCOUNTER_COLLISIONS, cur_tx->pcn_txstat & PCN_TXSTAT_TRC); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (sc->pcn_cdata.pcn_tx_chain[idx] != NULL) { m_freem(sc->pcn_cdata.pcn_tx_chain[idx]); sc->pcn_cdata.pcn_tx_chain[idx] = NULL; } sc->pcn_cdata.pcn_tx_cnt--; PCN_INC(idx, PCN_TX_LIST_CNT); } if (idx != sc->pcn_cdata.pcn_tx_cons) { /* Some buffers have been freed. */ sc->pcn_cdata.pcn_tx_cons = idx; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->pcn_timer = (sc->pcn_cdata.pcn_tx_cnt == 0) ? 0 : 5; return; } static void pcn_tick(xsc) void *xsc; { struct pcn_softc *sc; struct mii_data *mii; struct ifnet *ifp; sc = xsc; ifp = sc->pcn_ifp; PCN_LOCK_ASSERT(sc); mii = device_get_softc(sc->pcn_miibus); mii_tick(mii); /* link just died */ if (sc->pcn_link && !(mii->mii_media_status & IFM_ACTIVE)) sc->pcn_link = 0; /* link just came up, restart */ if (!sc->pcn_link && mii->mii_media_status & IFM_ACTIVE && IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) { sc->pcn_link++; if (ifp->if_snd.ifq_head != NULL) pcn_start_locked(ifp); } if (sc->pcn_timer > 0 && --sc->pcn_timer == 0) pcn_watchdog(sc); callout_reset(&sc->pcn_stat_callout, hz, pcn_tick, sc); return; } static void pcn_intr(arg) void *arg; { struct pcn_softc *sc; struct ifnet *ifp; u_int32_t status; sc = arg; ifp = sc->pcn_ifp; PCN_LOCK(sc); /* Suppress unwanted interrupts */ if (!(ifp->if_flags & IFF_UP)) { pcn_stop(sc); PCN_UNLOCK(sc); return; } CSR_WRITE_4(sc, PCN_IO32_RAP, PCN_CSR_CSR); while ((status = CSR_READ_4(sc, PCN_IO32_RDP)) & PCN_CSR_INTR) { CSR_WRITE_4(sc, PCN_IO32_RDP, status); if (status & PCN_CSR_RINT) pcn_rxeof(sc); if (status & PCN_CSR_TINT) pcn_txeof(sc); if (status & PCN_CSR_ERR) { pcn_init_locked(sc); break; } } if (ifp->if_snd.ifq_head != NULL) pcn_start_locked(ifp); PCN_UNLOCK(sc); return; } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int pcn_encap(sc, m_head, txidx) struct pcn_softc *sc; struct mbuf *m_head; u_int32_t *txidx; { struct pcn_tx_desc *f = NULL; struct mbuf *m; int frag, cur, cnt = 0; /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; cur = frag = *txidx; for (m = m_head; m != NULL; m = m->m_next) { if (m->m_len == 0) continue; if ((PCN_TX_LIST_CNT - (sc->pcn_cdata.pcn_tx_cnt + cnt)) < 2) return(ENOBUFS); f = &sc->pcn_ldata->pcn_tx_list[frag]; f->pcn_txctl = (~(m->m_len) + 1) & PCN_TXCTL_BUFSZ; f->pcn_txctl |= PCN_TXCTL_MBO; f->pcn_tbaddr = vtophys(mtod(m, vm_offset_t)); if (cnt == 0) f->pcn_txctl |= PCN_TXCTL_STP; else f->pcn_txctl |= PCN_TXCTL_OWN; cur = frag; PCN_INC(frag, PCN_TX_LIST_CNT); cnt++; } if (m != NULL) return(ENOBUFS); sc->pcn_cdata.pcn_tx_chain[cur] = m_head; sc->pcn_ldata->pcn_tx_list[cur].pcn_txctl |= PCN_TXCTL_ENP|PCN_TXCTL_ADD_FCS|PCN_TXCTL_MORE_LTINT; sc->pcn_ldata->pcn_tx_list[*txidx].pcn_txctl |= PCN_TXCTL_OWN; sc->pcn_cdata.pcn_tx_cnt += cnt; *txidx = frag; return(0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void pcn_start(ifp) struct ifnet *ifp; { struct pcn_softc *sc; sc = ifp->if_softc; PCN_LOCK(sc); pcn_start_locked(ifp); PCN_UNLOCK(sc); } static void pcn_start_locked(ifp) struct ifnet *ifp; { struct pcn_softc *sc; struct mbuf *m_head = NULL; u_int32_t idx; sc = ifp->if_softc; PCN_LOCK_ASSERT(sc); if (!sc->pcn_link) return; idx = sc->pcn_cdata.pcn_tx_prod; if (ifp->if_drv_flags & IFF_DRV_OACTIVE) return; while(sc->pcn_cdata.pcn_tx_chain[idx] == NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (pcn_encap(sc, m_head, &idx)) { IF_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, m_head); } /* Transmit */ sc->pcn_cdata.pcn_tx_prod = idx; pcn_csr_write(sc, PCN_CSR_CSR, PCN_CSR_TX|PCN_CSR_INTEN); /* * Set a timeout in case the chip goes out to lunch. */ sc->pcn_timer = 5; return; } static void pcn_setfilt(ifp) struct ifnet *ifp; { struct pcn_softc *sc; sc = ifp->if_softc; /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & IFF_PROMISC) { PCN_CSR_SETBIT(sc, PCN_CSR_MODE, PCN_MODE_PROMISC); } else { PCN_CSR_CLRBIT(sc, PCN_CSR_MODE, PCN_MODE_PROMISC); } /* Set the capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) { PCN_CSR_CLRBIT(sc, PCN_CSR_MODE, PCN_MODE_RXNOBROAD); } else { PCN_CSR_SETBIT(sc, PCN_CSR_MODE, PCN_MODE_RXNOBROAD); } return; } static void pcn_init(xsc) void *xsc; { struct pcn_softc *sc = xsc; PCN_LOCK(sc); pcn_init_locked(sc); PCN_UNLOCK(sc); } static void pcn_init_locked(sc) struct pcn_softc *sc; { struct ifnet *ifp = sc->pcn_ifp; struct mii_data *mii = NULL; struct ifmedia_entry *ife; PCN_LOCK_ASSERT(sc); /* * Cancel pending I/O and free all RX/TX buffers. */ pcn_stop(sc); pcn_reset(sc); mii = device_get_softc(sc->pcn_miibus); ife = mii->mii_media.ifm_cur; /* Set MAC address */ pcn_csr_write(sc, PCN_CSR_PAR0, ((u_int16_t *)IF_LLADDR(sc->pcn_ifp))[0]); pcn_csr_write(sc, PCN_CSR_PAR1, ((u_int16_t *)IF_LLADDR(sc->pcn_ifp))[1]); pcn_csr_write(sc, PCN_CSR_PAR2, ((u_int16_t *)IF_LLADDR(sc->pcn_ifp))[2]); /* Init circular RX list. */ if (pcn_list_rx_init(sc) == ENOBUFS) { if_printf(ifp, "initialization failed: no " "memory for rx buffers\n"); pcn_stop(sc); return; } /* * Init tx descriptors. */ pcn_list_tx_init(sc); /* Clear PCN_MISC_ASEL so we can set the port via PCN_CSR_MODE. */ PCN_BCR_CLRBIT(sc, PCN_BCR_MISCCFG, PCN_MISC_ASEL); /* * Set up the port based on the currently selected media. * For Am79C978 we've to unconditionally set PCN_PORT_MII and * set the PHY in PCN_BCR_PHYSEL instead. */ if (sc->pcn_type != Am79C978 && IFM_INST(ife->ifm_media) == sc->pcn_inst_10bt) pcn_csr_write(sc, PCN_CSR_MODE, PCN_PORT_10BASET); else pcn_csr_write(sc, PCN_CSR_MODE, PCN_PORT_MII); /* Set up RX filter. */ pcn_setfilt(ifp); /* * Load the multicast filter. */ pcn_setmulti(sc); /* * Load the addresses of the RX and TX lists. */ pcn_csr_write(sc, PCN_CSR_RXADDR0, vtophys(&sc->pcn_ldata->pcn_rx_list[0]) & 0xFFFF); pcn_csr_write(sc, PCN_CSR_RXADDR1, (vtophys(&sc->pcn_ldata->pcn_rx_list[0]) >> 16) & 0xFFFF); pcn_csr_write(sc, PCN_CSR_TXADDR0, vtophys(&sc->pcn_ldata->pcn_tx_list[0]) & 0xFFFF); pcn_csr_write(sc, PCN_CSR_TXADDR1, (vtophys(&sc->pcn_ldata->pcn_tx_list[0]) >> 16) & 0xFFFF); /* Set the RX and TX ring sizes. */ pcn_csr_write(sc, PCN_CSR_RXRINGLEN, (~PCN_RX_LIST_CNT) + 1); pcn_csr_write(sc, PCN_CSR_TXRINGLEN, (~PCN_TX_LIST_CNT) + 1); /* We're not using the initialization block. */ pcn_csr_write(sc, PCN_CSR_IAB1, 0); /* Enable fast suspend mode. */ PCN_CSR_SETBIT(sc, PCN_CSR_EXTCTL2, PCN_EXTCTL2_FASTSPNDE); /* * Enable burst read and write. Also set the no underflow * bit. This will avoid transmit underruns in certain * conditions while still providing decent performance. */ PCN_BCR_SETBIT(sc, PCN_BCR_BUSCTL, PCN_BUSCTL_NOUFLOW| PCN_BUSCTL_BREAD|PCN_BUSCTL_BWRITE); /* Enable graceful recovery from underflow. */ PCN_CSR_SETBIT(sc, PCN_CSR_IMR, PCN_IMR_DXSUFLO); /* Enable auto-padding of short TX frames. */ PCN_CSR_SETBIT(sc, PCN_CSR_TFEAT, PCN_TFEAT_PAD_TX); /* Disable MII autoneg (we handle this ourselves). */ PCN_BCR_SETBIT(sc, PCN_BCR_MIICTL, PCN_MIICTL_DANAS); if (sc->pcn_type == Am79C978) /* XXX support other PHYs? */ pcn_bcr_write(sc, PCN_BCR_PHYSEL, PCN_PHYSEL_PCNET|PCN_PHY_HOMEPNA); /* Enable interrupts and start the controller running. */ pcn_csr_write(sc, PCN_CSR_CSR, PCN_CSR_INTEN|PCN_CSR_START); mii_mediachg(mii); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->pcn_stat_callout, hz, pcn_tick, sc); return; } /* * Set media options. */ static int pcn_ifmedia_upd(ifp) struct ifnet *ifp; { struct pcn_softc *sc; sc = ifp->if_softc; PCN_LOCK(sc); /* * At least Am79C971 with DP83840A can wedge when switching * from the internal 10baseT PHY to the external PHY without * issuing pcn_reset(). For setting the port in PCN_CSR_MODE * the PCnet chip has to be powered down or stopped anyway * and although documented otherwise it doesn't take effect * until the next initialization. */ sc->pcn_link = 0; pcn_stop(sc); pcn_reset(sc); pcn_init_locked(sc); if (ifp->if_snd.ifq_head != NULL) pcn_start_locked(ifp); PCN_UNLOCK(sc); return(0); } /* * Report current media status. */ static void pcn_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct pcn_softc *sc; struct mii_data *mii; sc = ifp->if_softc; mii = device_get_softc(sc->pcn_miibus); PCN_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; PCN_UNLOCK(sc); return; } static int pcn_ioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct pcn_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii = NULL; int error = 0; switch(command) { case SIOCSIFFLAGS: PCN_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->pcn_if_flags & IFF_PROMISC)) { PCN_CSR_SETBIT(sc, PCN_CSR_EXTCTL1, PCN_EXTCTL1_SPND); pcn_setfilt(ifp); PCN_CSR_CLRBIT(sc, PCN_CSR_EXTCTL1, PCN_EXTCTL1_SPND); pcn_csr_write(sc, PCN_CSR_CSR, PCN_CSR_INTEN|PCN_CSR_START); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->pcn_if_flags & IFF_PROMISC) { PCN_CSR_SETBIT(sc, PCN_CSR_EXTCTL1, PCN_EXTCTL1_SPND); pcn_setfilt(ifp); PCN_CSR_CLRBIT(sc, PCN_CSR_EXTCTL1, PCN_EXTCTL1_SPND); pcn_csr_write(sc, PCN_CSR_CSR, PCN_CSR_INTEN|PCN_CSR_START); } else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) pcn_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) pcn_stop(sc); } sc->pcn_if_flags = ifp->if_flags; PCN_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: PCN_LOCK(sc); pcn_setmulti(sc); PCN_UNLOCK(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->pcn_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; default: error = ether_ioctl(ifp, command, data); break; } return(error); } static void pcn_watchdog(struct pcn_softc *sc) { struct ifnet *ifp; PCN_LOCK_ASSERT(sc); ifp = sc->pcn_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); pcn_stop(sc); pcn_reset(sc); pcn_init_locked(sc); if (ifp->if_snd.ifq_head != NULL) pcn_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void pcn_stop(struct pcn_softc *sc) { - register int i; + int i; struct ifnet *ifp; PCN_LOCK_ASSERT(sc); ifp = sc->pcn_ifp; sc->pcn_timer = 0; callout_stop(&sc->pcn_stat_callout); /* Turn off interrupts */ PCN_CSR_CLRBIT(sc, PCN_CSR_CSR, PCN_CSR_INTEN); /* Stop adapter */ PCN_CSR_SETBIT(sc, PCN_CSR_CSR, PCN_CSR_STOP); sc->pcn_link = 0; /* * Free data in the RX lists. */ for (i = 0; i < PCN_RX_LIST_CNT; i++) { if (sc->pcn_cdata.pcn_rx_chain[i] != NULL) { m_freem(sc->pcn_cdata.pcn_rx_chain[i]); sc->pcn_cdata.pcn_rx_chain[i] = NULL; } } bzero((char *)&sc->pcn_ldata->pcn_rx_list, sizeof(sc->pcn_ldata->pcn_rx_list)); /* * Free the TX list buffers. */ for (i = 0; i < PCN_TX_LIST_CNT; i++) { if (sc->pcn_cdata.pcn_tx_chain[i] != NULL) { m_freem(sc->pcn_cdata.pcn_tx_chain[i]); sc->pcn_cdata.pcn_tx_chain[i] = NULL; } } bzero((char *)&sc->pcn_ldata->pcn_tx_list, sizeof(sc->pcn_ldata->pcn_tx_list)); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); return; } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int pcn_shutdown(device_t dev) { struct pcn_softc *sc; sc = device_get_softc(dev); PCN_LOCK(sc); pcn_reset(sc); pcn_stop(sc); PCN_UNLOCK(sc); return 0; } Index: stable/11/sys/dev/ppbus/immio.c =================================================================== --- stable/11/sys/dev/ppbus/immio.c (revision 331642) +++ stable/11/sys/dev/ppbus/immio.c (revision 331643) @@ -1,818 +1,818 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998, 1999 Nicolas Souchu * Copyright (c) 2001 Alcove - Nicolas Souchu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * */ #include __FBSDID("$FreeBSD$"); /* * Iomega ZIP+ Matchmaker Parallel Port Interface driver * * Thanks to David Campbell work on the Linux driver and the Iomega specs * Thanks to Thiebault Moeglin for the drive */ #ifdef _KERNEL #include #include #include #include #include #endif /* _KERNEL */ #include "opt_vpo.h" #include #include #include #include #include #include "ppbus_if.h" #define VP0_SELTMO 5000 /* select timeout */ #define VP0_FAST_SPINTMO 500000 /* wait status timeout */ #define VP0_LOW_SPINTMO 5000000 /* wait status timeout */ #define VP0_SECTOR_SIZE 512 /* * Microcode to execute very fast I/O sequences at the lowest bus level. */ #define WAIT_RET MS_PARAM(7, 2, MS_TYP_PTR) #define WAIT_TMO MS_PARAM(1, 0, MS_TYP_INT) #define DECLARE_WAIT_MICROSEQUENCE \ struct ppb_microseq wait_microseq[] = { \ MS_CASS(0x0c), \ MS_SET(MS_UNKNOWN), \ /* loop */ \ MS_BRSET(nBUSY, 4 /* ready */), \ MS_DBRA(-2 /* loop */), \ MS_CASS(0x04), \ MS_RET(1), /* timed out */ \ /* ready */ \ MS_CASS(0x04), \ MS_RFETCH(MS_REG_STR, 0xb8, MS_UNKNOWN ), \ MS_RET(0) /* no error */ \ } #define SELECT_TARGET MS_PARAM(6, 1, MS_TYP_CHA) #define DECLARE_SELECT_MICROSEQUENCE \ struct ppb_microseq select_microseq[] = { \ MS_CASS(0xc), \ /* first, check there is nothing holding onto the bus */ \ MS_SET(VP0_SELTMO), \ /* _loop: */ \ MS_BRCLEAR(0x8, 2 /* _ready */), \ MS_DBRA(-2 /* _loop */), \ MS_RET(2), /* bus busy */ \ /* _ready: */ \ MS_CASS(0x4), \ MS_DASS(MS_UNKNOWN /* 0x80 | 1 << target */), \ MS_DELAY(1), \ MS_CASS(0xc), \ MS_CASS(0xd), \ /* now, wait until the drive is ready */ \ MS_SET(VP0_SELTMO), \ /* loop: */ \ MS_BRSET(0x8, 3 /* ready */), \ MS_DBRA(-2 /* loop */), \ /* error: */ \ MS_CASS(0xc), \ MS_RET(VP0_ESELECT_TIMEOUT), \ /* ready: */ \ MS_CASS(0xc), \ MS_RET(0) \ } static struct ppb_microseq transfer_epilog[] = { MS_CASS(0x4), MS_CASS(0xc), MS_CASS(0xe), MS_CASS(0x4), MS_RET(0) }; #define CPP_S1 MS_PARAM(10, 2, MS_TYP_PTR) #define CPP_S2 MS_PARAM(13, 2, MS_TYP_PTR) #define CPP_S3 MS_PARAM(16, 2, MS_TYP_PTR) #define CPP_PARAM MS_PARAM(17, 1, MS_TYP_CHA) #define DECLARE_CPP_MICROSEQ \ struct ppb_microseq cpp_microseq[] = { \ MS_CASS(0x0c), MS_DELAY(2), \ MS_DASS(0xaa), MS_DELAY(10), \ MS_DASS(0x55), MS_DELAY(10), \ MS_DASS(0x00), MS_DELAY(10), \ MS_DASS(0xff), MS_DELAY(10), \ MS_RFETCH(MS_REG_STR, 0xb8, MS_UNKNOWN /* &s1 */), \ MS_DASS(0x87), MS_DELAY(10), \ MS_RFETCH(MS_REG_STR, 0xb8, MS_UNKNOWN /* &s2 */), \ MS_DASS(0x78), MS_DELAY(10), \ MS_RFETCH(MS_REG_STR, 0x38, MS_UNKNOWN /* &s3 */), \ MS_DASS(MS_UNKNOWN /* param */), \ MS_DELAY(2), \ MS_CASS(0x0c), MS_DELAY(10), \ MS_CASS(0x0d), MS_DELAY(2), \ MS_CASS(0x0c), MS_DELAY(10), \ MS_DASS(0xff), MS_DELAY(10), \ MS_RET(0) \ } #define NEGOCIATED_MODE MS_PARAM(2, 1, MS_TYP_CHA) #define DECLARE_NEGOCIATE_MICROSEQ \ struct ppb_microseq negociate_microseq[] = { \ MS_CASS(0x4), \ MS_DELAY(5), \ MS_DASS(MS_UNKNOWN /* mode */), \ MS_DELAY(100), \ MS_CASS(0x6), \ MS_DELAY(5), \ MS_BRSET(0x20, 5 /* continue */), \ MS_DELAY(5), \ MS_CASS(0x7), \ MS_DELAY(5), \ MS_CASS(0x6), \ MS_RET(VP0_ENEGOCIATE), \ /* continue: */ \ MS_DELAY(5), \ MS_CASS(0x7), \ MS_DELAY(5), \ MS_CASS(0x6), \ MS_RET(0) \ } #define INB_NIBBLE_L MS_PARAM(3, 2, MS_TYP_PTR) #define INB_NIBBLE_H MS_PARAM(6, 2, MS_TYP_PTR) #define INB_NIBBLE_F MS_PARAM(9, 0, MS_TYP_FUN) #define INB_NIBBLE_P MS_PARAM(9, 1, MS_TYP_PTR) /* * This is the sub-microseqence for MS_GET in NIBBLE mode * Retrieve the two nibbles and call the C function to generate the character * and store it in the buffer (see nibble_inbyte_hook()) */ #define DECLARE_NIBBLE_INBYTE_SUBMICROSEQ \ struct ppb_microseq nibble_inbyte_submicroseq[] = { \ MS_CASS(0x4), \ /* loop: */ \ MS_CASS(0x6), \ MS_DELAY(1), \ MS_RFETCH(MS_REG_STR, MS_FETCH_ALL, MS_UNKNOWN /* low nibble */),\ MS_CASS(0x5), \ MS_DELAY(1), \ MS_RFETCH(MS_REG_STR, MS_FETCH_ALL, MS_UNKNOWN /* high nibble */),\ MS_CASS(0x4), \ MS_DELAY(1), \ /* do a C call to format the received nibbles */ \ MS_C_CALL(MS_UNKNOWN /* C hook */, MS_UNKNOWN /* param */), \ MS_DBRA(-7 /* loop */), \ MS_RET(0) \ } static struct ppb_microseq reset_microseq[] = { MS_CASS(0x04), MS_DASS(0x40), MS_DELAY(1), MS_CASS(0x0c), MS_CASS(0x0d), MS_DELAY(50), MS_CASS(0x0c), MS_CASS(0x04), MS_RET(0) }; /* * nibble_inbyte_hook() * * Formats high and low nibble into a character */ static int nibble_inbyte_hook (void *p, char *ptr) { struct vpo_nibble *s = (struct vpo_nibble *)p; /* increment the buffer pointer */ *ptr = ((s->l >> 4) & 0x0f) + (s->h & 0xf0); return (0); } /* * This is the sub-microseqence for MS_GET in PS2 mode */ static struct ppb_microseq ps2_inbyte_submicroseq[] = { MS_CASS(0x4), /* loop: */ MS_CASS(PCD | 0x6), MS_RFETCH_P(1, MS_REG_DTR, MS_FETCH_ALL), MS_CASS(PCD | 0x5), MS_DBRA(-4 /* loop */), MS_RET(0) }; /* * This is the sub-microsequence for MS_PUT in both NIBBLE and PS2 modes */ static struct ppb_microseq spp_outbyte_submicroseq[] = { MS_CASS(0x4), /* loop: */ MS_RASSERT_P(1, MS_REG_DTR), MS_CASS(0x5), MS_DBRA(0), /* decrement counter */ MS_RASSERT_P(1, MS_REG_DTR), MS_CASS(0x0), MS_DBRA(-6 /* loop */), /* return from the put call */ MS_CASS(0x4), MS_RET(0) }; /* EPP 1.7 microsequences, ptr and len set at runtime */ static struct ppb_microseq epp17_outstr[] = { MS_CASS(0x4), MS_RASSERT_P(MS_ACCUM, MS_REG_EPP_D), MS_CASS(0xc), MS_RET(0), }; static struct ppb_microseq epp17_instr[] = { MS_CASS(PCD | 0x4), MS_RFETCH_P(MS_ACCUM, MS_REG_EPP_D, MS_FETCH_ALL), MS_CASS(PCD | 0xc), MS_RET(0), }; static int imm_disconnect(struct vpoio_data *vpo, int *connected, int release_bus) { DECLARE_CPP_MICROSEQ; device_t ppbus = device_get_parent(vpo->vpo_dev); char s1, s2, s3; int ret; /* all should be ok */ if (connected) *connected = 0; ppb_MS_init_msq(cpp_microseq, 4, CPP_S1, (void *)&s1, CPP_S2, (void *)&s2, CPP_S3, (void *)&s3, CPP_PARAM, 0x30); ppb_MS_microseq(ppbus, vpo->vpo_dev, cpp_microseq, &ret); if ((s1 != (char)0xb8 || s2 != (char)0x18 || s3 != (char)0x38)) { if (bootverbose) device_printf(vpo->vpo_dev, "(disconnect) s1=0x%x s2=0x%x, s3=0x%x\n", s1 & 0xff, s2 & 0xff, s3 & 0xff); if (connected) *connected = VP0_ECONNECT; } if (release_bus) return (ppb_release_bus(ppbus, vpo->vpo_dev)); else return (0); } /* * how : PPB_WAIT or PPB_DONTWAIT */ static int imm_connect(struct vpoio_data *vpo, int how, int *disconnected, int request_bus) { DECLARE_CPP_MICROSEQ; device_t ppbus = device_get_parent(vpo->vpo_dev); char s1, s2, s3; int error; int ret; /* all should be ok */ if (disconnected) *disconnected = 0; if (request_bus) if ((error = ppb_request_bus(ppbus, vpo->vpo_dev, how))) return (error); ppb_MS_init_msq(cpp_microseq, 3, CPP_S1, (void *)&s1, CPP_S2, (void *)&s2, CPP_S3, (void *)&s3); /* select device 0 in compatible mode */ ppb_MS_init_msq(cpp_microseq, 1, CPP_PARAM, 0xe0); ppb_MS_microseq(ppbus, vpo->vpo_dev, cpp_microseq, &ret); /* disconnect all devices */ ppb_MS_init_msq(cpp_microseq, 1, CPP_PARAM, 0x30); ppb_MS_microseq(ppbus, vpo->vpo_dev, cpp_microseq, &ret); if (PPB_IN_EPP_MODE(ppbus)) ppb_MS_init_msq(cpp_microseq, 1, CPP_PARAM, 0x28); else ppb_MS_init_msq(cpp_microseq, 1, CPP_PARAM, 0xe0); ppb_MS_microseq(ppbus, vpo->vpo_dev, cpp_microseq, &ret); if ((s1 != (char)0xb8 || s2 != (char)0x18 || s3 != (char)0x30)) { if (bootverbose) device_printf(vpo->vpo_dev, "(connect) s1=0x%x s2=0x%x, s3=0x%x\n", s1 & 0xff, s2 & 0xff, s3 & 0xff); if (disconnected) *disconnected = VP0_ECONNECT; } return (0); } /* * imm_detect() * * Detect and initialise the VP0 adapter. */ static int imm_detect(struct vpoio_data *vpo) { device_t ppbus = device_get_parent(vpo->vpo_dev); int error; if ((error = ppb_request_bus(ppbus, vpo->vpo_dev, PPB_DONTWAIT))) return (error); /* disconnect the drive, keep the bus */ imm_disconnect(vpo, NULL, 0); vpo->vpo_mode_found = VP0_MODE_UNDEFINED; error = 1; /* try to enter EPP mode since vpoio failure put the bus in NIBBLE */ if (ppb_set_mode(ppbus, PPB_EPP) != -1) { imm_connect(vpo, PPB_DONTWAIT, &error, 0); } /* if connection failed try PS/2 then NIBBLE modes */ if (error) { if (ppb_set_mode(ppbus, PPB_PS2) != -1) { imm_connect(vpo, PPB_DONTWAIT, &error, 0); } if (error) { if (ppb_set_mode(ppbus, PPB_NIBBLE) != -1) { imm_connect(vpo, PPB_DONTWAIT, &error, 0); if (error) goto error; vpo->vpo_mode_found = VP0_MODE_NIBBLE; } else { device_printf(vpo->vpo_dev, "NIBBLE mode unavailable!\n"); goto error; } } else { vpo->vpo_mode_found = VP0_MODE_PS2; } } else { vpo->vpo_mode_found = VP0_MODE_EPP; } /* send SCSI reset signal */ ppb_MS_microseq(ppbus, vpo->vpo_dev, reset_microseq, NULL); /* release the bus now */ imm_disconnect(vpo, &error, 1); /* ensure we are disconnected or daisy chained peripheral * may cause serious problem to the disk */ if (error) { if (bootverbose) device_printf(vpo->vpo_dev, "can't disconnect from the drive\n"); goto error; } return (0); error: ppb_release_bus(ppbus, vpo->vpo_dev); return (VP0_EINITFAILED); } /* * imm_outstr() */ static int imm_outstr(struct vpoio_data *vpo, char *buffer, int size) { device_t ppbus = device_get_parent(vpo->vpo_dev); int error = 0; if (PPB_IN_EPP_MODE(ppbus)) ppb_reset_epp_timeout(ppbus); ppb_MS_exec(ppbus, vpo->vpo_dev, MS_OP_PUT, (union ppb_insarg)buffer, (union ppb_insarg)size, (union ppb_insarg)MS_UNKNOWN, &error); return (error); } /* * imm_instr() */ static int imm_instr(struct vpoio_data *vpo, char *buffer, int size) { device_t ppbus = device_get_parent(vpo->vpo_dev); int error = 0; if (PPB_IN_EPP_MODE(ppbus)) ppb_reset_epp_timeout(ppbus); ppb_MS_exec(ppbus, vpo->vpo_dev, MS_OP_GET, (union ppb_insarg)buffer, (union ppb_insarg)size, (union ppb_insarg)MS_UNKNOWN, &error); return (error); } static char imm_select(struct vpoio_data *vpo, int initiator, int target) { DECLARE_SELECT_MICROSEQUENCE; device_t ppbus = device_get_parent(vpo->vpo_dev); int ret; /* initialize the select microsequence */ ppb_MS_init_msq(select_microseq, 1, SELECT_TARGET, 1 << initiator | 1 << target); ppb_MS_microseq(ppbus, vpo->vpo_dev, select_microseq, &ret); return (ret); } /* * imm_wait() * * H_SELIN must be low. * */ static char imm_wait(struct vpoio_data *vpo, int tmo) { DECLARE_WAIT_MICROSEQUENCE; device_t ppbus = device_get_parent(vpo->vpo_dev); int ret, err; /* * Return some status information. * Semantics : 0x88 = ZIP+ wants more data * 0x98 = ZIP+ wants to send more data * 0xa8 = ZIP+ wants command * 0xb8 = end of transfer, ZIP+ is sending status */ ppb_MS_init_msq(wait_microseq, 2, WAIT_RET, (void *)&ret, WAIT_TMO, tmo); ppb_MS_microseq(ppbus, vpo->vpo_dev, wait_microseq, &err); if (err) return (0); /* command timed out */ return(ret); } static int imm_negociate(struct vpoio_data *vpo) { DECLARE_NEGOCIATE_MICROSEQ; device_t ppbus = device_get_parent(vpo->vpo_dev); int negociate_mode; int ret; if (PPB_IN_NIBBLE_MODE(ppbus)) negociate_mode = 0; else if (PPB_IN_PS2_MODE(ppbus)) negociate_mode = 1; else return (0); #if 0 /* XXX use standalone code not to depend on ppb_1284 code yet */ ret = ppb_1284_negociate(ppbus, negociate_mode); if (ret) return (VP0_ENEGOCIATE); #endif ppb_MS_init_msq(negociate_microseq, 1, NEGOCIATED_MODE, negociate_mode); ppb_MS_microseq(ppbus, vpo->vpo_dev, negociate_microseq, &ret); return (ret); } /* * imm_probe() * * Low level probe of vpo device * */ int imm_probe(device_t dev, struct vpoio_data *vpo) { int error; /* ppbus dependent initialisation */ vpo->vpo_dev = dev; /* now, try to initialise the drive */ if ((error = imm_detect(vpo))) { return (error); } return (0); } /* * imm_attach() * * Low level attachment of vpo device * */ int imm_attach(struct vpoio_data *vpo) { DECLARE_NIBBLE_INBYTE_SUBMICROSEQ; device_t ppbus = device_get_parent(vpo->vpo_dev); int error = 0; /* * Initialize microsequence code */ vpo->vpo_nibble_inbyte_msq = (struct ppb_microseq *)malloc( sizeof(nibble_inbyte_submicroseq), M_DEVBUF, M_NOWAIT); if (!vpo->vpo_nibble_inbyte_msq) return (ENXIO); bcopy((void *)nibble_inbyte_submicroseq, (void *)vpo->vpo_nibble_inbyte_msq, sizeof(nibble_inbyte_submicroseq)); ppb_MS_init_msq(vpo->vpo_nibble_inbyte_msq, 4, INB_NIBBLE_H, (void *)&(vpo)->vpo_nibble.h, INB_NIBBLE_L, (void *)&(vpo)->vpo_nibble.l, INB_NIBBLE_F, nibble_inbyte_hook, INB_NIBBLE_P, (void *)&(vpo)->vpo_nibble); /* * Initialize mode dependent in/out microsequences */ ppb_lock(ppbus); if ((error = ppb_request_bus(ppbus, vpo->vpo_dev, PPB_WAIT))) goto error; /* ppbus automatically restore the last mode entered during detection */ switch (vpo->vpo_mode_found) { case VP0_MODE_EPP: ppb_MS_GET_init(ppbus, vpo->vpo_dev, epp17_instr); ppb_MS_PUT_init(ppbus, vpo->vpo_dev, epp17_outstr); device_printf(vpo->vpo_dev, "EPP mode\n"); break; case VP0_MODE_PS2: ppb_MS_GET_init(ppbus, vpo->vpo_dev, ps2_inbyte_submicroseq); ppb_MS_PUT_init(ppbus, vpo->vpo_dev, spp_outbyte_submicroseq); device_printf(vpo->vpo_dev, "PS2 mode\n"); break; case VP0_MODE_NIBBLE: ppb_MS_GET_init(ppbus, vpo->vpo_dev, vpo->vpo_nibble_inbyte_msq); ppb_MS_PUT_init(ppbus, vpo->vpo_dev, spp_outbyte_submicroseq); device_printf(vpo->vpo_dev, "NIBBLE mode\n"); break; default: panic("imm: unknown mode %d", vpo->vpo_mode_found); } ppb_release_bus(ppbus, vpo->vpo_dev); error: ppb_unlock(ppbus); return (error); } /* * imm_reset_bus() * */ int imm_reset_bus(struct vpoio_data *vpo) { device_t ppbus = device_get_parent(vpo->vpo_dev); int disconnected; /* first, connect to the drive and request the bus */ imm_connect(vpo, PPB_WAIT|PPB_INTR, &disconnected, 1); if (!disconnected) { /* reset the SCSI bus */ ppb_MS_microseq(ppbus, vpo->vpo_dev, reset_microseq, NULL); /* then disconnect */ imm_disconnect(vpo, NULL, 1); } return (0); } /* * imm_do_scsi() * * Send an SCSI command * */ int imm_do_scsi(struct vpoio_data *vpo, int host, int target, char *command, int clen, char *buffer, int blen, int *result, int *count, int *ret) { device_t ppbus = device_get_parent(vpo->vpo_dev); - register char r; + char r; char l, h = 0; int len, error = 0, not_connected = 0; - register int k; + int k; int negociated = 0; /* * enter disk state, allocate the ppbus * * XXX * Should we allow this call to be interruptible? * The only way to report the interruption is to return * EIO to upper SCSI code :^( */ if ((error = imm_connect(vpo, PPB_WAIT|PPB_INTR, ¬_connected, 1))) return (error); if (not_connected) { *ret = VP0_ECONNECT; goto error; } /* * Select the drive ... */ if ((*ret = imm_select(vpo,host,target))) goto error; /* * Send the command ... */ for (k = 0; k < clen; k+=2) { if (imm_wait(vpo, VP0_FAST_SPINTMO) != (char)0xa8) { *ret = VP0_ECMD_TIMEOUT; goto error; } if (imm_outstr(vpo, &command[k], 2)) { *ret = VP0_EPPDATA_TIMEOUT; goto error; } } if (!(r = imm_wait(vpo, VP0_LOW_SPINTMO))) { *ret = VP0_ESTATUS_TIMEOUT; goto error; } if ((r & 0x30) == 0x10) { if (imm_negociate(vpo)) { *ret = VP0_ENEGOCIATE; goto error; } else negociated = 1; } /* * Complete transfer ... */ *count = 0; for (;;) { if (!(r = imm_wait(vpo, VP0_LOW_SPINTMO))) { *ret = VP0_ESTATUS_TIMEOUT; goto error; } /* stop when the ZIP+ wants to send status */ if (r == (char)0xb8) break; if (*count >= blen) { *ret = VP0_EDATA_OVERFLOW; goto error; } /* ZIP+ wants to send data? */ if (r == (char)0x88) { len = (((blen - *count) >= VP0_SECTOR_SIZE)) ? VP0_SECTOR_SIZE : 2; error = imm_outstr(vpo, &buffer[*count], len); } else { if (!PPB_IN_EPP_MODE(ppbus)) len = 1; else len = (((blen - *count) >= VP0_SECTOR_SIZE)) ? VP0_SECTOR_SIZE : 1; error = imm_instr(vpo, &buffer[*count], len); } if (error) { *ret = error; goto error; } *count += len; } if ((PPB_IN_NIBBLE_MODE(ppbus) || PPB_IN_PS2_MODE(ppbus)) && negociated) ppb_MS_microseq(ppbus, vpo->vpo_dev, transfer_epilog, NULL); /* * Retrieve status ... */ if (imm_negociate(vpo)) { *ret = VP0_ENEGOCIATE; goto error; } else negociated = 1; if (imm_instr(vpo, &l, 1)) { *ret = VP0_EOTHER; goto error; } /* check if the ZIP+ wants to send more status */ if (imm_wait(vpo, VP0_FAST_SPINTMO) == (char)0xb8) if (imm_instr(vpo, &h, 1)) { *ret = VP0_EOTHER + 2; goto error; } /* Experience showed that we should discard this */ if (h == (char) -1) h = 0; *result = ((int) h << 8) | ((int) l & 0xff); error: if ((PPB_IN_NIBBLE_MODE(ppbus) || PPB_IN_PS2_MODE(ppbus)) && negociated) ppb_MS_microseq(ppbus, vpo->vpo_dev, transfer_epilog, NULL); /* return to printer state, release the ppbus */ imm_disconnect(vpo, NULL, 1); return (0); } Index: stable/11/sys/dev/ppbus/vpoio.c =================================================================== --- stable/11/sys/dev/ppbus/vpoio.c (revision 331642) +++ stable/11/sys/dev/ppbus/vpoio.c (revision 331643) @@ -1,789 +1,789 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998, 1999 Nicolas Souchu * Copyright (c) 2000 Alcove - Nicolas Souchu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * */ #include __FBSDID("$FreeBSD$"); #ifdef _KERNEL #include #include #include #include #include #endif #include "opt_vpo.h" #include #include #include #include #include "ppbus_if.h" /* * The driver pools the drive. We may add a timeout queue to avoid * active polling on nACK. I've tried this but it leads to unreliable * transfers */ #define VP0_SELTMO 5000 /* select timeout */ #define VP0_FAST_SPINTMO 500000 /* wait status timeout */ #define VP0_LOW_SPINTMO 5000000 /* wait status timeout */ /* * Actually, VP0 timings are more accurate (about few 16MHZ cycles), * but succeeding in respecting such timings leads to architecture * dependent considerations. */ #define VP0_PULSE 1 #define VP0_SECTOR_SIZE 512 #define VP0_BUFFER_SIZE 0x12000 #define n(flags) (~(flags) & (flags)) /* * VP0 connections. */ #define H_AUTO n(AUTOFEED) #define H_nAUTO AUTOFEED #define H_STROBE n(STROBE) #define H_nSTROBE STROBE #define H_BSY n(nBUSY) #define H_nBSY nBUSY #define H_SEL SELECT #define H_nSEL n(SELECT) #define H_ERR PERROR #define H_nERR n(PERROR) #define H_ACK nACK #define H_nACK n(nACK) #define H_FLT nFAULT #define H_nFLT n(nFAULT) #define H_SELIN n(SELECTIN) #define H_nSELIN SELECTIN #define H_INIT nINIT #define H_nINIT n(nINIT) /* * Microcode to execute very fast I/O sequences at the lowest bus level. */ #define WAIT_RET MS_PARAM(4, 2, MS_TYP_PTR) #define WAIT_TMO MS_PARAM(0, 0, MS_TYP_INT) #define DECLARE_WAIT_MICROSEQUENCE \ struct ppb_microseq wait_microseq[] = { \ MS_SET(MS_UNKNOWN), \ /* loop */ \ MS_BRSET(nBUSY, 2 /* ready */), \ MS_DBRA(-2 /* loop */), \ MS_RET(1), /* timed out */ \ /* ready */ \ MS_RFETCH(MS_REG_STR, 0xf0, MS_UNKNOWN), \ MS_RET(0) /* no error */ \ } /* call this macro to initialize connect/disconnect microsequences */ #define INIT_TRIG_MICROSEQ { \ int i; \ for (i=1; i <= 7; i+=2) { \ disconnect_microseq[i].arg[2] = (union ppb_insarg)d_pulse; \ connect_epp_microseq[i].arg[2] = \ connect_spp_microseq[i].arg[2] = (union ppb_insarg)c_pulse; \ } \ } #define trig_d_pulse MS_TRIG(MS_REG_CTR,5,MS_UNKNOWN /* d_pulse */) static char d_pulse[] = { H_AUTO | H_nSELIN | H_INIT | H_STROBE, 0, H_nAUTO | H_nSELIN | H_INIT | H_STROBE, VP0_PULSE, H_AUTO | H_nSELIN | H_INIT | H_STROBE, 0, H_AUTO | H_SELIN | H_INIT | H_STROBE, VP0_PULSE, H_AUTO | H_nSELIN | H_INIT | H_STROBE, VP0_PULSE }; #define trig_c_pulse MS_TRIG(MS_REG_CTR,5,MS_UNKNOWN /* c_pulse */) static char c_pulse[] = { H_AUTO | H_nSELIN | H_INIT | H_STROBE, 0, H_AUTO | H_SELIN | H_INIT | H_STROBE, 0, H_nAUTO | H_SELIN | H_INIT | H_STROBE, VP0_PULSE, H_AUTO | H_SELIN | H_INIT | H_STROBE, 0, H_AUTO | H_nSELIN | H_INIT | H_STROBE, VP0_PULSE }; static struct ppb_microseq disconnect_microseq[] = { MS_DASS(0x0), trig_d_pulse, MS_DASS(0x3c), trig_d_pulse, MS_DASS(0x20), trig_d_pulse, MS_DASS(0xf), trig_d_pulse, MS_RET(0) }; static struct ppb_microseq connect_epp_microseq[] = { MS_DASS(0x0), trig_c_pulse, MS_DASS(0x3c), trig_c_pulse, MS_DASS(0x20), trig_c_pulse, MS_DASS(0xcf), trig_c_pulse, MS_RET(0) }; static struct ppb_microseq connect_spp_microseq[] = { MS_DASS(0x0), trig_c_pulse, MS_DASS(0x3c), trig_c_pulse, MS_DASS(0x20), trig_c_pulse, MS_DASS(0x8f), trig_c_pulse, MS_RET(0) }; /* * nibble_inbyte_hook() * * Formats high and low nibble into a character */ static int nibble_inbyte_hook (void *p, char *ptr) { struct vpo_nibble *s = (struct vpo_nibble *)p; /* increment the buffer pointer */ *ptr++ = ((s->l >> 4) & 0x0f) + (s->h & 0xf0); return (0); } #define INB_NIBBLE_H MS_PARAM(2, 2, MS_TYP_PTR) #define INB_NIBBLE_L MS_PARAM(4, 2, MS_TYP_PTR) #define INB_NIBBLE_F MS_PARAM(5, 0, MS_TYP_FUN) #define INB_NIBBLE_P MS_PARAM(5, 1, MS_TYP_PTR) /* * This is the sub-microseqence for MS_GET in NIBBLE mode * Retrieve the two nibbles and call the C function to generate the character * and store it in the buffer (see nibble_inbyte_hook()) */ #define DECLARE_NIBBLE_INBYTE_SUBMICROSEQ \ struct ppb_microseq nibble_inbyte_submicroseq[] = { \ /* loop: */ \ MS_CASS( H_AUTO | H_SELIN | H_INIT | H_STROBE), \ MS_DELAY(VP0_PULSE), \ MS_RFETCH(MS_REG_STR, MS_FETCH_ALL, MS_UNKNOWN /* high nibble */),\ MS_CASS(H_nAUTO | H_SELIN | H_INIT | H_STROBE), \ MS_RFETCH(MS_REG_STR, MS_FETCH_ALL, MS_UNKNOWN /* low nibble */),\ /* do a C call to format the received nibbles */ \ MS_C_CALL(MS_UNKNOWN /* C hook */, MS_UNKNOWN /* param */),\ MS_DBRA(-7 /* loop */), \ MS_CASS(H_AUTO | H_nSELIN | H_INIT | H_STROBE), \ MS_RET(0) \ } /* * This is the sub-microseqence for MS_GET in PS2 mode */ static struct ppb_microseq ps2_inbyte_submicroseq[] = { MS_CASS(PCD | H_AUTO | H_SELIN | H_INIT | H_nSTROBE), /* loop: */ MS_RFETCH_P(1, MS_REG_DTR, MS_FETCH_ALL), MS_CASS(PCD | H_nAUTO | H_SELIN | H_INIT | H_nSTROBE), MS_CASS(PCD | H_AUTO | H_SELIN | H_INIT | H_nSTROBE), MS_DBRA(-4 /* loop */), MS_CASS(H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_RET(0) }; /* * This is the sub-microsequence for MS_PUT in both NIBBLE and PS2 modes */ static struct ppb_microseq spp_outbyte_submicroseq[] = { /* loop: */ MS_RASSERT_P(1, MS_REG_DTR), MS_CASS(H_nAUTO | H_nSELIN | H_INIT | H_STROBE), MS_CASS( H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_DELAY(VP0_PULSE), MS_DBRA(-5 /* loop */), /* return from the put call */ MS_RET(0) }; /* EPP 1.7 microsequences, ptr and len set at runtime */ static struct ppb_microseq epp17_outstr_body[] = { MS_CASS(H_AUTO | H_SELIN | H_INIT | H_STROBE), /* loop: */ MS_RASSERT_P(1, MS_REG_EPP_D), MS_BRSET(TIMEOUT, 3 /* error */), /* EPP timeout? */ MS_DBRA(-3 /* loop */), MS_CASS(H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_RET(0), /* error: */ MS_CASS(H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_RET(1) }; static struct ppb_microseq epp17_instr_body[] = { MS_CASS(PCD | H_AUTO | H_SELIN | H_INIT | H_STROBE), /* loop: */ MS_RFETCH_P(1, MS_REG_EPP_D, MS_FETCH_ALL), MS_BRSET(TIMEOUT, 3 /* error */), /* EPP timeout? */ MS_DBRA(-3 /* loop */), MS_CASS(PCD | H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_RET(0), /* error: */ MS_CASS(PCD | H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_RET(1) }; static struct ppb_microseq in_disk_mode[] = { MS_CASS( H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_CASS(H_nAUTO | H_nSELIN | H_INIT | H_STROBE), MS_BRCLEAR(H_FLT, 3 /* error */), MS_CASS( H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_BRSET(H_FLT, 1 /* error */), MS_RET(1), /* error: */ MS_RET(0) }; static int vpoio_disconnect(struct vpoio_data *vpo) { device_t ppbus = device_get_parent(vpo->vpo_dev); int ret; ppb_MS_microseq(ppbus, vpo->vpo_dev, disconnect_microseq, &ret); return (ppb_release_bus(ppbus, vpo->vpo_dev)); } /* * how : PPB_WAIT or PPB_DONTWAIT */ static int vpoio_connect(struct vpoio_data *vpo, int how) { device_t ppbus = device_get_parent(vpo->vpo_dev); int error; int ret; if ((error = ppb_request_bus(ppbus, vpo->vpo_dev, how))) { #ifdef VP0_DEBUG printf("%s: can't request bus!\n", __func__); #endif return (error); } if (PPB_IN_EPP_MODE(ppbus)) ppb_MS_microseq(ppbus, vpo->vpo_dev, connect_epp_microseq, &ret); else ppb_MS_microseq(ppbus, vpo->vpo_dev, connect_spp_microseq, &ret); return (0); } /* * vpoio_reset() * * SCSI reset signal, the drive must be in disk mode */ static void vpoio_reset(struct vpoio_data *vpo) { device_t ppbus = device_get_parent(vpo->vpo_dev); int ret; struct ppb_microseq reset_microseq[] = { #define INITIATOR MS_PARAM(0, 1, MS_TYP_INT) MS_DASS(MS_UNKNOWN), MS_CASS(H_AUTO | H_nSELIN | H_nINIT | H_STROBE), MS_DELAY(25), MS_CASS(H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_RET(0) }; ppb_MS_init_msq(reset_microseq, 1, INITIATOR, 1 << VP0_INITIATOR); ppb_MS_microseq(ppbus, vpo->vpo_dev, reset_microseq, &ret); return; } /* * vpoio_in_disk_mode() */ static int vpoio_in_disk_mode(struct vpoio_data *vpo) { device_t ppbus = device_get_parent(vpo->vpo_dev); int ret; ppb_MS_microseq(ppbus, vpo->vpo_dev, in_disk_mode, &ret); return (ret); } /* * vpoio_detect() * * Detect and initialise the VP0 adapter. */ static int vpoio_detect(struct vpoio_data *vpo) { device_t ppbus = device_get_parent(vpo->vpo_dev); int error, ret; /* allocate the bus, then apply microsequences */ if ((error = ppb_request_bus(ppbus, vpo->vpo_dev, PPB_DONTWAIT))) return (error); /* Force disconnection */ ppb_MS_microseq(ppbus, vpo->vpo_dev, disconnect_microseq, &ret); /* Try to enter EPP mode, then connect to the drive in EPP mode */ if (ppb_set_mode(ppbus, PPB_EPP) != -1) { /* call manually the microseq instead of using the appropriate function * since we already requested the ppbus */ ppb_MS_microseq(ppbus, vpo->vpo_dev, connect_epp_microseq, &ret); } /* If EPP mode switch failed or ZIP connection in EPP mode failed, * try to connect in NIBBLE mode */ if (!vpoio_in_disk_mode(vpo)) { /* The interface must be at least PS/2 or NIBBLE capable. * There is no way to know if the ZIP will work with * PS/2 mode since PS/2 and SPP both use the same connect * sequence. One must suppress PS/2 with boot flags if * PS/2 mode fails (see ppc(4)). */ if (ppb_set_mode(ppbus, PPB_PS2) != -1) { vpo->vpo_mode_found = VP0_MODE_PS2; } else { if (ppb_set_mode(ppbus, PPB_NIBBLE) == -1) goto error; vpo->vpo_mode_found = VP0_MODE_NIBBLE; } /* Can't know if the interface is capable of PS/2 yet */ ppb_MS_microseq(ppbus, vpo->vpo_dev, connect_spp_microseq, &ret); if (!vpoio_in_disk_mode(vpo)) { vpo->vpo_mode_found = VP0_MODE_UNDEFINED; if (bootverbose) device_printf(vpo->vpo_dev, "can't connect to the drive\n"); /* disconnect and release the bus */ ppb_MS_microseq(ppbus, vpo->vpo_dev, disconnect_microseq, &ret); goto error; } } else { vpo->vpo_mode_found = VP0_MODE_EPP; } /* send SCSI reset signal */ vpoio_reset(vpo); ppb_MS_microseq(ppbus, vpo->vpo_dev, disconnect_microseq, &ret); /* ensure we are disconnected or daisy chained peripheral * may cause serious problem to the disk */ if (vpoio_in_disk_mode(vpo)) { if (bootverbose) device_printf(vpo->vpo_dev, "can't disconnect from the drive\n"); goto error; } ppb_release_bus(ppbus, vpo->vpo_dev); return (0); error: ppb_release_bus(ppbus, vpo->vpo_dev); return (VP0_EINITFAILED); } /* * vpoio_outstr() */ static int vpoio_outstr(struct vpoio_data *vpo, char *buffer, int size) { device_t ppbus = device_get_parent(vpo->vpo_dev); int error = 0; ppb_MS_exec(ppbus, vpo->vpo_dev, MS_OP_PUT, (union ppb_insarg)buffer, (union ppb_insarg)size, (union ppb_insarg)MS_UNKNOWN, &error); ppb_ecp_sync(ppbus); return (error); } /* * vpoio_instr() */ static int vpoio_instr(struct vpoio_data *vpo, char *buffer, int size) { device_t ppbus = device_get_parent(vpo->vpo_dev); int error = 0; ppb_MS_exec(ppbus, vpo->vpo_dev, MS_OP_GET, (union ppb_insarg)buffer, (union ppb_insarg)size, (union ppb_insarg)MS_UNKNOWN, &error); ppb_ecp_sync(ppbus); return (error); } static char vpoio_select(struct vpoio_data *vpo, int initiator, int target) { device_t ppbus = device_get_parent(vpo->vpo_dev); int ret; struct ppb_microseq select_microseq[] = { /* parameter list */ #define SELECT_TARGET MS_PARAM(0, 1, MS_TYP_INT) #define SELECT_INITIATOR MS_PARAM(3, 1, MS_TYP_INT) /* send the select command to the drive */ MS_DASS(MS_UNKNOWN), MS_CASS(H_nAUTO | H_nSELIN | H_INIT | H_STROBE), MS_CASS( H_AUTO | H_nSELIN | H_INIT | H_STROBE), MS_DASS(MS_UNKNOWN), MS_CASS( H_AUTO | H_nSELIN | H_nINIT | H_STROBE), /* now, wait until the drive is ready */ MS_SET(VP0_SELTMO), /* loop: */ MS_BRSET(H_ACK, 2 /* ready */), MS_DBRA(-2 /* loop */), /* error: */ MS_RET(1), /* ready: */ MS_RET(0) }; /* initialize the select microsequence */ ppb_MS_init_msq(select_microseq, 2, SELECT_TARGET, 1 << target, SELECT_INITIATOR, 1 << initiator); ppb_MS_microseq(ppbus, vpo->vpo_dev, select_microseq, &ret); if (ret) return (VP0_ESELECT_TIMEOUT); return (0); } /* * vpoio_wait() * * H_SELIN must be low. * * XXX should be ported to microseq */ static char vpoio_wait(struct vpoio_data *vpo, int tmo) { DECLARE_WAIT_MICROSEQUENCE; device_t ppbus = device_get_parent(vpo->vpo_dev); int ret, err; #if 0 /* broken */ if (ppb_poll_device(ppbus, 150, nBUSY, nBUSY, PPB_INTR)) return (0); return (ppb_rstr(ppbus) & 0xf0); #endif /* * Return some status information. * Semantics : 0xc0 = ZIP wants more data * 0xd0 = ZIP wants to send more data * 0xe0 = ZIP wants command * 0xf0 = end of transfer, ZIP is sending status */ ppb_MS_init_msq(wait_microseq, 2, WAIT_RET, (void *)&ret, WAIT_TMO, tmo); ppb_MS_microseq(ppbus, vpo->vpo_dev, wait_microseq, &err); if (err) return (0); /* command timed out */ return(ret); } /* * vpoio_probe() * * Low level probe of vpo device * */ int vpoio_probe(device_t dev, struct vpoio_data *vpo) { int error; /* ppbus dependent initialisation */ vpo->vpo_dev = dev; /* * Initialize microsequence code */ INIT_TRIG_MICROSEQ; /* now, try to initialise the drive */ if ((error = vpoio_detect(vpo))) { return (error); } return (0); } /* * vpoio_attach() * * Low level attachment of vpo device * */ int vpoio_attach(struct vpoio_data *vpo) { DECLARE_NIBBLE_INBYTE_SUBMICROSEQ; device_t ppbus = device_get_parent(vpo->vpo_dev); int error = 0; vpo->vpo_nibble_inbyte_msq = (struct ppb_microseq *)malloc( sizeof(nibble_inbyte_submicroseq), M_DEVBUF, M_NOWAIT); if (!vpo->vpo_nibble_inbyte_msq) return (ENXIO); bcopy((void *)nibble_inbyte_submicroseq, (void *)vpo->vpo_nibble_inbyte_msq, sizeof(nibble_inbyte_submicroseq)); ppb_MS_init_msq(vpo->vpo_nibble_inbyte_msq, 4, INB_NIBBLE_H, (void *)&(vpo)->vpo_nibble.h, INB_NIBBLE_L, (void *)&(vpo)->vpo_nibble.l, INB_NIBBLE_F, nibble_inbyte_hook, INB_NIBBLE_P, (void *)&(vpo)->vpo_nibble); /* * Initialize mode dependent in/out microsequences */ ppb_lock(ppbus); if ((error = ppb_request_bus(ppbus, vpo->vpo_dev, PPB_WAIT))) goto error; /* ppbus sets automatically the last mode entered during detection */ switch (vpo->vpo_mode_found) { case VP0_MODE_EPP: ppb_MS_GET_init(ppbus, vpo->vpo_dev, epp17_instr_body); ppb_MS_PUT_init(ppbus, vpo->vpo_dev, epp17_outstr_body); device_printf(vpo->vpo_dev, "EPP mode\n"); break; case VP0_MODE_PS2: ppb_MS_GET_init(ppbus, vpo->vpo_dev, ps2_inbyte_submicroseq); ppb_MS_PUT_init(ppbus, vpo->vpo_dev, spp_outbyte_submicroseq); device_printf(vpo->vpo_dev, "PS2 mode\n"); break; case VP0_MODE_NIBBLE: ppb_MS_GET_init(ppbus, vpo->vpo_dev, vpo->vpo_nibble_inbyte_msq); ppb_MS_PUT_init(ppbus, vpo->vpo_dev, spp_outbyte_submicroseq); device_printf(vpo->vpo_dev, "NIBBLE mode\n"); break; default: panic("vpo: unknown mode %d", vpo->vpo_mode_found); } ppb_release_bus(ppbus, vpo->vpo_dev); error: ppb_unlock(ppbus); return (error); } /* * vpoio_reset_bus() * */ int vpoio_reset_bus(struct vpoio_data *vpo) { /* first, connect to the drive */ if (vpoio_connect(vpo, PPB_WAIT|PPB_INTR) || !vpoio_in_disk_mode(vpo)) { #ifdef VP0_DEBUG printf("%s: not in disk mode!\n", __func__); #endif /* release ppbus */ vpoio_disconnect(vpo); return (1); } /* reset the SCSI bus */ vpoio_reset(vpo); /* then disconnect */ vpoio_disconnect(vpo); return (0); } /* * vpoio_do_scsi() * * Send an SCSI command * */ int vpoio_do_scsi(struct vpoio_data *vpo, int host, int target, char *command, int clen, char *buffer, int blen, int *result, int *count, int *ret) { device_t ppbus = device_get_parent(vpo->vpo_dev); - register char r; + char r; char l, h = 0; int len, error = 0; - register int k; + int k; /* * enter disk state, allocate the ppbus * * XXX * Should we allow this call to be interruptible? * The only way to report the interruption is to return * EIO do upper SCSI code :^( */ if ((error = vpoio_connect(vpo, PPB_WAIT|PPB_INTR))) return (error); if (!vpoio_in_disk_mode(vpo)) { *ret = VP0_ECONNECT; goto error; } if ((*ret = vpoio_select(vpo,host,target))) goto error; /* * Send the command ... * * set H_SELIN low for vpoio_wait(). */ ppb_wctr(ppbus, H_AUTO | H_nSELIN | H_INIT | H_STROBE); for (k = 0; k < clen; k++) { if (vpoio_wait(vpo, VP0_FAST_SPINTMO) != (char)0xe0) { *ret = VP0_ECMD_TIMEOUT; goto error; } if (vpoio_outstr(vpo, &command[k], 1)) { *ret = VP0_EPPDATA_TIMEOUT; goto error; } } /* * Completion ... */ *count = 0; for (;;) { if (!(r = vpoio_wait(vpo, VP0_LOW_SPINTMO))) { *ret = VP0_ESTATUS_TIMEOUT; goto error; } /* stop when the ZIP wants to send status */ if (r == (char)0xf0) break; if (*count >= blen) { *ret = VP0_EDATA_OVERFLOW; goto error; } /* if in EPP mode or writing bytes, try to transfer a sector * otherwise, just send one byte */ if (PPB_IN_EPP_MODE(ppbus) || r == (char)0xc0) len = (((blen - *count) >= VP0_SECTOR_SIZE)) ? VP0_SECTOR_SIZE : 1; else len = 1; /* ZIP wants to send data? */ if (r == (char)0xc0) error = vpoio_outstr(vpo, &buffer[*count], len); else error = vpoio_instr(vpo, &buffer[*count], len); if (error) { *ret = error; goto error; } *count += len; } if (vpoio_instr(vpo, &l, 1)) { *ret = VP0_EOTHER; goto error; } /* check if the ZIP wants to send more status */ if (vpoio_wait(vpo, VP0_FAST_SPINTMO) == (char)0xf0) if (vpoio_instr(vpo, &h, 1)) { *ret = VP0_EOTHER + 2; goto error; } *result = ((int) h << 8) | ((int) l & 0xff); error: /* return to printer state, release the ppbus */ vpoio_disconnect(vpo); return (0); } Index: stable/11/sys/dev/ppc/ppc.c =================================================================== --- stable/11/sys/dev/ppc/ppc.c (revision 331642) +++ stable/11/sys/dev/ppc/ppc.c (revision 331643) @@ -1,2055 +1,2055 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997-2000 Nicolas Souchu * Copyright (c) 2001 Alcove - Nicolas Souchu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ppc.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #include #include #include #endif #include #include #include #include #include "ppbus_if.h" static void ppcintr(void *arg); #define IO_LPTSIZE_EXTENDED 8 /* "Extended" LPT controllers */ #define IO_LPTSIZE_NORMAL 4 /* "Normal" LPT controllers */ #define LOG_PPC(function, ppc, string) \ if (bootverbose) printf("%s: %s\n", function, string) #if defined(__i386__) && defined(PC98) #define PC98_IEEE_1284_DISABLE 0x100 #define PC98_IEEE_1284_PORT 0x140 #endif #define DEVTOSOFTC(dev) ((struct ppc_data *)device_get_softc(dev)) /* * We use critical enter/exit for the simple config locking needed to * detect the devices. We just want to make sure that both of our writes * happen without someone else also writing to those config registers. Since * we just do this at startup, Giant keeps multiple threads from executing, * and critical_enter() then is all that's needed to keep us from being preempted * during the critical sequences with the hardware. * * Note: this doesn't prevent multiple threads from putting the chips into * config mode, but since we only do that to detect the type at startup the * extra overhead isn't needed since Giant protects us from multiple entry * and no other code changes these registers. */ #define PPC_CONFIG_LOCK(ppc) critical_enter() #define PPC_CONFIG_UNLOCK(ppc) critical_exit() devclass_t ppc_devclass; const char ppc_driver_name[] = "ppc"; static char *ppc_models[] = { "SMC-like", "SMC FDC37C665GT", "SMC FDC37C666GT", "PC87332", "PC87306", "82091AA", "Generic", "W83877F", "W83877AF", "Winbond", "PC87334", "SMC FDC37C935", "PC87303", 0 }; /* list of available modes */ static char *ppc_avms[] = { "COMPATIBLE", "NIBBLE-only", "PS2-only", "PS2/NIBBLE", "EPP-only", "EPP/NIBBLE", "EPP/PS2", "EPP/PS2/NIBBLE", "ECP-only", "ECP/NIBBLE", "ECP/PS2", "ECP/PS2/NIBBLE", "ECP/EPP", "ECP/EPP/NIBBLE", "ECP/EPP/PS2", "ECP/EPP/PS2/NIBBLE", 0 }; /* list of current executing modes * Note that few modes do not actually exist. */ static char *ppc_modes[] = { "COMPATIBLE", "NIBBLE", "PS/2", "PS/2", "EPP", "EPP", "EPP", "EPP", "ECP", "ECP", "ECP+PS2", "ECP+PS2", "ECP+EPP", "ECP+EPP", "ECP+EPP", "ECP+EPP", 0 }; static char *ppc_epp_protocol[] = { " (EPP 1.9)", " (EPP 1.7)", 0 }; #ifdef __i386__ /* * BIOS printer list - used by BIOS probe. */ #define BIOS_PPC_PORTS 0x408 #define BIOS_PORTS (short *)(KERNBASE+BIOS_PPC_PORTS) #define BIOS_MAX_PPC 4 #endif /* * ppc_ecp_sync() XXX */ int ppc_ecp_sync(device_t dev) { int i, r; struct ppc_data *ppc = DEVTOSOFTC(dev); PPC_ASSERT_LOCKED(ppc); if (!(ppc->ppc_avm & PPB_ECP) && !(ppc->ppc_dtm & PPB_ECP)) return 0; r = r_ecr(ppc); if ((r & 0xe0) != PPC_ECR_EPP) return 0; for (i = 0; i < 100; i++) { r = r_ecr(ppc); if (r & 0x1) return 0; DELAY(100); } device_printf(dev, "ECP sync failed as data still present in FIFO.\n"); return 0; } /* * ppc_detect_fifo() * * Detect parallel port FIFO */ static int ppc_detect_fifo(struct ppc_data *ppc) { char ecr_sav; char ctr_sav, ctr, cc; short i; /* save registers */ ecr_sav = r_ecr(ppc); ctr_sav = r_ctr(ppc); /* enter ECP configuration mode, no interrupt, no DMA */ w_ecr(ppc, 0xf4); /* read PWord size - transfers in FIFO mode must be PWord aligned */ ppc->ppc_pword = (r_cnfgA(ppc) & PPC_PWORD_MASK); /* XXX 16 and 32 bits implementations not supported */ if (ppc->ppc_pword != PPC_PWORD_8) { LOG_PPC(__func__, ppc, "PWord not supported"); goto error; } w_ecr(ppc, 0x34); /* byte mode, no interrupt, no DMA */ ctr = r_ctr(ppc); w_ctr(ppc, ctr | PCD); /* set direction to 1 */ /* enter ECP test mode, no interrupt, no DMA */ w_ecr(ppc, 0xd4); /* flush the FIFO */ for (i=0; i<1024; i++) { if (r_ecr(ppc) & PPC_FIFO_EMPTY) break; cc = r_fifo(ppc); } if (i >= 1024) { LOG_PPC(__func__, ppc, "can't flush FIFO"); goto error; } /* enable interrupts, no DMA */ w_ecr(ppc, 0xd0); /* determine readIntrThreshold * fill the FIFO until serviceIntr is set */ for (i=0; i<1024; i++) { w_fifo(ppc, (char)i); if (!ppc->ppc_rthr && (r_ecr(ppc) & PPC_SERVICE_INTR)) { /* readThreshold reached */ ppc->ppc_rthr = i+1; } if (r_ecr(ppc) & PPC_FIFO_FULL) { ppc->ppc_fifo = i+1; break; } } if (i >= 1024) { LOG_PPC(__func__, ppc, "can't fill FIFO"); goto error; } w_ecr(ppc, 0xd4); /* test mode, no interrupt, no DMA */ w_ctr(ppc, ctr & ~PCD); /* set direction to 0 */ w_ecr(ppc, 0xd0); /* enable interrupts */ /* determine writeIntrThreshold * empty the FIFO until serviceIntr is set */ for (i=ppc->ppc_fifo; i>0; i--) { if (r_fifo(ppc) != (char)(ppc->ppc_fifo-i)) { LOG_PPC(__func__, ppc, "invalid data in FIFO"); goto error; } if (r_ecr(ppc) & PPC_SERVICE_INTR) { /* writeIntrThreshold reached */ ppc->ppc_wthr = ppc->ppc_fifo - i+1; } /* if FIFO empty before the last byte, error */ if (i>1 && (r_ecr(ppc) & PPC_FIFO_EMPTY)) { LOG_PPC(__func__, ppc, "data lost in FIFO"); goto error; } } /* FIFO must be empty after the last byte */ if (!(r_ecr(ppc) & PPC_FIFO_EMPTY)) { LOG_PPC(__func__, ppc, "can't empty the FIFO"); goto error; } w_ctr(ppc, ctr_sav); w_ecr(ppc, ecr_sav); return (0); error: w_ctr(ppc, ctr_sav); w_ecr(ppc, ecr_sav); return (EINVAL); } static int ppc_detect_port(struct ppc_data *ppc) { w_ctr(ppc, 0x0c); /* To avoid missing PS2 ports */ w_dtr(ppc, 0xaa); if (r_dtr(ppc) != 0xaa) return (0); return (1); } /* * EPP timeout, according to the PC87332 manual * Semantics of clearing EPP timeout bit. * PC87332 - reading SPP_STR does it... * SMC - write 1 to EPP timeout bit XXX * Others - (?) write 0 to EPP timeout bit */ static void ppc_reset_epp_timeout(struct ppc_data *ppc) { - register char r; + char r; r = r_str(ppc); w_str(ppc, r | 0x1); w_str(ppc, r & 0xfe); return; } static int ppc_check_epp_timeout(struct ppc_data *ppc) { ppc_reset_epp_timeout(ppc); return (!(r_str(ppc) & TIMEOUT)); } /* * Configure current operating mode */ static int ppc_generic_setmode(struct ppc_data *ppc, int mode) { u_char ecr = 0; /* check if mode is available */ if (mode && !(ppc->ppc_avm & mode)) return (EINVAL); /* if ECP mode, configure ecr register */ if ((ppc->ppc_avm & PPB_ECP) || (ppc->ppc_dtm & PPB_ECP)) { /* return to byte mode (keeping direction bit), * no interrupt, no DMA to be able to change to * ECP */ w_ecr(ppc, PPC_ECR_RESET); ecr = PPC_DISABLE_INTR; if (mode & PPB_EPP) return (EINVAL); else if (mode & PPB_ECP) /* select ECP mode */ ecr |= PPC_ECR_ECP; else if (mode & PPB_PS2) /* select PS2 mode with ECP */ ecr |= PPC_ECR_PS2; else /* select COMPATIBLE/NIBBLE mode */ ecr |= PPC_ECR_STD; w_ecr(ppc, ecr); } ppc->ppc_mode = mode; return (0); } /* * The ppc driver is free to choose options like FIFO or DMA * if ECP mode is available. * * The 'RAW' option allows the upper drivers to force the ppc mode * even with FIFO, DMA available. */ static int ppc_smclike_setmode(struct ppc_data *ppc, int mode) { u_char ecr = 0; /* check if mode is available */ if (mode && !(ppc->ppc_avm & mode)) return (EINVAL); /* if ECP mode, configure ecr register */ if ((ppc->ppc_avm & PPB_ECP) || (ppc->ppc_dtm & PPB_ECP)) { /* return to byte mode (keeping direction bit), * no interrupt, no DMA to be able to change to * ECP or EPP mode */ w_ecr(ppc, PPC_ECR_RESET); ecr = PPC_DISABLE_INTR; if (mode & PPB_EPP) /* select EPP mode */ ecr |= PPC_ECR_EPP; else if (mode & PPB_ECP) /* select ECP mode */ ecr |= PPC_ECR_ECP; else if (mode & PPB_PS2) /* select PS2 mode with ECP */ ecr |= PPC_ECR_PS2; else /* select COMPATIBLE/NIBBLE mode */ ecr |= PPC_ECR_STD; w_ecr(ppc, ecr); } ppc->ppc_mode = mode; return (0); } #ifdef PPC_PROBE_CHIPSET /* * ppc_pc873xx_detect * * Probe for a Natsemi PC873xx-family part. * * References in this function are to the National Semiconductor * PC87332 datasheet TL/C/11930, May 1995 revision. */ static int pc873xx_basetab[] = {0x0398, 0x026e, 0x015c, 0x002e, 0}; static int pc873xx_porttab[] = {0x0378, 0x03bc, 0x0278, 0}; static int pc873xx_irqtab[] = {5, 7, 5, 0}; static int pc873xx_regstab[] = { PC873_FER, PC873_FAR, PC873_PTR, PC873_FCR, PC873_PCR, PC873_PMC, PC873_TUP, PC873_SID, PC873_PNP0, PC873_PNP1, PC873_LPTBA, -1 }; static char *pc873xx_rnametab[] = { "FER", "FAR", "PTR", "FCR", "PCR", "PMC", "TUP", "SID", "PNP0", "PNP1", "LPTBA", NULL }; static int ppc_pc873xx_detect(struct ppc_data *ppc, int chipset_mode) /* XXX mode never forced */ { static int index = 0; int idport, irq; int ptr, pcr, val, i; while ((idport = pc873xx_basetab[index++])) { /* XXX should check first to see if this location is already claimed */ /* * Pull the 873xx through the power-on ID cycle (2.2,1.). * We can't use this to locate the chip as it may already have * been used by the BIOS. */ (void)inb(idport); (void)inb(idport); (void)inb(idport); (void)inb(idport); /* * Read the SID byte. Possible values are : * * 01010xxx PC87334 * 0001xxxx PC87332 * 01110xxx PC87306 * 00110xxx PC87303 */ outb(idport, PC873_SID); val = inb(idport + 1); if ((val & 0xf0) == 0x10) { ppc->ppc_model = NS_PC87332; } else if ((val & 0xf8) == 0x70) { ppc->ppc_model = NS_PC87306; } else if ((val & 0xf8) == 0x50) { ppc->ppc_model = NS_PC87334; } else if ((val & 0xf8) == 0x40) { /* Should be 0x30 by the documentation, but probing yielded 0x40... */ ppc->ppc_model = NS_PC87303; } else { if (bootverbose && (val != 0xff)) printf("PC873xx probe at 0x%x got unknown ID 0x%x\n", idport, val); continue ; /* not recognised */ } /* print registers */ if (bootverbose) { printf("PC873xx"); for (i=0; pc873xx_regstab[i] != -1; i++) { outb(idport, pc873xx_regstab[i]); printf(" %s=0x%x", pc873xx_rnametab[i], inb(idport + 1) & 0xff); } printf("\n"); } /* * We think we have one. Is it enabled and where we want it to be? */ outb(idport, PC873_FER); val = inb(idport + 1); if (!(val & PC873_PPENABLE)) { if (bootverbose) printf("PC873xx parallel port disabled\n"); continue; } outb(idport, PC873_FAR); val = inb(idport + 1); /* XXX we should create a driver instance for every port found */ if (pc873xx_porttab[val & 0x3] != ppc->ppc_base) { /* First try to change the port address to that requested... */ switch (ppc->ppc_base) { case 0x378: val &= 0xfc; break; case 0x3bc: val &= 0xfd; break; case 0x278: val &= 0xfe; break; default: val &= 0xfd; break; } outb(idport, PC873_FAR); outb(idport + 1, val); outb(idport + 1, val); /* Check for success by reading back the value we supposedly wrote and comparing...*/ outb(idport, PC873_FAR); val = inb(idport + 1) & 0x3; /* If we fail, report the failure... */ if (pc873xx_porttab[val] != ppc->ppc_base) { if (bootverbose) printf("PC873xx at 0x%x not for driver at port 0x%x\n", pc873xx_porttab[val], ppc->ppc_base); } continue; } outb(idport, PC873_PTR); ptr = inb(idport + 1); /* get irq settings */ if (ppc->ppc_base == 0x378) irq = (ptr & PC873_LPTBIRQ7) ? 7 : 5; else irq = pc873xx_irqtab[val]; if (bootverbose) printf("PC873xx irq %d at 0x%x\n", irq, ppc->ppc_base); /* * Check if irq settings are correct */ if (irq != ppc->ppc_irq) { /* * If the chipset is not locked and base address is 0x378, * we have another chance */ if (ppc->ppc_base == 0x378 && !(ptr & PC873_CFGLOCK)) { if (ppc->ppc_irq == 7) { outb(idport + 1, (ptr | PC873_LPTBIRQ7)); outb(idport + 1, (ptr | PC873_LPTBIRQ7)); } else { outb(idport + 1, (ptr & ~PC873_LPTBIRQ7)); outb(idport + 1, (ptr & ~PC873_LPTBIRQ7)); } if (bootverbose) printf("PC873xx irq set to %d\n", ppc->ppc_irq); } else { if (bootverbose) printf("PC873xx sorry, can't change irq setting\n"); } } else { if (bootverbose) printf("PC873xx irq settings are correct\n"); } outb(idport, PC873_PCR); pcr = inb(idport + 1); if ((ptr & PC873_CFGLOCK) || !chipset_mode) { if (bootverbose) printf("PC873xx %s", (ptr & PC873_CFGLOCK)?"locked":"unlocked"); ppc->ppc_avm |= PPB_NIBBLE; if (bootverbose) printf(", NIBBLE"); if (pcr & PC873_EPPEN) { ppc->ppc_avm |= PPB_EPP; if (bootverbose) printf(", EPP"); if (pcr & PC873_EPP19) ppc->ppc_epp = EPP_1_9; else ppc->ppc_epp = EPP_1_7; if ((ppc->ppc_model == NS_PC87332) && bootverbose) { outb(idport, PC873_PTR); ptr = inb(idport + 1); if (ptr & PC873_EPPRDIR) printf(", Regular mode"); else printf(", Automatic mode"); } } else if (pcr & PC873_ECPEN) { ppc->ppc_avm |= PPB_ECP; if (bootverbose) printf(", ECP"); if (pcr & PC873_ECPCLK) { /* XXX */ ppc->ppc_avm |= PPB_PS2; if (bootverbose) printf(", PS/2"); } } else { outb(idport, PC873_PTR); ptr = inb(idport + 1); if (ptr & PC873_EXTENDED) { ppc->ppc_avm |= PPB_SPP; if (bootverbose) printf(", SPP"); } } } else { if (bootverbose) printf("PC873xx unlocked"); if (chipset_mode & PPB_ECP) { if ((chipset_mode & PPB_EPP) && bootverbose) printf(", ECP+EPP not supported"); pcr &= ~PC873_EPPEN; pcr |= (PC873_ECPEN | PC873_ECPCLK); /* XXX */ outb(idport + 1, pcr); outb(idport + 1, pcr); if (bootverbose) printf(", ECP"); } else if (chipset_mode & PPB_EPP) { pcr &= ~(PC873_ECPEN | PC873_ECPCLK); pcr |= (PC873_EPPEN | PC873_EPP19); outb(idport + 1, pcr); outb(idport + 1, pcr); ppc->ppc_epp = EPP_1_9; /* XXX */ if (bootverbose) printf(", EPP1.9"); /* enable automatic direction turnover */ if (ppc->ppc_model == NS_PC87332) { outb(idport, PC873_PTR); ptr = inb(idport + 1); ptr &= ~PC873_EPPRDIR; outb(idport + 1, ptr); outb(idport + 1, ptr); if (bootverbose) printf(", Automatic mode"); } } else { pcr &= ~(PC873_ECPEN | PC873_ECPCLK | PC873_EPPEN); outb(idport + 1, pcr); outb(idport + 1, pcr); /* configure extended bit in PTR */ outb(idport, PC873_PTR); ptr = inb(idport + 1); if (chipset_mode & PPB_PS2) { ptr |= PC873_EXTENDED; if (bootverbose) printf(", PS/2"); } else { /* default to NIBBLE mode */ ptr &= ~PC873_EXTENDED; if (bootverbose) printf(", NIBBLE"); } outb(idport + 1, ptr); outb(idport + 1, ptr); } ppc->ppc_avm = chipset_mode; } if (bootverbose) printf("\n"); ppc->ppc_type = PPC_TYPE_GENERIC; ppc_generic_setmode(ppc, chipset_mode); return(chipset_mode); } return(-1); } /* * ppc_smc37c66xgt_detect * * SMC FDC37C66xGT configuration. */ static int ppc_smc37c66xgt_detect(struct ppc_data *ppc, int chipset_mode) { int i; u_char r; int type = -1; int csr = SMC66x_CSR; /* initial value is 0x3F0 */ int port_address[] = { -1 /* disabled */ , 0x3bc, 0x378, 0x278 }; #define cio csr+1 /* config IO port is either 0x3F1 or 0x371 */ /* * Detection: enter configuration mode and read CRD register. */ PPC_CONFIG_LOCK(ppc); outb(csr, SMC665_iCODE); outb(csr, SMC665_iCODE); PPC_CONFIG_UNLOCK(ppc); outb(csr, 0xd); if (inb(cio) == 0x65) { type = SMC_37C665GT; goto config; } for (i = 0; i < 2; i++) { PPC_CONFIG_LOCK(ppc); outb(csr, SMC666_iCODE); outb(csr, SMC666_iCODE); PPC_CONFIG_UNLOCK(ppc); outb(csr, 0xd); if (inb(cio) == 0x66) { type = SMC_37C666GT; break; } /* Another chance, CSR may be hard-configured to be at 0x370 */ csr = SMC666_CSR; } config: /* * If chipset not found, do not continue. */ if (type == -1) { outb(csr, 0xaa); /* end config mode */ return (-1); } /* select CR1 */ outb(csr, 0x1); /* read the port's address: bits 0 and 1 of CR1 */ r = inb(cio) & SMC_CR1_ADDR; if (port_address[(int)r] != ppc->ppc_base) { outb(csr, 0xaa); /* end config mode */ return (-1); } ppc->ppc_model = type; /* * CR1 and CR4 registers bits 3 and 0/1 for mode configuration * If SPP mode is detected, try to set ECP+EPP mode */ if (bootverbose) { outb(csr, 0x1); device_printf(ppc->ppc_dev, "SMC registers CR1=0x%x", inb(cio) & 0xff); outb(csr, 0x4); printf(" CR4=0x%x", inb(cio) & 0xff); } /* select CR1 */ outb(csr, 0x1); if (!chipset_mode) { /* autodetect mode */ /* 666GT is ~certainly~ hardwired to an extended ECP+EPP mode */ if (type == SMC_37C666GT) { ppc->ppc_avm |= PPB_ECP | PPB_EPP | PPB_SPP; if (bootverbose) printf(" configuration hardwired, supposing " \ "ECP+EPP SPP"); } else if ((inb(cio) & SMC_CR1_MODE) == 0) { /* already in extended parallel port mode, read CR4 */ outb(csr, 0x4); r = (inb(cio) & SMC_CR4_EMODE); switch (r) { case SMC_SPP: ppc->ppc_avm |= PPB_SPP; if (bootverbose) printf(" SPP"); break; case SMC_EPPSPP: ppc->ppc_avm |= PPB_EPP | PPB_SPP; if (bootverbose) printf(" EPP SPP"); break; case SMC_ECP: ppc->ppc_avm |= PPB_ECP | PPB_SPP; if (bootverbose) printf(" ECP SPP"); break; case SMC_ECPEPP: ppc->ppc_avm |= PPB_ECP | PPB_EPP | PPB_SPP; if (bootverbose) printf(" ECP+EPP SPP"); break; } } else { /* not an extended port mode */ ppc->ppc_avm |= PPB_SPP; if (bootverbose) printf(" SPP"); } } else { /* mode forced */ ppc->ppc_avm = chipset_mode; /* 666GT is ~certainly~ hardwired to an extended ECP+EPP mode */ if (type == SMC_37C666GT) goto end_detect; r = inb(cio); if ((chipset_mode & (PPB_ECP | PPB_EPP)) == 0) { /* do not use ECP when the mode is not forced to */ outb(cio, r | SMC_CR1_MODE); if (bootverbose) printf(" SPP"); } else { /* an extended mode is selected */ outb(cio, r & ~SMC_CR1_MODE); /* read CR4 register and reset mode field */ outb(csr, 0x4); r = inb(cio) & ~SMC_CR4_EMODE; if (chipset_mode & PPB_ECP) { if (chipset_mode & PPB_EPP) { outb(cio, r | SMC_ECPEPP); if (bootverbose) printf(" ECP+EPP"); } else { outb(cio, r | SMC_ECP); if (bootverbose) printf(" ECP"); } } else { /* PPB_EPP is set */ outb(cio, r | SMC_EPPSPP); if (bootverbose) printf(" EPP SPP"); } } ppc->ppc_avm = chipset_mode; } /* set FIFO threshold to 16 */ if (ppc->ppc_avm & PPB_ECP) { /* select CRA */ outb(csr, 0xa); outb(cio, 16); } end_detect: if (bootverbose) printf ("\n"); if (ppc->ppc_avm & PPB_EPP) { /* select CR4 */ outb(csr, 0x4); r = inb(cio); /* * Set the EPP protocol... * Low=EPP 1.9 (1284 standard) and High=EPP 1.7 */ if (ppc->ppc_epp == EPP_1_9) outb(cio, (r & ~SMC_CR4_EPPTYPE)); else outb(cio, (r | SMC_CR4_EPPTYPE)); } outb(csr, 0xaa); /* end config mode */ ppc->ppc_type = PPC_TYPE_SMCLIKE; ppc_smclike_setmode(ppc, chipset_mode); return (chipset_mode); } /* * SMC FDC37C935 configuration * Found on many Alpha machines */ static int ppc_smc37c935_detect(struct ppc_data *ppc, int chipset_mode) { int type = -1; PPC_CONFIG_LOCK(ppc); outb(SMC935_CFG, 0x55); /* enter config mode */ outb(SMC935_CFG, 0x55); PPC_CONFIG_UNLOCK(ppc); outb(SMC935_IND, SMC935_ID); /* check device id */ if (inb(SMC935_DAT) == 0x2) type = SMC_37C935; if (type == -1) { outb(SMC935_CFG, 0xaa); /* exit config mode */ return (-1); } ppc->ppc_model = type; outb(SMC935_IND, SMC935_LOGDEV); /* select parallel port, */ outb(SMC935_DAT, 3); /* which is logical device 3 */ /* set io port base */ outb(SMC935_IND, SMC935_PORTHI); outb(SMC935_DAT, (u_char)((ppc->ppc_base & 0xff00) >> 8)); outb(SMC935_IND, SMC935_PORTLO); outb(SMC935_DAT, (u_char)(ppc->ppc_base & 0xff)); if (!chipset_mode) ppc->ppc_avm = PPB_COMPATIBLE; /* default mode */ else { ppc->ppc_avm = chipset_mode; outb(SMC935_IND, SMC935_PPMODE); outb(SMC935_DAT, SMC935_CENT); /* start in compatible mode */ /* SPP + EPP or just plain SPP */ if (chipset_mode & (PPB_SPP)) { if (chipset_mode & PPB_EPP) { if (ppc->ppc_epp == EPP_1_9) { outb(SMC935_IND, SMC935_PPMODE); outb(SMC935_DAT, SMC935_EPP19SPP); } if (ppc->ppc_epp == EPP_1_7) { outb(SMC935_IND, SMC935_PPMODE); outb(SMC935_DAT, SMC935_EPP17SPP); } } else { outb(SMC935_IND, SMC935_PPMODE); outb(SMC935_DAT, SMC935_SPP); } } /* ECP + EPP or just plain ECP */ if (chipset_mode & PPB_ECP) { if (chipset_mode & PPB_EPP) { if (ppc->ppc_epp == EPP_1_9) { outb(SMC935_IND, SMC935_PPMODE); outb(SMC935_DAT, SMC935_ECPEPP19); } if (ppc->ppc_epp == EPP_1_7) { outb(SMC935_IND, SMC935_PPMODE); outb(SMC935_DAT, SMC935_ECPEPP17); } } else { outb(SMC935_IND, SMC935_PPMODE); outb(SMC935_DAT, SMC935_ECP); } } } outb(SMC935_CFG, 0xaa); /* exit config mode */ ppc->ppc_type = PPC_TYPE_SMCLIKE; ppc_smclike_setmode(ppc, chipset_mode); return (chipset_mode); } /* * Winbond W83877F stuff * * EFER: extended function enable register * EFIR: extended function index register * EFDR: extended function data register */ #define efir ((efer == 0x250) ? 0x251 : 0x3f0) #define efdr ((efer == 0x250) ? 0x252 : 0x3f1) static int w83877f_efers[] = { 0x250, 0x3f0, 0x3f0, 0x250 }; static int w83877f_keys[] = { 0x89, 0x86, 0x87, 0x88 }; static int w83877f_keyiter[] = { 1, 2, 2, 1 }; static int w83877f_hefs[] = { WINB_HEFERE, WINB_HEFRAS, WINB_HEFERE | WINB_HEFRAS, 0 }; static int ppc_w83877f_detect(struct ppc_data *ppc, int chipset_mode) { int i, j, efer; unsigned char r, hefere, hefras; for (i = 0; i < 4; i ++) { /* first try to enable configuration registers */ efer = w83877f_efers[i]; /* write the key to the EFER */ for (j = 0; j < w83877f_keyiter[i]; j ++) outb (efer, w83877f_keys[i]); /* then check HEFERE and HEFRAS bits */ outb (efir, 0x0c); hefere = inb(efdr) & WINB_HEFERE; outb (efir, 0x16); hefras = inb(efdr) & WINB_HEFRAS; /* * HEFRAS HEFERE * 0 1 write 89h to 250h (power-on default) * 1 0 write 86h twice to 3f0h * 1 1 write 87h twice to 3f0h * 0 0 write 88h to 250h */ if ((hefere | hefras) == w83877f_hefs[i]) goto found; } return (-1); /* failed */ found: /* check base port address - read from CR23 */ outb(efir, 0x23); if (ppc->ppc_base != inb(efdr) * 4) /* 4 bytes boundaries */ return (-1); /* read CHIP ID from CR9/bits0-3 */ outb(efir, 0x9); switch (inb(efdr) & WINB_CHIPID) { case WINB_W83877F_ID: ppc->ppc_model = WINB_W83877F; break; case WINB_W83877AF_ID: ppc->ppc_model = WINB_W83877AF; break; default: ppc->ppc_model = WINB_UNKNOWN; } if (bootverbose) { /* dump of registers */ device_printf(ppc->ppc_dev, "0x%x - ", w83877f_keys[i]); for (i = 0; i <= 0xd; i ++) { outb(efir, i); printf("0x%x ", inb(efdr)); } for (i = 0x10; i <= 0x17; i ++) { outb(efir, i); printf("0x%x ", inb(efdr)); } outb(efir, 0x1e); printf("0x%x ", inb(efdr)); for (i = 0x20; i <= 0x29; i ++) { outb(efir, i); printf("0x%x ", inb(efdr)); } printf("\n"); } ppc->ppc_type = PPC_TYPE_GENERIC; if (!chipset_mode) { /* autodetect mode */ /* select CR0 */ outb(efir, 0x0); r = inb(efdr) & (WINB_PRTMODS0 | WINB_PRTMODS1); /* select CR9 */ outb(efir, 0x9); r |= (inb(efdr) & WINB_PRTMODS2); switch (r) { case WINB_W83757: if (bootverbose) device_printf(ppc->ppc_dev, "W83757 compatible mode\n"); return (-1); /* generic or SMC-like */ case WINB_EXTFDC: case WINB_EXTADP: case WINB_EXT2FDD: case WINB_JOYSTICK: if (bootverbose) device_printf(ppc->ppc_dev, "not in parallel port mode\n"); return (-1); case (WINB_PARALLEL | WINB_EPP_SPP): ppc->ppc_avm |= PPB_EPP | PPB_SPP; if (bootverbose) device_printf(ppc->ppc_dev, "EPP SPP\n"); break; case (WINB_PARALLEL | WINB_ECP): ppc->ppc_avm |= PPB_ECP | PPB_SPP; if (bootverbose) device_printf(ppc->ppc_dev, "ECP SPP\n"); break; case (WINB_PARALLEL | WINB_ECP_EPP): ppc->ppc_avm |= PPB_ECP | PPB_EPP | PPB_SPP; ppc->ppc_type = PPC_TYPE_SMCLIKE; if (bootverbose) device_printf(ppc->ppc_dev, "ECP+EPP SPP\n"); break; default: printf("%s: unknown case (0x%x)!\n", __func__, r); } } else { /* mode forced */ /* select CR9 and set PRTMODS2 bit */ outb(efir, 0x9); outb(efdr, inb(efdr) & ~WINB_PRTMODS2); /* select CR0 and reset PRTMODSx bits */ outb(efir, 0x0); outb(efdr, inb(efdr) & ~(WINB_PRTMODS0 | WINB_PRTMODS1)); if (chipset_mode & PPB_ECP) { if (chipset_mode & PPB_EPP) { outb(efdr, inb(efdr) | WINB_ECP_EPP); if (bootverbose) device_printf(ppc->ppc_dev, "ECP+EPP\n"); ppc->ppc_type = PPC_TYPE_SMCLIKE; } else { outb(efdr, inb(efdr) | WINB_ECP); if (bootverbose) device_printf(ppc->ppc_dev, "ECP\n"); } } else { /* select EPP_SPP otherwise */ outb(efdr, inb(efdr) | WINB_EPP_SPP); if (bootverbose) device_printf(ppc->ppc_dev, "EPP SPP\n"); } ppc->ppc_avm = chipset_mode; } /* exit configuration mode */ outb(efer, 0xaa); switch (ppc->ppc_type) { case PPC_TYPE_SMCLIKE: ppc_smclike_setmode(ppc, chipset_mode); break; default: ppc_generic_setmode(ppc, chipset_mode); break; } return (chipset_mode); } #endif /* * ppc_generic_detect */ static int ppc_generic_detect(struct ppc_data *ppc, int chipset_mode) { /* default to generic */ ppc->ppc_type = PPC_TYPE_GENERIC; if (bootverbose) device_printf(ppc->ppc_dev, "SPP"); /* first, check for ECP */ w_ecr(ppc, PPC_ECR_PS2); if ((r_ecr(ppc) & 0xe0) == PPC_ECR_PS2) { ppc->ppc_dtm |= PPB_ECP | PPB_SPP; if (bootverbose) printf(" ECP "); /* search for SMC style ECP+EPP mode */ w_ecr(ppc, PPC_ECR_EPP); } /* try to reset EPP timeout bit */ if (ppc_check_epp_timeout(ppc)) { ppc->ppc_dtm |= PPB_EPP; if (ppc->ppc_dtm & PPB_ECP) { /* SMC like chipset found */ ppc->ppc_model = SMC_LIKE; ppc->ppc_type = PPC_TYPE_SMCLIKE; if (bootverbose) printf(" ECP+EPP"); } else { if (bootverbose) printf(" EPP"); } } else { /* restore to standard mode */ w_ecr(ppc, PPC_ECR_STD); } /* XXX try to detect NIBBLE and PS2 modes */ ppc->ppc_dtm |= PPB_NIBBLE; if (chipset_mode) ppc->ppc_avm = chipset_mode; else ppc->ppc_avm = ppc->ppc_dtm; if (bootverbose) printf("\n"); switch (ppc->ppc_type) { case PPC_TYPE_SMCLIKE: ppc_smclike_setmode(ppc, chipset_mode); break; default: ppc_generic_setmode(ppc, chipset_mode); break; } return (chipset_mode); } /* * ppc_detect() * * mode is the mode suggested at boot */ static int ppc_detect(struct ppc_data *ppc, int chipset_mode) { #ifdef PPC_PROBE_CHIPSET int i, mode; /* list of supported chipsets */ int (*chipset_detect[])(struct ppc_data *, int) = { ppc_pc873xx_detect, ppc_smc37c66xgt_detect, ppc_w83877f_detect, ppc_smc37c935_detect, ppc_generic_detect, NULL }; #endif /* if can't find the port and mode not forced return error */ if (!ppc_detect_port(ppc) && chipset_mode == 0) return (EIO); /* failed, port not present */ /* assume centronics compatible mode is supported */ ppc->ppc_avm = PPB_COMPATIBLE; #ifdef PPC_PROBE_CHIPSET /* we have to differenciate available chipset modes, * chipset running modes and IEEE-1284 operating modes * * after detection, the port must support running in compatible mode */ if (ppc->ppc_flags & 0x40) { if (bootverbose) printf("ppc: chipset forced to generic\n"); #endif ppc->ppc_mode = ppc_generic_detect(ppc, chipset_mode); #ifdef PPC_PROBE_CHIPSET } else { for (i=0; chipset_detect[i] != NULL; i++) { if ((mode = chipset_detect[i](ppc, chipset_mode)) != -1) { ppc->ppc_mode = mode; break; } } } #endif /* configure/detect ECP FIFO */ if ((ppc->ppc_avm & PPB_ECP) && !(ppc->ppc_flags & 0x80)) ppc_detect_fifo(ppc); return (0); } /* * ppc_exec_microseq() * * Execute a microsequence. * Microsequence mechanism is supposed to handle fast I/O operations. */ int ppc_exec_microseq(device_t dev, struct ppb_microseq **p_msq) { struct ppc_data *ppc = DEVTOSOFTC(dev); struct ppb_microseq *mi; char cc, *p; int i, iter, len; int error; - register int reg; - register char mask; - register int accum = 0; - register char *ptr = NULL; + int reg; + char mask; + int accum = 0; + char *ptr = NULL; struct ppb_microseq *stack = NULL; /* microsequence registers are equivalent to PC-like port registers */ #define r_reg(reg,ppc) (bus_read_1((ppc)->res_ioport, reg)) #define w_reg(reg, ppc, byte) (bus_write_1((ppc)->res_ioport, reg, byte)) #define INCR_PC (mi ++) /* increment program counter */ PPC_ASSERT_LOCKED(ppc); mi = *p_msq; for (;;) { switch (mi->opcode) { case MS_OP_RSET: cc = r_reg(mi->arg[0].i, ppc); cc &= (char)mi->arg[2].i; /* clear mask */ cc |= (char)mi->arg[1].i; /* assert mask */ w_reg(mi->arg[0].i, ppc, cc); INCR_PC; break; case MS_OP_RASSERT_P: reg = mi->arg[1].i; ptr = ppc->ppc_ptr; if ((len = mi->arg[0].i) == MS_ACCUM) { accum = ppc->ppc_accum; for (; accum; accum--) w_reg(reg, ppc, *ptr++); ppc->ppc_accum = accum; } else for (i=0; ippc_ptr = ptr; INCR_PC; break; case MS_OP_RFETCH_P: reg = mi->arg[1].i; mask = (char)mi->arg[2].i; ptr = ppc->ppc_ptr; if ((len = mi->arg[0].i) == MS_ACCUM) { accum = ppc->ppc_accum; for (; accum; accum--) *ptr++ = r_reg(reg, ppc) & mask; ppc->ppc_accum = accum; } else for (i=0; ippc_ptr = ptr; INCR_PC; break; case MS_OP_RFETCH: *((char *) mi->arg[2].p) = r_reg(mi->arg[0].i, ppc) & (char)mi->arg[1].i; INCR_PC; break; case MS_OP_RASSERT: case MS_OP_DELAY: /* let's suppose the next instr. is the same */ prefetch: for (;mi->opcode == MS_OP_RASSERT; INCR_PC) w_reg(mi->arg[0].i, ppc, (char)mi->arg[1].i); if (mi->opcode == MS_OP_DELAY) { DELAY(mi->arg[0].i); INCR_PC; goto prefetch; } break; case MS_OP_ADELAY: if (mi->arg[0].i) { PPC_UNLOCK(ppc); pause("ppbdelay", mi->arg[0].i * (hz/1000)); PPC_LOCK(ppc); } INCR_PC; break; case MS_OP_TRIG: reg = mi->arg[0].i; iter = mi->arg[1].i; p = (char *)mi->arg[2].p; /* XXX delay limited to 255 us */ for (i=0; ippc_accum = mi->arg[0].i; INCR_PC; break; case MS_OP_DBRA: if (--ppc->ppc_accum > 0) mi += mi->arg[0].i; INCR_PC; break; case MS_OP_BRSET: cc = r_str(ppc); if ((cc & (char)mi->arg[0].i) == (char)mi->arg[0].i) mi += mi->arg[1].i; INCR_PC; break; case MS_OP_BRCLEAR: cc = r_str(ppc); if ((cc & (char)mi->arg[0].i) == 0) mi += mi->arg[1].i; INCR_PC; break; case MS_OP_BRSTAT: cc = r_str(ppc); if ((cc & ((char)mi->arg[0].i | (char)mi->arg[1].i)) == (char)mi->arg[0].i) mi += mi->arg[2].i; INCR_PC; break; case MS_OP_C_CALL: /* * If the C call returns !0 then end the microseq. * The current state of ptr is passed to the C function */ if ((error = mi->arg[0].f(mi->arg[1].p, ppc->ppc_ptr))) return (error); INCR_PC; break; case MS_OP_PTR: ppc->ppc_ptr = (char *)mi->arg[0].p; INCR_PC; break; case MS_OP_CALL: if (stack) panic("%s: too much calls", __func__); if (mi->arg[0].p) { /* store the state of the actual * microsequence */ stack = mi; /* jump to the new microsequence */ mi = (struct ppb_microseq *)mi->arg[0].p; } else INCR_PC; break; case MS_OP_SUBRET: /* retrieve microseq and pc state before the call */ mi = stack; /* reset the stack */ stack = NULL; /* XXX return code */ INCR_PC; break; case MS_OP_PUT: case MS_OP_GET: case MS_OP_RET: /* can't return to ppb level during the execution * of a submicrosequence */ if (stack) panic("%s: can't return to ppb level", __func__); /* update pc for ppb level of execution */ *p_msq = mi; /* return to ppb level of execution */ return (0); default: panic("%s: unknown microsequence opcode 0x%x", __func__, mi->opcode); } } /* unreached */ } static void ppcintr(void *arg) { struct ppc_data *ppc = arg; u_char ctr, ecr, str; /* * If we have any child interrupt handlers registered, let * them handle this interrupt. * * XXX: If DMA is in progress should we just complete that w/o * doing this? */ PPC_LOCK(ppc); if (ppc->ppc_intr_hook != NULL && ppc->ppc_intr_hook(ppc->ppc_intr_arg) == 0) { PPC_UNLOCK(ppc); return; } str = r_str(ppc); ctr = r_ctr(ppc); ecr = r_ecr(ppc); #if defined(PPC_DEBUG) && PPC_DEBUG > 1 printf("![%x/%x/%x]", ctr, ecr, str); #endif /* don't use ecp mode with IRQENABLE set */ if (ctr & IRQENABLE) { PPC_UNLOCK(ppc); return; } /* interrupts are generated by nFault signal * only in ECP mode */ if ((str & nFAULT) && (ppc->ppc_mode & PPB_ECP)) { /* check if ppc driver has programmed the * nFault interrupt */ if (ppc->ppc_irqstat & PPC_IRQ_nFAULT) { w_ecr(ppc, ecr | PPC_nFAULT_INTR); ppc->ppc_irqstat &= ~PPC_IRQ_nFAULT; } else { /* shall be handled by underlying layers XXX */ PPC_UNLOCK(ppc); return; } } if (ppc->ppc_irqstat & PPC_IRQ_DMA) { /* disable interrupts (should be done by hardware though) */ w_ecr(ppc, ecr | PPC_SERVICE_INTR); ppc->ppc_irqstat &= ~PPC_IRQ_DMA; ecr = r_ecr(ppc); /* check if DMA completed */ if ((ppc->ppc_avm & PPB_ECP) && (ecr & PPC_ENABLE_DMA)) { #ifdef PPC_DEBUG printf("a"); #endif /* stop DMA */ w_ecr(ppc, ecr & ~PPC_ENABLE_DMA); ecr = r_ecr(ppc); if (ppc->ppc_dmastat == PPC_DMA_STARTED) { #ifdef PPC_DEBUG printf("d"); #endif ppc->ppc_dmadone(ppc); ppc->ppc_dmastat = PPC_DMA_COMPLETE; /* wakeup the waiting process */ wakeup(ppc); } } } else if (ppc->ppc_irqstat & PPC_IRQ_FIFO) { /* classic interrupt I/O */ ppc->ppc_irqstat &= ~PPC_IRQ_FIFO; } PPC_UNLOCK(ppc); return; } int ppc_read(device_t dev, char *buf, int len, int mode) { return (EINVAL); } int ppc_write(device_t dev, char *buf, int len, int how) { return (EINVAL); } int ppc_reset_epp(device_t dev) { struct ppc_data *ppc = DEVTOSOFTC(dev); PPC_ASSERT_LOCKED(ppc); ppc_reset_epp_timeout(ppc); return 0; } int ppc_setmode(device_t dev, int mode) { struct ppc_data *ppc = DEVTOSOFTC(dev); PPC_ASSERT_LOCKED(ppc); switch (ppc->ppc_type) { case PPC_TYPE_SMCLIKE: return (ppc_smclike_setmode(ppc, mode)); break; case PPC_TYPE_GENERIC: default: return (ppc_generic_setmode(ppc, mode)); break; } /* not reached */ return (ENXIO); } int ppc_probe(device_t dev, int rid) { #ifdef __i386__ static short next_bios_ppc = 0; #ifdef PC98 unsigned int pc98_ieee_mode = 0x00; unsigned int tmp; #endif #endif struct ppc_data *ppc; int error; rman_res_t port; /* * Allocate the ppc_data structure. */ ppc = DEVTOSOFTC(dev); bzero(ppc, sizeof(struct ppc_data)); ppc->rid_ioport = rid; /* retrieve ISA parameters */ error = bus_get_resource(dev, SYS_RES_IOPORT, rid, &port, NULL); #ifdef __i386__ /* * If port not specified, use bios list. */ if (error) { #ifdef PC98 if (next_bios_ppc == 0) { /* Use default IEEE-1284 port of NEC PC-98x1 */ port = PC98_IEEE_1284_PORT; next_bios_ppc += 1; if (bootverbose) device_printf(dev, "parallel port found at 0x%jx\n", port); } #else if ((next_bios_ppc < BIOS_MAX_PPC) && (*(BIOS_PORTS + next_bios_ppc) != 0)) { port = *(BIOS_PORTS + next_bios_ppc++); if (bootverbose) device_printf(dev, "parallel port found at 0x%jx\n", port); } else { device_printf(dev, "parallel port not found.\n"); return (ENXIO); } #endif /* PC98 */ bus_set_resource(dev, SYS_RES_IOPORT, rid, port, IO_LPTSIZE_EXTENDED); } #endif /* IO port is mandatory */ /* Try "extended" IO port range...*/ ppc->res_ioport = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &ppc->rid_ioport, IO_LPTSIZE_EXTENDED, RF_ACTIVE); if (ppc->res_ioport != 0) { if (bootverbose) device_printf(dev, "using extended I/O port range\n"); } else { /* Failed? If so, then try the "normal" IO port range... */ ppc->res_ioport = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &ppc->rid_ioport, IO_LPTSIZE_NORMAL, RF_ACTIVE); if (ppc->res_ioport != 0) { if (bootverbose) device_printf(dev, "using normal I/O port range\n"); } else { device_printf(dev, "cannot reserve I/O port range\n"); goto error; } } ppc->ppc_base = rman_get_start(ppc->res_ioport); ppc->ppc_flags = device_get_flags(dev); if (!(ppc->ppc_flags & 0x20)) { ppc->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ppc->rid_irq, RF_SHAREABLE); ppc->res_drq = bus_alloc_resource_any(dev, SYS_RES_DRQ, &ppc->rid_drq, RF_ACTIVE); } if (ppc->res_irq) ppc->ppc_irq = rman_get_start(ppc->res_irq); if (ppc->res_drq) ppc->ppc_dmachan = rman_get_start(ppc->res_drq); ppc->ppc_dev = dev; ppc->ppc_model = GENERIC; ppc->ppc_mode = PPB_COMPATIBLE; ppc->ppc_epp = (ppc->ppc_flags & 0x10) >> 4; ppc->ppc_type = PPC_TYPE_GENERIC; #if defined(__i386__) && defined(PC98) /* * IEEE STD 1284 Function Check and Enable * for default IEEE-1284 port of NEC PC-98x1 */ if (ppc->ppc_base == PC98_IEEE_1284_PORT && !(ppc->ppc_flags & PC98_IEEE_1284_DISABLE)) { tmp = inb(ppc->ppc_base + PPC_1284_ENABLE); pc98_ieee_mode = tmp; if ((tmp & 0x10) == 0x10) { outb(ppc->ppc_base + PPC_1284_ENABLE, tmp & ~0x10); tmp = inb(ppc->ppc_base + PPC_1284_ENABLE); if ((tmp & 0x10) == 0x10) goto error; } else { outb(ppc->ppc_base + PPC_1284_ENABLE, tmp | 0x10); tmp = inb(ppc->ppc_base + PPC_1284_ENABLE); if ((tmp & 0x10) != 0x10) goto error; } outb(ppc->ppc_base + PPC_1284_ENABLE, pc98_ieee_mode | 0x10); } #endif /* * Try to detect the chipset and its mode. */ if (ppc_detect(ppc, ppc->ppc_flags & 0xf)) goto error; return (0); error: #if defined(__i386__) && defined(PC98) if (ppc->ppc_base == PC98_IEEE_1284_PORT && !(ppc->ppc_flags & PC98_IEEE_1284_DISABLE)) { outb(ppc->ppc_base + PPC_1284_ENABLE, pc98_ieee_mode); } #endif if (ppc->res_irq != 0) { bus_release_resource(dev, SYS_RES_IRQ, ppc->rid_irq, ppc->res_irq); } if (ppc->res_ioport != 0) { bus_release_resource(dev, SYS_RES_IOPORT, ppc->rid_ioport, ppc->res_ioport); } if (ppc->res_drq != 0) { bus_release_resource(dev, SYS_RES_DRQ, ppc->rid_drq, ppc->res_drq); } return (ENXIO); } int ppc_attach(device_t dev) { struct ppc_data *ppc = DEVTOSOFTC(dev); int error; mtx_init(&ppc->ppc_lock, device_get_nameunit(dev), "ppc", MTX_DEF); device_printf(dev, "%s chipset (%s) in %s mode%s\n", ppc_models[ppc->ppc_model], ppc_avms[ppc->ppc_avm], ppc_modes[ppc->ppc_mode], (PPB_IS_EPP(ppc->ppc_mode)) ? ppc_epp_protocol[ppc->ppc_epp] : ""); if (ppc->ppc_fifo) device_printf(dev, "FIFO with %d/%d/%d bytes threshold\n", ppc->ppc_fifo, ppc->ppc_wthr, ppc->ppc_rthr); if (ppc->res_irq) { /* default to the tty mask for registration */ /* XXX */ error = bus_setup_intr(dev, ppc->res_irq, INTR_TYPE_TTY | INTR_MPSAFE, NULL, ppcintr, ppc, &ppc->intr_cookie); if (error) { device_printf(dev, "failed to register interrupt handler: %d\n", error); mtx_destroy(&ppc->ppc_lock); return (error); } } /* add ppbus as a child of this isa to parallel bridge */ ppc->ppbus = device_add_child(dev, "ppbus", -1); /* * Probe the ppbus and attach devices found. */ device_probe_and_attach(ppc->ppbus); return (0); } int ppc_detach(device_t dev) { struct ppc_data *ppc = DEVTOSOFTC(dev); if (ppc->res_irq == 0) { return (ENXIO); } /* detach & delete all children */ device_delete_children(dev); if (ppc->res_irq != 0) { bus_teardown_intr(dev, ppc->res_irq, ppc->intr_cookie); bus_release_resource(dev, SYS_RES_IRQ, ppc->rid_irq, ppc->res_irq); } if (ppc->res_ioport != 0) { bus_release_resource(dev, SYS_RES_IOPORT, ppc->rid_ioport, ppc->res_ioport); } if (ppc->res_drq != 0) { bus_release_resource(dev, SYS_RES_DRQ, ppc->rid_drq, ppc->res_drq); } mtx_destroy(&ppc->ppc_lock); return (0); } u_char ppc_io(device_t ppcdev, int iop, u_char *addr, int cnt, u_char byte) { struct ppc_data *ppc = DEVTOSOFTC(ppcdev); PPC_ASSERT_LOCKED(ppc); switch (iop) { case PPB_OUTSB_EPP: bus_write_multi_1(ppc->res_ioport, PPC_EPP_DATA, addr, cnt); break; case PPB_OUTSW_EPP: bus_write_multi_2(ppc->res_ioport, PPC_EPP_DATA, (u_int16_t *)addr, cnt); break; case PPB_OUTSL_EPP: bus_write_multi_4(ppc->res_ioport, PPC_EPP_DATA, (u_int32_t *)addr, cnt); break; case PPB_INSB_EPP: bus_read_multi_1(ppc->res_ioport, PPC_EPP_DATA, addr, cnt); break; case PPB_INSW_EPP: bus_read_multi_2(ppc->res_ioport, PPC_EPP_DATA, (u_int16_t *)addr, cnt); break; case PPB_INSL_EPP: bus_read_multi_4(ppc->res_ioport, PPC_EPP_DATA, (u_int32_t *)addr, cnt); break; case PPB_RDTR: return (r_dtr(ppc)); case PPB_RSTR: return (r_str(ppc)); case PPB_RCTR: return (r_ctr(ppc)); case PPB_REPP_A: return (r_epp_A(ppc)); case PPB_REPP_D: return (r_epp_D(ppc)); case PPB_RECR: return (r_ecr(ppc)); case PPB_RFIFO: return (r_fifo(ppc)); case PPB_WDTR: w_dtr(ppc, byte); break; case PPB_WSTR: w_str(ppc, byte); break; case PPB_WCTR: w_ctr(ppc, byte); break; case PPB_WEPP_A: w_epp_A(ppc, byte); break; case PPB_WEPP_D: w_epp_D(ppc, byte); break; case PPB_WECR: w_ecr(ppc, byte); break; case PPB_WFIFO: w_fifo(ppc, byte); break; default: panic("%s: unknown I/O operation", __func__); break; } return (0); /* not significative */ } int ppc_read_ivar(device_t bus, device_t dev, int index, uintptr_t *val) { struct ppc_data *ppc = (struct ppc_data *)device_get_softc(bus); switch (index) { case PPC_IVAR_EPP_PROTO: PPC_ASSERT_LOCKED(ppc); *val = (u_long)ppc->ppc_epp; break; case PPC_IVAR_LOCK: *val = (uintptr_t)&ppc->ppc_lock; break; default: return (ENOENT); } return (0); } int ppc_write_ivar(device_t bus, device_t dev, int index, uintptr_t val) { struct ppc_data *ppc = (struct ppc_data *)device_get_softc(bus); switch (index) { case PPC_IVAR_INTR_HANDLER: PPC_ASSERT_LOCKED(ppc); if (dev != ppc->ppbus) return (EINVAL); if (val == 0) { ppc->ppc_intr_hook = NULL; break; } if (ppc->ppc_intr_hook != NULL) return (EBUSY); ppc->ppc_intr_hook = (void *)val; ppc->ppc_intr_arg = device_get_softc(dev); break; default: return (ENOENT); } return (0); } /* * We allow child devices to allocate an IRQ resource at rid 0 for their * interrupt handlers. */ struct resource * ppc_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct ppc_data *ppc = DEVTOSOFTC(bus); switch (type) { case SYS_RES_IRQ: if (*rid == 0) return (ppc->res_irq); break; } return (NULL); } int ppc_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { #ifdef INVARIANTS struct ppc_data *ppc = DEVTOSOFTC(bus); #endif switch (type) { case SYS_RES_IRQ: if (rid == 0) { KASSERT(r == ppc->res_irq, ("ppc child IRQ resource mismatch")); return (0); } break; } return (EINVAL); } MODULE_DEPEND(ppc, ppbus, 1, 1, 1); Index: stable/11/sys/dev/qlxgb/qla_os.c =================================================================== --- stable/11/sys/dev/qlxgb/qla_os.c (revision 331642) +++ stable/11/sys/dev/qlxgb/qla_os.c (revision 331643) @@ -1,1489 +1,1489 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011-2013 Qlogic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: qla_os.c * Author : David C Somayajulu, Qlogic Corporation, Aliso Viejo, CA 92656. */ #include __FBSDID("$FreeBSD$"); #include "qla_os.h" #include "qla_reg.h" #include "qla_hw.h" #include "qla_def.h" #include "qla_inline.h" #include "qla_ver.h" #include "qla_glbl.h" #include "qla_dbg.h" /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif #ifndef PCI_PRODUCT_QLOGIC_ISP8020 #define PCI_PRODUCT_QLOGIC_ISP8020 0x8020 #endif #define PCI_QLOGIC_ISP8020 \ ((PCI_PRODUCT_QLOGIC_ISP8020 << 16) | PCI_VENDOR_QLOGIC) /* * static functions */ static int qla_alloc_parent_dma_tag(qla_host_t *ha); static void qla_free_parent_dma_tag(qla_host_t *ha); static int qla_alloc_xmt_bufs(qla_host_t *ha); static void qla_free_xmt_bufs(qla_host_t *ha); static int qla_alloc_rcv_bufs(qla_host_t *ha); static void qla_free_rcv_bufs(qla_host_t *ha); static void qla_init_ifnet(device_t dev, qla_host_t *ha); static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS); static void qla_release(qla_host_t *ha); static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void qla_stop(qla_host_t *ha); static int qla_send(qla_host_t *ha, struct mbuf **m_headp); static void qla_tx_done(void *context, int pending); /* * Hooks to the Operating Systems */ static int qla_pci_probe (device_t); static int qla_pci_attach (device_t); static int qla_pci_detach (device_t); static void qla_init(void *arg); static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qla_media_change(struct ifnet *ifp); static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static device_method_t qla_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qla_pci_probe), DEVMETHOD(device_attach, qla_pci_attach), DEVMETHOD(device_detach, qla_pci_detach), { 0, 0 } }; static driver_t qla_pci_driver = { "ql", qla_pci_methods, sizeof (qla_host_t), }; static devclass_t qla80xx_devclass; DRIVER_MODULE(qla80xx, pci, qla_pci_driver, qla80xx_devclass, 0, 0); MODULE_DEPEND(qla80xx, pci, 1, 1, 1); MODULE_DEPEND(qla80xx, ether, 1, 1, 1); MALLOC_DEFINE(M_QLA8XXXBUF, "qla80xxbuf", "Buffers for qla80xx driver"); uint32_t std_replenish = 8; uint32_t jumbo_replenish = 2; uint32_t rcv_pkt_thres = 128; uint32_t rcv_pkt_thres_d = 32; uint32_t snd_pkt_thres = 16; uint32_t free_pkt_thres = (NUM_TX_DESCRIPTORS / 2); static char dev_str[64]; /* * Name: qla_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qla_pci_probe(device_t dev) { switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) { case PCI_QLOGIC_ISP8020: snprintf(dev_str, sizeof(dev_str), "%s v%d.%d.%d", "Qlogic ISP 80xx PCI CNA Adapter-Ethernet Function", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); device_set_desc(dev, dev_str); break; default: return (ENXIO); } if (bootverbose) printf("%s: %s\n ", __func__, dev_str); return (BUS_PROBE_DEFAULT); } static void qla_add_sysctls(qla_host_t *ha) { device_t dev = ha->pci_dev; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RD, (void *)ha, 0, qla_sysctl_get_stats, "I", "Statistics"); SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fw_version", CTLFLAG_RD, ha->fw_ver_str, 0, "firmware version"); dbg_level = 0; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &dbg_level, dbg_level, "Debug Level"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "std_replenish", CTLFLAG_RW, &std_replenish, std_replenish, "Threshold for Replenishing Standard Frames"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "jumbo_replenish", CTLFLAG_RW, &jumbo_replenish, jumbo_replenish, "Threshold for Replenishing Jumbo Frames"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rcv_pkt_thres", CTLFLAG_RW, &rcv_pkt_thres, rcv_pkt_thres, "Threshold for # of rcv pkts to trigger indication isr"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rcv_pkt_thres_d", CTLFLAG_RW, &rcv_pkt_thres_d, rcv_pkt_thres_d, "Threshold for # of rcv pkts to trigger indication defered"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "snd_pkt_thres", CTLFLAG_RW, &snd_pkt_thres, snd_pkt_thres, "Threshold for # of snd packets"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "free_pkt_thres", CTLFLAG_RW, &free_pkt_thres, free_pkt_thres, "Threshold for # of packets to free at a time"); return; } static void qla_watchdog(void *arg) { qla_host_t *ha = arg; qla_hw_t *hw; struct ifnet *ifp; hw = &ha->hw; ifp = ha->ifp; if (ha->flags.qla_watchdog_exit) return; if (!ha->flags.qla_watchdog_pause) { if (qla_le32_to_host(*(hw->tx_cons)) != hw->txr_comp) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } else if ((ifp->if_snd.ifq_head != NULL) && QL_RUNNING(ifp)) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } } ha->watchdog_ticks = ha->watchdog_ticks++ % 1000; callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); } /* * Name: qla_pci_attach * Function: attaches the device to the operating system */ static int qla_pci_attach(device_t dev) { qla_host_t *ha = NULL; uint32_t rsrc_len, i; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qla_host_t)); if (pci_get_device(dev) != PCI_PRODUCT_QLOGIC_ISP8020) { device_printf(dev, "device is not ISP8020\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; pci_enable_busmaster(dev); ha->reg_rid = PCIR_BAR(0); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map any ports\n"); goto qla_pci_attach_err; } rsrc_len = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->reg_rid); mtx_init(&ha->hw_lock, "qla80xx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->tx_lock, "qla80xx_tx_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->rx_lock, "qla80xx_rx_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->rxj_lock, "qla80xx_rxj_lock", MTX_NETWORK_LOCK, MTX_DEF); ha->flags.lock_init = 1; ha->msix_count = pci_msix_count(dev); if (ha->msix_count < qla_get_msix_count(ha)) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qla_pci_attach_err; } QL_DPRINT2((dev, "%s: ha %p irq %p pci_func 0x%x rsrc_count 0x%08x" " msix_count 0x%x pci_reg %p\n", __func__, ha, ha->irq, ha->pci_func, rsrc_len, ha->msix_count, ha->pci_reg)); ha->msix_count = qla_get_msix_count(ha); if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msi[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qla_pci_attach_err; } TASK_INIT(&ha->tx_task, 0, qla_tx_done, ha); ha->tx_tq = taskqueue_create_fast("qla_txq", M_NOWAIT, taskqueue_thread_enqueue, &ha->tx_tq); taskqueue_start_threads(&ha->tx_tq, 1, PI_NET, "%s txq", device_get_nameunit(ha->pci_dev)); for (i = 0; i < ha->msix_count; i++) { ha->irq_vec[i].irq_rid = i+1; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, qla_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle)) { device_printf(dev, "could not setup interrupt\n"); goto qla_pci_attach_err; } TASK_INIT(&ha->irq_vec[i].rcv_task, 0, qla_rcv,\ &ha->irq_vec[i]); ha->irq_vec[i].rcv_tq = taskqueue_create_fast("qla_rcvq", M_NOWAIT, taskqueue_thread_enqueue, &ha->irq_vec[i].rcv_tq); taskqueue_start_threads(&ha->irq_vec[i].rcv_tq, 1, PI_NET, "%s rcvq", device_get_nameunit(ha->pci_dev)); } qla_add_sysctls(ha); /* add hardware specific sysctls */ qla_hw_add_sysctls(ha); /* initialize hardware */ if (qla_init_hw(ha)) { device_printf(dev, "%s: qla_init_hw failed\n", __func__); goto qla_pci_attach_err; } device_printf(dev, "%s: firmware[%d.%d.%d.%d]\n", __func__, ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); snprintf(ha->fw_ver_str, sizeof(ha->fw_ver_str), "%d.%d.%d.%d", ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); //qla_get_hw_caps(ha); qla_read_mac_addr(ha); /* allocate parent dma tag */ if (qla_alloc_parent_dma_tag(ha)) { device_printf(dev, "%s: qla_alloc_parent_dma_tag failed\n", __func__); goto qla_pci_attach_err; } /* alloc all dma buffers */ if (qla_alloc_dma(ha)) { device_printf(dev, "%s: qla_alloc_dma failed\n", __func__); goto qla_pci_attach_err; } /* create the o.s ethernet interface */ qla_init_ifnet(dev, ha); ha->flags.qla_watchdog_active = 1; ha->flags.qla_watchdog_pause = 1; callout_init(&ha->tx_callout, 1); /* create ioctl device interface */ if (qla_make_cdev(ha)) { device_printf(dev, "%s: qla_make_cdev failed\n", __func__); goto qla_pci_attach_err; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); QL_DPRINT2((dev, "%s: exit 0\n", __func__)); return (0); qla_pci_attach_err: qla_release(ha); QL_DPRINT2((dev, "%s: exit ENXIO\n", __func__)); return (ENXIO); } /* * Name: qla_pci_detach * Function: Unhooks the device from the operating system */ static int qla_pci_detach(device_t dev) { qla_host_t *ha = NULL; struct ifnet *ifp; int i; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } ifp = ha->ifp; QLA_LOCK(ha, __func__); qla_stop(ha); QLA_UNLOCK(ha, __func__); if (ha->tx_tq) { taskqueue_drain(ha->tx_tq, &ha->tx_task); taskqueue_free(ha->tx_tq); } for (i = 0; i < ha->msix_count; i++) { taskqueue_drain(ha->irq_vec[i].rcv_tq, &ha->irq_vec[i].rcv_task); taskqueue_free(ha->irq_vec[i].rcv_tq); } qla_release(ha); QL_DPRINT2((dev, "%s: exit\n", __func__)); return (0); } /* * SYSCTL Related Callbacks */ static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qla_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err) return (err); ha = (qla_host_t *)arg1; //qla_get_stats(ha); QL_DPRINT2((ha->pci_dev, "%s: called ret %d\n", __func__, ret)); return (err); } /* * Name: qla_release * Function: Releases the resources allocated for the device */ static void qla_release(qla_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; qla_del_cdev(ha); if (ha->flags.qla_watchdog_active) ha->flags.qla_watchdog_exit = 1; callout_stop(&ha->tx_callout); qla_mdelay(__func__, 100); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); qla_free_dma(ha); qla_free_parent_dma_tag(ha); for (i = 0; i < ha->msix_count; i++) { if (ha->irq_vec[i].handle) (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); if (ha->irq_vec[i].irq) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->tx_lock); mtx_destroy(&ha->rx_lock); mtx_destroy(&ha->rxj_lock); mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); } /* * DMA Related Functions */ static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } QL_ASSERT((nsegs == 1), ("%s: %d segments returned!", __func__, nsegs)); *((bus_addr_t *)arg) = segs[0].ds_addr; return; } int qla_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; QL_DPRINT2((dev, "%s: enter\n", __func__)); ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { device_printf(dev, "%s: could not create dma tag\n", __func__); goto qla_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); device_printf(dev, "%s: bus_dmamem_alloc failed\n", __func__); goto qla_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qla_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto qla_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; qla_alloc_dmabuf_exit: QL_DPRINT2((dev, "%s: exit ret 0x%08x tag %p map %p b %p sz 0x%x\n", __func__, ret, (void *)dma_buf->dma_tag, (void *)dma_buf->dma_map, (void *)dma_buf->dma_b, dma_buf->size)); return ret; } void qla_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { bus_dmamap_unload(dma_buf->dma_tag, dma_buf->dma_map); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); } static int qla_alloc_parent_dma_tag(qla_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { device_printf(dev, "%s: could not create parent dma tag\n", __func__); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qla_free_parent_dma_tag(qla_host_t *ha) { if (ha->flags.parent_tag) { bus_dma_tag_destroy(ha->parent_tag); ha->flags.parent_tag = 0; } } /* * Name: qla_init_ifnet * Function: Creates the Network Device Interface and Registers it with the O.S */ static void qla_init_ifnet(device_t dev, qla_host_t *ha) { struct ifnet *ifp; QL_DPRINT2((dev, "%s: enter\n", __func__)); ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = IF_Gbps(10); ifp->if_init = qla_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qla_ioctl; ifp->if_start = qla_start; IFQ_SET_MAXLEN(&ifp->if_snd, qla_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qla_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ether_ifattach(ifp, qla_get_mac_addr(ha)); ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_LINKSTATE; #if defined(__FreeBSD_version) && (__FreeBSD_version < 900002) ifp->if_timer = 0; ifp->if_watchdog = NULL; #endif /* #if defined(__FreeBSD_version) && (__FreeBSD_version < 900002) */ ifp->if_capenable = ifp->if_capabilities; ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qla_media_change, qla_media_status); ifmedia_add(&ha->media, (IFM_ETHER | qla_get_optics(ha) | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2((dev, "%s: exit\n", __func__)); return; } static void qla_init_locked(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; qla_stop(ha); if (qla_alloc_xmt_bufs(ha) != 0) return; if (qla_alloc_rcv_bufs(ha) != 0) return; if (qla_config_lro(ha)) return; bcopy(IF_LLADDR(ha->ifp), ha->hw.mac_addr, ETHER_ADDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ha->flags.stop_rcv = 0; if (qla_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; } return; } static void qla_init(void *arg) { qla_host_t *ha; ha = (qla_host_t *)arg; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); QLA_LOCK(ha, __func__); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qla_set_multi(qla_host_t *ha, uint32_t add_multi) { uint8_t mta[Q8_MAX_NUM_MULTICAST_ADDRS * Q8_MAC_ADDR_LEN]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == Q8_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * Q8_MAC_ADDR_LEN], Q8_MAC_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); qla_hw_set_multi(ha, mta, mcnt, add_multi); return; } static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx)\n", __func__, cmd)); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QLA_LOCK(ha, __func__); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); } QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", __func__, cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr))); arp_ifinit(ifp, ifa); if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) { qla_config_ipv4_addr(ha, (IA_SIN(ifa)->sin_addr.s_addr)); } } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMTU (0x%lx)\n", __func__, cmd)); if (ifr->ifr_mtu > QLA_MAX_FRAME_SIZE - ETHER_HDR_LEN) { ret = EINVAL; } else { QLA_LOCK(ha, __func__); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { ret = qla_set_max_mtu(ha, ha->max_frame_size, (ha->hw.rx_cntxt_rsp)->rx_rsp.cntxt_id); } QLA_UNLOCK(ha, __func__); if (ret) ret = EINVAL; } break; case SIOCSIFFLAGS: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFFLAGS (0x%lx)\n", __func__, cmd)); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { qla_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { qla_set_allmulti(ha); } } else { QLA_LOCK(ha, __func__); qla_init_locked(ha); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ret = qla_set_max_mtu(ha, ha->max_frame_size, (ha->hw.rx_cntxt_rsp)->rx_rsp.cntxt_id); QLA_UNLOCK(ha, __func__); } } else { QLA_LOCK(ha, __func__); if (ifp->if_drv_flags & IFF_DRV_RUNNING) qla_stop(ha); ha->if_flags = ifp->if_flags; QLA_UNLOCK(ha, __func__); } break; case SIOCADDMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCADDMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qla_set_multi(ha, 1); } break; case SIOCDELMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCDELMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qla_set_multi(ha, 0); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", __func__, cmd)); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFCAP (0x%lx)\n", __func__, cmd)); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qla_init(ha); VLAN_CAPABILITIES(ifp); break; } default: QL_DPRINT4((ha->pci_dev, "%s: default (0x%lx)\n", __func__, cmd)); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qla_media_change(struct ifnet *ifp) { qla_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; qla_update_link_state(ha); if (ha->hw.flags.link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qla_get_optics(ha)); } QL_DPRINT2((ha->pci_dev, "%s: exit (%s)\n", __func__,\ (ha->hw.flags.link_up ? "link_up" : "link_down"))); return; } void qla_start(struct ifnet *ifp) { struct mbuf *m_head; qla_host_t *ha = (qla_host_t *)ifp->if_softc; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); if (!mtx_trylock(&ha->tx_lock)) { QL_DPRINT8((ha->pci_dev, "%s: mtx_trylock(&ha->tx_lock) failed\n", __func__)); return; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { QL_DPRINT8((ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); QLA_TX_UNLOCK(ha); return; } if (!ha->watchdog_ticks) qla_update_link_state(ha); if (!ha->hw.flags.link_up) { QL_DPRINT8((ha->pci_dev, "%s: link down\n", __func__)); QLA_TX_UNLOCK(ha); return; } while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) { QL_DPRINT8((ha->pci_dev, "%s: m_head == NULL\n", __func__)); break; } if (qla_send(ha, &m_head)) { if (m_head == NULL) break; QL_DPRINT8((ha->pci_dev, "%s: PREPEND\n", __func__)); ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } QLA_TX_UNLOCK(ha); QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return; } static int qla_send(qla_host_t *ha, struct mbuf **m_headp) { bus_dma_segment_t segs[QLA_MAX_SEGMENTS]; bus_dmamap_t map; int nsegs; int ret = -1; uint32_t tx_idx; struct mbuf *m_head = *m_headp; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); if ((ret = bus_dmamap_create(ha->tx_tag, BUS_DMA_NOWAIT, &map))) { ha->err_tx_dmamap_create++; device_printf(ha->pci_dev, "%s: bus_dmamap_create failed[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); return (ret); } ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ret == EFBIG) { struct mbuf *m; QL_DPRINT8((ha->pci_dev, "%s: EFBIG [%d]\n", __func__, m_head->m_pkthdr.len)); m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { ha->err_tx_defrag++; m_freem(m_head); *m_headp = NULL; device_printf(ha->pci_dev, "%s: m_defrag() = NULL [%d]\n", __func__, ret); return (ENOBUFS); } m_head = m; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed0[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); bus_dmamap_destroy(ha->tx_tag, map); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } } else if (ret) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed1[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); bus_dmamap_destroy(ha->tx_tag, map); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } QL_ASSERT((nsegs != 0), ("qla_send: empty packet")); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); if (!(ret = qla_hw_send(ha, segs, nsegs, &tx_idx, m_head))) { ha->tx_buf[tx_idx].m_head = m_head; ha->tx_buf[tx_idx].map = map; } else { if (ret == EINVAL) { m_freem(m_head); *m_headp = NULL; } } QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_stop(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; ha->flags.qla_watchdog_pause = 1; qla_mdelay(__func__, 100); ha->flags.stop_rcv = 1; qla_hw_stop_rcv(ha); qla_del_hw_if(ha); qla_free_lro(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); return; } /* * Buffer Management Functions for Transmit and Receive Rings */ static int qla_alloc_xmt_bufs(qla_host_t *ha) { if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ QLA_MAX_TSO_FRAME_SIZE, /* maxsize */ QLA_MAX_SEGMENTS, /* nsegments */ PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->tx_tag)) { device_printf(ha->pci_dev, "%s: tx_tag alloc failed\n", __func__); return (ENOMEM); } bzero((void *)ha->tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); return 0; } /* * Release mbuf after it sent on the wire */ static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb) { QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); if (txb->m_head) { bus_dmamap_unload(ha->tx_tag, txb->map); bus_dmamap_destroy(ha->tx_tag, txb->map); m_freem(txb->m_head); txb->m_head = NULL; } QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qla_free_xmt_bufs(qla_host_t *ha) { int i; for (i = 0; i < NUM_TX_DESCRIPTORS; i++) qla_clear_tx_buf(ha, &ha->tx_buf[i]); if (ha->tx_tag != NULL) { bus_dma_tag_destroy(ha->tx_tag); ha->tx_tag = NULL; } bzero((void *)ha->tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); return; } static int qla_alloc_rcv_bufs(qla_host_t *ha) { int i, j, ret = 0; qla_rx_buf_t *rxb; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->rx_tag)) { device_printf(ha->pci_dev, "%s: rx_tag alloc failed\n", __func__); return (ENOMEM); } bzero((void *)ha->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); bzero((void *)ha->rx_jbuf, (sizeof(qla_rx_buf_t) * NUM_RX_JUMBO_DESCRIPTORS)); for (i = 0; i < MAX_SDS_RINGS; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; ha->hw.sds[i].rxjb_free = NULL; ha->hw.sds[i].rxj_free = 0; } for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_buf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d] failed\n", __func__, i); for (j = 0; j < i; j++) { bus_dmamap_destroy(ha->rx_tag, ha->rx_buf[j].map); } goto qla_alloc_rcv_bufs_failed; } } qla_init_hw_rcv_descriptors(ha, RDS_RING_INDEX_NORMAL); for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_buf[i]; rxb->handle = i; if (!(ret = qla_get_mbuf(ha, rxb, NULL, 0))) { /* * set the physical address in the corresponding * descriptor entry in the receive ring/queue for the * hba */ qla_set_hw_rcv_desc(ha, RDS_RING_INDEX_NORMAL, i, rxb->handle, rxb->paddr, (rxb->m_head)->m_pkthdr.len); } else { device_printf(ha->pci_dev, "%s: qla_get_mbuf [standard(%d)] failed\n", __func__, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qla_alloc_rcv_bufs_failed; } } for (i = 0; i < NUM_RX_JUMBO_DESCRIPTORS; i++) { rxb = &ha->rx_jbuf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d] failed\n", __func__, i); for (j = 0; j < i; j++) { bus_dmamap_destroy(ha->rx_tag, ha->rx_jbuf[j].map); } goto qla_alloc_rcv_bufs_failed; } } qla_init_hw_rcv_descriptors(ha, RDS_RING_INDEX_JUMBO); for (i = 0; i < NUM_RX_JUMBO_DESCRIPTORS; i++) { rxb = &ha->rx_jbuf[i]; rxb->handle = i; if (!(ret = qla_get_mbuf(ha, rxb, NULL, 1))) { /* * set the physical address in the corresponding * descriptor entry in the receive ring/queue for the * hba */ qla_set_hw_rcv_desc(ha, RDS_RING_INDEX_JUMBO, i, rxb->handle, rxb->paddr, (rxb->m_head)->m_pkthdr.len); } else { device_printf(ha->pci_dev, "%s: qla_get_mbuf [jumbo(%d)] failed\n", __func__, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qla_alloc_rcv_bufs_failed; } } return (0); qla_alloc_rcv_bufs_failed: qla_free_rcv_bufs(ha); return (ret); } static void qla_free_rcv_bufs(qla_host_t *ha) { int i; qla_rx_buf_t *rxb; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_buf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); rxb->m_head = NULL; } } for (i = 0; i < NUM_RX_JUMBO_DESCRIPTORS; i++) { rxb = &ha->rx_jbuf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); rxb->m_head = NULL; } } if (ha->rx_tag != NULL) { bus_dma_tag_destroy(ha->rx_tag); ha->rx_tag = NULL; } bzero((void *)ha->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); bzero((void *)ha->rx_jbuf, (sizeof(qla_rx_buf_t) * NUM_RX_JUMBO_DESCRIPTORS)); for (i = 0; i < MAX_SDS_RINGS; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; ha->hw.sds[i].rxjb_free = NULL; ha->hw.sds[i].rxj_free = 0; } return; } int qla_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp, uint32_t jumbo) { - register struct mbuf *mp = nmp; + struct mbuf *mp = nmp; struct ifnet *ifp; int ret = 0; uint32_t offset; QL_DPRINT2((ha->pci_dev, "%s: jumbo(0x%x) enter\n", __func__, jumbo)); ifp = ha->ifp; if (mp == NULL) { if (!jumbo) { mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mp == NULL) { ha->err_m_getcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getcl failed\n", __func__); goto exit_qla_get_mbuf; } mp->m_len = mp->m_pkthdr.len = MCLBYTES; } else { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (mp == NULL) { ha->err_m_getjcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getjcl failed\n", __func__); goto exit_qla_get_mbuf; } mp->m_len = mp->m_pkthdr.len = MJUM9BYTES; } } else { if (!jumbo) mp->m_len = mp->m_pkthdr.len = MCLBYTES; else mp->m_len = mp->m_pkthdr.len = MJUM9BYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } offset = (uint32_t)((unsigned long long)mp->m_data & 0x7ULL); if (offset) { offset = 8 - offset; m_adj(mp, offset); } /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ ret = bus_dmamap_load(ha->rx_tag, rxb->map, mtod(mp, void *), mp->m_len, qla_dmamap_callback, &rxb->paddr, BUS_DMA_NOWAIT); if (ret || !rxb->paddr) { m_free(mp); rxb->m_head = NULL; device_printf(ha->pci_dev, "%s: bus_dmamap_load failed\n", __func__); ret = -1; goto exit_qla_get_mbuf; } rxb->m_head = mp; bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_PREREAD); exit_qla_get_mbuf: QL_DPRINT2((ha->pci_dev, "%s: exit ret = 0x%08x\n", __func__, ret)); return (ret); } static void qla_tx_done(void *context, int pending) { qla_host_t *ha = context; qla_hw_tx_done(ha); qla_start(ha->ifp); } Index: stable/11/sys/dev/qlxgbe/ql_os.c =================================================================== --- stable/11/sys/dev/qlxgbe/ql_os.c (revision 331642) +++ stable/11/sys/dev/qlxgbe/ql_os.c (revision 331643) @@ -1,2299 +1,2299 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013-2016 Qlogic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * and ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: ql_os.c * Author : David C Somayajulu, Qlogic Corporation, Aliso Viejo, CA 92656. */ #include __FBSDID("$FreeBSD$"); #include "ql_os.h" #include "ql_hw.h" #include "ql_def.h" #include "ql_inline.h" #include "ql_ver.h" #include "ql_glbl.h" #include "ql_dbg.h" #include /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif #ifndef PCI_PRODUCT_QLOGIC_ISP8030 #define PCI_PRODUCT_QLOGIC_ISP8030 0x8030 #endif #define PCI_QLOGIC_ISP8030 \ ((PCI_PRODUCT_QLOGIC_ISP8030 << 16) | PCI_VENDOR_QLOGIC) /* * static functions */ static int qla_alloc_parent_dma_tag(qla_host_t *ha); static void qla_free_parent_dma_tag(qla_host_t *ha); static int qla_alloc_xmt_bufs(qla_host_t *ha); static void qla_free_xmt_bufs(qla_host_t *ha); static int qla_alloc_rcv_bufs(qla_host_t *ha); static void qla_free_rcv_bufs(qla_host_t *ha); static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb); static void qla_init_ifnet(device_t dev, qla_host_t *ha); static int qla_sysctl_get_link_status(SYSCTL_HANDLER_ARGS); static void qla_release(qla_host_t *ha); static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void qla_stop(qla_host_t *ha); static void qla_get_peer(qla_host_t *ha); static void qla_error_recovery(void *context, int pending); static void qla_async_event(void *context, int pending); static void qla_stats(void *context, int pending); static int qla_send(qla_host_t *ha, struct mbuf **m_headp, uint32_t txr_idx, uint32_t iscsi_pdu); /* * Hooks to the Operating Systems */ static int qla_pci_probe (device_t); static int qla_pci_attach (device_t); static int qla_pci_detach (device_t); static void qla_init(void *arg); static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qla_media_change(struct ifnet *ifp); static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static int qla_transmit(struct ifnet *ifp, struct mbuf *mp); static void qla_qflush(struct ifnet *ifp); static int qla_alloc_tx_br(qla_host_t *ha, qla_tx_fp_t *tx_fp); static void qla_free_tx_br(qla_host_t *ha, qla_tx_fp_t *tx_fp); static int qla_create_fp_taskqueues(qla_host_t *ha); static void qla_destroy_fp_taskqueues(qla_host_t *ha); static void qla_drain_fp_taskqueues(qla_host_t *ha); static device_method_t qla_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qla_pci_probe), DEVMETHOD(device_attach, qla_pci_attach), DEVMETHOD(device_detach, qla_pci_detach), { 0, 0 } }; static driver_t qla_pci_driver = { "ql", qla_pci_methods, sizeof (qla_host_t), }; static devclass_t qla83xx_devclass; DRIVER_MODULE(qla83xx, pci, qla_pci_driver, qla83xx_devclass, 0, 0); MODULE_DEPEND(qla83xx, pci, 1, 1, 1); MODULE_DEPEND(qla83xx, ether, 1, 1, 1); MALLOC_DEFINE(M_QLA83XXBUF, "qla83xxbuf", "Buffers for qla83xx driver"); #define QL_STD_REPLENISH_THRES 0 #define QL_JUMBO_REPLENISH_THRES 32 static char dev_str[64]; static char ver_str[64]; /* * Name: qla_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qla_pci_probe(device_t dev) { switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) { case PCI_QLOGIC_ISP8030: snprintf(dev_str, sizeof(dev_str), "%s v%d.%d.%d", "Qlogic ISP 83xx PCI CNA Adapter-Ethernet Function", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); snprintf(ver_str, sizeof(ver_str), "v%d.%d.%d", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); device_set_desc(dev, dev_str); break; default: return (ENXIO); } if (bootverbose) printf("%s: %s\n ", __func__, dev_str); return (BUS_PROBE_DEFAULT); } static void qla_add_sysctls(qla_host_t *ha) { device_t dev = ha->pci_dev; SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "version", CTLFLAG_RD, ver_str, 0, "Driver Version"); SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fw_version", CTLFLAG_RD, ha->fw_ver_str, 0, "firmware version"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "link_status", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qla_sysctl_get_link_status, "I", "Link Status"); ha->dbg_level = 0; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &ha->dbg_level, ha->dbg_level, "Debug Level"); ha->enable_minidump = 1; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_minidump", CTLFLAG_RW, &ha->enable_minidump, ha->enable_minidump, "Minidump retrival prior to error recovery " "is enabled only when this is set"); ha->enable_driverstate_dump = 1; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_driverstate_dump", CTLFLAG_RW, &ha->enable_driverstate_dump, ha->enable_driverstate_dump, "Driver State retrival prior to error recovery " "is enabled only when this is set"); ha->enable_error_recovery = 1; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_error_recovery", CTLFLAG_RW, &ha->enable_error_recovery, ha->enable_error_recovery, "when set error recovery is enabled on fatal errors " "otherwise the port is turned offline"); ha->ms_delay_after_init = 1000; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ms_delay_after_init", CTLFLAG_RW, &ha->ms_delay_after_init, ha->ms_delay_after_init, "millisecond delay after hw_init"); ha->std_replenish = QL_STD_REPLENISH_THRES; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "std_replenish", CTLFLAG_RW, &ha->std_replenish, ha->std_replenish, "Threshold for Replenishing Standard Frames"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ipv4_lro", CTLFLAG_RD, &ha->ipv4_lro, "number of ipv4 lro completions"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ipv6_lro", CTLFLAG_RD, &ha->ipv6_lro, "number of ipv6 lro completions"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "tx_tso_frames", CTLFLAG_RD, &ha->tx_tso_frames, "number of Tx TSO Frames"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "hw_vlan_tx_frames", CTLFLAG_RD, &ha->hw_vlan_tx_frames, "number of Tx VLAN Frames"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "hw_lock_failed", CTLFLAG_RD, &ha->hw_lock_failed, "number of hw_lock failures"); return; } static void qla_watchdog(void *arg) { qla_host_t *ha = arg; qla_hw_t *hw; struct ifnet *ifp; hw = &ha->hw; ifp = ha->ifp; if (ha->qla_watchdog_exit) { ha->qla_watchdog_exited = 1; return; } ha->qla_watchdog_exited = 0; if (!ha->qla_watchdog_pause) { if (!ha->offline && (ql_hw_check_health(ha) || ha->qla_initiate_recovery || (ha->msg_from_peer == QL_PEER_MSG_RESET))) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ql_update_link_state(ha); if (ha->enable_error_recovery) { ha->qla_watchdog_paused = 1; ha->qla_watchdog_pause = 1; ha->err_inject = 0; device_printf(ha->pci_dev, "%s: taskqueue_enqueue(err_task) \n", __func__); taskqueue_enqueue(ha->err_tq, &ha->err_task); } else { if (ifp != NULL) ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ha->offline = 1; } return; } else { if (ha->qla_interface_up) { ha->watchdog_ticks++; if (ha->watchdog_ticks > 1000) ha->watchdog_ticks = 0; if (!ha->watchdog_ticks && QL_RUNNING(ifp)) { taskqueue_enqueue(ha->stats_tq, &ha->stats_task); } if (ha->async_event) { taskqueue_enqueue(ha->async_event_tq, &ha->async_event_task); } } ha->qla_watchdog_paused = 0; } } else { ha->qla_watchdog_paused = 1; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); } /* * Name: qla_pci_attach * Function: attaches the device to the operating system */ static int qla_pci_attach(device_t dev) { qla_host_t *ha = NULL; uint32_t rsrc_len; int i; uint32_t num_rcvq = 0; if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qla_host_t)); if (pci_get_device(dev) != PCI_PRODUCT_QLOGIC_ISP8030) { device_printf(dev, "device is not ISP8030\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev) & 0x1; ha->pci_dev = dev; pci_enable_busmaster(dev); ha->reg_rid = PCIR_BAR(0); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map any ports\n"); goto qla_pci_attach_err; } rsrc_len = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->reg_rid); mtx_init(&ha->hw_lock, "qla83xx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->sp_log_lock, "qla83xx_sp_log_lock", MTX_NETWORK_LOCK, MTX_DEF); ha->flags.lock_init = 1; qla_add_sysctls(ha); ha->hw.num_sds_rings = MAX_SDS_RINGS; ha->hw.num_rds_rings = MAX_RDS_RINGS; ha->hw.num_tx_rings = NUM_TX_RINGS; ha->reg_rid1 = PCIR_BAR(2); ha->pci_reg1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid1, RF_ACTIVE); ha->msix_count = pci_msix_count(dev); if (ha->msix_count < 1 ) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qla_pci_attach_err; } if (ha->msix_count < (ha->hw.num_sds_rings + 1)) { ha->hw.num_sds_rings = ha->msix_count - 1; } QL_DPRINT2(ha, (dev, "%s: ha %p pci_func 0x%x rsrc_count 0x%08x" " msix_count 0x%x pci_reg %p pci_reg1 %p\n", __func__, ha, ha->pci_func, rsrc_len, ha->msix_count, ha->pci_reg, ha->pci_reg1)); /* initialize hardware */ if (ql_init_hw(ha)) { device_printf(dev, "%s: ql_init_hw failed\n", __func__); goto qla_pci_attach_err; } device_printf(dev, "%s: firmware[%d.%d.%d.%d]\n", __func__, ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); snprintf(ha->fw_ver_str, sizeof(ha->fw_ver_str), "%d.%d.%d.%d", ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); if (qla_get_nic_partition(ha, NULL, &num_rcvq)) { device_printf(dev, "%s: qla_get_nic_partition failed\n", __func__); goto qla_pci_attach_err; } device_printf(dev, "%s: ha %p pci_func 0x%x rsrc_count 0x%08x" " msix_count 0x%x pci_reg %p pci_reg1 %p num_rcvq = %d\n", __func__, ha, ha->pci_func, rsrc_len, ha->msix_count, ha->pci_reg, ha->pci_reg1, num_rcvq); if ((ha->msix_count < 64) || (num_rcvq != 32)) { if (ha->hw.num_sds_rings > 15) { ha->hw.num_sds_rings = 15; } } ha->hw.num_rds_rings = ha->hw.num_sds_rings; ha->hw.num_tx_rings = ha->hw.num_sds_rings; #ifdef QL_ENABLE_ISCSI_TLV ha->hw.num_tx_rings = ha->hw.num_sds_rings * 2; #endif /* #ifdef QL_ENABLE_ISCSI_TLV */ ql_hw_add_sysctls(ha); ha->msix_count = ha->hw.num_sds_rings + 1; if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msi[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qla_pci_attach_err; } ha->mbx_irq_rid = 1; ha->mbx_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->mbx_irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->mbx_irq == NULL) { device_printf(dev, "could not allocate mbx interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->mbx_irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, ql_mbx_isr, ha, &ha->mbx_handle)) { device_printf(dev, "could not setup mbx interrupt\n"); goto qla_pci_attach_err; } for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->irq_vec[i].sds_idx = i; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq_rid = 2 + i; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, ql_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle)) { device_printf(dev, "could not setup interrupt\n"); goto qla_pci_attach_err; } ha->tx_fp[i].ha = ha; ha->tx_fp[i].txr_idx = i; if (qla_alloc_tx_br(ha, &ha->tx_fp[i])) { device_printf(dev, "%s: could not allocate tx_br[%d]\n", __func__, i); goto qla_pci_attach_err; } } if (qla_create_fp_taskqueues(ha) != 0) goto qla_pci_attach_err; printf("%s: mp__ncpus %d sds %d rds %d msi-x %d\n", __func__, mp_ncpus, ha->hw.num_sds_rings, ha->hw.num_rds_rings, ha->msix_count); ql_read_mac_addr(ha); /* allocate parent dma tag */ if (qla_alloc_parent_dma_tag(ha)) { device_printf(dev, "%s: qla_alloc_parent_dma_tag failed\n", __func__); goto qla_pci_attach_err; } /* alloc all dma buffers */ if (ql_alloc_dma(ha)) { device_printf(dev, "%s: ql_alloc_dma failed\n", __func__); goto qla_pci_attach_err; } qla_get_peer(ha); if (ql_minidump_init(ha) != 0) { device_printf(dev, "%s: ql_minidump_init failed\n", __func__); goto qla_pci_attach_err; } ql_alloc_drvr_state_buffer(ha); ql_alloc_sp_log_buffer(ha); /* create the o.s ethernet interface */ qla_init_ifnet(dev, ha); ha->flags.qla_watchdog_active = 1; ha->qla_watchdog_pause = 0; callout_init(&ha->tx_callout, TRUE); ha->flags.qla_callout_init = 1; /* create ioctl device interface */ if (ql_make_cdev(ha)) { device_printf(dev, "%s: ql_make_cdev failed\n", __func__); goto qla_pci_attach_err; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); TASK_INIT(&ha->err_task, 0, qla_error_recovery, ha); ha->err_tq = taskqueue_create("qla_errq", M_NOWAIT, taskqueue_thread_enqueue, &ha->err_tq); taskqueue_start_threads(&ha->err_tq, 1, PI_NET, "%s errq", device_get_nameunit(ha->pci_dev)); TASK_INIT(&ha->async_event_task, 0, qla_async_event, ha); ha->async_event_tq = taskqueue_create("qla_asyncq", M_NOWAIT, taskqueue_thread_enqueue, &ha->async_event_tq); taskqueue_start_threads(&ha->async_event_tq, 1, PI_NET, "%s asyncq", device_get_nameunit(ha->pci_dev)); TASK_INIT(&ha->stats_task, 0, qla_stats, ha); ha->stats_tq = taskqueue_create("qla_statsq", M_NOWAIT, taskqueue_thread_enqueue, &ha->stats_tq); taskqueue_start_threads(&ha->stats_tq, 1, PI_NET, "%s taskq", device_get_nameunit(ha->pci_dev)); QL_DPRINT2(ha, (dev, "%s: exit 0\n", __func__)); return (0); qla_pci_attach_err: qla_release(ha); if (ha->flags.lock_init) { mtx_destroy(&ha->hw_lock); mtx_destroy(&ha->sp_log_lock); } QL_DPRINT2(ha, (dev, "%s: exit ENXIO\n", __func__)); return (ENXIO); } /* * Name: qla_pci_detach * Function: Unhooks the device from the operating system */ static int qla_pci_detach(device_t dev) { qla_host_t *ha = NULL; struct ifnet *ifp; if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); ifp = ha->ifp; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; QLA_LOCK(ha, __func__, -1, 0); ha->qla_detach_active = 1; qla_stop(ha); qla_release(ha); QLA_UNLOCK(ha, __func__); if (ha->flags.lock_init) { mtx_destroy(&ha->hw_lock); mtx_destroy(&ha->sp_log_lock); } QL_DPRINT2(ha, (dev, "%s: exit\n", __func__)); return (0); } /* * SYSCTL Related Callbacks */ static int qla_sysctl_get_link_status(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qla_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qla_host_t *)arg1; ql_hw_link_status(ha); } return (err); } /* * Name: qla_release * Function: Releases the resources allocated for the device */ static void qla_release(qla_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; if (ha->async_event_tq) { taskqueue_drain_all(ha->async_event_tq); taskqueue_free(ha->async_event_tq); } if (ha->err_tq) { taskqueue_drain_all(ha->err_tq); taskqueue_free(ha->err_tq); } if (ha->stats_tq) { taskqueue_drain_all(ha->stats_tq); taskqueue_free(ha->stats_tq); } ql_del_cdev(ha); if (ha->flags.qla_watchdog_active) { ha->qla_watchdog_exit = 1; while (ha->qla_watchdog_exited == 0) qla_mdelay(__func__, 1); } if (ha->flags.qla_callout_init) callout_stop(&ha->tx_callout); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); ql_free_drvr_state_buffer(ha); ql_free_sp_log_buffer(ha); ql_free_dma(ha); qla_free_parent_dma_tag(ha); if (ha->mbx_handle) (void)bus_teardown_intr(dev, ha->mbx_irq, ha->mbx_handle); if (ha->mbx_irq) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->mbx_irq_rid, ha->mbx_irq); for (i = 0; i < ha->hw.num_sds_rings; i++) { if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); } if (ha->irq_vec[i].irq) { (void)bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } qla_free_tx_br(ha, &ha->tx_fp[i]); } qla_destroy_fp_taskqueues(ha); if (ha->msix_count) pci_release_msi(dev); if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); if (ha->pci_reg1) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid1, ha->pci_reg1); return; } /* * DMA Related Functions */ static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } *((bus_addr_t *)arg) = segs[0].ds_addr; return; } int ql_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { device_printf(dev, "%s: could not create dma tag\n", __func__); goto ql_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); device_printf(dev, "%s: bus_dmamem_alloc failed\n", __func__); goto ql_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qla_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto ql_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; ql_alloc_dmabuf_exit: QL_DPRINT2(ha, (dev, "%s: exit ret 0x%08x tag %p map %p b %p sz 0x%x\n", __func__, ret, (void *)dma_buf->dma_tag, (void *)dma_buf->dma_map, (void *)dma_buf->dma_b, dma_buf->size)); return ret; } void ql_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { bus_dmamap_unload(dma_buf->dma_tag, dma_buf->dma_map); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); } static int qla_alloc_parent_dma_tag(qla_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { device_printf(dev, "%s: could not create parent dma tag\n", __func__); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qla_free_parent_dma_tag(qla_host_t *ha) { if (ha->flags.parent_tag) { bus_dma_tag_destroy(ha->parent_tag); ha->flags.parent_tag = 0; } } /* * Name: qla_init_ifnet * Function: Creates the Network Device Interface and Registers it with the O.S */ static void qla_init_ifnet(device_t dev, qla_host_t *ha) { struct ifnet *ifp; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = IF_Gbps(10); ifp->if_capabilities = IFCAP_LINKSTATE; ifp->if_mtu = ETHERMTU; ifp->if_init = qla_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qla_ioctl; ifp->if_transmit = qla_transmit; ifp->if_qflush = qla_qflush; IFQ_SET_MAXLEN(&ifp->if_snd, qla_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qla_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ether_ifattach(ifp, qla_get_mac_addr(ha)); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_JUMBO_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTSO | IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qla_media_change, qla_media_status); ifmedia_add(&ha->media, (IFM_ETHER | qla_get_optics(ha) | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2(ha, (dev, "%s: exit\n", __func__)); return; } static void qla_init_locked(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; ql_sp_log(ha, 14, 0, 0, 0, 0, 0, 0); qla_stop(ha); if (qla_alloc_xmt_bufs(ha) != 0) return; qla_confirm_9kb_enable(ha); if (qla_alloc_rcv_bufs(ha) != 0) return; bcopy(IF_LLADDR(ha->ifp), ha->hw.mac_addr, ETHER_ADDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; ha->stop_rcv = 0; if (ql_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ha->hw_vlan_tx_frames = 0; ha->tx_tso_frames = 0; ha->qla_interface_up = 1; ql_update_link_state(ha); } else { if (ha->hw.sp_log_stop_events & Q8_SP_LOG_STOP_IF_START_FAILURE) ha->hw.sp_log_stop = -1; } ha->qla_watchdog_pause = 0; return; } static void qla_init(void *arg) { qla_host_t *ha; ha = (qla_host_t *)arg; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (QLA_LOCK(ha, __func__, -1, 0) != 0) return; qla_init_locked(ha); QLA_UNLOCK(ha, __func__); QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); } static int qla_set_multi(qla_host_t *ha, uint32_t add_multi) { uint8_t mta[Q8_MAX_NUM_MULTICAST_ADDRS * Q8_MAC_ADDR_LEN]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; int ret = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == Q8_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * Q8_MAC_ADDR_LEN], Q8_MAC_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, QLA_LOCK_NO_SLEEP) != 0) return (-1); ql_sp_log(ha, 12, 4, ifp->if_drv_flags, (ifp->if_drv_flags & IFF_DRV_RUNNING), add_multi, (uint32_t)mcnt, 0); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (!add_multi) { ret = qla_hw_del_all_mcast(ha); if (ret) device_printf(ha->pci_dev, "%s: qla_hw_del_all_mcast() failed\n", __func__); } if (!ret) ret = ql_hw_set_multi(ha, mta, mcnt, 1); } QLA_UNLOCK(ha, __func__); return (ret); } static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; if (ha->offline || ha->qla_initiate_recovery) return (ret); switch (cmd) { case SIOCSIFADDR: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFADDR (0x%lx)\n", __func__, cmd)); if (ifa->ifa_addr->sa_family == AF_INET) { ret = QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, QLA_LOCK_NO_SLEEP); if (ret) break; ifp->if_flags |= IFF_UP; ql_sp_log(ha, 8, 3, ifp->if_drv_flags, (ifp->if_drv_flags & IFF_DRV_RUNNING), ntohl(IA_SIN(ifa)->sin_addr.s_addr), 0, 0); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { qla_init_locked(ha); } QLA_UNLOCK(ha, __func__); QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", __func__, cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr))); arp_ifinit(ifp, ifa); } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFMTU (0x%lx)\n", __func__, cmd)); if (ifr->ifr_mtu > QLA_MAX_MTU) { ret = EINVAL; } else { ret = QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, QLA_LOCK_NO_SLEEP); if (ret) break; ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ql_sp_log(ha, 9, 4, ifp->if_drv_flags, (ifp->if_drv_flags & IFF_DRV_RUNNING), ha->max_frame_size, ifp->if_mtu, 0); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qla_init_locked(ha); } if (ifp->if_mtu > ETHERMTU) ha->std_replenish = QL_JUMBO_REPLENISH_THRES; else ha->std_replenish = QL_STD_REPLENISH_THRES; QLA_UNLOCK(ha, __func__); } break; case SIOCSIFFLAGS: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFFLAGS (0x%lx)\n", __func__, cmd)); ret = QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, QLA_LOCK_NO_SLEEP); if (ret) break; ql_sp_log(ha, 10, 4, ifp->if_drv_flags, (ifp->if_drv_flags & IFF_DRV_RUNNING), ha->if_flags, ifp->if_flags, 0); if (ifp->if_flags & IFF_UP) { ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; qla_init_locked(ha); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { ret = ql_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { ret = ql_set_allmulti(ha); } } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) qla_stop(ha); ha->if_flags = ifp->if_flags; } QLA_UNLOCK(ha, __func__); break; case SIOCADDMULTI: QL_DPRINT4(ha, (ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCADDMULTI", cmd)); if (qla_set_multi(ha, 1)) ret = EINVAL; break; case SIOCDELMULTI: QL_DPRINT4(ha, (ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCDELMULTI", cmd)); if (qla_set_multi(ha, 0)) ret = EINVAL; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", __func__, cmd)); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFCAP (0x%lx)\n", __func__, cmd)); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ret = QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, QLA_LOCK_NO_SLEEP); if (ret) break; ql_sp_log(ha, 11, 4, ifp->if_drv_flags, (ifp->if_drv_flags & IFF_DRV_RUNNING), mask, ifp->if_capenable, 0); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); } VLAN_CAPABILITIES(ifp); break; } default: QL_DPRINT4(ha, (ha->pci_dev, "%s: default (0x%lx)\n", __func__, cmd)); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qla_media_change(struct ifnet *ifp) { qla_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; ql_update_link_state(ha); if (ha->hw.link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qla_get_optics(ha)); } QL_DPRINT2(ha, (ha->pci_dev, "%s: exit (%s)\n", __func__,\ (ha->hw.link_up ? "link_up" : "link_down"))); return; } static int qla_send(qla_host_t *ha, struct mbuf **m_headp, uint32_t txr_idx, uint32_t iscsi_pdu) { bus_dma_segment_t segs[QLA_MAX_SEGMENTS]; bus_dmamap_t map; int nsegs; int ret = -1; uint32_t tx_idx; struct mbuf *m_head = *m_headp; QL_DPRINT8(ha, (ha->pci_dev, "%s: enter\n", __func__)); tx_idx = ha->hw.tx_cntxt[txr_idx].txr_next; if (NULL != ha->tx_ring[txr_idx].tx_buf[tx_idx].m_head) { QL_ASSERT(ha, 0, ("%s [%d]: txr_idx = %d tx_idx = %d "\ "mbuf = %p\n", __func__, __LINE__, txr_idx, tx_idx,\ ha->tx_ring[txr_idx].tx_buf[tx_idx].m_head)); if (m_head) m_freem(m_head); *m_headp = NULL; return (ret); } map = ha->tx_ring[txr_idx].tx_buf[tx_idx].map; ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ret == EFBIG) { struct mbuf *m; QL_DPRINT8(ha, (ha->pci_dev, "%s: EFBIG [%d]\n", __func__, m_head->m_pkthdr.len)); m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { ha->err_tx_defrag++; m_freem(m_head); *m_headp = NULL; device_printf(ha->pci_dev, "%s: m_defrag() = NULL [%d]\n", __func__, ret); return (ENOBUFS); } m_head = m; *m_headp = m_head; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed0[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } } else if (ret) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed1[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } QL_ASSERT(ha, (nsegs != 0), ("qla_send: empty packet")); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); if (!(ret = ql_hw_send(ha, segs, nsegs, tx_idx, m_head, txr_idx, iscsi_pdu))) { ha->tx_ring[txr_idx].count++; if (iscsi_pdu) ha->tx_ring[txr_idx].iscsi_pkt_count++; ha->tx_ring[txr_idx].tx_buf[tx_idx].m_head = m_head; } else { bus_dmamap_unload(ha->tx_tag, map); if (ret == EINVAL) { if (m_head) m_freem(m_head); *m_headp = NULL; } } QL_DPRINT8(ha, (ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static int qla_alloc_tx_br(qla_host_t *ha, qla_tx_fp_t *fp) { snprintf(fp->tx_mtx_name, sizeof(fp->tx_mtx_name), "qla%d_fp%d_tx_mq_lock", ha->pci_func, fp->txr_idx); mtx_init(&fp->tx_mtx, fp->tx_mtx_name, NULL, MTX_DEF); fp->tx_br = buf_ring_alloc(NUM_TX_DESCRIPTORS, M_DEVBUF, M_NOWAIT, &fp->tx_mtx); if (fp->tx_br == NULL) { QL_DPRINT1(ha, (ha->pci_dev, "buf_ring_alloc failed for " " fp[%d, %d]\n", ha->pci_func, fp->txr_idx)); return (-ENOMEM); } return 0; } static void qla_free_tx_br(qla_host_t *ha, qla_tx_fp_t *fp) { struct mbuf *mp; struct ifnet *ifp = ha->ifp; if (mtx_initialized(&fp->tx_mtx)) { if (fp->tx_br != NULL) { mtx_lock(&fp->tx_mtx); while ((mp = drbr_dequeue(ifp, fp->tx_br)) != NULL) { m_freem(mp); } mtx_unlock(&fp->tx_mtx); buf_ring_free(fp->tx_br, M_DEVBUF); fp->tx_br = NULL; } mtx_destroy(&fp->tx_mtx); } return; } static void qla_fp_taskqueue(void *context, int pending) { qla_tx_fp_t *fp; qla_host_t *ha; struct ifnet *ifp; struct mbuf *mp; int ret; uint32_t txr_idx; uint32_t iscsi_pdu = 0; uint32_t rx_pkts_left = -1; fp = context; if (fp == NULL) return; ha = (qla_host_t *)fp->ha; ifp = ha->ifp; txr_idx = fp->txr_idx; mtx_lock(&fp->tx_mtx); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING) || (!ha->hw.link_up)) { mtx_unlock(&fp->tx_mtx); goto qla_fp_taskqueue_exit; } while (rx_pkts_left && !ha->stop_rcv && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { rx_pkts_left = ql_rcv_isr(ha, fp->txr_idx, 64); #ifdef QL_ENABLE_ISCSI_TLV ql_hw_tx_done_locked(ha, fp->txr_idx); ql_hw_tx_done_locked(ha, (fp->txr_idx + (ha->hw.num_tx_rings >> 1))); #else ql_hw_tx_done_locked(ha, fp->txr_idx); #endif /* #ifdef QL_ENABLE_ISCSI_TLV */ mp = drbr_peek(ifp, fp->tx_br); while (mp != NULL) { if (M_HASHTYPE_GET(mp) != M_HASHTYPE_NONE) { #ifdef QL_ENABLE_ISCSI_TLV if (ql_iscsi_pdu(ha, mp) == 0) { txr_idx = txr_idx + (ha->hw.num_tx_rings >> 1); iscsi_pdu = 1; } else { iscsi_pdu = 0; txr_idx = fp->txr_idx; } #endif /* #ifdef QL_ENABLE_ISCSI_TLV */ } ret = qla_send(ha, &mp, txr_idx, iscsi_pdu); if (ret) { if (mp != NULL) drbr_putback(ifp, fp->tx_br, mp); else { drbr_advance(ifp, fp->tx_br); } mtx_unlock(&fp->tx_mtx); goto qla_fp_taskqueue_exit0; } else { drbr_advance(ifp, fp->tx_br); } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, mp); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; mp = drbr_peek(ifp, fp->tx_br); } } mtx_unlock(&fp->tx_mtx); qla_fp_taskqueue_exit0: if (rx_pkts_left || ((mp != NULL) && ret)) { taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); } else { if (!ha->stop_rcv) { QL_ENABLE_INTERRUPTS(ha, fp->txr_idx); } } qla_fp_taskqueue_exit: QL_DPRINT2(ha, (ha->pci_dev, "%s: exit ret = %d\n", __func__, ret)); return; } static int qla_create_fp_taskqueues(qla_host_t *ha) { int i; uint8_t tq_name[32]; for (i = 0; i < ha->hw.num_sds_rings; i++) { qla_tx_fp_t *fp = &ha->tx_fp[i]; bzero(tq_name, sizeof (tq_name)); snprintf(tq_name, sizeof (tq_name), "ql_fp_tq_%d", i); TASK_INIT(&fp->fp_task, 0, qla_fp_taskqueue, fp); fp->fp_taskqueue = taskqueue_create_fast(tq_name, M_NOWAIT, taskqueue_thread_enqueue, &fp->fp_taskqueue); if (fp->fp_taskqueue == NULL) return (-1); taskqueue_start_threads(&fp->fp_taskqueue, 1, PI_NET, "%s", tq_name); QL_DPRINT1(ha, (ha->pci_dev, "%s: %p\n", __func__, fp->fp_taskqueue)); } return (0); } static void qla_destroy_fp_taskqueues(qla_host_t *ha) { int i; for (i = 0; i < ha->hw.num_sds_rings; i++) { qla_tx_fp_t *fp = &ha->tx_fp[i]; if (fp->fp_taskqueue != NULL) { taskqueue_drain_all(fp->fp_taskqueue); taskqueue_free(fp->fp_taskqueue); fp->fp_taskqueue = NULL; } } return; } static void qla_drain_fp_taskqueues(qla_host_t *ha) { int i; for (i = 0; i < ha->hw.num_sds_rings; i++) { qla_tx_fp_t *fp = &ha->tx_fp[i]; if (fp->fp_taskqueue != NULL) { taskqueue_drain_all(fp->fp_taskqueue); } } return; } static int qla_transmit(struct ifnet *ifp, struct mbuf *mp) { qla_host_t *ha = (qla_host_t *)ifp->if_softc; qla_tx_fp_t *fp; int rss_id = 0; int ret = 0; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); #if __FreeBSD_version >= 1100000 if (M_HASHTYPE_GET(mp) != M_HASHTYPE_NONE) #else if (mp->m_flags & M_FLOWID) #endif rss_id = (mp->m_pkthdr.flowid & Q8_RSS_IND_TBL_MAX_IDX) % ha->hw.num_sds_rings; fp = &ha->tx_fp[rss_id]; if (fp->tx_br == NULL) { ret = EINVAL; goto qla_transmit_exit; } if (mp != NULL) { ret = drbr_enqueue(ifp, fp->tx_br, mp); } if (fp->fp_taskqueue != NULL) taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); ret = 0; qla_transmit_exit: QL_DPRINT2(ha, (ha->pci_dev, "%s: exit ret = %d\n", __func__, ret)); return ret; } static void qla_qflush(struct ifnet *ifp) { int i; qla_tx_fp_t *fp; struct mbuf *mp; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); for (i = 0; i < ha->hw.num_sds_rings; i++) { fp = &ha->tx_fp[i]; if (fp == NULL) continue; if (fp->tx_br) { mtx_lock(&fp->tx_mtx); while ((mp = drbr_dequeue(ifp, fp->tx_br)) != NULL) { m_freem(mp); } mtx_unlock(&fp->tx_mtx); } } QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); return; } static void qla_stop(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; int i = 0; ql_sp_log(ha, 13, 0, 0, 0, 0, 0, 0); dev = ha->pci_dev; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ha->qla_watchdog_pause = 1; for (i = 0; i < ha->hw.num_sds_rings; i++) { qla_tx_fp_t *fp; fp = &ha->tx_fp[i]; if (fp == NULL) continue; if (fp->tx_br != NULL) { mtx_lock(&fp->tx_mtx); mtx_unlock(&fp->tx_mtx); } } while (!ha->qla_watchdog_paused) qla_mdelay(__func__, 1); ha->qla_interface_up = 0; qla_drain_fp_taskqueues(ha); ql_del_hw_if(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); return; } /* * Buffer Management Functions for Transmit and Receive Rings */ static int qla_alloc_xmt_bufs(qla_host_t *ha) { int ret = 0; uint32_t i, j; qla_tx_buf_t *txb; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ QLA_MAX_TSO_FRAME_SIZE, /* maxsize */ QLA_MAX_SEGMENTS, /* nsegments */ PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->tx_tag)) { device_printf(ha->pci_dev, "%s: tx_tag alloc failed\n", __func__); return (ENOMEM); } for (i = 0; i < ha->hw.num_tx_rings; i++) { bzero((void *)ha->tx_ring[i].tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); } for (j = 0; j < ha->hw.num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) { txb = &ha->tx_ring[j].tx_buf[i]; if ((ret = bus_dmamap_create(ha->tx_tag, BUS_DMA_NOWAIT, &txb->map))) { ha->err_tx_dmamap_create++; device_printf(ha->pci_dev, "%s: bus_dmamap_create failed[%d]\n", __func__, ret); qla_free_xmt_bufs(ha); return (ret); } } } return 0; } /* * Release mbuf after it sent on the wire */ static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb) { QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (txb->m_head) { bus_dmamap_sync(ha->tx_tag, txb->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ha->tx_tag, txb->map); m_freem(txb->m_head); txb->m_head = NULL; bus_dmamap_destroy(ha->tx_tag, txb->map); txb->map = NULL; } if (txb->map) { bus_dmamap_unload(ha->tx_tag, txb->map); bus_dmamap_destroy(ha->tx_tag, txb->map); txb->map = NULL; } QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); } static void qla_free_xmt_bufs(qla_host_t *ha) { int i, j; for (j = 0; j < ha->hw.num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) qla_clear_tx_buf(ha, &ha->tx_ring[j].tx_buf[i]); } if (ha->tx_tag != NULL) { bus_dma_tag_destroy(ha->tx_tag); ha->tx_tag = NULL; } for (i = 0; i < ha->hw.num_tx_rings; i++) { bzero((void *)ha->tx_ring[i].tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); } return; } static int qla_alloc_rcv_std(qla_host_t *ha) { int i, j, k, r, ret = 0; qla_rx_buf_t *rxb; qla_rx_ring_t *rx_ring; for (r = 0; r < ha->hw.num_rds_rings; r++) { rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d, %d] failed\n", __func__, r, i); for (k = 0; k < r; k++) { for (j = 0; j < NUM_RX_DESCRIPTORS; j++) { rxb = &ha->rx_ring[k].rx_buf[j]; bus_dmamap_destroy(ha->rx_tag, rxb->map); } } for (j = 0; j < i; j++) { bus_dmamap_destroy(ha->rx_tag, rx_ring->rx_buf[j].map); } goto qla_alloc_rcv_std_err; } } } qla_init_hw_rcv_descriptors(ha); for (r = 0; r < ha->hw.num_rds_rings; r++) { rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; rxb->handle = i; if (!(ret = ql_get_mbuf(ha, rxb, NULL))) { /* * set the physical address in the * corresponding descriptor entry in the * receive ring/queue for the hba */ qla_set_hw_rcv_desc(ha, r, i, rxb->handle, rxb->paddr, (rxb->m_head)->m_pkthdr.len); } else { device_printf(ha->pci_dev, "%s: ql_get_mbuf [%d, %d] failed\n", __func__, r, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qla_alloc_rcv_std_err; } } } return 0; qla_alloc_rcv_std_err: return (-1); } static void qla_free_rcv_std(qla_host_t *ha) { int i, r; qla_rx_buf_t *rxb; for (r = 0; r < ha->hw.num_rds_rings; r++) { for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_ring[r].rx_buf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); rxb->m_head = NULL; } } } return; } static int qla_alloc_rcv_bufs(qla_host_t *ha) { int i, ret = 0; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->rx_tag)) { device_printf(ha->pci_dev, "%s: rx_tag alloc failed\n", __func__); return (ENOMEM); } bzero((void *)ha->rx_ring, (sizeof(qla_rx_ring_t) * MAX_RDS_RINGS)); for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; } ret = qla_alloc_rcv_std(ha); return (ret); } static void qla_free_rcv_bufs(qla_host_t *ha) { int i; qla_free_rcv_std(ha); if (ha->rx_tag != NULL) { bus_dma_tag_destroy(ha->rx_tag); ha->rx_tag = NULL; } bzero((void *)ha->rx_ring, (sizeof(qla_rx_ring_t) * MAX_RDS_RINGS)); for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; } return; } int ql_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp) { - register struct mbuf *mp = nmp; + struct mbuf *mp = nmp; struct ifnet *ifp; int ret = 0; uint32_t offset; bus_dma_segment_t segs[1]; int nsegs, mbuf_size; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifp = ha->ifp; if (ha->hw.enable_9kb) mbuf_size = MJUM9BYTES; else mbuf_size = MCLBYTES; if (mp == NULL) { if (QL_ERR_INJECT(ha, INJCT_M_GETCL_M_GETJCL_FAILURE)) return(-1); if (ha->hw.enable_9kb) mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, mbuf_size); else mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mp == NULL) { ha->err_m_getcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getcl failed\n", __func__); goto exit_ql_get_mbuf; } mp->m_len = mp->m_pkthdr.len = mbuf_size; } else { mp->m_len = mp->m_pkthdr.len = mbuf_size; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } offset = (uint32_t)((unsigned long long)mp->m_data & 0x7ULL); if (offset) { offset = 8 - offset; m_adj(mp, offset); } /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, rxb->map, mp, segs, &nsegs, BUS_DMA_NOWAIT); rxb->paddr = segs[0].ds_addr; if (ret || !rxb->paddr || (nsegs != 1)) { m_free(mp); rxb->m_head = NULL; device_printf(ha->pci_dev, "%s: bus_dmamap_load failed[%d, 0x%016llx, %d]\n", __func__, ret, (long long unsigned int)rxb->paddr, nsegs); ret = -1; goto exit_ql_get_mbuf; } rxb->m_head = mp; bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_PREREAD); exit_ql_get_mbuf: QL_DPRINT2(ha, (ha->pci_dev, "%s: exit ret = 0x%08x\n", __func__, ret)); return (ret); } static void qla_get_peer(qla_host_t *ha) { device_t *peers; int count, i, slot; int my_slot = pci_get_slot(ha->pci_dev); if (device_get_children(device_get_parent(ha->pci_dev), &peers, &count)) return; for (i = 0; i < count; i++) { slot = pci_get_slot(peers[i]); if ((slot >= 0) && (slot == my_slot) && (pci_get_device(peers[i]) == pci_get_device(ha->pci_dev))) { if (ha->pci_dev != peers[i]) ha->peer_dev = peers[i]; } } } static void qla_send_msg_to_peer(qla_host_t *ha, uint32_t msg_to_peer) { qla_host_t *ha_peer; if (ha->peer_dev) { if ((ha_peer = device_get_softc(ha->peer_dev)) != NULL) { ha_peer->msg_from_peer = msg_to_peer; } } } void qla_set_error_recovery(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; if (!cold && ha->enable_error_recovery) { if (ifp) ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ha->qla_initiate_recovery = 1; } else ha->offline = 1; return; } static void qla_error_recovery(void *context, int pending) { qla_host_t *ha = context; uint32_t msecs_100 = 400; struct ifnet *ifp = ha->ifp; int i = 0; device_printf(ha->pci_dev, "%s: enter\n", __func__); ha->hw.imd_compl = 1; taskqueue_drain_all(ha->stats_tq); taskqueue_drain_all(ha->async_event_tq); if (QLA_LOCK(ha, __func__, -1, 0) != 0) return; device_printf(ha->pci_dev, "%s: ts_usecs = %ld start\n", __func__, qla_get_usec_timestamp()); if (ha->qla_interface_up) { qla_mdelay(__func__, 300); //ifp->if_drv_flags &= ~IFF_DRV_RUNNING; for (i = 0; i < ha->hw.num_sds_rings; i++) { qla_tx_fp_t *fp; fp = &ha->tx_fp[i]; if (fp == NULL) continue; if (fp->tx_br != NULL) { mtx_lock(&fp->tx_mtx); mtx_unlock(&fp->tx_mtx); } } } qla_drain_fp_taskqueues(ha); if ((ha->pci_func & 0x1) == 0) { if (!ha->msg_from_peer) { qla_send_msg_to_peer(ha, QL_PEER_MSG_RESET); while ((ha->msg_from_peer != QL_PEER_MSG_ACK) && msecs_100--) qla_mdelay(__func__, 100); } ha->msg_from_peer = 0; if (ha->enable_minidump) ql_minidump(ha); if (ha->enable_driverstate_dump) ql_capture_drvr_state(ha); if (ql_init_hw(ha)) { device_printf(ha->pci_dev, "%s: ts_usecs = %ld exit: ql_init_hw failed\n", __func__, qla_get_usec_timestamp()); ha->offline = 1; goto qla_error_recovery_exit; } if (ha->qla_interface_up) { qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); } if (!QL_ERR_INJECT(ha, INJCT_PEER_PORT_FAILURE_ERR_RECOVERY)) qla_send_msg_to_peer(ha, QL_PEER_MSG_ACK); } else { if (ha->msg_from_peer == QL_PEER_MSG_RESET) { ha->msg_from_peer = 0; if (!QL_ERR_INJECT(ha, INJCT_PEER_PORT_FAILURE_ERR_RECOVERY)) qla_send_msg_to_peer(ha, QL_PEER_MSG_ACK); } else { qla_send_msg_to_peer(ha, QL_PEER_MSG_RESET); } while ((ha->msg_from_peer != QL_PEER_MSG_ACK) && msecs_100--) qla_mdelay(__func__, 100); ha->msg_from_peer = 0; if (ha->enable_driverstate_dump) ql_capture_drvr_state(ha); if (msecs_100 == 0) { device_printf(ha->pci_dev, "%s: ts_usecs = %ld exit: QL_PEER_MSG_ACK not received\n", __func__, qla_get_usec_timestamp()); ha->offline = 1; goto qla_error_recovery_exit; } if (ql_init_hw(ha)) { device_printf(ha->pci_dev, "%s: ts_usecs = %ld exit: ql_init_hw failed\n", __func__, qla_get_usec_timestamp()); ha->offline = 1; goto qla_error_recovery_exit; } if (ha->qla_interface_up) { qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); } } qla_mdelay(__func__, ha->ms_delay_after_init); *((uint32_t *)&ha->hw.flags) = 0; ha->qla_initiate_recovery = 0; if (ha->qla_interface_up) { if (qla_alloc_xmt_bufs(ha) != 0) { ha->offline = 1; goto qla_error_recovery_exit; } qla_confirm_9kb_enable(ha); if (qla_alloc_rcv_bufs(ha) != 0) { ha->offline = 1; goto qla_error_recovery_exit; } ha->stop_rcv = 0; if (ql_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ha->qla_watchdog_pause = 0; ql_update_link_state(ha); } else { ha->offline = 1; if (ha->hw.sp_log_stop_events & Q8_SP_LOG_STOP_IF_START_FAILURE) ha->hw.sp_log_stop = -1; } } else { ha->qla_watchdog_pause = 0; } qla_error_recovery_exit: if (ha->offline ) { device_printf(ha->pci_dev, "%s: ts_usecs = %ld port offline\n", __func__, qla_get_usec_timestamp()); if (ha->hw.sp_log_stop_events & Q8_SP_LOG_STOP_ERR_RECOVERY_FAILURE) ha->hw.sp_log_stop = -1; } QLA_UNLOCK(ha, __func__); if (!ha->offline) callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); device_printf(ha->pci_dev, "%s: ts_usecs = %ld exit\n", __func__, qla_get_usec_timestamp()); return; } static void qla_async_event(void *context, int pending) { qla_host_t *ha = context; if (QLA_LOCK(ha, __func__, -1, 0) != 0) return; if (ha->async_event) { ha->async_event = 0; qla_hw_async_event(ha); } QLA_UNLOCK(ha, __func__); return; } static void qla_stats(void *context, int pending) { qla_host_t *ha; ha = context; ql_get_stats(ha); return; } Index: stable/11/sys/dev/qlxge/qls_os.c =================================================================== --- stable/11/sys/dev/qlxge/qls_os.c (revision 331642) +++ stable/11/sys/dev/qlxge/qls_os.c (revision 331643) @@ -1,1534 +1,1534 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013-2014 Qlogic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * and ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: qls_os.c * Author : David C Somayajulu, Qlogic Corporation, Aliso Viejo, CA 92656. */ #include __FBSDID("$FreeBSD$"); #include "qls_os.h" #include "qls_hw.h" #include "qls_def.h" #include "qls_inline.h" #include "qls_ver.h" #include "qls_glbl.h" #include "qls_dbg.h" #include /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif #ifndef PCI_DEVICE_QLOGIC_8000 #define PCI_DEVICE_QLOGIC_8000 0x8000 #endif #define PCI_QLOGIC_DEV8000 \ ((PCI_DEVICE_QLOGIC_8000 << 16) | PCI_VENDOR_QLOGIC) /* * static functions */ static int qls_alloc_parent_dma_tag(qla_host_t *ha); static void qls_free_parent_dma_tag(qla_host_t *ha); static void qls_flush_xmt_bufs(qla_host_t *ha); static int qls_alloc_rcv_bufs(qla_host_t *ha); static void qls_free_rcv_bufs(qla_host_t *ha); static void qls_init_ifnet(device_t dev, qla_host_t *ha); static void qls_release(qla_host_t *ha); static void qls_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void qls_stop(qla_host_t *ha); static int qls_send(qla_host_t *ha, struct mbuf **m_headp); static void qls_tx_done(void *context, int pending); static int qls_config_lro(qla_host_t *ha); static void qls_free_lro(qla_host_t *ha); static void qls_error_recovery(void *context, int pending); /* * Hooks to the Operating Systems */ static int qls_pci_probe (device_t); static int qls_pci_attach (device_t); static int qls_pci_detach (device_t); static void qls_start(struct ifnet *ifp); static void qls_init(void *arg); static int qls_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qls_media_change(struct ifnet *ifp); static void qls_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static device_method_t qla_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qls_pci_probe), DEVMETHOD(device_attach, qls_pci_attach), DEVMETHOD(device_detach, qls_pci_detach), { 0, 0 } }; static driver_t qla_pci_driver = { "ql", qla_pci_methods, sizeof (qla_host_t), }; static devclass_t qla8000_devclass; DRIVER_MODULE(qla8000, pci, qla_pci_driver, qla8000_devclass, 0, 0); MODULE_DEPEND(qla8000, pci, 1, 1, 1); MODULE_DEPEND(qla8000, ether, 1, 1, 1); MALLOC_DEFINE(M_QLA8000BUF, "qla8000buf", "Buffers for qla8000 driver"); static char dev_str[64]; static char ver_str[64]; /* * Name: qls_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qls_pci_probe(device_t dev) { switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) { case PCI_QLOGIC_DEV8000: snprintf(dev_str, sizeof(dev_str), "%s v%d.%d.%d", "Qlogic ISP 8000 PCI CNA Adapter-Ethernet Function", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); snprintf(ver_str, sizeof(ver_str), "v%d.%d.%d", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); device_set_desc(dev, dev_str); break; default: return (ENXIO); } if (bootverbose) printf("%s: %s\n ", __func__, dev_str); return (BUS_PROBE_DEFAULT); } static int qls_sysctl_get_drvr_stats(SYSCTL_HANDLER_ARGS) { int err = 0, ret; qla_host_t *ha; uint32_t i; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qla_host_t *)arg1; for (i = 0; i < ha->num_tx_rings; i++) { device_printf(ha->pci_dev, "%s: tx_ring[%d].tx_frames= %p\n", __func__, i, (void *)ha->tx_ring[i].tx_frames); device_printf(ha->pci_dev, "%s: tx_ring[%d].tx_tso_frames= %p\n", __func__, i, (void *)ha->tx_ring[i].tx_tso_frames); device_printf(ha->pci_dev, "%s: tx_ring[%d].tx_vlan_frames= %p\n", __func__, i, (void *)ha->tx_ring[i].tx_vlan_frames); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_free= 0x%08x\n", __func__, i, ha->tx_ring[i].txr_free); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_next= 0x%08x\n", __func__, i, ha->tx_ring[i].txr_next); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_done= 0x%08x\n", __func__, i, ha->tx_ring[i].txr_done); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_cons_idx= 0x%08x\n", __func__, i, *(ha->tx_ring[i].txr_cons_vaddr)); } for (i = 0; i < ha->num_rx_rings; i++) { device_printf(ha->pci_dev, "%s: rx_ring[%d].rx_int= %p\n", __func__, i, (void *)ha->rx_ring[i].rx_int); device_printf(ha->pci_dev, "%s: rx_ring[%d].rss_int= %p\n", __func__, i, (void *)ha->rx_ring[i].rss_int); device_printf(ha->pci_dev, "%s: rx_ring[%d].lbq_next= 0x%08x\n", __func__, i, ha->rx_ring[i].lbq_next); device_printf(ha->pci_dev, "%s: rx_ring[%d].lbq_free= 0x%08x\n", __func__, i, ha->rx_ring[i].lbq_free); device_printf(ha->pci_dev, "%s: rx_ring[%d].lbq_in= 0x%08x\n", __func__, i, ha->rx_ring[i].lbq_in); device_printf(ha->pci_dev, "%s: rx_ring[%d].sbq_next= 0x%08x\n", __func__, i, ha->rx_ring[i].sbq_next); device_printf(ha->pci_dev, "%s: rx_ring[%d].sbq_free= 0x%08x\n", __func__, i, ha->rx_ring[i].sbq_free); device_printf(ha->pci_dev, "%s: rx_ring[%d].sbq_in= 0x%08x\n", __func__, i, ha->rx_ring[i].sbq_in); } device_printf(ha->pci_dev, "%s: err_m_getcl = 0x%08x\n", __func__, ha->err_m_getcl); device_printf(ha->pci_dev, "%s: err_m_getjcl = 0x%08x\n", __func__, ha->err_m_getjcl); device_printf(ha->pci_dev, "%s: err_tx_dmamap_create = 0x%08x\n", __func__, ha->err_tx_dmamap_create); device_printf(ha->pci_dev, "%s: err_tx_dmamap_load = 0x%08x\n", __func__, ha->err_tx_dmamap_load); device_printf(ha->pci_dev, "%s: err_tx_defrag = 0x%08x\n", __func__, ha->err_tx_defrag); } return (err); } static void qls_add_sysctls(qla_host_t *ha) { device_t dev = ha->pci_dev; SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "version", CTLFLAG_RD, ver_str, 0, "Driver Version"); qls_dbg_level = 0; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &qls_dbg_level, qls_dbg_level, "Debug Level"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "drvr_stats", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qls_sysctl_get_drvr_stats, "I", "Driver Maintained Statistics"); return; } static void qls_watchdog(void *arg) { qla_host_t *ha = arg; struct ifnet *ifp; ifp = ha->ifp; if (ha->flags.qla_watchdog_exit) { ha->qla_watchdog_exited = 1; return; } ha->qla_watchdog_exited = 0; if (!ha->flags.qla_watchdog_pause) { if (ha->qla_initiate_recovery) { ha->qla_watchdog_paused = 1; ha->qla_initiate_recovery = 0; ha->err_inject = 0; taskqueue_enqueue(ha->err_tq, &ha->err_task); } else if ((ifp->if_snd.ifq_head != NULL) && QL_RUNNING(ifp)) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } ha->qla_watchdog_paused = 0; } else { ha->qla_watchdog_paused = 1; } ha->watchdog_ticks = ha->watchdog_ticks++ % 1000; callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qls_watchdog, ha); return; } /* * Name: qls_pci_attach * Function: attaches the device to the operating system */ static int qls_pci_attach(device_t dev) { qla_host_t *ha = NULL; int i; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qla_host_t)); if (pci_get_device(dev) != PCI_DEVICE_QLOGIC_8000) { device_printf(dev, "device is not QLE8000\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; pci_enable_busmaster(dev); ha->reg_rid = PCIR_BAR(1); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map any ports\n"); goto qls_pci_attach_err; } ha->reg_rid1 = PCIR_BAR(3); ha->pci_reg1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid1, RF_ACTIVE); if (ha->pci_reg1 == NULL) { device_printf(dev, "unable to map any ports\n"); goto qls_pci_attach_err; } mtx_init(&ha->hw_lock, "qla80xx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->tx_lock, "qla80xx_tx_lock", MTX_NETWORK_LOCK, MTX_DEF); qls_add_sysctls(ha); qls_hw_add_sysctls(ha); ha->flags.lock_init = 1; ha->msix_count = pci_msix_count(dev); if (ha->msix_count < qls_get_msix_count(ha)) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qls_pci_attach_err; } ha->msix_count = qls_get_msix_count(ha); device_printf(dev, "\n%s: ha %p pci_func 0x%x msix_count 0x%x" " pci_reg %p pci_reg1 %p\n", __func__, ha, ha->pci_func, ha->msix_count, ha->pci_reg, ha->pci_reg1); if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msi[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qls_pci_attach_err; } for (i = 0; i < ha->num_rx_rings; i++) { ha->irq_vec[i].cq_idx = i; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq_rid = 1 + i; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); goto qls_pci_attach_err; } if (bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, qls_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle)) { device_printf(dev, "could not setup interrupt\n"); goto qls_pci_attach_err; } } qls_rd_nic_params(ha); /* allocate parent dma tag */ if (qls_alloc_parent_dma_tag(ha)) { device_printf(dev, "%s: qls_alloc_parent_dma_tag failed\n", __func__); goto qls_pci_attach_err; } /* alloc all dma buffers */ if (qls_alloc_dma(ha)) { device_printf(dev, "%s: qls_alloc_dma failed\n", __func__); goto qls_pci_attach_err; } /* create the o.s ethernet interface */ qls_init_ifnet(dev, ha); ha->flags.qla_watchdog_active = 1; ha->flags.qla_watchdog_pause = 1; TASK_INIT(&ha->tx_task, 0, qls_tx_done, ha); ha->tx_tq = taskqueue_create_fast("qla_txq", M_NOWAIT, taskqueue_thread_enqueue, &ha->tx_tq); taskqueue_start_threads(&ha->tx_tq, 1, PI_NET, "%s txq", device_get_nameunit(ha->pci_dev)); callout_init(&ha->tx_callout, 1); ha->flags.qla_callout_init = 1; /* create ioctl device interface */ if (qls_make_cdev(ha)) { device_printf(dev, "%s: qls_make_cdev failed\n", __func__); goto qls_pci_attach_err; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qls_watchdog, ha); TASK_INIT(&ha->err_task, 0, qls_error_recovery, ha); ha->err_tq = taskqueue_create_fast("qla_errq", M_NOWAIT, taskqueue_thread_enqueue, &ha->err_tq); taskqueue_start_threads(&ha->err_tq, 1, PI_NET, "%s errq", device_get_nameunit(ha->pci_dev)); QL_DPRINT2((dev, "%s: exit 0\n", __func__)); return (0); qls_pci_attach_err: qls_release(ha); QL_DPRINT2((dev, "%s: exit ENXIO\n", __func__)); return (ENXIO); } /* * Name: qls_pci_detach * Function: Unhooks the device from the operating system */ static int qls_pci_detach(device_t dev) { qla_host_t *ha = NULL; struct ifnet *ifp; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } ifp = ha->ifp; (void)QLA_LOCK(ha, __func__, 0); qls_stop(ha); QLA_UNLOCK(ha, __func__); qls_release(ha); QL_DPRINT2((dev, "%s: exit\n", __func__)); return (0); } /* * Name: qls_release * Function: Releases the resources allocated for the device */ static void qls_release(qla_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; if (ha->err_tq) { taskqueue_drain(ha->err_tq, &ha->err_task); taskqueue_free(ha->err_tq); } if (ha->tx_tq) { taskqueue_drain(ha->tx_tq, &ha->tx_task); taskqueue_free(ha->tx_tq); } qls_del_cdev(ha); if (ha->flags.qla_watchdog_active) { ha->flags.qla_watchdog_exit = 1; while (ha->qla_watchdog_exited == 0) qls_mdelay(__func__, 1); } if (ha->flags.qla_callout_init) callout_stop(&ha->tx_callout); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); qls_free_dma(ha); qls_free_parent_dma_tag(ha); for (i = 0; i < ha->num_rx_rings; i++) { if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); } if (ha->irq_vec[i].irq) { (void)bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } } if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->tx_lock); mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); if (ha->pci_reg1) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid1, ha->pci_reg1); } /* * DMA Related Functions */ static void qls_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } *((bus_addr_t *)arg) = segs[0].ds_addr; return; } int qls_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; QL_DPRINT2((dev, "%s: enter\n", __func__)); ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { device_printf(dev, "%s: could not create dma tag\n", __func__); goto qls_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); device_printf(dev, "%s: bus_dmamem_alloc failed\n", __func__); goto qls_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qls_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto qls_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; qls_alloc_dmabuf_exit: QL_DPRINT2((dev, "%s: exit ret 0x%08x tag %p map %p b %p sz 0x%x\n", __func__, ret, (void *)dma_buf->dma_tag, (void *)dma_buf->dma_map, (void *)dma_buf->dma_b, dma_buf->size)); return ret; } void qls_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { bus_dmamap_unload(dma_buf->dma_tag, dma_buf->dma_map); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); } static int qls_alloc_parent_dma_tag(qla_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { device_printf(dev, "%s: could not create parent dma tag\n", __func__); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qls_free_parent_dma_tag(qla_host_t *ha) { if (ha->flags.parent_tag) { bus_dma_tag_destroy(ha->parent_tag); ha->flags.parent_tag = 0; } } /* * Name: qls_init_ifnet * Function: Creates the Network Device Interface and Registers it with the O.S */ static void qls_init_ifnet(device_t dev, qla_host_t *ha) { struct ifnet *ifp; QL_DPRINT2((dev, "%s: enter\n", __func__)); ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = IF_Gbps(10); ifp->if_init = qls_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qls_ioctl; ifp->if_start = qls_start; IFQ_SET_MAXLEN(&ifp->if_snd, qls_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qls_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ha->max_frame_size <= MCLBYTES) { ha->msize = MCLBYTES; } else if (ha->max_frame_size <= MJUMPAGESIZE) { ha->msize = MJUMPAGESIZE; } else ha->msize = MJUM9BYTES; ether_ifattach(ifp, qls_get_mac_addr(ha)); ifp->if_capabilities = IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_TSO4; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable = ifp->if_capabilities; ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qls_media_change, qls_media_status); ifmedia_add(&ha->media, (IFM_ETHER | qls_get_optics(ha) | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2((dev, "%s: exit\n", __func__)); return; } static void qls_init_locked(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; qls_stop(ha); qls_flush_xmt_bufs(ha); if (qls_alloc_rcv_bufs(ha) != 0) return; if (qls_config_lro(ha)) return; bcopy(IF_LLADDR(ha->ifp), ha->mac_addr, ETHER_ADDR_LEN); ifp->if_hwassist = CSUM_IP; ifp->if_hwassist |= CSUM_TCP; ifp->if_hwassist |= CSUM_UDP; ifp->if_hwassist |= CSUM_TSO; if (qls_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; } return; } static void qls_init(void *arg) { qla_host_t *ha; ha = (qla_host_t *)arg; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); (void)QLA_LOCK(ha, __func__, 0); qls_init_locked(ha); QLA_UNLOCK(ha, __func__); QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qls_set_multi(qla_host_t *ha, uint32_t add_multi) { uint8_t mta[Q8_MAX_NUM_MULTICAST_ADDRS * Q8_MAC_ADDR_LEN]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == Q8_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * Q8_MAC_ADDR_LEN], Q8_MAC_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (QLA_LOCK(ha, __func__, 1) == 0) { qls_hw_set_multi(ha, mta, mcnt, add_multi); QLA_UNLOCK(ha, __func__); } return; } static int qls_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx)\n", __func__, cmd)); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { (void)QLA_LOCK(ha, __func__, 0); qls_init_locked(ha); QLA_UNLOCK(ha, __func__); } QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", __func__, cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr))); arp_ifinit(ifp, ifa); } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMTU (0x%lx)\n", __func__, cmd)); if (ifr->ifr_mtu > QLA_MAX_MTU) { ret = EINVAL; } else { (void) QLA_LOCK(ha, __func__, 0); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; QLA_UNLOCK(ha, __func__); if (ret) ret = EINVAL; } break; case SIOCSIFFLAGS: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFFLAGS (0x%lx)\n", __func__, cmd)); (void)QLA_LOCK(ha, __func__, 0); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { ret = qls_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { ret = qls_set_allmulti(ha); } } else { ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; qls_init_locked(ha); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) qls_stop(ha); ha->if_flags = ifp->if_flags; } QLA_UNLOCK(ha, __func__); break; case SIOCADDMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCADDMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qls_set_multi(ha, 1); } break; case SIOCDELMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCDELMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qls_set_multi(ha, 0); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", __func__, cmd)); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFCAP (0x%lx)\n", __func__, cmd)); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qls_init(ha); VLAN_CAPABILITIES(ifp); break; } default: QL_DPRINT4((ha->pci_dev, "%s: default (0x%lx)\n", __func__, cmd)); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qls_media_change(struct ifnet *ifp) { qla_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qls_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; qls_update_link_state(ha); if (ha->link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qls_get_optics(ha)); } QL_DPRINT2((ha->pci_dev, "%s: exit (%s)\n", __func__,\ (ha->link_up ? "link_up" : "link_down"))); return; } static void qls_start(struct ifnet *ifp) { int i, ret = 0; struct mbuf *m_head; qla_host_t *ha = (qla_host_t *)ifp->if_softc; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); if (!mtx_trylock(&ha->tx_lock)) { QL_DPRINT8((ha->pci_dev, "%s: mtx_trylock(&ha->tx_lock) failed\n", __func__)); return; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == IFF_DRV_RUNNING) { for (i = 0; i < ha->num_tx_rings; i++) { ret |= qls_hw_tx_done(ha, i); } if (ret == 0) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { QL_DPRINT8((ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); QLA_TX_UNLOCK(ha); return; } if (!ha->link_up) { qls_update_link_state(ha); if (!ha->link_up) { QL_DPRINT8((ha->pci_dev, "%s: link down\n", __func__)); QLA_TX_UNLOCK(ha); return; } } while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) { QL_DPRINT8((ha->pci_dev, "%s: m_head == NULL\n", __func__)); break; } if (qls_send(ha, &m_head)) { if (m_head == NULL) break; QL_DPRINT8((ha->pci_dev, "%s: PREPEND\n", __func__)); ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } QLA_TX_UNLOCK(ha); QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return; } static int qls_send(qla_host_t *ha, struct mbuf **m_headp) { bus_dma_segment_t segs[QLA_MAX_SEGMENTS]; bus_dmamap_t map; int nsegs; int ret = -1; uint32_t tx_idx; struct mbuf *m_head = *m_headp; uint32_t txr_idx = 0; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); /* check if flowid is set */ if (M_HASHTYPE_GET(m_head) != M_HASHTYPE_NONE) txr_idx = m_head->m_pkthdr.flowid & (ha->num_tx_rings - 1); tx_idx = ha->tx_ring[txr_idx].txr_next; map = ha->tx_ring[txr_idx].tx_buf[tx_idx].map; ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ret == EFBIG) { struct mbuf *m; QL_DPRINT8((ha->pci_dev, "%s: EFBIG [%d]\n", __func__, m_head->m_pkthdr.len)); m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { ha->err_tx_defrag++; m_freem(m_head); *m_headp = NULL; device_printf(ha->pci_dev, "%s: m_defrag() = NULL [%d]\n", __func__, ret); return (ENOBUFS); } m_head = m; *m_headp = m_head; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed0[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } } else if (ret) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed1[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } QL_ASSERT(ha, (nsegs != 0), ("qls_send: empty packet")); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); if (!(ret = qls_hw_send(ha, segs, nsegs, tx_idx, m_head, txr_idx))) { ha->tx_ring[txr_idx].count++; ha->tx_ring[txr_idx].tx_buf[tx_idx].m_head = m_head; ha->tx_ring[txr_idx].tx_buf[tx_idx].map = map; } else { if (ret == EINVAL) { if (m_head) m_freem(m_head); *m_headp = NULL; } } QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qls_stop(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); ha->flags.qla_watchdog_pause = 1; while (!ha->qla_watchdog_paused) qls_mdelay(__func__, 1); qls_del_hw_if(ha); qls_free_lro(ha); qls_flush_xmt_bufs(ha); qls_free_rcv_bufs(ha); return; } /* * Buffer Management Functions for Transmit and Receive Rings */ /* * Release mbuf after it sent on the wire */ static void qls_flush_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb) { QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); if (txb->m_head) { bus_dmamap_unload(ha->tx_tag, txb->map); m_freem(txb->m_head); txb->m_head = NULL; } QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qls_flush_xmt_bufs(qla_host_t *ha) { int i, j; for (j = 0; j < ha->num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) qls_flush_tx_buf(ha, &ha->tx_ring[j].tx_buf[i]); } return; } static int qls_alloc_rcv_mbufs(qla_host_t *ha, int r) { int i, j, ret = 0; qla_rx_buf_t *rxb; qla_rx_ring_t *rx_ring; volatile q81_bq_addr_e_t *sbq_e; rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d, %d] failed\n", __func__, r, i); for (j = 0; j < i; j++) { rxb = &rx_ring->rx_buf[j]; bus_dmamap_destroy(ha->rx_tag, rxb->map); } goto qls_alloc_rcv_mbufs_err; } } rx_ring = &ha->rx_ring[r]; sbq_e = rx_ring->sbq_vaddr; rxb = &rx_ring->rx_buf[0]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { if (!(ret = qls_get_mbuf(ha, rxb, NULL))) { /* * set the physical address in the * corresponding descriptor entry in the * receive ring/queue for the hba */ sbq_e->addr_lo = rxb->paddr & 0xFFFFFFFF; sbq_e->addr_hi = (rxb->paddr >> 32) & 0xFFFFFFFF; } else { device_printf(ha->pci_dev, "%s: qls_get_mbuf [%d, %d] failed\n", __func__, r, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qls_alloc_rcv_mbufs_err; } rxb++; sbq_e++; } return 0; qls_alloc_rcv_mbufs_err: return (-1); } static void qls_free_rcv_bufs(qla_host_t *ha) { int i, r; qla_rx_buf_t *rxb; qla_rx_ring_t *rxr; for (r = 0; r < ha->num_rx_rings; r++) { rxr = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rxr->rx_buf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); } } bzero(rxr->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); } return; } static int qls_alloc_rcv_bufs(qla_host_t *ha) { int r, ret = 0; qla_rx_ring_t *rxr; for (r = 0; r < ha->num_rx_rings; r++) { rxr = &ha->rx_ring[r]; bzero(rxr->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); } for (r = 0; r < ha->num_rx_rings; r++) { ret = qls_alloc_rcv_mbufs(ha, r); if (ret) qls_free_rcv_bufs(ha); } return (ret); } int qls_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp) { - register struct mbuf *mp = nmp; + struct mbuf *mp = nmp; struct ifnet *ifp; int ret = 0; uint32_t offset; bus_dma_segment_t segs[1]; int nsegs; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifp = ha->ifp; if (mp == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ha->msize); if (mp == NULL) { if (ha->msize == MCLBYTES) ha->err_m_getcl++; else ha->err_m_getjcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getcl failed\n", __func__); goto exit_qls_get_mbuf; } mp->m_len = mp->m_pkthdr.len = ha->msize; } else { mp->m_len = mp->m_pkthdr.len = ha->msize; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } /* align the receive buffers to 8 byte boundary */ offset = (uint32_t)((unsigned long long)mp->m_data & 0x7ULL); if (offset) { offset = 8 - offset; m_adj(mp, offset); } /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, rxb->map, mp, segs, &nsegs, BUS_DMA_NOWAIT); rxb->paddr = segs[0].ds_addr; if (ret || !rxb->paddr || (nsegs != 1)) { m_freem(mp); rxb->m_head = NULL; device_printf(ha->pci_dev, "%s: bus_dmamap_load failed[%d, 0x%016llx, %d]\n", __func__, ret, (long long unsigned int)rxb->paddr, nsegs); ret = -1; goto exit_qls_get_mbuf; } rxb->m_head = mp; bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_PREREAD); exit_qls_get_mbuf: QL_DPRINT2((ha->pci_dev, "%s: exit ret = 0x%08x\n", __func__, ret)); return (ret); } static void qls_tx_done(void *context, int pending) { qla_host_t *ha = context; struct ifnet *ifp; ifp = ha->ifp; if (!ifp) return; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QL_DPRINT8((ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); return; } qls_start(ha->ifp); return; } static int qls_config_lro(qla_host_t *ha) { int i; struct lro_ctrl *lro; for (i = 0; i < ha->num_rx_rings; i++) { lro = &ha->rx_ring[i].lro; if (tcp_lro_init(lro)) { device_printf(ha->pci_dev, "%s: tcp_lro_init failed\n", __func__); return (-1); } lro->ifp = ha->ifp; } ha->flags.lro_init = 1; QL_DPRINT2((ha->pci_dev, "%s: LRO initialized\n", __func__)); return (0); } static void qls_free_lro(qla_host_t *ha) { int i; struct lro_ctrl *lro; if (!ha->flags.lro_init) return; for (i = 0; i < ha->num_rx_rings; i++) { lro = &ha->rx_ring[i].lro; tcp_lro_free(lro); } ha->flags.lro_init = 0; } static void qls_error_recovery(void *context, int pending) { qla_host_t *ha = context; qls_init(ha); return; } Index: stable/11/sys/dev/rl/if_rl.c =================================================================== --- stable/11/sys/dev/rl/if_rl.c (revision 331642) +++ stable/11/sys/dev/rl/if_rl.c (revision 331643) @@ -1,2124 +1,2124 @@ /*- * Copyright (c) 1997, 1998 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * RealTek 8129/8139 PCI NIC driver * * Supports several extremely cheap PCI 10/100 adapters based on * the RealTek chipset. Datasheets can be obtained from * www.realtek.com.tw. * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * The RealTek 8139 PCI NIC redefines the meaning of 'low end.' This is * probably the worst PCI ethernet controller ever made, with the possible * exception of the FEAST chip made by SMC. The 8139 supports bus-master * DMA, but it has a terrible interface that nullifies any performance * gains that bus-master DMA usually offers. * * For transmission, the chip offers a series of four TX descriptor * registers. Each transmit frame must be in a contiguous buffer, aligned * on a longword (32-bit) boundary. This means we almost always have to * do mbuf copies in order to transmit a frame, except in the unlikely * case where a) the packet fits into a single mbuf, and b) the packet * is 32-bit aligned within the mbuf's data area. The presence of only * four descriptor registers means that we can never have more than four * packets queued for transmission at any one time. * * Reception is not much better. The driver has to allocate a single large * buffer area (up to 64K in size) into which the chip will DMA received * frames. Because we don't know where within this region received packets * will begin or end, we have no choice but to copy data from the buffer * area into mbufs in order to pass the packets up to the higher protocol * levels. * * It's impossible given this rotten design to really achieve decent * performance at 100Mbps, unless you happen to have a 400Mhz PII or * some equally overmuscled CPU to drive it. * * On the bright side, the 8139 does have a built-in PHY, although * rather than using an MDIO serial interface like most other NICs, the * PHY registers are directly accessible through the 8139's register * space. The 8139 supports autonegotiation, as well as a 64-bit multicast * filter. * * The 8129 chip is an older version of the 8139 that uses an external PHY * chip. The 8129 has a serial MDIO interface for accessing the MII where * the 8139 lets you directly access the on-board PHY registers. We need * to select which interface to use depending on the chip type. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(rl, pci, 1, 1, 1); MODULE_DEPEND(rl, ether, 1, 1, 1); MODULE_DEPEND(rl, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #include /* * Various supported device vendors/types and their names. */ static const struct rl_type rl_devs[] = { { RT_VENDORID, RT_DEVICEID_8129, RL_8129, "RealTek 8129 10/100BaseTX" }, { RT_VENDORID, RT_DEVICEID_8139, RL_8139, "RealTek 8139 10/100BaseTX" }, { RT_VENDORID, RT_DEVICEID_8139D, RL_8139, "RealTek 8139 10/100BaseTX" }, { RT_VENDORID, RT_DEVICEID_8138, RL_8139, "RealTek 8139 10/100BaseTX CardBus" }, { RT_VENDORID, RT_DEVICEID_8100, RL_8139, "RealTek 8100 10/100BaseTX" }, { ACCTON_VENDORID, ACCTON_DEVICEID_5030, RL_8139, "Accton MPX 5030/5038 10/100BaseTX" }, { DELTA_VENDORID, DELTA_DEVICEID_8139, RL_8139, "Delta Electronics 8139 10/100BaseTX" }, { ADDTRON_VENDORID, ADDTRON_DEVICEID_8139, RL_8139, "Addtron Technology 8139 10/100BaseTX" }, { DLINK_VENDORID, DLINK_DEVICEID_520TX_REVC1, RL_8139, "D-Link DFE-520TX (rev. C1) 10/100BaseTX" }, { DLINK_VENDORID, DLINK_DEVICEID_530TXPLUS, RL_8139, "D-Link DFE-530TX+ 10/100BaseTX" }, { DLINK_VENDORID, DLINK_DEVICEID_690TXD, RL_8139, "D-Link DFE-690TXD 10/100BaseTX" }, { NORTEL_VENDORID, ACCTON_DEVICEID_5030, RL_8139, "Nortel Networks 10/100BaseTX" }, { COREGA_VENDORID, COREGA_DEVICEID_FETHERCBTXD, RL_8139, "Corega FEther CB-TXD" }, { COREGA_VENDORID, COREGA_DEVICEID_FETHERIICBTXD, RL_8139, "Corega FEtherII CB-TXD" }, { PEPPERCON_VENDORID, PEPPERCON_DEVICEID_ROLF, RL_8139, "Peppercon AG ROL-F" }, { PLANEX_VENDORID, PLANEX_DEVICEID_FNW3603TX, RL_8139, "Planex FNW-3603-TX" }, { PLANEX_VENDORID, PLANEX_DEVICEID_FNW3800TX, RL_8139, "Planex FNW-3800-TX" }, { CP_VENDORID, RT_DEVICEID_8139, RL_8139, "Compaq HNE-300" }, { LEVEL1_VENDORID, LEVEL1_DEVICEID_FPC0106TX, RL_8139, "LevelOne FPC-0106TX" }, { EDIMAX_VENDORID, EDIMAX_DEVICEID_EP4103DL, RL_8139, "Edimax EP-4103DL CardBus" } }; static int rl_attach(device_t); static int rl_detach(device_t); static void rl_dmamap_cb(void *, bus_dma_segment_t *, int, int); static int rl_dma_alloc(struct rl_softc *); static void rl_dma_free(struct rl_softc *); static void rl_eeprom_putbyte(struct rl_softc *, int); static void rl_eeprom_getword(struct rl_softc *, int, uint16_t *); static int rl_encap(struct rl_softc *, struct mbuf **); static int rl_list_tx_init(struct rl_softc *); static int rl_list_rx_init(struct rl_softc *); static int rl_ifmedia_upd(struct ifnet *); static void rl_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int rl_ioctl(struct ifnet *, u_long, caddr_t); static void rl_intr(void *); static void rl_init(void *); static void rl_init_locked(struct rl_softc *sc); static int rl_miibus_readreg(device_t, int, int); static void rl_miibus_statchg(device_t); static int rl_miibus_writereg(device_t, int, int, int); #ifdef DEVICE_POLLING static int rl_poll(struct ifnet *ifp, enum poll_cmd cmd, int count); static int rl_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count); #endif static int rl_probe(device_t); static void rl_read_eeprom(struct rl_softc *, uint8_t *, int, int, int); static void rl_reset(struct rl_softc *); static int rl_resume(device_t); static int rl_rxeof(struct rl_softc *); static void rl_rxfilter(struct rl_softc *); static int rl_shutdown(device_t); static void rl_start(struct ifnet *); static void rl_start_locked(struct ifnet *); static void rl_stop(struct rl_softc *); static int rl_suspend(device_t); static void rl_tick(void *); static void rl_txeof(struct rl_softc *); static void rl_watchdog(struct rl_softc *); static void rl_setwol(struct rl_softc *); static void rl_clrwol(struct rl_softc *); /* * MII bit-bang glue */ static uint32_t rl_mii_bitbang_read(device_t); static void rl_mii_bitbang_write(device_t, uint32_t); static const struct mii_bitbang_ops rl_mii_bitbang_ops = { rl_mii_bitbang_read, rl_mii_bitbang_write, { RL_MII_DATAOUT, /* MII_BIT_MDO */ RL_MII_DATAIN, /* MII_BIT_MDI */ RL_MII_CLK, /* MII_BIT_MDC */ RL_MII_DIR, /* MII_BIT_DIR_HOST_PHY */ 0, /* MII_BIT_DIR_PHY_HOST */ } }; static device_method_t rl_methods[] = { /* Device interface */ DEVMETHOD(device_probe, rl_probe), DEVMETHOD(device_attach, rl_attach), DEVMETHOD(device_detach, rl_detach), DEVMETHOD(device_suspend, rl_suspend), DEVMETHOD(device_resume, rl_resume), DEVMETHOD(device_shutdown, rl_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, rl_miibus_readreg), DEVMETHOD(miibus_writereg, rl_miibus_writereg), DEVMETHOD(miibus_statchg, rl_miibus_statchg), DEVMETHOD_END }; static driver_t rl_driver = { "rl", rl_methods, sizeof(struct rl_softc) }; static devclass_t rl_devclass; DRIVER_MODULE(rl, pci, rl_driver, rl_devclass, 0, 0); DRIVER_MODULE(rl, cardbus, rl_driver, rl_devclass, 0, 0); DRIVER_MODULE(miibus, rl, miibus_driver, miibus_devclass, 0, 0); #define EE_SET(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) | x) #define EE_CLR(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) & ~x) /* * Send a read command and address to the EEPROM, check for ACK. */ static void rl_eeprom_putbyte(struct rl_softc *sc, int addr) { - register int d, i; + int d, i; d = addr | sc->rl_eecmd_read; /* * Feed in each bit and strobe the clock. */ for (i = 0x400; i; i >>= 1) { if (d & i) { EE_SET(RL_EE_DATAIN); } else { EE_CLR(RL_EE_DATAIN); } DELAY(100); EE_SET(RL_EE_CLK); DELAY(150); EE_CLR(RL_EE_CLK); DELAY(100); } } /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void rl_eeprom_getword(struct rl_softc *sc, int addr, uint16_t *dest) { - register int i; + int i; uint16_t word = 0; /* Enter EEPROM access mode. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_PROGRAM|RL_EE_SEL); /* * Send address of word we want to read. */ rl_eeprom_putbyte(sc, addr); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_PROGRAM|RL_EE_SEL); /* * Start reading bits from EEPROM. */ for (i = 0x8000; i; i >>= 1) { EE_SET(RL_EE_CLK); DELAY(100); if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT) word |= i; EE_CLR(RL_EE_CLK); DELAY(100); } /* Turn off EEPROM access mode. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); *dest = word; } /* * Read a sequence of words from the EEPROM. */ static void rl_read_eeprom(struct rl_softc *sc, uint8_t *dest, int off, int cnt, int swap) { int i; uint16_t word = 0, *ptr; for (i = 0; i < cnt; i++) { rl_eeprom_getword(sc, off + i, &word); ptr = (uint16_t *)(dest + (i * 2)); if (swap) *ptr = ntohs(word); else *ptr = word; } } /* * Read the MII serial port for the MII bit-bang module. */ static uint32_t rl_mii_bitbang_read(device_t dev) { struct rl_softc *sc; uint32_t val; sc = device_get_softc(dev); val = CSR_READ_1(sc, RL_MII); CSR_BARRIER(sc, RL_MII, 1, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (val); } /* * Write the MII serial port for the MII bit-bang module. */ static void rl_mii_bitbang_write(device_t dev, uint32_t val) { struct rl_softc *sc; sc = device_get_softc(dev); CSR_WRITE_1(sc, RL_MII, val); CSR_BARRIER(sc, RL_MII, 1, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } static int rl_miibus_readreg(device_t dev, int phy, int reg) { struct rl_softc *sc; uint16_t rl8139_reg; sc = device_get_softc(dev); if (sc->rl_type == RL_8139) { switch (reg) { case MII_BMCR: rl8139_reg = RL_BMCR; break; case MII_BMSR: rl8139_reg = RL_BMSR; break; case MII_ANAR: rl8139_reg = RL_ANAR; break; case MII_ANER: rl8139_reg = RL_ANER; break; case MII_ANLPAR: rl8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); /* * Allow the rlphy driver to read the media status * register. If we have a link partner which does not * support NWAY, this is the register which will tell * us the results of parallel detection. */ case RL_MEDIASTAT: return (CSR_READ_1(sc, RL_MEDIASTAT)); default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } return (CSR_READ_2(sc, rl8139_reg)); } return (mii_bitbang_readreg(dev, &rl_mii_bitbang_ops, phy, reg)); } static int rl_miibus_writereg(device_t dev, int phy, int reg, int data) { struct rl_softc *sc; uint16_t rl8139_reg; sc = device_get_softc(dev); if (sc->rl_type == RL_8139) { switch (reg) { case MII_BMCR: rl8139_reg = RL_BMCR; break; case MII_BMSR: rl8139_reg = RL_BMSR; break; case MII_ANAR: rl8139_reg = RL_ANAR; break; case MII_ANER: rl8139_reg = RL_ANER; break; case MII_ANLPAR: rl8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); break; default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } CSR_WRITE_2(sc, rl8139_reg, data); return (0); } mii_bitbang_writereg(dev, &rl_mii_bitbang_ops, phy, reg, data); return (0); } static void rl_miibus_statchg(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->rl_miibus); ifp = sc->rl_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc->rl_flags &= ~RL_FLAG_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->rl_flags |= RL_FLAG_LINK; break; default: break; } } /* * RealTek controllers do not provide any interface to * Tx/Rx MACs for resolved speed, duplex and flow-control * parameters. */ } /* * Program the 64-bit multicast hash filter. */ static void rl_rxfilter(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; int h = 0; uint32_t hashes[2] = { 0, 0 }; struct ifmultiaddr *ifma; uint32_t rxfilt; RL_LOCK_ASSERT(sc); rxfilt = CSR_READ_4(sc, RL_RXCFG); rxfilt &= ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_BROAD | RL_RXCFG_RX_MULTI); /* Always accept frames destined for this host. */ rxfilt |= RL_RXCFG_RX_INDIV; /* Set capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) rxfilt |= RL_RXCFG_RX_BROAD; if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { rxfilt |= RL_RXCFG_RX_MULTI; if (ifp->if_flags & IFF_PROMISC) rxfilt |= RL_RXCFG_RX_ALLPHYS; hashes[0] = 0xFFFFFFFF; hashes[1] = 0xFFFFFFFF; } else { /* Now program new ones. */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } if_maddr_runlock(ifp); if (hashes[0] != 0 || hashes[1] != 0) rxfilt |= RL_RXCFG_RX_MULTI; } CSR_WRITE_4(sc, RL_MAR0, hashes[0]); CSR_WRITE_4(sc, RL_MAR4, hashes[1]); CSR_WRITE_4(sc, RL_RXCFG, rxfilt); } static void rl_reset(struct rl_softc *sc) { - register int i; + int i; RL_LOCK_ASSERT(sc); CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET); for (i = 0; i < RL_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET)) break; } if (i == RL_TIMEOUT) device_printf(sc->rl_dev, "reset never completed!\n"); } /* * Probe for a RealTek 8129/8139 chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int rl_probe(device_t dev) { const struct rl_type *t; uint16_t devid, revid, vendor; int i; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); revid = pci_get_revid(dev); if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) { if (revid == 0x20) { /* 8139C+, let re(4) take care of this device. */ return (ENXIO); } } t = rl_devs; for (i = 0; i < nitems(rl_devs); i++, t++) { if (vendor == t->rl_vid && devid == t->rl_did) { device_set_desc(dev, t->rl_name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } struct rl_dmamap_arg { bus_addr_t rl_busaddr; }; static void rl_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct rl_dmamap_arg *ctx; if (error != 0) return; KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); ctx = (struct rl_dmamap_arg *)arg; ctx->rl_busaddr = segs[0].ds_addr; } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int rl_attach(device_t dev) { uint8_t eaddr[ETHER_ADDR_LEN]; uint16_t as[3]; struct ifnet *ifp; struct rl_softc *sc; const struct rl_type *t; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int error = 0, hwrev, i, phy, pmc, rid; int prefer_iomap, unit; uint16_t rl_did = 0; char tn[32]; sc = device_get_softc(dev); unit = device_get_unit(dev); sc->rl_dev = dev; sc->rl_twister_enable = 0; snprintf(tn, sizeof(tn), "dev.rl.%d.twister_enable", unit); TUNABLE_INT_FETCH(tn, &sc->rl_twister_enable); ctx = device_get_sysctl_ctx(sc->rl_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->rl_dev)); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "twister_enable", CTLFLAG_RD, &sc->rl_twister_enable, 0, ""); mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0); pci_enable_busmaster(dev); /* * Map control/status registers. * Default to using PIO access for this driver. On SMP systems, * there appear to be problems with memory mapped mode: it looks * like doing too many memory mapped access back to back in rapid * succession can hang the bus. I'm inclined to blame this on * crummy design/construction on the part of RealTek. Memory * mapped mode does appear to work on uniprocessor systems though. */ prefer_iomap = 1; snprintf(tn, sizeof(tn), "dev.rl.%d.prefer_iomap", unit); TUNABLE_INT_FETCH(tn, &prefer_iomap); if (prefer_iomap) { sc->rl_res_id = PCIR_BAR(0); sc->rl_res_type = SYS_RES_IOPORT; sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); } if (prefer_iomap == 0 || sc->rl_res == NULL) { sc->rl_res_id = PCIR_BAR(1); sc->rl_res_type = SYS_RES_MEMORY; sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); } if (sc->rl_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } #ifdef notdef /* * Detect the Realtek 8139B. For some reason, this chip is very * unstable when left to autoselect the media * The best workaround is to set the device to the required * media type or to set it to the 10 Meg speed. */ if ((rman_get_end(sc->rl_res) - rman_get_start(sc->rl_res)) == 0xFF) device_printf(dev, "Realtek 8139B detected. Warning, this may be unstable in autoselect mode\n"); #endif sc->rl_btag = rman_get_bustag(sc->rl_res); sc->rl_bhandle = rman_get_bushandle(sc->rl_res); /* Allocate interrupt */ rid = 0; sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->rl_irq[0] == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } sc->rl_cfg0 = RL_8139_CFG0; sc->rl_cfg1 = RL_8139_CFG1; sc->rl_cfg2 = 0; sc->rl_cfg3 = RL_8139_CFG3; sc->rl_cfg4 = RL_8139_CFG4; sc->rl_cfg5 = RL_8139_CFG5; /* * Reset the adapter. Only take the lock here as it's needed in * order to call rl_reset(). */ RL_LOCK(sc); rl_reset(sc); RL_UNLOCK(sc); sc->rl_eecmd_read = RL_EECMD_READ_6BIT; rl_read_eeprom(sc, (uint8_t *)&rl_did, 0, 1, 0); if (rl_did != 0x8129) sc->rl_eecmd_read = RL_EECMD_READ_8BIT; /* * Get station address from the EEPROM. */ rl_read_eeprom(sc, (uint8_t *)as, RL_EE_EADDR, 3, 0); for (i = 0; i < 3; i++) { eaddr[(i * 2) + 0] = as[i] & 0xff; eaddr[(i * 2) + 1] = as[i] >> 8; } /* * Now read the exact device type from the EEPROM to find * out if it's an 8129 or 8139. */ rl_read_eeprom(sc, (uint8_t *)&rl_did, RL_EE_PCI_DID, 1, 0); t = rl_devs; sc->rl_type = 0; while(t->rl_name != NULL) { if (rl_did == t->rl_did) { sc->rl_type = t->rl_basetype; break; } t++; } if (sc->rl_type == 0) { device_printf(dev, "unknown device ID: %x assuming 8139\n", rl_did); sc->rl_type = RL_8139; /* * Read RL_IDR register to get ethernet address as accessing * EEPROM may not extract correct address. */ for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i); } if ((error = rl_dma_alloc(sc)) != 0) goto fail; ifp = sc->rl_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } #define RL_PHYAD_INTERNAL 0 /* Do MII setup */ phy = MII_PHY_ANY; if (sc->rl_type == RL_8139) phy = RL_PHYAD_INTERNAL; error = mii_attach(dev, &sc->rl_miibus, ifp, rl_ifmedia_upd, rl_ifmedia_sts, BMSR_DEFCAPMASK, phy, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = rl_ioctl; ifp->if_start = rl_start; ifp->if_init = rl_init; ifp->if_capabilities = IFCAP_VLAN_MTU; /* Check WOL for RTL8139B or newer controllers. */ if (sc->rl_type == RL_8139 && pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) == 0) { hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV; switch (hwrev) { case RL_HWREV_8139B: case RL_HWREV_8130: case RL_HWREV_8139C: case RL_HWREV_8139D: case RL_HWREV_8101: case RL_HWREV_8100: ifp->if_capabilities |= IFCAP_WOL; /* Disable WOL. */ rl_clrwol(sc); break; default: break; } } ifp->if_capenable = ifp->if_capabilities; ifp->if_capenable &= ~(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST); #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->rl_irq[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, rl_intr, sc, &sc->rl_intrhand[0]); if (error) { device_printf(sc->rl_dev, "couldn't set up irq\n"); ether_ifdetach(ifp); } fail: if (error) rl_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int rl_detach(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->rl_ifp; KASSERT(mtx_initialized(&sc->rl_mtx), ("rl mutex not initialized")); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { RL_LOCK(sc); rl_stop(sc); RL_UNLOCK(sc); callout_drain(&sc->rl_stat_callout); ether_ifdetach(ifp); } #if 0 sc->suspended = 1; #endif if (sc->rl_miibus) device_delete_child(dev, sc->rl_miibus); bus_generic_detach(dev); if (sc->rl_intrhand[0]) bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]); if (sc->rl_irq[0]) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->rl_irq[0]); if (sc->rl_res) bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id, sc->rl_res); if (ifp) if_free(ifp); rl_dma_free(sc); mtx_destroy(&sc->rl_mtx); return (0); } static int rl_dma_alloc(struct rl_softc *sc) { struct rl_dmamap_arg ctx; int error, i; /* * Allocate the parent bus DMA tag appropriate for PCI. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->rl_dev), /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, 0, /* maxsize, nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->rl_parent_tag); if (error) { device_printf(sc->rl_dev, "failed to create parent DMA tag.\n"); goto fail; } /* Create DMA tag for Rx memory block. */ error = bus_dma_tag_create(sc->rl_parent_tag, /* parent */ RL_RX_8139_BUF_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ RL_RXBUFLEN + RL_RX_8139_BUF_GUARD_SZ, 1, /* maxsize,nsegments */ RL_RXBUFLEN + RL_RX_8139_BUF_GUARD_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->rl_cdata.rl_rx_tag); if (error) { device_printf(sc->rl_dev, "failed to create Rx memory block DMA tag.\n"); goto fail; } /* Create DMA tag for Tx buffer. */ error = bus_dma_tag_create(sc->rl_parent_tag, /* parent */ RL_TX_8139_BUF_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, 1, /* maxsize, nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->rl_cdata.rl_tx_tag); if (error) { device_printf(sc->rl_dev, "failed to create Tx DMA tag.\n"); goto fail; } /* * Allocate DMA'able memory and load DMA map for Rx memory block. */ error = bus_dmamem_alloc(sc->rl_cdata.rl_rx_tag, (void **)&sc->rl_cdata.rl_rx_buf, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_cdata.rl_rx_dmamap); if (error != 0) { device_printf(sc->rl_dev, "failed to allocate Rx DMA memory block.\n"); goto fail; } ctx.rl_busaddr = 0; error = bus_dmamap_load(sc->rl_cdata.rl_rx_tag, sc->rl_cdata.rl_rx_dmamap, sc->rl_cdata.rl_rx_buf, RL_RXBUFLEN + RL_RX_8139_BUF_GUARD_SZ, rl_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0 || ctx.rl_busaddr == 0) { device_printf(sc->rl_dev, "could not load Rx DMA memory block.\n"); goto fail; } sc->rl_cdata.rl_rx_buf_paddr = ctx.rl_busaddr; /* Create DMA maps for Tx buffers. */ for (i = 0; i < RL_TX_LIST_CNT; i++) { sc->rl_cdata.rl_tx_chain[i] = NULL; sc->rl_cdata.rl_tx_dmamap[i] = NULL; error = bus_dmamap_create(sc->rl_cdata.rl_tx_tag, 0, &sc->rl_cdata.rl_tx_dmamap[i]); if (error != 0) { device_printf(sc->rl_dev, "could not create Tx dmamap.\n"); goto fail; } } /* Leave a few bytes before the start of the RX ring buffer. */ sc->rl_cdata.rl_rx_buf_ptr = sc->rl_cdata.rl_rx_buf; sc->rl_cdata.rl_rx_buf += RL_RX_8139_BUF_RESERVE; fail: return (error); } static void rl_dma_free(struct rl_softc *sc) { int i; /* Rx memory block. */ if (sc->rl_cdata.rl_rx_tag != NULL) { if (sc->rl_cdata.rl_rx_buf_paddr != 0) bus_dmamap_unload(sc->rl_cdata.rl_rx_tag, sc->rl_cdata.rl_rx_dmamap); if (sc->rl_cdata.rl_rx_buf_ptr != NULL) bus_dmamem_free(sc->rl_cdata.rl_rx_tag, sc->rl_cdata.rl_rx_buf_ptr, sc->rl_cdata.rl_rx_dmamap); sc->rl_cdata.rl_rx_buf_ptr = NULL; sc->rl_cdata.rl_rx_buf = NULL; sc->rl_cdata.rl_rx_buf_paddr = 0; bus_dma_tag_destroy(sc->rl_cdata.rl_rx_tag); sc->rl_cdata.rl_tx_tag = NULL; } /* Tx buffers. */ if (sc->rl_cdata.rl_tx_tag != NULL) { for (i = 0; i < RL_TX_LIST_CNT; i++) { if (sc->rl_cdata.rl_tx_dmamap[i] != NULL) { bus_dmamap_destroy( sc->rl_cdata.rl_tx_tag, sc->rl_cdata.rl_tx_dmamap[i]); sc->rl_cdata.rl_tx_dmamap[i] = NULL; } } bus_dma_tag_destroy(sc->rl_cdata.rl_tx_tag); sc->rl_cdata.rl_tx_tag = NULL; } if (sc->rl_parent_tag != NULL) { bus_dma_tag_destroy(sc->rl_parent_tag); sc->rl_parent_tag = NULL; } } /* * Initialize the transmit descriptors. */ static int rl_list_tx_init(struct rl_softc *sc) { struct rl_chain_data *cd; int i; RL_LOCK_ASSERT(sc); cd = &sc->rl_cdata; for (i = 0; i < RL_TX_LIST_CNT; i++) { cd->rl_tx_chain[i] = NULL; CSR_WRITE_4(sc, RL_TXADDR0 + (i * sizeof(uint32_t)), 0x0000000); } sc->rl_cdata.cur_tx = 0; sc->rl_cdata.last_tx = 0; return (0); } static int rl_list_rx_init(struct rl_softc *sc) { RL_LOCK_ASSERT(sc); bzero(sc->rl_cdata.rl_rx_buf_ptr, RL_RXBUFLEN + RL_RX_8139_BUF_GUARD_SZ); bus_dmamap_sync(sc->rl_cdata.rl_tx_tag, sc->rl_cdata.rl_rx_dmamap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. * * You know there's something wrong with a PCI bus-master chip design * when you have to use m_devget(). * * The receive operation is badly documented in the datasheet, so I'll * attempt to document it here. The driver provides a buffer area and * places its base address in the RX buffer start address register. * The chip then begins copying frames into the RX buffer. Each frame * is preceded by a 32-bit RX status word which specifies the length * of the frame and certain other status bits. Each frame (starting with * the status word) is also 32-bit aligned. The frame length is in the * first 16 bits of the status word; the lower 15 bits correspond with * the 'rx status register' mentioned in the datasheet. * * Note: to make the Alpha happy, the frame payload needs to be aligned * on a 32-bit boundary. To achieve this, we pass RL_ETHER_ALIGN (2 bytes) * as the offset argument to m_devget(). */ static int rl_rxeof(struct rl_softc *sc) { struct mbuf *m; struct ifnet *ifp = sc->rl_ifp; uint8_t *rxbufpos; int total_len = 0; int wrap = 0; int rx_npkts = 0; uint32_t rxstat; uint16_t cur_rx; uint16_t limit; uint16_t max_bytes, rx_bytes = 0; RL_LOCK_ASSERT(sc); bus_dmamap_sync(sc->rl_cdata.rl_rx_tag, sc->rl_cdata.rl_rx_dmamap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur_rx = (CSR_READ_2(sc, RL_CURRXADDR) + 16) % RL_RXBUFLEN; /* Do not try to read past this point. */ limit = CSR_READ_2(sc, RL_CURRXBUF) % RL_RXBUFLEN; if (limit < cur_rx) max_bytes = (RL_RXBUFLEN - cur_rx) + limit; else max_bytes = limit - cur_rx; while((CSR_READ_1(sc, RL_COMMAND) & RL_CMD_EMPTY_RXBUF) == 0) { #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { if (sc->rxcycles <= 0) break; sc->rxcycles--; } #endif rxbufpos = sc->rl_cdata.rl_rx_buf + cur_rx; rxstat = le32toh(*(uint32_t *)rxbufpos); /* * Here's a totally undocumented fact for you. When the * RealTek chip is in the process of copying a packet into * RAM for you, the length will be 0xfff0. If you spot a * packet header with this value, you need to stop. The * datasheet makes absolutely no mention of this and * RealTek should be shot for this. */ total_len = rxstat >> 16; if (total_len == RL_RXSTAT_UNFINISHED) break; if (!(rxstat & RL_RXSTAT_RXOK) || total_len < ETHER_MIN_LEN || total_len > ETHER_MAX_LEN + ETHER_VLAN_ENCAP_LEN) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; rl_init_locked(sc); return (rx_npkts); } /* No errors; receive the packet. */ rx_bytes += total_len + 4; /* * XXX The RealTek chip includes the CRC with every * received frame, and there's no way to turn this * behavior off (at least, I can't find anything in * the manual that explains how to do it) so we have * to trim off the CRC manually. */ total_len -= ETHER_CRC_LEN; /* * Avoid trying to read more bytes than we know * the chip has prepared for us. */ if (rx_bytes > max_bytes) break; rxbufpos = sc->rl_cdata.rl_rx_buf + ((cur_rx + sizeof(uint32_t)) % RL_RXBUFLEN); if (rxbufpos == (sc->rl_cdata.rl_rx_buf + RL_RXBUFLEN)) rxbufpos = sc->rl_cdata.rl_rx_buf; wrap = (sc->rl_cdata.rl_rx_buf + RL_RXBUFLEN) - rxbufpos; if (total_len > wrap) { m = m_devget(rxbufpos, total_len, RL_ETHER_ALIGN, ifp, NULL); if (m != NULL) m_copyback(m, wrap, total_len - wrap, sc->rl_cdata.rl_rx_buf); cur_rx = (total_len - wrap + ETHER_CRC_LEN); } else { m = m_devget(rxbufpos, total_len, RL_ETHER_ALIGN, ifp, NULL); cur_rx += total_len + 4 + ETHER_CRC_LEN; } /* Round up to 32-bit boundary. */ cur_rx = (cur_rx + 3) & ~3; CSR_WRITE_2(sc, RL_CURRXADDR, cur_rx - 16); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); continue; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); RL_UNLOCK(sc); (*ifp->if_input)(ifp, m); RL_LOCK(sc); rx_npkts++; } /* No need to sync Rx memory block as we didn't modify it. */ return (rx_npkts); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void rl_txeof(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; uint32_t txstat; RL_LOCK_ASSERT(sc); /* * Go through our tx list and free mbufs for those * frames that have been uploaded. */ do { if (RL_LAST_TXMBUF(sc) == NULL) break; txstat = CSR_READ_4(sc, RL_LAST_TXSTAT(sc)); if (!(txstat & (RL_TXSTAT_TX_OK| RL_TXSTAT_TX_UNDERRUN|RL_TXSTAT_TXABRT))) break; if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (txstat & RL_TXSTAT_COLLCNT) >> 24); bus_dmamap_sync(sc->rl_cdata.rl_tx_tag, RL_LAST_DMAMAP(sc), BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_cdata.rl_tx_tag, RL_LAST_DMAMAP(sc)); m_freem(RL_LAST_TXMBUF(sc)); RL_LAST_TXMBUF(sc) = NULL; /* * If there was a transmit underrun, bump the TX threshold. * Make sure not to overflow the 63 * 32byte we can address * with the 6 available bit. */ if ((txstat & RL_TXSTAT_TX_UNDERRUN) && (sc->rl_txthresh < 2016)) sc->rl_txthresh += 32; if (txstat & RL_TXSTAT_TX_OK) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); else { int oldthresh; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if ((txstat & RL_TXSTAT_TXABRT) || (txstat & RL_TXSTAT_OUTOFWIN)) CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG); oldthresh = sc->rl_txthresh; /* error recovery */ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; rl_init_locked(sc); /* restore original threshold */ sc->rl_txthresh = oldthresh; return; } RL_INC(sc->rl_cdata.last_tx); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } while (sc->rl_cdata.last_tx != sc->rl_cdata.cur_tx); if (RL_LAST_TXMBUF(sc) == NULL) sc->rl_watchdog_timer = 0; } static void rl_twister_update(struct rl_softc *sc) { uint16_t linktest; /* * Table provided by RealTek (Kinston ) for * Linux driver. Values undocumented otherwise. */ static const uint32_t param[4][4] = { {0xcb39de43, 0xcb39ce43, 0xfb38de03, 0xcb38de43}, {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83}, {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83}, {0xbb39de43, 0xbb39ce43, 0xbb39ce83, 0xbb39ce83} }; /* * Tune the so-called twister registers of the RTL8139. These * are used to compensate for impedance mismatches. The * method for tuning these registers is undocumented and the * following procedure is collected from public sources. */ switch (sc->rl_twister) { case CHK_LINK: /* * If we have a sufficient link, then we can proceed in * the state machine to the next stage. If not, then * disable further tuning after writing sane defaults. */ if (CSR_READ_2(sc, RL_CSCFG) & RL_CSCFG_LINK_OK) { CSR_WRITE_2(sc, RL_CSCFG, RL_CSCFG_LINK_DOWN_OFF_CMD); sc->rl_twister = FIND_ROW; } else { CSR_WRITE_2(sc, RL_CSCFG, RL_CSCFG_LINK_DOWN_CMD); CSR_WRITE_4(sc, RL_NWAYTST, RL_NWAYTST_CBL_TEST); CSR_WRITE_4(sc, RL_PARA78, RL_PARA78_DEF); CSR_WRITE_4(sc, RL_PARA7C, RL_PARA7C_DEF); sc->rl_twister = DONE; } break; case FIND_ROW: /* * Read how long it took to see the echo to find the tuning * row to use. */ linktest = CSR_READ_2(sc, RL_CSCFG) & RL_CSCFG_STATUS; if (linktest == RL_CSCFG_ROW3) sc->rl_twist_row = 3; else if (linktest == RL_CSCFG_ROW2) sc->rl_twist_row = 2; else if (linktest == RL_CSCFG_ROW1) sc->rl_twist_row = 1; else sc->rl_twist_row = 0; sc->rl_twist_col = 0; sc->rl_twister = SET_PARAM; break; case SET_PARAM: if (sc->rl_twist_col == 0) CSR_WRITE_4(sc, RL_NWAYTST, RL_NWAYTST_RESET); CSR_WRITE_4(sc, RL_PARA7C, param[sc->rl_twist_row][sc->rl_twist_col]); if (++sc->rl_twist_col == 4) { if (sc->rl_twist_row == 3) sc->rl_twister = RECHK_LONG; else sc->rl_twister = DONE; } break; case RECHK_LONG: /* * For long cables, we have to double check to make sure we * don't mistune. */ linktest = CSR_READ_2(sc, RL_CSCFG) & RL_CSCFG_STATUS; if (linktest == RL_CSCFG_ROW3) sc->rl_twister = DONE; else { CSR_WRITE_4(sc, RL_PARA7C, RL_PARA7C_RETUNE); sc->rl_twister = RETUNE; } break; case RETUNE: /* Retune for a shorter cable (try column 2) */ CSR_WRITE_4(sc, RL_NWAYTST, RL_NWAYTST_CBL_TEST); CSR_WRITE_4(sc, RL_PARA78, RL_PARA78_DEF); CSR_WRITE_4(sc, RL_PARA7C, RL_PARA7C_DEF); CSR_WRITE_4(sc, RL_NWAYTST, RL_NWAYTST_RESET); sc->rl_twist_row--; sc->rl_twist_col = 0; sc->rl_twister = SET_PARAM; break; case DONE: break; } } static void rl_tick(void *xsc) { struct rl_softc *sc = xsc; struct mii_data *mii; int ticks; RL_LOCK_ASSERT(sc); /* * If we're doing the twister cable calibration, then we need to defer * watchdog timeouts. This is a no-op in normal operations, but * can falsely trigger when the cable calibration takes a while and * there was traffic ready to go when rl was started. * * We don't defer mii_tick since that updates the mii status, which * helps the twister process, at least according to similar patches * for the Linux driver I found online while doing the fixes. Worst * case is a few extra mii reads during calibration. */ mii = device_get_softc(sc->rl_miibus); mii_tick(mii); if ((sc->rl_flags & RL_FLAG_LINK) == 0) rl_miibus_statchg(sc->rl_dev); if (sc->rl_twister_enable) { if (sc->rl_twister == DONE) rl_watchdog(sc); else rl_twister_update(sc); if (sc->rl_twister == DONE) ticks = hz; else ticks = hz / 10; } else { rl_watchdog(sc); ticks = hz; } callout_reset(&sc->rl_stat_callout, ticks, rl_tick, sc); } #ifdef DEVICE_POLLING static int rl_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts = 0; RL_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rx_npkts = rl_poll_locked(ifp, cmd, count); RL_UNLOCK(sc); return (rx_npkts); } static int rl_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts; RL_LOCK_ASSERT(sc); sc->rxcycles = count; rx_npkts = rl_rxeof(sc); rl_txeof(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) rl_start_locked(ifp); if (cmd == POLL_AND_CHECK_STATUS) { uint16_t status; /* We should also check the status register. */ status = CSR_READ_2(sc, RL_ISR); if (status == 0xffff) return (rx_npkts); if (status != 0) CSR_WRITE_2(sc, RL_ISR, status); /* XXX We should check behaviour on receiver stalls. */ if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; rl_init_locked(sc); } } return (rx_npkts); } #endif /* DEVICE_POLLING */ static void rl_intr(void *arg) { struct rl_softc *sc = arg; struct ifnet *ifp = sc->rl_ifp; uint16_t status; int count; RL_LOCK(sc); if (sc->suspended) goto done_locked; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) goto done_locked; #endif if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done_locked2; status = CSR_READ_2(sc, RL_ISR); if (status == 0xffff || (status & RL_INTRS) == 0) goto done_locked; /* * Ours, disable further interrupts. */ CSR_WRITE_2(sc, RL_IMR, 0); for (count = 16; count > 0; count--) { CSR_WRITE_2(sc, RL_ISR, status); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (status & (RL_ISR_RX_OK | RL_ISR_RX_ERR)) rl_rxeof(sc); if (status & (RL_ISR_TX_OK | RL_ISR_TX_ERR)) rl_txeof(sc); if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; rl_init_locked(sc); RL_UNLOCK(sc); return; } } status = CSR_READ_2(sc, RL_ISR); /* If the card has gone away, the read returns 0xffff. */ if (status == 0xffff || (status & RL_INTRS) == 0) break; } if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) rl_start_locked(ifp); done_locked2: if (ifp->if_drv_flags & IFF_DRV_RUNNING) CSR_WRITE_2(sc, RL_IMR, RL_INTRS); done_locked: RL_UNLOCK(sc); } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int rl_encap(struct rl_softc *sc, struct mbuf **m_head) { struct mbuf *m; bus_dma_segment_t txsegs[1]; int error, nsegs, padlen; RL_LOCK_ASSERT(sc); m = *m_head; padlen = 0; /* * Hardware doesn't auto-pad, so we have to make sure * pad short frames out to the minimum frame length. */ if (m->m_pkthdr.len < RL_MIN_FRAMELEN) padlen = RL_MIN_FRAMELEN - m->m_pkthdr.len; /* * The RealTek is brain damaged and wants longword-aligned * TX buffers, plus we can only have one fragment buffer * per packet. We have to copy pretty much all the time. */ if (m->m_next != NULL || (mtod(m, uintptr_t) & 3) != 0 || (padlen > 0 && M_TRAILINGSPACE(m) < padlen)) { m = m_defrag(*m_head, M_NOWAIT); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } } *m_head = m; if (padlen > 0) { /* * Make security-conscious people happy: zero out the * bytes in the pad area, since we don't know what * this mbuf cluster buffer's previous user might * have left in it. */ bzero(mtod(m, char *) + m->m_pkthdr.len, padlen); m->m_pkthdr.len += padlen; m->m_len = m->m_pkthdr.len; } error = bus_dmamap_load_mbuf_sg(sc->rl_cdata.rl_tx_tag, RL_CUR_DMAMAP(sc), m, txsegs, &nsegs, 0); if (error != 0) return (error); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } RL_CUR_TXMBUF(sc) = m; bus_dmamap_sync(sc->rl_cdata.rl_tx_tag, RL_CUR_DMAMAP(sc), BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, RL_CUR_TXADDR(sc), RL_ADDR_LO(txsegs[0].ds_addr)); return (0); } /* * Main transmit routine. */ static void rl_start(struct ifnet *ifp) { struct rl_softc *sc = ifp->if_softc; RL_LOCK(sc); rl_start_locked(ifp); RL_UNLOCK(sc); } static void rl_start_locked(struct ifnet *ifp) { struct rl_softc *sc = ifp->if_softc; struct mbuf *m_head = NULL; RL_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) return; while (RL_CUR_TXMBUF(sc) == NULL) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (rl_encap(sc, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* Pass a copy of this mbuf chain to the bpf subsystem. */ BPF_MTAP(ifp, RL_CUR_TXMBUF(sc)); /* Transmit the frame. */ CSR_WRITE_4(sc, RL_CUR_TXSTAT(sc), RL_TXTHRESH(sc->rl_txthresh) | RL_CUR_TXMBUF(sc)->m_pkthdr.len); RL_INC(sc->rl_cdata.cur_tx); /* Set a timeout in case the chip goes out to lunch. */ sc->rl_watchdog_timer = 5; } /* * We broke out of the loop because all our TX slots are * full. Mark the NIC as busy until it drains some of the * packets from the queue. */ if (RL_CUR_TXMBUF(sc) != NULL) ifp->if_drv_flags |= IFF_DRV_OACTIVE; } static void rl_init(void *xsc) { struct rl_softc *sc = xsc; RL_LOCK(sc); rl_init_locked(sc); RL_UNLOCK(sc); } static void rl_init_locked(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; struct mii_data *mii; uint32_t eaddr[2]; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* * Cancel pending I/O and free all RX/TX buffers. */ rl_stop(sc); rl_reset(sc); if (sc->rl_twister_enable) { /* * Reset twister register tuning state. The twister * registers and their tuning are undocumented, but * are necessary to cope with bad links. rl_twister = * DONE here will disable this entirely. */ sc->rl_twister = CHK_LINK; } /* * Init our MAC address. Even though the chipset * documentation doesn't mention it, we need to enter "Config * register write enable" mode to modify the ID registers. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG); bzero(eaddr, sizeof(eaddr)); bcopy(IF_LLADDR(sc->rl_ifp), eaddr, ETHER_ADDR_LEN); CSR_WRITE_STREAM_4(sc, RL_IDR0, eaddr[0]); CSR_WRITE_STREAM_4(sc, RL_IDR4, eaddr[1]); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); /* Init the RX memory block pointer register. */ CSR_WRITE_4(sc, RL_RXADDR, sc->rl_cdata.rl_rx_buf_paddr + RL_RX_8139_BUF_RESERVE); /* Init TX descriptors. */ rl_list_tx_init(sc); /* Init Rx memory block. */ rl_list_rx_init(sc); /* * Enable transmit and receive. */ CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB); /* * Set the initial TX and RX configuration. */ CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG); CSR_WRITE_4(sc, RL_RXCFG, RL_RXCFG_CONFIG); /* Set RX filter. */ rl_rxfilter(sc); #ifdef DEVICE_POLLING /* Disable interrupts if we are polling. */ if (ifp->if_capenable & IFCAP_POLLING) CSR_WRITE_2(sc, RL_IMR, 0); else #endif /* Enable interrupts. */ CSR_WRITE_2(sc, RL_IMR, RL_INTRS); /* Set initial TX threshold */ sc->rl_txthresh = RL_TX_THRESH_INIT; /* Start RX/TX process. */ CSR_WRITE_4(sc, RL_MISSEDPKT, 0); /* Enable receiver and transmitter. */ CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB); sc->rl_flags &= ~RL_FLAG_LINK; mii_mediachg(mii); CSR_WRITE_1(sc, sc->rl_cfg1, RL_CFG1_DRVLOAD|RL_CFG1_FULLDUPLEX); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->rl_stat_callout, hz, rl_tick, sc); } /* * Set media options. */ static int rl_ifmedia_upd(struct ifnet *ifp) { struct rl_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); mii_mediachg(mii); RL_UNLOCK(sc); return (0); } /* * Report current media status. */ static void rl_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct rl_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; RL_UNLOCK(sc); } static int rl_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct mii_data *mii; struct rl_softc *sc = ifp->if_softc; int error = 0, mask; switch (command) { case SIOCSIFFLAGS: RL_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && ((ifp->if_flags ^ sc->rl_if_flags) & (IFF_PROMISC | IFF_ALLMULTI))) rl_rxfilter(sc); else rl_init_locked(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) rl_stop(sc); sc->rl_if_flags = ifp->if_flags; RL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: RL_LOCK(sc); rl_rxfilter(sc); RL_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->rl_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (ifr->ifr_reqcap & IFCAP_POLLING && !(ifp->if_capenable & IFCAP_POLLING)) { error = ether_poll_register(rl_poll, ifp); if (error) return(error); RL_LOCK(sc); /* Disable interrupts */ CSR_WRITE_2(sc, RL_IMR, 0x0000); ifp->if_capenable |= IFCAP_POLLING; RL_UNLOCK(sc); return (error); } if (!(ifr->ifr_reqcap & IFCAP_POLLING) && ifp->if_capenable & IFCAP_POLLING) { error = ether_poll_deregister(ifp); /* Enable interrupts. */ RL_LOCK(sc); CSR_WRITE_2(sc, RL_IMR, RL_INTRS); ifp->if_capenable &= ~IFCAP_POLLING; RL_UNLOCK(sc); return (error); } #endif /* DEVICE_POLLING */ if ((mask & IFCAP_WOL) != 0 && (ifp->if_capabilities & IFCAP_WOL) != 0) { if ((mask & IFCAP_WOL_UCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_UCAST; if ((mask & IFCAP_WOL_MCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_MCAST; if ((mask & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void rl_watchdog(struct rl_softc *sc) { RL_LOCK_ASSERT(sc); if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer >0) return; device_printf(sc->rl_dev, "watchdog timeout\n"); if_inc_counter(sc->rl_ifp, IFCOUNTER_OERRORS, 1); rl_txeof(sc); rl_rxeof(sc); sc->rl_ifp->if_drv_flags &= ~IFF_DRV_RUNNING; rl_init_locked(sc); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void rl_stop(struct rl_softc *sc) { - register int i; + int i; struct ifnet *ifp = sc->rl_ifp; RL_LOCK_ASSERT(sc); sc->rl_watchdog_timer = 0; callout_stop(&sc->rl_stat_callout); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->rl_flags &= ~RL_FLAG_LINK; CSR_WRITE_1(sc, RL_COMMAND, 0x00); CSR_WRITE_2(sc, RL_IMR, 0x0000); for (i = 0; i < RL_TIMEOUT; i++) { DELAY(10); if ((CSR_READ_1(sc, RL_COMMAND) & (RL_CMD_RX_ENB | RL_CMD_TX_ENB)) == 0) break; } if (i == RL_TIMEOUT) device_printf(sc->rl_dev, "Unable to stop Tx/Rx MAC\n"); /* * Free the TX list buffers. */ for (i = 0; i < RL_TX_LIST_CNT; i++) { if (sc->rl_cdata.rl_tx_chain[i] != NULL) { bus_dmamap_sync(sc->rl_cdata.rl_tx_tag, sc->rl_cdata.rl_tx_dmamap[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_cdata.rl_tx_tag, sc->rl_cdata.rl_tx_dmamap[i]); m_freem(sc->rl_cdata.rl_tx_chain[i]); sc->rl_cdata.rl_tx_chain[i] = NULL; CSR_WRITE_4(sc, RL_TXADDR0 + (i * sizeof(uint32_t)), 0x0000000); } } } /* * Device suspend routine. Stop the interface and save some PCI * settings in case the BIOS doesn't restore them properly on * resume. */ static int rl_suspend(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); rl_stop(sc); rl_setwol(sc); sc->suspended = 1; RL_UNLOCK(sc); return (0); } /* * Device resume routine. Restore some PCI settings in case the BIOS * doesn't, re-enable busmastering, and restart the interface if * appropriate. */ static int rl_resume(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; int pmc; uint16_t pmstat; sc = device_get_softc(dev); ifp = sc->rl_ifp; RL_LOCK(sc); if ((ifp->if_capabilities & IFCAP_WOL) != 0 && pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) == 0) { /* Disable PME and clear PME status. */ pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2); if ((pmstat & PCIM_PSTAT_PMEENABLE) != 0) { pmstat &= ~PCIM_PSTAT_PMEENABLE; pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2); } /* * Clear WOL matching such that normal Rx filtering * wouldn't interfere with WOL patterns. */ rl_clrwol(sc); } /* reinitialize interface if necessary */ if (ifp->if_flags & IFF_UP) rl_init_locked(sc); sc->suspended = 0; RL_UNLOCK(sc); return (0); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int rl_shutdown(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); rl_stop(sc); /* * Mark interface as down since otherwise we will panic if * interrupt comes in later on, which can happen in some * cases. */ sc->rl_ifp->if_flags &= ~IFF_UP; rl_setwol(sc); RL_UNLOCK(sc); return (0); } static void rl_setwol(struct rl_softc *sc) { struct ifnet *ifp; int pmc; uint16_t pmstat; uint8_t v; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; if ((ifp->if_capabilities & IFCAP_WOL) == 0) return; if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0) return; /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); /* Enable PME. */ v = CSR_READ_1(sc, sc->rl_cfg1); v &= ~RL_CFG1_PME; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG1_PME; CSR_WRITE_1(sc, sc->rl_cfg1, v); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) v |= RL_CFG3_WOL_MAGIC; CSR_WRITE_1(sc, sc->rl_cfg3, v); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST); v &= ~RL_CFG5_WOL_LANWAKE; if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0) v |= RL_CFG5_WOL_UCAST; if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); /* Request PME if WOL is requested. */ pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2); } static void rl_clrwol(struct rl_softc *sc) { struct ifnet *ifp; uint8_t v; ifp = sc->rl_ifp; if ((ifp->if_capabilities & IFCAP_WOL) == 0) return; /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); CSR_WRITE_1(sc, sc->rl_cfg3, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST); v &= ~RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); } Index: stable/11/sys/dev/sound/pci/fm801.c =================================================================== --- stable/11/sys/dev/sound/pci/fm801.c (revision 331642) +++ stable/11/sys/dev/sound/pci/fm801.c (revision 331643) @@ -1,768 +1,768 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Dmitry Dicky diwil@dataart.com * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS `AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_snd.h" #endif #include #include #include #include SND_DECLARE_FILE("$FreeBSD$"); #define PCI_VENDOR_FORTEMEDIA 0x1319 #define PCI_DEVICE_FORTEMEDIA1 0x08011319 /* Audio controller */ #define PCI_DEVICE_FORTEMEDIA2 0x08021319 /* Joystick controller */ #define FM_PCM_VOLUME 0x00 #define FM_FM_VOLUME 0x02 #define FM_I2S_VOLUME 0x04 #define FM_RECORD_SOURCE 0x06 #define FM_PLAY_CTL 0x08 #define FM_PLAY_RATE_MASK 0x0f00 #define FM_PLAY_BUF1_LAST 0x0001 #define FM_PLAY_BUF2_LAST 0x0002 #define FM_PLAY_START 0x0020 #define FM_PLAY_PAUSE 0x0040 #define FM_PLAY_STOPNOW 0x0080 #define FM_PLAY_16BIT 0x4000 #define FM_PLAY_STEREO 0x8000 #define FM_PLAY_DMALEN 0x0a #define FM_PLAY_DMABUF1 0x0c #define FM_PLAY_DMABUF2 0x10 #define FM_REC_CTL 0x14 #define FM_REC_RATE_MASK 0x0f00 #define FM_REC_BUF1_LAST 0x0001 #define FM_REC_BUF2_LAST 0x0002 #define FM_REC_START 0x0020 #define FM_REC_PAUSE 0x0040 #define FM_REC_STOPNOW 0x0080 #define FM_REC_16BIT 0x4000 #define FM_REC_STEREO 0x8000 #define FM_REC_DMALEN 0x16 #define FM_REC_DMABUF1 0x18 #define FM_REC_DMABUF2 0x1c #define FM_CODEC_CTL 0x22 #define FM_VOLUME 0x26 #define FM_VOLUME_MUTE 0x8000 #define FM_CODEC_CMD 0x2a #define FM_CODEC_CMD_READ 0x0080 #define FM_CODEC_CMD_VALID 0x0100 #define FM_CODEC_CMD_BUSY 0x0200 #define FM_CODEC_DATA 0x2c #define FM_IO_CTL 0x52 #define FM_CARD_CTL 0x54 #define FM_INTMASK 0x56 #define FM_INTMASK_PLAY 0x0001 #define FM_INTMASK_REC 0x0002 #define FM_INTMASK_VOL 0x0040 #define FM_INTMASK_MPU 0x0080 #define FM_INTSTATUS 0x5a #define FM_INTSTATUS_PLAY 0x0100 #define FM_INTSTATUS_REC 0x0200 #define FM_INTSTATUS_VOL 0x4000 #define FM_INTSTATUS_MPU 0x8000 #define FM801_DEFAULT_BUFSZ 4096 /* Other values do not work!!! */ /* debug purposes */ #define DPRINT if(0) printf /* static int fm801ch_setup(struct pcm_channel *c); */ static u_int32_t fmts[] = { SND_FORMAT(AFMT_U8, 1, 0), SND_FORMAT(AFMT_U8, 2, 0), SND_FORMAT(AFMT_S16_LE, 1, 0), SND_FORMAT(AFMT_S16_LE, 2, 0), 0 }; static struct pcmchan_caps fm801ch_caps = { 5500, 48000, fmts, 0 }; struct fm801_info; struct fm801_chinfo { struct fm801_info *parent; struct pcm_channel *channel; struct snd_dbuf *buffer; u_int32_t spd, dir, fmt; /* speed, direction, format */ u_int32_t shift; }; struct fm801_info { int type; bus_space_tag_t st; bus_space_handle_t sh; bus_dma_tag_t parent_dmat; device_t dev; int num; u_int32_t unit; struct resource *reg, *irq; int regtype, regid, irqid; void *ih; u_int32_t play_flip, play_nextblk, play_start, play_blksize, play_fmt, play_shift, play_size; u_int32_t rec_flip, rec_nextblk, rec_start, rec_blksize, rec_fmt, rec_shift, rec_size; unsigned int bufsz; struct fm801_chinfo pch, rch; device_t radio; }; /* Bus Read / Write routines */ static u_int32_t fm801_rd(struct fm801_info *fm801, int regno, int size) { switch(size) { case 1: return (bus_space_read_1(fm801->st, fm801->sh, regno)); case 2: return (bus_space_read_2(fm801->st, fm801->sh, regno)); case 4: return (bus_space_read_4(fm801->st, fm801->sh, regno)); default: return 0xffffffff; } } static void fm801_wr(struct fm801_info *fm801, int regno, u_int32_t data, int size) { switch(size) { case 1: bus_space_write_1(fm801->st, fm801->sh, regno, data); break; case 2: bus_space_write_2(fm801->st, fm801->sh, regno, data); break; case 4: bus_space_write_4(fm801->st, fm801->sh, regno, data); break; } } /* -------------------------------------------------------------------- */ /* * ac97 codec routines */ #define TIMO 50 static int fm801_rdcd(kobj_t obj, void *devinfo, int regno) { struct fm801_info *fm801 = (struct fm801_info *)devinfo; int i; for (i = 0; i < TIMO && fm801_rd(fm801,FM_CODEC_CMD,2) & FM_CODEC_CMD_BUSY; i++) { DELAY(10000); DPRINT("fm801 rdcd: 1 - DELAY\n"); } if (i >= TIMO) { printf("fm801 rdcd: codec busy\n"); return 0; } fm801_wr(fm801,FM_CODEC_CMD, regno|FM_CODEC_CMD_READ,2); for (i = 0; i < TIMO && !(fm801_rd(fm801,FM_CODEC_CMD,2) & FM_CODEC_CMD_VALID); i++) { DELAY(10000); DPRINT("fm801 rdcd: 2 - DELAY\n"); } if (i >= TIMO) { printf("fm801 rdcd: write codec invalid\n"); return 0; } return fm801_rd(fm801,FM_CODEC_DATA,2); } static int fm801_wrcd(kobj_t obj, void *devinfo, int regno, u_int32_t data) { struct fm801_info *fm801 = (struct fm801_info *)devinfo; int i; DPRINT("fm801_wrcd reg 0x%x val 0x%x\n",regno, data); /* if(regno == AC97_REG_RECSEL) return; */ /* Poll until codec is ready */ for (i = 0; i < TIMO && fm801_rd(fm801,FM_CODEC_CMD,2) & FM_CODEC_CMD_BUSY; i++) { DELAY(10000); DPRINT("fm801 rdcd: 1 - DELAY\n"); } if (i >= TIMO) { printf("fm801 wrcd: read codec busy\n"); return -1; } fm801_wr(fm801,FM_CODEC_DATA,data, 2); fm801_wr(fm801,FM_CODEC_CMD, regno,2); /* wait until codec is ready */ for (i = 0; i < TIMO && fm801_rd(fm801,FM_CODEC_CMD,2) & FM_CODEC_CMD_BUSY; i++) { DELAY(10000); DPRINT("fm801 wrcd: 2 - DELAY\n"); } if (i >= TIMO) { printf("fm801 wrcd: read codec busy\n"); return -1; } DPRINT("fm801 wrcd release reg 0x%x val 0x%x\n",regno, data); return 0; } static kobj_method_t fm801_ac97_methods[] = { KOBJMETHOD(ac97_read, fm801_rdcd), KOBJMETHOD(ac97_write, fm801_wrcd), DEVMETHOD_END }; AC97_DECLARE(fm801_ac97); /* -------------------------------------------------------------------- */ /* * The interrupt handler */ static void fm801_intr(void *p) { struct fm801_info *fm801 = (struct fm801_info *)p; u_int32_t intsrc = fm801_rd(fm801, FM_INTSTATUS, 2); DPRINT("\nfm801_intr intsrc 0x%x ", intsrc); if(intsrc & FM_INTSTATUS_PLAY) { fm801->play_flip++; if(fm801->play_flip & 1) { fm801_wr(fm801, FM_PLAY_DMABUF1, fm801->play_start,4); } else fm801_wr(fm801, FM_PLAY_DMABUF2, fm801->play_nextblk,4); chn_intr(fm801->pch.channel); } if(intsrc & FM_INTSTATUS_REC) { fm801->rec_flip++; if(fm801->rec_flip & 1) { fm801_wr(fm801, FM_REC_DMABUF1, fm801->rec_start,4); } else fm801_wr(fm801, FM_REC_DMABUF2, fm801->rec_nextblk,4); chn_intr(fm801->rch.channel); } if ( intsrc & FM_INTSTATUS_MPU ) { /* This is a TODOish thing... */ fm801_wr(fm801, FM_INTSTATUS, intsrc & FM_INTSTATUS_MPU,2); } if ( intsrc & FM_INTSTATUS_VOL ) { /* This is a TODOish thing... */ fm801_wr(fm801, FM_INTSTATUS, intsrc & FM_INTSTATUS_VOL,2); } DPRINT("fm801_intr clear\n\n"); fm801_wr(fm801, FM_INTSTATUS, intsrc & (FM_INTSTATUS_PLAY | FM_INTSTATUS_REC), 2); } /* -------------------------------------------------------------------- */ /* channel interface */ static void * fm801ch_init(kobj_t obj, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir) { struct fm801_info *fm801 = (struct fm801_info *)devinfo; struct fm801_chinfo *ch = (dir == PCMDIR_PLAY)? &fm801->pch : &fm801->rch; DPRINT("fm801ch_init, direction = %d\n", dir); ch->parent = fm801; ch->channel = c; ch->buffer = b; ch->dir = dir; if (sndbuf_alloc(ch->buffer, fm801->parent_dmat, 0, fm801->bufsz) != 0) return NULL; return (void *)ch; } static int fm801ch_setformat(kobj_t obj, void *data, u_int32_t format) { struct fm801_chinfo *ch = data; struct fm801_info *fm801 = ch->parent; DPRINT("fm801ch_setformat 0x%x : %s, %s, %s, %s\n", format, (AFMT_CHANNEL(format) > 1)?"stereo":"mono", (format & AFMT_16BIT) ? "16bit":"8bit", (format & AFMT_SIGNED)? "signed":"unsigned", (format & AFMT_BIGENDIAN)?"bigendiah":"littleendian" ); if(ch->dir == PCMDIR_PLAY) { fm801->play_fmt = (AFMT_CHANNEL(format) > 1)? FM_PLAY_STEREO : 0; fm801->play_fmt |= (format & AFMT_16BIT) ? FM_PLAY_16BIT : 0; return 0; } if(ch->dir == PCMDIR_REC ) { fm801->rec_fmt = (AFMT_CHANNEL(format) > 1)? FM_REC_STEREO:0; fm801->rec_fmt |= (format & AFMT_16BIT) ? FM_PLAY_16BIT : 0; return 0; } return 0; } struct { u_int32_t limit; u_int32_t rate; } fm801_rates[11] = { { 6600, 5500 }, { 8750, 8000 }, { 10250, 9600 }, { 13200, 11025 }, { 17500, 16000 }, { 20500, 19200 }, { 26500, 22050 }, { 35000, 32000 }, { 41000, 38400 }, { 46000, 44100 }, { 48000, 48000 }, /* anything above -> 48000 */ }; static u_int32_t fm801ch_setspeed(kobj_t obj, void *data, u_int32_t speed) { struct fm801_chinfo *ch = data; struct fm801_info *fm801 = ch->parent; - register int i; + int i; for (i = 0; i < 10 && fm801_rates[i].limit <= speed; i++) ; if(ch->dir == PCMDIR_PLAY) { fm801->pch.spd = fm801_rates[i].rate; fm801->play_shift = (i<<8); fm801->play_shift &= FM_PLAY_RATE_MASK; } if(ch->dir == PCMDIR_REC ) { fm801->rch.spd = fm801_rates[i].rate; fm801->rec_shift = (i<<8); fm801->rec_shift &= FM_REC_RATE_MASK; } ch->spd = fm801_rates[i].rate; return fm801_rates[i].rate; } static u_int32_t fm801ch_setblocksize(kobj_t obj, void *data, u_int32_t blocksize) { struct fm801_chinfo *ch = data; struct fm801_info *fm801 = ch->parent; /* * Don't mind for play_flip, set the blocksize to the * desired values in any case - otherwise sound playback * breaks here. */ if(ch->dir == PCMDIR_PLAY) fm801->play_blksize = blocksize; if(ch->dir == PCMDIR_REC) fm801->rec_blksize = blocksize; DPRINT("fm801ch_setblocksize %d (dir %d)\n",blocksize, ch->dir); return blocksize; } static int fm801ch_trigger(kobj_t obj, void *data, int go) { struct fm801_chinfo *ch = data; struct fm801_info *fm801 = ch->parent; u_int32_t baseaddr = sndbuf_getbufaddr(ch->buffer); u_int32_t k1; DPRINT("fm801ch_trigger go %d , ", go); if (!PCMTRIG_COMMON(go)) return 0; if (ch->dir == PCMDIR_PLAY) { if (go == PCMTRIG_START) { fm801->play_start = baseaddr; fm801->play_nextblk = fm801->play_start + fm801->play_blksize; fm801->play_flip = 0; fm801_wr(fm801, FM_PLAY_DMALEN, fm801->play_blksize - 1, 2); fm801_wr(fm801, FM_PLAY_DMABUF1,fm801->play_start,4); fm801_wr(fm801, FM_PLAY_DMABUF2,fm801->play_nextblk,4); fm801_wr(fm801, FM_PLAY_CTL, FM_PLAY_START | FM_PLAY_STOPNOW | fm801->play_fmt | fm801->play_shift, 2 ); } else { fm801->play_flip = 0; k1 = fm801_rd(fm801, FM_PLAY_CTL,2); fm801_wr(fm801, FM_PLAY_CTL, (k1 & ~(FM_PLAY_STOPNOW | FM_PLAY_START)) | FM_PLAY_BUF1_LAST | FM_PLAY_BUF2_LAST, 2 ); } } else if(ch->dir == PCMDIR_REC) { if (go == PCMTRIG_START) { fm801->rec_start = baseaddr; fm801->rec_nextblk = fm801->rec_start + fm801->rec_blksize; fm801->rec_flip = 0; fm801_wr(fm801, FM_REC_DMALEN, fm801->rec_blksize - 1, 2); fm801_wr(fm801, FM_REC_DMABUF1,fm801->rec_start,4); fm801_wr(fm801, FM_REC_DMABUF2,fm801->rec_nextblk,4); fm801_wr(fm801, FM_REC_CTL, FM_REC_START | FM_REC_STOPNOW | fm801->rec_fmt | fm801->rec_shift, 2 ); } else { fm801->rec_flip = 0; k1 = fm801_rd(fm801, FM_REC_CTL,2); fm801_wr(fm801, FM_REC_CTL, (k1 & ~(FM_REC_STOPNOW | FM_REC_START)) | FM_REC_BUF1_LAST | FM_REC_BUF2_LAST, 2); } } return 0; } /* Almost ALSA copy */ static u_int32_t fm801ch_getptr(kobj_t obj, void *data) { struct fm801_chinfo *ch = data; struct fm801_info *fm801 = ch->parent; u_int32_t result = 0; if (ch->dir == PCMDIR_PLAY) { result = fm801_rd(fm801, (fm801->play_flip&1) ? FM_PLAY_DMABUF2:FM_PLAY_DMABUF1, 4) - fm801->play_start; } if (ch->dir == PCMDIR_REC) { result = fm801_rd(fm801, (fm801->rec_flip&1) ? FM_REC_DMABUF2:FM_REC_DMABUF1, 4) - fm801->rec_start; } return result; } static struct pcmchan_caps * fm801ch_getcaps(kobj_t obj, void *data) { return &fm801ch_caps; } static kobj_method_t fm801ch_methods[] = { KOBJMETHOD(channel_init, fm801ch_init), KOBJMETHOD(channel_setformat, fm801ch_setformat), KOBJMETHOD(channel_setspeed, fm801ch_setspeed), KOBJMETHOD(channel_setblocksize, fm801ch_setblocksize), KOBJMETHOD(channel_trigger, fm801ch_trigger), KOBJMETHOD(channel_getptr, fm801ch_getptr), KOBJMETHOD(channel_getcaps, fm801ch_getcaps), DEVMETHOD_END }; CHANNEL_DECLARE(fm801ch); /* -------------------------------------------------------------------- */ /* * Init routine is taken from an original NetBSD driver */ static int fm801_init(struct fm801_info *fm801) { u_int32_t k1; /* reset codec */ fm801_wr(fm801, FM_CODEC_CTL, 0x0020,2); DELAY(100000); fm801_wr(fm801, FM_CODEC_CTL, 0x0000,2); DELAY(100000); fm801_wr(fm801, FM_PCM_VOLUME, 0x0808,2); fm801_wr(fm801, FM_FM_VOLUME, 0x0808,2); fm801_wr(fm801, FM_I2S_VOLUME, 0x0808,2); fm801_wr(fm801, 0x40,0x107f,2); /* enable legacy audio */ fm801_wr((void *)fm801, FM_RECORD_SOURCE, 0x0000,2); /* Unmask playback, record and mpu interrupts, mask the rest */ k1 = fm801_rd((void *)fm801, FM_INTMASK,2); fm801_wr(fm801, FM_INTMASK, (k1 & ~(FM_INTMASK_PLAY | FM_INTMASK_REC | FM_INTMASK_MPU)) | FM_INTMASK_VOL,2); fm801_wr(fm801, FM_INTSTATUS, FM_INTSTATUS_PLAY | FM_INTSTATUS_REC | FM_INTSTATUS_MPU | FM_INTSTATUS_VOL,2); DPRINT("FM801 init Ok\n"); return 0; } static int fm801_pci_attach(device_t dev) { struct ac97_info *codec = NULL; struct fm801_info *fm801; int i; int mapped = 0; char status[SND_STATUSLEN]; fm801 = malloc(sizeof(*fm801), M_DEVBUF, M_WAITOK | M_ZERO); fm801->type = pci_get_devid(dev); pci_enable_busmaster(dev); for (i = 0; (mapped == 0) && (i < PCI_MAXMAPS_0); i++) { fm801->regid = PCIR_BAR(i); fm801->regtype = SYS_RES_MEMORY; fm801->reg = bus_alloc_resource_any(dev, fm801->regtype, &fm801->regid, RF_ACTIVE); if(!fm801->reg) { fm801->regtype = SYS_RES_IOPORT; fm801->reg = bus_alloc_resource_any(dev, fm801->regtype, &fm801->regid, RF_ACTIVE); } if(fm801->reg) { fm801->st = rman_get_bustag(fm801->reg); fm801->sh = rman_get_bushandle(fm801->reg); mapped++; } } if (mapped == 0) { device_printf(dev, "unable to map register space\n"); goto oops; } fm801->bufsz = pcm_getbuffersize(dev, 4096, FM801_DEFAULT_BUFSZ, 65536); fm801_init(fm801); codec = AC97_CREATE(dev, fm801, fm801_ac97); if (codec == NULL) goto oops; if (mixer_init(dev, ac97_getmixerclass(), codec) == -1) goto oops; fm801->irqid = 0; fm801->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &fm801->irqid, RF_ACTIVE | RF_SHAREABLE); if (!fm801->irq || snd_setup_intr(dev, fm801->irq, 0, fm801_intr, fm801, &fm801->ih)) { device_printf(dev, "unable to map interrupt\n"); goto oops; } if (bus_dma_tag_create(/*parent*/bus_get_dma_tag(dev), /*alignment*/2, /*boundary*/0, /*lowaddr*/BUS_SPACE_MAXADDR_32BIT, /*highaddr*/BUS_SPACE_MAXADDR, /*filter*/NULL, /*filterarg*/NULL, /*maxsize*/fm801->bufsz, /*nsegments*/1, /*maxsegz*/0x3ffff, /*flags*/0, /*lockfunc*/busdma_lock_mutex, /*lockarg*/&Giant, &fm801->parent_dmat) != 0) { device_printf(dev, "unable to create dma tag\n"); goto oops; } snprintf(status, 64, "at %s 0x%jx irq %jd %s", (fm801->regtype == SYS_RES_IOPORT)? "io" : "memory", rman_get_start(fm801->reg), rman_get_start(fm801->irq),PCM_KLDSTRING(snd_fm801)); #define FM801_MAXPLAYCH 1 if (pcm_register(dev, fm801, FM801_MAXPLAYCH, 1)) goto oops; pcm_addchan(dev, PCMDIR_PLAY, &fm801ch_class, fm801); pcm_addchan(dev, PCMDIR_REC, &fm801ch_class, fm801); pcm_setstatus(dev, status); fm801->radio = device_add_child(dev, "radio", -1); bus_generic_attach(dev); return 0; oops: if (codec) ac97_destroy(codec); if (fm801->reg) bus_release_resource(dev, fm801->regtype, fm801->regid, fm801->reg); if (fm801->ih) bus_teardown_intr(dev, fm801->irq, fm801->ih); if (fm801->irq) bus_release_resource(dev, SYS_RES_IRQ, fm801->irqid, fm801->irq); if (fm801->parent_dmat) bus_dma_tag_destroy(fm801->parent_dmat); free(fm801, M_DEVBUF); return ENXIO; } static int fm801_pci_detach(device_t dev) { int r; struct fm801_info *fm801; DPRINT("Forte Media FM801 detach\n"); fm801 = pcm_getdevinfo(dev); r = bus_generic_detach(dev); if (r) return r; if (fm801->radio != NULL) { r = device_delete_child(dev, fm801->radio); if (r) return r; fm801->radio = NULL; } r = pcm_unregister(dev); if (r) return r; bus_release_resource(dev, fm801->regtype, fm801->regid, fm801->reg); bus_teardown_intr(dev, fm801->irq, fm801->ih); bus_release_resource(dev, SYS_RES_IRQ, fm801->irqid, fm801->irq); bus_dma_tag_destroy(fm801->parent_dmat); free(fm801, M_DEVBUF); return 0; } static int fm801_pci_probe( device_t dev ) { int id; if ((id = pci_get_devid(dev)) == PCI_DEVICE_FORTEMEDIA1 ) { device_set_desc(dev, "Forte Media FM801 Audio Controller"); return BUS_PROBE_DEFAULT; } /* if ((id = pci_get_devid(dev)) == PCI_DEVICE_FORTEMEDIA2 ) { device_set_desc(dev, "Forte Media FM801 Joystick (Not Supported)"); return ENXIO; } */ return ENXIO; } static struct resource * fm801_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct fm801_info *fm801; fm801 = pcm_getdevinfo(bus); if (type == SYS_RES_IOPORT && *rid == PCIR_BAR(0)) return (fm801->reg); return (NULL); } static int fm801_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { return (0); } static device_method_t fm801_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fm801_pci_probe), DEVMETHOD(device_attach, fm801_pci_attach), DEVMETHOD(device_detach, fm801_pci_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_alloc_resource, fm801_alloc_resource), DEVMETHOD(bus_release_resource, fm801_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD_END }; static driver_t fm801_driver = { "pcm", fm801_methods, PCM_SOFTC_SIZE, }; DRIVER_MODULE(snd_fm801, pci, fm801_driver, pcm_devclass, 0, 0); MODULE_DEPEND(snd_fm801, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER); MODULE_VERSION(snd_fm801, 1); Index: stable/11/sys/dev/speaker/spkr.c =================================================================== --- stable/11/sys/dev/speaker/spkr.c (revision 331642) +++ stable/11/sys/dev/speaker/spkr.c (revision 331643) @@ -1,551 +1,551 @@ /*- * spkr.c -- device driver for console speaker * * v1.4 by Eric S. Raymond (esr@snark.thyrsus.com) Aug 1993 * modified for FreeBSD by Andrew A. Chernov * modified for PC98 by Kakefuda */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include static d_open_t spkropen; static d_close_t spkrclose; static d_write_t spkrwrite; static d_ioctl_t spkrioctl; static struct cdevsw spkr_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = spkropen, .d_close = spkrclose, .d_write = spkrwrite, .d_ioctl = spkrioctl, .d_name = "spkr", }; static MALLOC_DEFINE(M_SPKR, "spkr", "Speaker buffer"); /* **************** MACHINE DEPENDENT PART STARTS HERE ************************* * This section defines a function tone() which causes a tone of given * frequency and duration from the ISA console speaker. * Another function endtone() is defined to force sound off, and there is * also a rest() entry point to do pauses. * * Audible sound is generated using the Programmable Interval Timer (PIT) and * Programmable Peripheral Interface (PPI) attached to the ISA speaker. The * PPI controls whether sound is passed through at all; the PIT's channel 2 is * used to generate clicks (a square wave) of whatever frequency is desired. */ #define SPKRPRI PSOCK static char endtone, endrest; static void tone(unsigned int thz, unsigned int centisecs); static void rest(int centisecs); static void playinit(void); static void playtone(int pitch, int value, int sustain); static void playstring(char *cp, size_t slen); /* * Emit tone of frequency thz for given number of centisecs */ static void tone(unsigned int thz, unsigned int centisecs) { int sps, timo; if (thz <= 0) return; #ifdef DEBUG (void) printf("tone: thz=%d centisecs=%d\n", thz, centisecs); #endif /* DEBUG */ /* set timer to generate clicks at given frequency in Hertz */ sps = splclock(); if (timer_spkr_acquire()) { /* enter list of waiting procs ??? */ splx(sps); return; } splx(sps); disable_intr(); timer_spkr_setfreq(thz); enable_intr(); /* * Set timeout to endtone function, then give up the timeslice. * This is so other processes can execute while the tone is being * emitted. */ timo = centisecs * hz / 100; if (timo > 0) tsleep(&endtone, SPKRPRI | PCATCH, "spkrtn", timo); sps = splclock(); timer_spkr_release(); splx(sps); } /* * Rest for given number of centisecs */ static void rest(int centisecs) { int timo; /* * Set timeout to endrest function, then give up the timeslice. * This is so other processes can execute while the rest is being * waited out. */ #ifdef DEBUG (void) printf("rest: %d\n", centisecs); #endif /* DEBUG */ timo = centisecs * hz / 100; if (timo > 0) tsleep(&endrest, SPKRPRI | PCATCH, "spkrrs", timo); } /* **************** PLAY STRING INTERPRETER BEGINS HERE ********************** * Play string interpretation is modelled on IBM BASIC 2.0's PLAY statement; * M[LNS] are missing; the ~ synonym and the _ slur mark and the octave- * tracking facility are added. * Requires tone(), rest(), and endtone(). String play is not interruptible * except possibly at physical block boundaries. */ #ifndef __bool_true_false_are_defined typedef int bool; #endif #define TRUE 1 #define FALSE 0 #define dtoi(c) ((c) - '0') static int octave; /* currently selected octave */ static int whole; /* whole-note time at current tempo, in ticks */ static int value; /* whole divisor for note time, quarter note = 1 */ static int fill; /* controls spacing of notes */ static bool octtrack; /* octave-tracking on? */ static bool octprefix; /* override current octave-tracking state? */ /* * Magic number avoidance... */ #define SECS_PER_MIN 60 /* seconds per minute */ #define WHOLE_NOTE 4 /* quarter notes per whole note */ #define MIN_VALUE 64 /* the most we can divide a note by */ #define DFLT_VALUE 4 /* default value (quarter-note) */ #define FILLTIME 8 /* for articulation, break note in parts */ #define STACCATO 6 /* 6/8 = 3/4 of note is filled */ #define NORMAL 7 /* 7/8ths of note interval is filled */ #define LEGATO 8 /* all of note interval is filled */ #define DFLT_OCTAVE 4 /* default octave */ #define MIN_TEMPO 32 /* minimum tempo */ #define DFLT_TEMPO 120 /* default tempo */ #define MAX_TEMPO 255 /* max tempo */ #define NUM_MULT 3 /* numerator of dot multiplier */ #define DENOM_MULT 2 /* denominator of dot multiplier */ /* letter to half-tone: A B C D E F G */ static int notetab[8] = {9, 11, 0, 2, 4, 5, 7}; /* * This is the American Standard A440 Equal-Tempered scale with frequencies * rounded to nearest integer. Thank Goddess for the good ol' CRC Handbook... * our octave 0 is standard octave 2. */ #define OCTAVE_NOTES 12 /* semitones per octave */ static int pitchtab[] = { /* C C# D D# E F F# G G# A A# B*/ /* 0 */ 65, 69, 73, 78, 82, 87, 93, 98, 103, 110, 117, 123, /* 1 */ 131, 139, 147, 156, 165, 175, 185, 196, 208, 220, 233, 247, /* 2 */ 262, 277, 294, 311, 330, 349, 370, 392, 415, 440, 466, 494, /* 3 */ 523, 554, 587, 622, 659, 698, 740, 784, 831, 880, 932, 988, /* 4 */ 1047, 1109, 1175, 1245, 1319, 1397, 1480, 1568, 1661, 1760, 1865, 1975, /* 5 */ 2093, 2217, 2349, 2489, 2637, 2794, 2960, 3136, 3322, 3520, 3729, 3951, /* 6 */ 4186, 4435, 4698, 4978, 5274, 5588, 5920, 6272, 6644, 7040, 7459, 7902, }; static void playinit() { octave = DFLT_OCTAVE; whole = (100 * SECS_PER_MIN * WHOLE_NOTE) / DFLT_TEMPO; fill = NORMAL; value = DFLT_VALUE; octtrack = FALSE; octprefix = TRUE; /* act as though there was an initial O(n) */ } /* * Play tone of proper duration for current rhythm signature */ static void playtone(int pitch, int value, int sustain) { - register int sound, silence, snum = 1, sdenom = 1; + int sound, silence, snum = 1, sdenom = 1; /* this weirdness avoids floating-point arithmetic */ for (; sustain; sustain--) { /* See the BUGS section in the man page for discussion */ snum *= NUM_MULT; sdenom *= DENOM_MULT; } if (value == 0 || sdenom == 0) return; if (pitch == -1) rest(whole * snum / (value * sdenom)); else { sound = (whole * snum) / (value * sdenom) - (whole * (FILLTIME - fill)) / (value * FILLTIME); silence = whole * (FILLTIME-fill) * snum / (FILLTIME * value * sdenom); #ifdef DEBUG (void) printf("playtone: pitch %d for %d ticks, rest for %d ticks\n", pitch, sound, silence); #endif /* DEBUG */ tone(pitchtab[pitch], sound); if (fill != LEGATO) rest(silence); } } /* * Interpret and play an item from a notation string */ static void playstring(char *cp, size_t slen) { int pitch, oldfill, lastpitch = OCTAVE_NOTES * DFLT_OCTAVE; #define GETNUM(cp, v) for(v=0; isdigit(cp[1]) && slen > 0; ) \ {v = v * 10 + (*++cp - '0'); slen--;} for (; slen--; cp++) { int sustain, timeval, tempo; - register char c = toupper(*cp); + char c = toupper(*cp); #ifdef DEBUG (void) printf("playstring: %c (%x)\n", c, c); #endif /* DEBUG */ switch (c) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': /* compute pitch */ pitch = notetab[c - 'A'] + octave * OCTAVE_NOTES; /* this may be followed by an accidental sign */ if (cp[1] == '#' || cp[1] == '+') { ++pitch; ++cp; slen--; } else if (cp[1] == '-') { --pitch; ++cp; slen--; } /* * If octave-tracking mode is on, and there has been no octave- * setting prefix, find the version of the current letter note * closest to the last regardless of octave. */ if (octtrack && !octprefix) { if (abs(pitch-lastpitch) > abs(pitch+OCTAVE_NOTES - lastpitch)) { ++octave; pitch += OCTAVE_NOTES; } if (abs(pitch-lastpitch) > abs((pitch-OCTAVE_NOTES) - lastpitch)) { --octave; pitch -= OCTAVE_NOTES; } } octprefix = FALSE; lastpitch = pitch; /* ...which may in turn be followed by an override time value */ GETNUM(cp, timeval); if (timeval <= 0 || timeval > MIN_VALUE) timeval = value; /* ...and/or sustain dots */ for (sustain = 0; cp[1] == '.'; cp++) { slen--; sustain++; } /* ...and/or a slur mark */ oldfill = fill; if (cp[1] == '_') { fill = LEGATO; ++cp; slen--; } /* time to emit the actual tone */ playtone(pitch, timeval, sustain); fill = oldfill; break; case 'O': if (cp[1] == 'N' || cp[1] == 'n') { octprefix = octtrack = FALSE; ++cp; slen--; } else if (cp[1] == 'L' || cp[1] == 'l') { octtrack = TRUE; ++cp; slen--; } else { GETNUM(cp, octave); if (octave >= nitems(pitchtab) / OCTAVE_NOTES) octave = DFLT_OCTAVE; octprefix = TRUE; } break; case '>': if (octave < nitems(pitchtab) / OCTAVE_NOTES - 1) octave++; octprefix = TRUE; break; case '<': if (octave > 0) octave--; octprefix = TRUE; break; case 'N': GETNUM(cp, pitch); for (sustain = 0; cp[1] == '.'; cp++) { slen--; sustain++; } oldfill = fill; if (cp[1] == '_') { fill = LEGATO; ++cp; slen--; } playtone(pitch - 1, value, sustain); fill = oldfill; break; case 'L': GETNUM(cp, value); if (value <= 0 || value > MIN_VALUE) value = DFLT_VALUE; break; case 'P': case '~': /* this may be followed by an override time value */ GETNUM(cp, timeval); if (timeval <= 0 || timeval > MIN_VALUE) timeval = value; for (sustain = 0; cp[1] == '.'; cp++) { slen--; sustain++; } playtone(-1, timeval, sustain); break; case 'T': GETNUM(cp, tempo); if (tempo < MIN_TEMPO || tempo > MAX_TEMPO) tempo = DFLT_TEMPO; whole = (100 * SECS_PER_MIN * WHOLE_NOTE) / tempo; break; case 'M': if (cp[1] == 'N' || cp[1] == 'n') { fill = NORMAL; ++cp; slen--; } else if (cp[1] == 'L' || cp[1] == 'l') { fill = LEGATO; ++cp; slen--; } else if (cp[1] == 'S' || cp[1] == 's') { fill = STACCATO; ++cp; slen--; } break; } } } /* * ****************** UNIX DRIVER HOOKS BEGIN HERE ************************** * This section implements driver hooks to run playstring() and the tone(), * endtone(), and rest() functions defined above. */ static int spkr_active = FALSE; /* exclusion flag */ static char *spkr_inbuf; /* incoming buf */ static int spkropen(dev, flags, fmt, td) struct cdev *dev; int flags; int fmt; struct thread *td; { #ifdef DEBUG (void) printf("spkropen: entering with dev = %s\n", devtoname(dev)); #endif /* DEBUG */ if (spkr_active) return(EBUSY); else { #ifdef DEBUG (void) printf("spkropen: about to perform play initialization\n"); #endif /* DEBUG */ playinit(); spkr_inbuf = malloc(DEV_BSIZE, M_SPKR, M_WAITOK); spkr_active = TRUE; return(0); } } static int spkrwrite(dev, uio, ioflag) struct cdev *dev; struct uio *uio; int ioflag; { #ifdef DEBUG printf("spkrwrite: entering with dev = %s, count = %zd\n", devtoname(dev), uio->uio_resid); #endif /* DEBUG */ if (uio->uio_resid > (DEV_BSIZE - 1)) /* prevent system crashes */ return(E2BIG); else { unsigned n; char *cp; int error; n = uio->uio_resid; cp = spkr_inbuf; error = uiomove(cp, n, uio); if (!error) { cp[n] = '\0'; playstring(cp, n); } return(error); } } static int spkrclose(dev, flags, fmt, td) struct cdev *dev; int flags; int fmt; struct thread *td; { #ifdef DEBUG (void) printf("spkrclose: entering with dev = %s\n", devtoname(dev)); #endif /* DEBUG */ wakeup(&endtone); wakeup(&endrest); free(spkr_inbuf, M_SPKR); spkr_active = FALSE; return(0); } static int spkrioctl(dev, cmd, cmdarg, flags, td) struct cdev *dev; unsigned long cmd; caddr_t cmdarg; int flags; struct thread *td; { #ifdef DEBUG (void) printf("spkrioctl: entering with dev = %s, cmd = %lx\n", devtoname(dev), cmd); #endif /* DEBUG */ if (cmd == SPKRTONE) { tone_t *tp = (tone_t *)cmdarg; if (tp->frequency == 0) rest(tp->duration); else tone(tp->frequency, tp->duration); return 0; } else if (cmd == SPKRTUNE) { tone_t *tp = (tone_t *)(*(caddr_t *)cmdarg); tone_t ttp; int error; for (; ; tp++) { error = copyin(tp, &ttp, sizeof(tone_t)); if (error) return(error); if (ttp.duration == 0) break; if (ttp.frequency == 0) rest(ttp.duration); else tone(ttp.frequency, ttp.duration); } return(0); } return(EINVAL); } static struct cdev *speaker_dev; /* * Module handling */ static int speaker_modevent(module_t mod, int type, void *data) { int error = 0; switch(type) { case MOD_LOAD: speaker_dev = make_dev(&spkr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "speaker"); break; case MOD_SHUTDOWN: case MOD_UNLOAD: destroy_dev(speaker_dev); break; default: error = EOPNOTSUPP; } return (error); } DEV_MODULE(speaker, speaker_modevent, NULL); Index: stable/11/sys/dev/tl/if_tl.c =================================================================== --- stable/11/sys/dev/tl/if_tl.c (revision 331642) +++ stable/11/sys/dev/tl/if_tl.c (revision 331643) @@ -1,2278 +1,2278 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1997, 1998 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Texas Instruments ThunderLAN driver for FreeBSD 2.2.6 and 3.x. * Supports many Compaq PCI NICs based on the ThunderLAN ethernet controller, * the National Semiconductor DP83840A physical interface and the * Microchip Technology 24Cxx series serial EEPROM. * * Written using the following four documents: * * Texas Instruments ThunderLAN Programmer's Guide (www.ti.com) * National Semiconductor DP83840A data sheet (www.national.com) * Microchip Technology 24C02C data sheet (www.microchip.com) * Micro Linear ML6692 100BaseTX only PHY data sheet (www.microlinear.com) * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * Some notes about the ThunderLAN: * * The ThunderLAN controller is a single chip containing PCI controller * logic, approximately 3K of on-board SRAM, a LAN controller, and media * independent interface (MII) bus. The MII allows the ThunderLAN chip to * control up to 32 different physical interfaces (PHYs). The ThunderLAN * also has a built-in 10baseT PHY, allowing a single ThunderLAN controller * to act as a complete ethernet interface. * * Other PHYs may be attached to the ThunderLAN; the Compaq 10/100 cards * use a National Semiconductor DP83840A PHY that supports 10 or 100Mb/sec * in full or half duplex. Some of the Compaq Deskpro machines use a * Level 1 LXT970 PHY with the same capabilities. Certain Olicom adapters * use a Micro Linear ML6692 100BaseTX only PHY, which can be used in * concert with the ThunderLAN's internal PHY to provide full 10/100 * support. This is cheaper than using a standalone external PHY for both * 10/100 modes and letting the ThunderLAN's internal PHY go to waste. * A serial EEPROM is also attached to the ThunderLAN chip to provide * power-up default register settings and for storing the adapter's * station address. Although not supported by this driver, the ThunderLAN * chip can also be connected to token ring PHYs. * * The ThunderLAN has a set of registers which can be used to issue * commands, acknowledge interrupts, and to manipulate other internal * registers on its DIO bus. The primary registers can be accessed * using either programmed I/O (inb/outb) or via PCI memory mapping, * depending on how the card is configured during the PCI probing * phase. It is even possible to have both PIO and memory mapped * access turned on at the same time. * * Frame reception and transmission with the ThunderLAN chip is done * using frame 'lists.' A list structure looks more or less like this: * * struct tl_frag { * u_int32_t fragment_address; * u_int32_t fragment_size; * }; * struct tl_list { * u_int32_t forward_pointer; * u_int16_t cstat; * u_int16_t frame_size; * struct tl_frag fragments[10]; * }; * * The forward pointer in the list header can be either a 0 or the address * of another list, which allows several lists to be linked together. Each * list contains up to 10 fragment descriptors. This means the chip allows * ethernet frames to be broken up into up to 10 chunks for transfer to * and from the SRAM. Note that the forward pointer and fragment buffer * addresses are physical memory addresses, not virtual. Note also that * a single ethernet frame can not span lists: if the host wants to * transmit a frame and the frame data is split up over more than 10 * buffers, the frame has to collapsed before it can be transmitted. * * To receive frames, the driver sets up a number of lists and populates * the fragment descriptors, then it sends an RX GO command to the chip. * When a frame is received, the chip will DMA it into the memory regions * specified by the fragment descriptors and then trigger an RX 'end of * frame interrupt' when done. The driver may choose to use only one * fragment per list; this may result is slighltly less efficient use * of memory in exchange for improving performance. * * To transmit frames, the driver again sets up lists and fragment * descriptors, only this time the buffers contain frame data that * is to be DMA'ed into the chip instead of out of it. Once the chip * has transferred the data into its on-board SRAM, it will trigger a * TX 'end of frame' interrupt. It will also generate an 'end of channel' * interrupt when it reaches the end of the list. */ /* * Some notes about this driver: * * The ThunderLAN chip provides a couple of different ways to organize * reception, transmission and interrupt handling. The simplest approach * is to use one list each for transmission and reception. In this mode, * the ThunderLAN will generate two interrupts for every received frame * (one RX EOF and one RX EOC) and two for each transmitted frame (one * TX EOF and one TX EOC). This may make the driver simpler but it hurts * performance to have to handle so many interrupts. * * Initially I wanted to create a circular list of receive buffers so * that the ThunderLAN chip would think there was an infinitely long * receive channel and never deliver an RXEOC interrupt. However this * doesn't work correctly under heavy load: while the manual says the * chip will trigger an RXEOF interrupt each time a frame is copied into * memory, you can't count on the chip waiting around for you to acknowledge * the interrupt before it starts trying to DMA the next frame. The result * is that the chip might traverse the entire circular list and then wrap * around before you have a chance to do anything about it. Consequently, * the receive list is terminated (with a 0 in the forward pointer in the * last element). Each time an RXEOF interrupt arrives, the used list * is shifted to the end of the list. This gives the appearance of an * infinitely large RX chain so long as the driver doesn't fall behind * the chip and allow all of the lists to be filled up. * * If all the lists are filled, the adapter will deliver an RX 'end of * channel' interrupt when it hits the 0 forward pointer at the end of * the chain. The RXEOC handler then cleans out the RX chain and resets * the list head pointer in the ch_parm register and restarts the receiver. * * For frame transmission, it is possible to program the ThunderLAN's * transmit interrupt threshold so that the chip can acknowledge multiple * lists with only a single TX EOF interrupt. This allows the driver to * queue several frames in one shot, and only have to handle a total * two interrupts (one TX EOF and one TX EOC) no matter how many frames * are transmitted. Frame transmission is done directly out of the * mbufs passed to the tl_start() routine via the interface send queue. * The driver simply sets up the fragment descriptors in the transmit * lists to point to the mbuf data regions and sends a TX GO command. * * Note that since the RX and TX lists themselves are always used * only by the driver, the are malloc()ed once at driver initialization * time and never free()ed. * * Also, in order to remain as platform independent as possible, this * driver uses memory mapped register access to manipulate the card * as opposed to programmed I/O. This avoids the use of the inb/outb * (and related) instructions which are specific to the i386 platform. * * Using these techniques, this driver achieves very high performance * by minimizing the amount of interrupts generated during large * transfers and by completely avoiding buffer copies. Frame transfer * to and from the ThunderLAN chip is performed entirely by the chip * itself thereby reducing the load on the host CPU. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for vtophys */ #include /* for vtophys */ #include #include #include #include #include #include #include #include #include /* * Default to using PIO register access mode to pacify certain * laptop docking stations with built-in ThunderLAN chips that * don't seem to handle memory mapped mode properly. */ #define TL_USEIOSPACE #include MODULE_DEPEND(tl, pci, 1, 1, 1); MODULE_DEPEND(tl, ether, 1, 1, 1); MODULE_DEPEND(tl, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* * Various supported device vendors/types and their names. */ static const struct tl_type tl_devs[] = { { TI_VENDORID, TI_DEVICEID_THUNDERLAN, "Texas Instruments ThunderLAN" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETEL_10, "Compaq Netelligent 10" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETEL_10_100, "Compaq Netelligent 10/100" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETEL_10_100_PROLIANT, "Compaq Netelligent 10/100 Proliant" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETEL_10_100_DUAL, "Compaq Netelligent 10/100 Dual Port" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETFLEX_3P_INTEGRATED, "Compaq NetFlex-3/P Integrated" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETFLEX_3P, "Compaq NetFlex-3/P" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETFLEX_3P_BNC, "Compaq NetFlex 3/P w/ BNC" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETEL_10_100_EMBEDDED, "Compaq Netelligent 10/100 TX Embedded UTP" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETEL_10_T2_UTP_COAX, "Compaq Netelligent 10 T/2 PCI UTP/Coax" }, { COMPAQ_VENDORID, COMPAQ_DEVICEID_NETEL_10_100_TX_UTP, "Compaq Netelligent 10/100 TX UTP" }, { OLICOM_VENDORID, OLICOM_DEVICEID_OC2183, "Olicom OC-2183/2185" }, { OLICOM_VENDORID, OLICOM_DEVICEID_OC2325, "Olicom OC-2325" }, { OLICOM_VENDORID, OLICOM_DEVICEID_OC2326, "Olicom OC-2326 10/100 TX UTP" }, { 0, 0, NULL } }; static int tl_probe(device_t); static int tl_attach(device_t); static int tl_detach(device_t); static int tl_intvec_rxeoc(void *, u_int32_t); static int tl_intvec_txeoc(void *, u_int32_t); static int tl_intvec_txeof(void *, u_int32_t); static int tl_intvec_rxeof(void *, u_int32_t); static int tl_intvec_adchk(void *, u_int32_t); static int tl_intvec_netsts(void *, u_int32_t); static int tl_newbuf(struct tl_softc *, struct tl_chain_onefrag *); static void tl_stats_update(void *); static int tl_encap(struct tl_softc *, struct tl_chain *, struct mbuf *); static void tl_intr(void *); static void tl_start(struct ifnet *); static void tl_start_locked(struct ifnet *); static int tl_ioctl(struct ifnet *, u_long, caddr_t); static void tl_init(void *); static void tl_init_locked(struct tl_softc *); static void tl_stop(struct tl_softc *); static void tl_watchdog(struct tl_softc *); static int tl_shutdown(device_t); static int tl_ifmedia_upd(struct ifnet *); static void tl_ifmedia_sts(struct ifnet *, struct ifmediareq *); static u_int8_t tl_eeprom_putbyte(struct tl_softc *, int); static u_int8_t tl_eeprom_getbyte(struct tl_softc *, int, u_int8_t *); static int tl_read_eeprom(struct tl_softc *, caddr_t, int, int); static int tl_miibus_readreg(device_t, int, int); static int tl_miibus_writereg(device_t, int, int, int); static void tl_miibus_statchg(device_t); static void tl_setmode(struct tl_softc *, int); static uint32_t tl_mchash(const uint8_t *); static void tl_setmulti(struct tl_softc *); static void tl_setfilt(struct tl_softc *, caddr_t, int); static void tl_softreset(struct tl_softc *, int); static void tl_hardreset(device_t); static int tl_list_rx_init(struct tl_softc *); static int tl_list_tx_init(struct tl_softc *); static u_int8_t tl_dio_read8(struct tl_softc *, int); static u_int16_t tl_dio_read16(struct tl_softc *, int); static u_int32_t tl_dio_read32(struct tl_softc *, int); static void tl_dio_write8(struct tl_softc *, int, int); static void tl_dio_write16(struct tl_softc *, int, int); static void tl_dio_write32(struct tl_softc *, int, int); static void tl_dio_setbit(struct tl_softc *, int, int); static void tl_dio_clrbit(struct tl_softc *, int, int); static void tl_dio_setbit16(struct tl_softc *, int, int); static void tl_dio_clrbit16(struct tl_softc *, int, int); /* * MII bit-bang glue */ static uint32_t tl_mii_bitbang_read(device_t); static void tl_mii_bitbang_write(device_t, uint32_t); static const struct mii_bitbang_ops tl_mii_bitbang_ops = { tl_mii_bitbang_read, tl_mii_bitbang_write, { TL_SIO_MDATA, /* MII_BIT_MDO */ TL_SIO_MDATA, /* MII_BIT_MDI */ TL_SIO_MCLK, /* MII_BIT_MDC */ TL_SIO_MTXEN, /* MII_BIT_DIR_HOST_PHY */ 0, /* MII_BIT_DIR_PHY_HOST */ } }; #ifdef TL_USEIOSPACE #define TL_RES SYS_RES_IOPORT #define TL_RID TL_PCI_LOIO #else #define TL_RES SYS_RES_MEMORY #define TL_RID TL_PCI_LOMEM #endif static device_method_t tl_methods[] = { /* Device interface */ DEVMETHOD(device_probe, tl_probe), DEVMETHOD(device_attach, tl_attach), DEVMETHOD(device_detach, tl_detach), DEVMETHOD(device_shutdown, tl_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, tl_miibus_readreg), DEVMETHOD(miibus_writereg, tl_miibus_writereg), DEVMETHOD(miibus_statchg, tl_miibus_statchg), DEVMETHOD_END }; static driver_t tl_driver = { "tl", tl_methods, sizeof(struct tl_softc) }; static devclass_t tl_devclass; DRIVER_MODULE(tl, pci, tl_driver, tl_devclass, 0, 0); DRIVER_MODULE(miibus, tl, miibus_driver, miibus_devclass, 0, 0); static u_int8_t tl_dio_read8(sc, reg) struct tl_softc *sc; int reg; { CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return(CSR_READ_1(sc, TL_DIO_DATA + (reg & 3))); } static u_int16_t tl_dio_read16(sc, reg) struct tl_softc *sc; int reg; { CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return(CSR_READ_2(sc, TL_DIO_DATA + (reg & 3))); } static u_int32_t tl_dio_read32(sc, reg) struct tl_softc *sc; int reg; { CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return(CSR_READ_4(sc, TL_DIO_DATA + (reg & 3))); } static void tl_dio_write8(sc, reg, val) struct tl_softc *sc; int reg; int val; { CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_1(sc, TL_DIO_DATA + (reg & 3), val); } static void tl_dio_write16(sc, reg, val) struct tl_softc *sc; int reg; int val; { CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_DATA + (reg & 3), val); } static void tl_dio_write32(sc, reg, val) struct tl_softc *sc; int reg; int val; { CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_4(sc, TL_DIO_DATA + (reg & 3), val); } static void tl_dio_setbit(sc, reg, bit) struct tl_softc *sc; int reg; int bit; { u_int8_t f; CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); f = CSR_READ_1(sc, TL_DIO_DATA + (reg & 3)); f |= bit; CSR_BARRIER(sc, TL_DIO_DATA + (reg & 3), 1, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_1(sc, TL_DIO_DATA + (reg & 3), f); } static void tl_dio_clrbit(sc, reg, bit) struct tl_softc *sc; int reg; int bit; { u_int8_t f; CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); f = CSR_READ_1(sc, TL_DIO_DATA + (reg & 3)); f &= ~bit; CSR_BARRIER(sc, TL_DIO_DATA + (reg & 3), 1, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_1(sc, TL_DIO_DATA + (reg & 3), f); } static void tl_dio_setbit16(sc, reg, bit) struct tl_softc *sc; int reg; int bit; { u_int16_t f; CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); f = CSR_READ_2(sc, TL_DIO_DATA + (reg & 3)); f |= bit; CSR_BARRIER(sc, TL_DIO_DATA + (reg & 3), 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_DATA + (reg & 3), f); } static void tl_dio_clrbit16(sc, reg, bit) struct tl_softc *sc; int reg; int bit; { u_int16_t f; CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_ADDR, reg); CSR_BARRIER(sc, TL_DIO_ADDR, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); f = CSR_READ_2(sc, TL_DIO_DATA + (reg & 3)); f &= ~bit; CSR_BARRIER(sc, TL_DIO_DATA + (reg & 3), 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); CSR_WRITE_2(sc, TL_DIO_DATA + (reg & 3), f); } /* * Send an instruction or address to the EEPROM, check for ACK. */ static u_int8_t tl_eeprom_putbyte(sc, byte) struct tl_softc *sc; int byte; { - register int i, ack = 0; + int i, ack = 0; /* * Make sure we're in TX mode. */ tl_dio_setbit(sc, TL_NETSIO, TL_SIO_ETXEN); /* * Feed in each bit and stobe the clock. */ for (i = 0x80; i; i >>= 1) { if (byte & i) { tl_dio_setbit(sc, TL_NETSIO, TL_SIO_EDATA); } else { tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_EDATA); } DELAY(1); tl_dio_setbit(sc, TL_NETSIO, TL_SIO_ECLOK); DELAY(1); tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_ECLOK); } /* * Turn off TX mode. */ tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_ETXEN); /* * Check for ack. */ tl_dio_setbit(sc, TL_NETSIO, TL_SIO_ECLOK); ack = tl_dio_read8(sc, TL_NETSIO) & TL_SIO_EDATA; tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_ECLOK); return(ack); } /* * Read a byte of data stored in the EEPROM at address 'addr.' */ static u_int8_t tl_eeprom_getbyte(sc, addr, dest) struct tl_softc *sc; int addr; u_int8_t *dest; { - register int i; + int i; u_int8_t byte = 0; device_t tl_dev = sc->tl_dev; tl_dio_write8(sc, TL_NETSIO, 0); EEPROM_START; /* * Send write control code to EEPROM. */ if (tl_eeprom_putbyte(sc, EEPROM_CTL_WRITE)) { device_printf(tl_dev, "failed to send write command, status: %x\n", tl_dio_read8(sc, TL_NETSIO)); return(1); } /* * Send address of byte we want to read. */ if (tl_eeprom_putbyte(sc, addr)) { device_printf(tl_dev, "failed to send address, status: %x\n", tl_dio_read8(sc, TL_NETSIO)); return(1); } EEPROM_STOP; EEPROM_START; /* * Send read control code to EEPROM. */ if (tl_eeprom_putbyte(sc, EEPROM_CTL_READ)) { device_printf(tl_dev, "failed to send write command, status: %x\n", tl_dio_read8(sc, TL_NETSIO)); return(1); } /* * Start reading bits from EEPROM. */ tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_ETXEN); for (i = 0x80; i; i >>= 1) { tl_dio_setbit(sc, TL_NETSIO, TL_SIO_ECLOK); DELAY(1); if (tl_dio_read8(sc, TL_NETSIO) & TL_SIO_EDATA) byte |= i; tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_ECLOK); DELAY(1); } EEPROM_STOP; /* * No ACK generated for read, so just return byte. */ *dest = byte; return(0); } /* * Read a sequence of bytes from the EEPROM. */ static int tl_read_eeprom(sc, dest, off, cnt) struct tl_softc *sc; caddr_t dest; int off; int cnt; { int err = 0, i; u_int8_t byte = 0; for (i = 0; i < cnt; i++) { err = tl_eeprom_getbyte(sc, off + i, &byte); if (err) break; *(dest + i) = byte; } return(err ? 1 : 0); } #define TL_SIO_MII (TL_SIO_MCLK | TL_SIO_MDATA | TL_SIO_MTXEN) /* * Read the MII serial port for the MII bit-bang module. */ static uint32_t tl_mii_bitbang_read(device_t dev) { struct tl_softc *sc; uint32_t val; sc = device_get_softc(dev); val = tl_dio_read8(sc, TL_NETSIO) & TL_SIO_MII; CSR_BARRIER(sc, TL_NETSIO, 1, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (val); } /* * Write the MII serial port for the MII bit-bang module. */ static void tl_mii_bitbang_write(device_t dev, uint32_t val) { struct tl_softc *sc; sc = device_get_softc(dev); val = (tl_dio_read8(sc, TL_NETSIO) & ~TL_SIO_MII) | val; CSR_BARRIER(sc, TL_NETSIO, 1, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); tl_dio_write8(sc, TL_NETSIO, val); CSR_BARRIER(sc, TL_NETSIO, 1, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } static int tl_miibus_readreg(dev, phy, reg) device_t dev; int phy, reg; { struct tl_softc *sc; int minten, val; sc = device_get_softc(dev); /* * Turn off MII interrupt by forcing MINTEN low. */ minten = tl_dio_read8(sc, TL_NETSIO) & TL_SIO_MINTEN; if (minten) { tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_MINTEN); } val = mii_bitbang_readreg(dev, &tl_mii_bitbang_ops, phy, reg); /* Reenable interrupts. */ if (minten) { tl_dio_setbit(sc, TL_NETSIO, TL_SIO_MINTEN); } return (val); } static int tl_miibus_writereg(dev, phy, reg, data) device_t dev; int phy, reg, data; { struct tl_softc *sc; int minten; sc = device_get_softc(dev); /* * Turn off MII interrupt by forcing MINTEN low. */ minten = tl_dio_read8(sc, TL_NETSIO) & TL_SIO_MINTEN; if (minten) { tl_dio_clrbit(sc, TL_NETSIO, TL_SIO_MINTEN); } mii_bitbang_writereg(dev, &tl_mii_bitbang_ops, phy, reg, data); /* Reenable interrupts. */ if (minten) { tl_dio_setbit(sc, TL_NETSIO, TL_SIO_MINTEN); } return(0); } static void tl_miibus_statchg(dev) device_t dev; { struct tl_softc *sc; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->tl_miibus); if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) { tl_dio_setbit(sc, TL_NETCMD, TL_CMD_DUPLEX); } else { tl_dio_clrbit(sc, TL_NETCMD, TL_CMD_DUPLEX); } } /* * Set modes for bitrate devices. */ static void tl_setmode(sc, media) struct tl_softc *sc; int media; { if (IFM_SUBTYPE(media) == IFM_10_5) tl_dio_setbit(sc, TL_ACOMMIT, TL_AC_MTXD1); if (IFM_SUBTYPE(media) == IFM_10_T) { tl_dio_clrbit(sc, TL_ACOMMIT, TL_AC_MTXD1); if ((media & IFM_GMASK) == IFM_FDX) { tl_dio_clrbit(sc, TL_ACOMMIT, TL_AC_MTXD3); tl_dio_setbit(sc, TL_NETCMD, TL_CMD_DUPLEX); } else { tl_dio_setbit(sc, TL_ACOMMIT, TL_AC_MTXD3); tl_dio_clrbit(sc, TL_NETCMD, TL_CMD_DUPLEX); } } } /* * Calculate the hash of a MAC address for programming the multicast hash * table. This hash is simply the address split into 6-bit chunks * XOR'd, e.g. * byte: 000000|00 1111|1111 22|222222|333333|33 4444|4444 55|555555 * bit: 765432|10 7654|3210 76|543210|765432|10 7654|3210 76|543210 * Bytes 0-2 and 3-5 are symmetrical, so are folded together. Then * the folded 24-bit value is split into 6-bit portions and XOR'd. */ static uint32_t tl_mchash(addr) const uint8_t *addr; { int t; t = (addr[0] ^ addr[3]) << 16 | (addr[1] ^ addr[4]) << 8 | (addr[2] ^ addr[5]); return ((t >> 18) ^ (t >> 12) ^ (t >> 6) ^ t) & 0x3f; } /* * The ThunderLAN has a perfect MAC address filter in addition to * the multicast hash filter. The perfect filter can be programmed * with up to four MAC addresses. The first one is always used to * hold the station address, which leaves us free to use the other * three for multicast addresses. */ static void tl_setfilt(sc, addr, slot) struct tl_softc *sc; caddr_t addr; int slot; { int i; u_int16_t regaddr; regaddr = TL_AREG0_B5 + (slot * ETHER_ADDR_LEN); for (i = 0; i < ETHER_ADDR_LEN; i++) tl_dio_write8(sc, regaddr + i, *(addr + i)); } /* * XXX In FreeBSD 3.0, multicast addresses are managed using a doubly * linked list. This is fine, except addresses are added from the head * end of the list. We want to arrange for 224.0.0.1 (the "all hosts") * group to always be in the perfect filter, but as more groups are added, * the 224.0.0.1 entry (which is always added first) gets pushed down * the list and ends up at the tail. So after 3 or 4 multicast groups * are added, the all-hosts entry gets pushed out of the perfect filter * and into the hash table. * * Because the multicast list is a doubly-linked list as opposed to a * circular queue, we don't have the ability to just grab the tail of * the list and traverse it backwards. Instead, we have to traverse * the list once to find the tail, then traverse it again backwards to * update the multicast filter. */ static void tl_setmulti(sc) struct tl_softc *sc; { struct ifnet *ifp; u_int32_t hashes[2] = { 0, 0 }; int h, i; struct ifmultiaddr *ifma; u_int8_t dummy[] = { 0, 0, 0, 0, 0 ,0 }; ifp = sc->tl_ifp; /* First, zot all the existing filters. */ for (i = 1; i < 4; i++) tl_setfilt(sc, (caddr_t)&dummy, i); tl_dio_write32(sc, TL_HASH1, 0); tl_dio_write32(sc, TL_HASH2, 0); /* Now program new ones. */ if (ifp->if_flags & IFF_ALLMULTI) { hashes[0] = 0xFFFFFFFF; hashes[1] = 0xFFFFFFFF; } else { i = 1; if_maddr_rlock(ifp); TAILQ_FOREACH_REVERSE(ifma, &ifp->if_multiaddrs, ifmultihead, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; /* * Program the first three multicast groups * into the perfect filter. For all others, * use the hash table. */ if (i < 4) { tl_setfilt(sc, LLADDR((struct sockaddr_dl *)ifma->ifma_addr), i); i++; continue; } h = tl_mchash( LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } if_maddr_runlock(ifp); } tl_dio_write32(sc, TL_HASH1, hashes[0]); tl_dio_write32(sc, TL_HASH2, hashes[1]); } /* * This routine is recommended by the ThunderLAN manual to insure that * the internal PHY is powered up correctly. It also recommends a one * second pause at the end to 'wait for the clocks to start' but in my * experience this isn't necessary. */ static void tl_hardreset(dev) device_t dev; { int i; u_int16_t flags; mii_bitbang_sync(dev, &tl_mii_bitbang_ops); flags = BMCR_LOOP|BMCR_ISO|BMCR_PDOWN; for (i = 0; i < MII_NPHY; i++) tl_miibus_writereg(dev, i, MII_BMCR, flags); tl_miibus_writereg(dev, 31, MII_BMCR, BMCR_ISO); DELAY(50000); tl_miibus_writereg(dev, 31, MII_BMCR, BMCR_LOOP|BMCR_ISO); mii_bitbang_sync(dev, &tl_mii_bitbang_ops); while(tl_miibus_readreg(dev, 31, MII_BMCR) & BMCR_RESET); DELAY(50000); } static void tl_softreset(sc, internal) struct tl_softc *sc; int internal; { u_int32_t cmd, dummy, i; /* Assert the adapter reset bit. */ CMD_SET(sc, TL_CMD_ADRST); /* Turn off interrupts */ CMD_SET(sc, TL_CMD_INTSOFF); /* First, clear the stats registers. */ for (i = 0; i < 5; i++) dummy = tl_dio_read32(sc, TL_TXGOODFRAMES); /* Clear Areg and Hash registers */ for (i = 0; i < 8; i++) tl_dio_write32(sc, TL_AREG0_B5, 0x00000000); /* * Set up Netconfig register. Enable one channel and * one fragment mode. */ tl_dio_setbit16(sc, TL_NETCONFIG, TL_CFG_ONECHAN|TL_CFG_ONEFRAG); if (internal && !sc->tl_bitrate) { tl_dio_setbit16(sc, TL_NETCONFIG, TL_CFG_PHYEN); } else { tl_dio_clrbit16(sc, TL_NETCONFIG, TL_CFG_PHYEN); } /* Handle cards with bitrate devices. */ if (sc->tl_bitrate) tl_dio_setbit16(sc, TL_NETCONFIG, TL_CFG_BITRATE); /* * Load adapter irq pacing timer and tx threshold. * We make the transmit threshold 1 initially but we may * change that later. */ cmd = CSR_READ_4(sc, TL_HOSTCMD); cmd |= TL_CMD_NES; cmd &= ~(TL_CMD_RT|TL_CMD_EOC|TL_CMD_ACK_MASK|TL_CMD_CHSEL_MASK); CMD_PUT(sc, cmd | (TL_CMD_LDTHR | TX_THR)); CMD_PUT(sc, cmd | (TL_CMD_LDTMR | 0x00000003)); /* Unreset the MII */ tl_dio_setbit(sc, TL_NETSIO, TL_SIO_NMRST); /* Take the adapter out of reset */ tl_dio_setbit(sc, TL_NETCMD, TL_CMD_NRESET|TL_CMD_NWRAP); /* Wait for things to settle down a little. */ DELAY(500); } /* * Probe for a ThunderLAN chip. Check the PCI vendor and device IDs * against our list and return its name if we find a match. */ static int tl_probe(dev) device_t dev; { const struct tl_type *t; t = tl_devs; while(t->tl_name != NULL) { if ((pci_get_vendor(dev) == t->tl_vid) && (pci_get_device(dev) == t->tl_did)) { device_set_desc(dev, t->tl_name); return (BUS_PROBE_DEFAULT); } t++; } return(ENXIO); } static int tl_attach(dev) device_t dev; { u_int16_t did, vid; const struct tl_type *t; struct ifnet *ifp; struct tl_softc *sc; int error, flags, i, rid, unit; u_char eaddr[6]; vid = pci_get_vendor(dev); did = pci_get_device(dev); sc = device_get_softc(dev); sc->tl_dev = dev; unit = device_get_unit(dev); t = tl_devs; while(t->tl_name != NULL) { if (vid == t->tl_vid && did == t->tl_did) break; t++; } if (t->tl_name == NULL) { device_printf(dev, "unknown device!?\n"); return (ENXIO); } mtx_init(&sc->tl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); /* * Map control/status registers. */ pci_enable_busmaster(dev); #ifdef TL_USEIOSPACE rid = TL_PCI_LOIO; sc->tl_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); /* * Some cards have the I/O and memory mapped address registers * reversed. Try both combinations before giving up. */ if (sc->tl_res == NULL) { rid = TL_PCI_LOMEM; sc->tl_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); } #else rid = TL_PCI_LOMEM; sc->tl_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->tl_res == NULL) { rid = TL_PCI_LOIO; sc->tl_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); } #endif if (sc->tl_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } #ifdef notdef /* * The ThunderLAN manual suggests jacking the PCI latency * timer all the way up to its maximum value. I'm not sure * if this is really necessary, but what the manual wants, * the manual gets. */ command = pci_read_config(dev, TL_PCI_LATENCY_TIMER, 4); command |= 0x0000FF00; pci_write_config(dev, TL_PCI_LATENCY_TIMER, command, 4); #endif /* Allocate interrupt */ rid = 0; sc->tl_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->tl_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* * Now allocate memory for the TX and RX lists. */ sc->tl_ldata = contigmalloc(sizeof(struct tl_list_data), M_DEVBUF, M_NOWAIT, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->tl_ldata == NULL) { device_printf(dev, "no memory for list buffers!\n"); error = ENXIO; goto fail; } bzero(sc->tl_ldata, sizeof(struct tl_list_data)); if (vid == COMPAQ_VENDORID || vid == TI_VENDORID) sc->tl_eeaddr = TL_EEPROM_EADDR; if (vid == OLICOM_VENDORID) sc->tl_eeaddr = TL_EEPROM_EADDR_OC; /* Reset the adapter. */ tl_softreset(sc, 1); tl_hardreset(dev); tl_softreset(sc, 1); /* * Get station address from the EEPROM. */ if (tl_read_eeprom(sc, eaddr, sc->tl_eeaddr, ETHER_ADDR_LEN)) { device_printf(dev, "failed to read station address\n"); error = ENXIO; goto fail; } /* * XXX Olicom, in its desire to be different from the * rest of the world, has done strange things with the * encoding of the station address in the EEPROM. First * of all, they store the address at offset 0xF8 rather * than at 0x83 like the ThunderLAN manual suggests. * Second, they store the address in three 16-bit words in * network byte order, as opposed to storing it sequentially * like all the other ThunderLAN cards. In order to get * the station address in a form that matches what the Olicom * diagnostic utility specifies, we have to byte-swap each * word. To make things even more confusing, neither 00:00:28 * nor 00:00:24 appear in the IEEE OUI database. */ if (vid == OLICOM_VENDORID) { for (i = 0; i < ETHER_ADDR_LEN; i += 2) { u_int16_t *p; p = (u_int16_t *)&eaddr[i]; *p = ntohs(*p); } } ifp = sc->tl_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = tl_ioctl; ifp->if_start = tl_start; ifp->if_init = tl_init; ifp->if_snd.ifq_maxlen = TL_TX_LIST_CNT - 1; ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_MTU; callout_init_mtx(&sc->tl_stat_callout, &sc->tl_mtx, 0); /* Reset the adapter again. */ tl_softreset(sc, 1); tl_hardreset(dev); tl_softreset(sc, 1); /* * Do MII setup. If no PHYs are found, then this is a * bitrate ThunderLAN chip that only supports 10baseT * and AUI/BNC. * XXX mii_attach() can fail for reason different than * no PHYs found! */ flags = 0; if (vid == COMPAQ_VENDORID) { if (did == COMPAQ_DEVICEID_NETEL_10_100_PROLIANT || did == COMPAQ_DEVICEID_NETFLEX_3P_INTEGRATED || did == COMPAQ_DEVICEID_NETFLEX_3P_BNC || did == COMPAQ_DEVICEID_NETEL_10_T2_UTP_COAX) flags |= MIIF_MACPRIV0; if (did == COMPAQ_DEVICEID_NETEL_10 || did == COMPAQ_DEVICEID_NETEL_10_100_DUAL || did == COMPAQ_DEVICEID_NETFLEX_3P || did == COMPAQ_DEVICEID_NETEL_10_100_EMBEDDED) flags |= MIIF_MACPRIV1; } else if (vid == OLICOM_VENDORID && did == OLICOM_DEVICEID_OC2183) flags |= MIIF_MACPRIV0 | MIIF_MACPRIV1; if (mii_attach(dev, &sc->tl_miibus, ifp, tl_ifmedia_upd, tl_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0)) { struct ifmedia *ifm; sc->tl_bitrate = 1; ifmedia_init(&sc->ifmedia, 0, tl_ifmedia_upd, tl_ifmedia_sts); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_HDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_5, 0, NULL); ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_10_T); /* Reset again, this time setting bitrate mode. */ tl_softreset(sc, 1); ifm = &sc->ifmedia; ifm->ifm_media = ifm->ifm_cur->ifm_media; tl_ifmedia_upd(ifp); } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->tl_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, tl_intr, sc, &sc->tl_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } fail: if (error) tl_detach(dev); return(error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int tl_detach(dev) device_t dev; { struct tl_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->tl_mtx), ("tl mutex not initialized")); ifp = sc->tl_ifp; /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { ether_ifdetach(ifp); TL_LOCK(sc); tl_stop(sc); TL_UNLOCK(sc); callout_drain(&sc->tl_stat_callout); } if (sc->tl_miibus) device_delete_child(dev, sc->tl_miibus); bus_generic_detach(dev); if (sc->tl_ldata) contigfree(sc->tl_ldata, sizeof(struct tl_list_data), M_DEVBUF); if (sc->tl_bitrate) ifmedia_removeall(&sc->ifmedia); if (sc->tl_intrhand) bus_teardown_intr(dev, sc->tl_irq, sc->tl_intrhand); if (sc->tl_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->tl_irq); if (sc->tl_res) bus_release_resource(dev, TL_RES, TL_RID, sc->tl_res); if (ifp) if_free(ifp); mtx_destroy(&sc->tl_mtx); return(0); } /* * Initialize the transmit lists. */ static int tl_list_tx_init(sc) struct tl_softc *sc; { struct tl_chain_data *cd; struct tl_list_data *ld; int i; cd = &sc->tl_cdata; ld = sc->tl_ldata; for (i = 0; i < TL_TX_LIST_CNT; i++) { cd->tl_tx_chain[i].tl_ptr = &ld->tl_tx_list[i]; if (i == (TL_TX_LIST_CNT - 1)) cd->tl_tx_chain[i].tl_next = NULL; else cd->tl_tx_chain[i].tl_next = &cd->tl_tx_chain[i + 1]; } cd->tl_tx_free = &cd->tl_tx_chain[0]; cd->tl_tx_tail = cd->tl_tx_head = NULL; sc->tl_txeoc = 1; return(0); } /* * Initialize the RX lists and allocate mbufs for them. */ static int tl_list_rx_init(sc) struct tl_softc *sc; { struct tl_chain_data *cd; struct tl_list_data *ld; int i; cd = &sc->tl_cdata; ld = sc->tl_ldata; for (i = 0; i < TL_RX_LIST_CNT; i++) { cd->tl_rx_chain[i].tl_ptr = (struct tl_list_onefrag *)&ld->tl_rx_list[i]; if (tl_newbuf(sc, &cd->tl_rx_chain[i]) == ENOBUFS) return(ENOBUFS); if (i == (TL_RX_LIST_CNT - 1)) { cd->tl_rx_chain[i].tl_next = NULL; ld->tl_rx_list[i].tlist_fptr = 0; } else { cd->tl_rx_chain[i].tl_next = &cd->tl_rx_chain[i + 1]; ld->tl_rx_list[i].tlist_fptr = vtophys(&ld->tl_rx_list[i + 1]); } } cd->tl_rx_head = &cd->tl_rx_chain[0]; cd->tl_rx_tail = &cd->tl_rx_chain[TL_RX_LIST_CNT - 1]; return(0); } static int tl_newbuf(sc, c) struct tl_softc *sc; struct tl_chain_onefrag *c; { struct mbuf *m_new = NULL; m_new = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m_new == NULL) return(ENOBUFS); c->tl_mbuf = m_new; c->tl_next = NULL; c->tl_ptr->tlist_frsize = MCLBYTES; c->tl_ptr->tlist_fptr = 0; c->tl_ptr->tl_frag.tlist_dadr = vtophys(mtod(m_new, caddr_t)); c->tl_ptr->tl_frag.tlist_dcnt = MCLBYTES; c->tl_ptr->tlist_cstat = TL_CSTAT_READY; return(0); } /* * Interrupt handler for RX 'end of frame' condition (EOF). This * tells us that a full ethernet frame has been captured and we need * to handle it. * * Reception is done using 'lists' which consist of a header and a * series of 10 data count/data address pairs that point to buffers. * Initially you're supposed to create a list, populate it with pointers * to buffers, then load the physical address of the list into the * ch_parm register. The adapter is then supposed to DMA the received * frame into the buffers for you. * * To make things as fast as possible, we have the chip DMA directly * into mbufs. This saves us from having to do a buffer copy: we can * just hand the mbufs directly to ether_input(). Once the frame has * been sent on its way, the 'list' structure is assigned a new buffer * and moved to the end of the RX chain. As long we we stay ahead of * the chip, it will always think it has an endless receive channel. * * If we happen to fall behind and the chip manages to fill up all of * the buffers, it will generate an end of channel interrupt and wait * for us to empty the chain and restart the receiver. */ static int tl_intvec_rxeof(xsc, type) void *xsc; u_int32_t type; { struct tl_softc *sc; int r = 0, total_len = 0; struct ether_header *eh; struct mbuf *m; struct ifnet *ifp; struct tl_chain_onefrag *cur_rx; sc = xsc; ifp = sc->tl_ifp; TL_LOCK_ASSERT(sc); while(sc->tl_cdata.tl_rx_head != NULL) { cur_rx = sc->tl_cdata.tl_rx_head; if (!(cur_rx->tl_ptr->tlist_cstat & TL_CSTAT_FRAMECMP)) break; r++; sc->tl_cdata.tl_rx_head = cur_rx->tl_next; m = cur_rx->tl_mbuf; total_len = cur_rx->tl_ptr->tlist_frsize; if (tl_newbuf(sc, cur_rx) == ENOBUFS) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->tl_ptr->tlist_frsize = MCLBYTES; cur_rx->tl_ptr->tlist_cstat = TL_CSTAT_READY; cur_rx->tl_ptr->tl_frag.tlist_dcnt = MCLBYTES; continue; } sc->tl_cdata.tl_rx_tail->tl_ptr->tlist_fptr = vtophys(cur_rx->tl_ptr); sc->tl_cdata.tl_rx_tail->tl_next = cur_rx; sc->tl_cdata.tl_rx_tail = cur_rx; /* * Note: when the ThunderLAN chip is in 'capture all * frames' mode, it will receive its own transmissions. * We drop don't need to process our own transmissions, * so we drop them here and continue. */ eh = mtod(m, struct ether_header *); /*if (ifp->if_flags & IFF_PROMISC && */ if (!bcmp(eh->ether_shost, IF_LLADDR(sc->tl_ifp), ETHER_ADDR_LEN)) { m_freem(m); continue; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = total_len; TL_UNLOCK(sc); (*ifp->if_input)(ifp, m); TL_LOCK(sc); } return(r); } /* * The RX-EOC condition hits when the ch_parm address hasn't been * initialized or the adapter reached a list with a forward pointer * of 0 (which indicates the end of the chain). In our case, this means * the card has hit the end of the receive buffer chain and we need to * empty out the buffers and shift the pointer back to the beginning again. */ static int tl_intvec_rxeoc(xsc, type) void *xsc; u_int32_t type; { struct tl_softc *sc; int r; struct tl_chain_data *cd; sc = xsc; cd = &sc->tl_cdata; /* Flush out the receive queue and ack RXEOF interrupts. */ r = tl_intvec_rxeof(xsc, type); CMD_PUT(sc, TL_CMD_ACK | r | (type & ~(0x00100000))); r = 1; cd->tl_rx_head = &cd->tl_rx_chain[0]; cd->tl_rx_tail = &cd->tl_rx_chain[TL_RX_LIST_CNT - 1]; CSR_WRITE_4(sc, TL_CH_PARM, vtophys(sc->tl_cdata.tl_rx_head->tl_ptr)); r |= (TL_CMD_GO|TL_CMD_RT); return(r); } static int tl_intvec_txeof(xsc, type) void *xsc; u_int32_t type; { struct tl_softc *sc; int r = 0; struct tl_chain *cur_tx; sc = xsc; /* * Go through our tx list and free mbufs for those * frames that have been sent. */ while (sc->tl_cdata.tl_tx_head != NULL) { cur_tx = sc->tl_cdata.tl_tx_head; if (!(cur_tx->tl_ptr->tlist_cstat & TL_CSTAT_FRAMECMP)) break; sc->tl_cdata.tl_tx_head = cur_tx->tl_next; r++; m_freem(cur_tx->tl_mbuf); cur_tx->tl_mbuf = NULL; cur_tx->tl_next = sc->tl_cdata.tl_tx_free; sc->tl_cdata.tl_tx_free = cur_tx; if (!cur_tx->tl_ptr->tlist_fptr) break; } return(r); } /* * The transmit end of channel interrupt. The adapter triggers this * interrupt to tell us it hit the end of the current transmit list. * * A note about this: it's possible for a condition to arise where * tl_start() may try to send frames between TXEOF and TXEOC interrupts. * You have to avoid this since the chip expects things to go in a * particular order: transmit, acknowledge TXEOF, acknowledge TXEOC. * When the TXEOF handler is called, it will free all of the transmitted * frames and reset the tx_head pointer to NULL. However, a TXEOC * interrupt should be received and acknowledged before any more frames * are queued for transmission. If tl_statrt() is called after TXEOF * resets the tx_head pointer but _before_ the TXEOC interrupt arrives, * it could attempt to issue a transmit command prematurely. * * To guard against this, tl_start() will only issue transmit commands * if the tl_txeoc flag is set, and only the TXEOC interrupt handler * can set this flag once tl_start() has cleared it. */ static int tl_intvec_txeoc(xsc, type) void *xsc; u_int32_t type; { struct tl_softc *sc; struct ifnet *ifp; u_int32_t cmd; sc = xsc; ifp = sc->tl_ifp; /* Clear the timeout timer. */ sc->tl_timer = 0; if (sc->tl_cdata.tl_tx_head == NULL) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->tl_cdata.tl_tx_tail = NULL; sc->tl_txeoc = 1; } else { sc->tl_txeoc = 0; /* First we have to ack the EOC interrupt. */ CMD_PUT(sc, TL_CMD_ACK | 0x00000001 | type); /* Then load the address of the next TX list. */ CSR_WRITE_4(sc, TL_CH_PARM, vtophys(sc->tl_cdata.tl_tx_head->tl_ptr)); /* Restart TX channel. */ cmd = CSR_READ_4(sc, TL_HOSTCMD); cmd &= ~TL_CMD_RT; cmd |= TL_CMD_GO|TL_CMD_INTSON; CMD_PUT(sc, cmd); return(0); } return(1); } static int tl_intvec_adchk(xsc, type) void *xsc; u_int32_t type; { struct tl_softc *sc; sc = xsc; if (type) device_printf(sc->tl_dev, "adapter check: %x\n", (unsigned int)CSR_READ_4(sc, TL_CH_PARM)); tl_softreset(sc, 1); tl_stop(sc); tl_init_locked(sc); CMD_SET(sc, TL_CMD_INTSON); return(0); } static int tl_intvec_netsts(xsc, type) void *xsc; u_int32_t type; { struct tl_softc *sc; u_int16_t netsts; sc = xsc; netsts = tl_dio_read16(sc, TL_NETSTS); tl_dio_write16(sc, TL_NETSTS, netsts); device_printf(sc->tl_dev, "network status: %x\n", netsts); return(1); } static void tl_intr(xsc) void *xsc; { struct tl_softc *sc; struct ifnet *ifp; int r = 0; u_int32_t type = 0; u_int16_t ints = 0; u_int8_t ivec = 0; sc = xsc; TL_LOCK(sc); /* Disable interrupts */ ints = CSR_READ_2(sc, TL_HOST_INT); CSR_WRITE_2(sc, TL_HOST_INT, ints); type = (ints << 16) & 0xFFFF0000; ivec = (ints & TL_VEC_MASK) >> 5; ints = (ints & TL_INT_MASK) >> 2; ifp = sc->tl_ifp; switch(ints) { case (TL_INTR_INVALID): #ifdef DIAGNOSTIC device_printf(sc->tl_dev, "got an invalid interrupt!\n"); #endif /* Re-enable interrupts but don't ack this one. */ CMD_PUT(sc, type); r = 0; break; case (TL_INTR_TXEOF): r = tl_intvec_txeof((void *)sc, type); break; case (TL_INTR_TXEOC): r = tl_intvec_txeoc((void *)sc, type); break; case (TL_INTR_STATOFLOW): tl_stats_update(sc); r = 1; break; case (TL_INTR_RXEOF): r = tl_intvec_rxeof((void *)sc, type); break; case (TL_INTR_DUMMY): device_printf(sc->tl_dev, "got a dummy interrupt\n"); r = 1; break; case (TL_INTR_ADCHK): if (ivec) r = tl_intvec_adchk((void *)sc, type); else r = tl_intvec_netsts((void *)sc, type); break; case (TL_INTR_RXEOC): r = tl_intvec_rxeoc((void *)sc, type); break; default: device_printf(sc->tl_dev, "bogus interrupt type\n"); break; } /* Re-enable interrupts */ if (r) { CMD_PUT(sc, TL_CMD_ACK | r | type); } if (ifp->if_snd.ifq_head != NULL) tl_start_locked(ifp); TL_UNLOCK(sc); } static void tl_stats_update(xsc) void *xsc; { struct tl_softc *sc; struct ifnet *ifp; struct tl_stats tl_stats; struct mii_data *mii; u_int32_t *p; bzero((char *)&tl_stats, sizeof(struct tl_stats)); sc = xsc; TL_LOCK_ASSERT(sc); ifp = sc->tl_ifp; p = (u_int32_t *)&tl_stats; CSR_WRITE_2(sc, TL_DIO_ADDR, TL_TXGOODFRAMES|TL_DIO_ADDR_INC); *p++ = CSR_READ_4(sc, TL_DIO_DATA); *p++ = CSR_READ_4(sc, TL_DIO_DATA); *p++ = CSR_READ_4(sc, TL_DIO_DATA); *p++ = CSR_READ_4(sc, TL_DIO_DATA); *p++ = CSR_READ_4(sc, TL_DIO_DATA); if_inc_counter(ifp, IFCOUNTER_OPACKETS, tl_tx_goodframes(tl_stats)); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, tl_stats.tl_tx_single_collision + tl_stats.tl_tx_multi_collision); if_inc_counter(ifp, IFCOUNTER_IPACKETS, tl_rx_goodframes(tl_stats)); if_inc_counter(ifp, IFCOUNTER_IERRORS, tl_stats.tl_crc_errors + tl_stats.tl_code_errors + tl_rx_overrun(tl_stats)); if_inc_counter(ifp, IFCOUNTER_OERRORS, tl_tx_underrun(tl_stats)); if (tl_tx_underrun(tl_stats)) { u_int8_t tx_thresh; tx_thresh = tl_dio_read8(sc, TL_ACOMMIT) & TL_AC_TXTHRESH; if (tx_thresh != TL_AC_TXTHRESH_WHOLEPKT) { tx_thresh >>= 4; tx_thresh++; device_printf(sc->tl_dev, "tx underrun -- increasing " "tx threshold to %d bytes\n", (64 * (tx_thresh * 4))); tl_dio_clrbit(sc, TL_ACOMMIT, TL_AC_TXTHRESH); tl_dio_setbit(sc, TL_ACOMMIT, tx_thresh << 4); } } if (sc->tl_timer > 0 && --sc->tl_timer == 0) tl_watchdog(sc); callout_reset(&sc->tl_stat_callout, hz, tl_stats_update, sc); if (!sc->tl_bitrate) { mii = device_get_softc(sc->tl_miibus); mii_tick(mii); } } /* * Encapsulate an mbuf chain in a list by coupling the mbuf data * pointers to the fragment pointers. */ static int tl_encap(sc, c, m_head) struct tl_softc *sc; struct tl_chain *c; struct mbuf *m_head; { int frag = 0; struct tl_frag *f = NULL; int total_len; struct mbuf *m; struct ifnet *ifp = sc->tl_ifp; /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; total_len = 0; for (m = m_head, frag = 0; m != NULL; m = m->m_next) { if (m->m_len != 0) { if (frag == TL_MAXFRAGS) break; total_len+= m->m_len; c->tl_ptr->tl_frag[frag].tlist_dadr = vtophys(mtod(m, vm_offset_t)); c->tl_ptr->tl_frag[frag].tlist_dcnt = m->m_len; frag++; } } /* * Handle special cases. * Special case #1: we used up all 10 fragments, but * we have more mbufs left in the chain. Copy the * data into an mbuf cluster. Note that we don't * bother clearing the values in the other fragment * pointers/counters; it wouldn't gain us anything, * and would waste cycles. */ if (m != NULL) { struct mbuf *m_new = NULL; MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { if_printf(ifp, "no memory for tx list\n"); return(1); } if (m_head->m_pkthdr.len > MHLEN) { if (!(MCLGET(m_new, M_NOWAIT))) { m_freem(m_new); if_printf(ifp, "no memory for tx list\n"); return(1); } } m_copydata(m_head, 0, m_head->m_pkthdr.len, mtod(m_new, caddr_t)); m_new->m_pkthdr.len = m_new->m_len = m_head->m_pkthdr.len; m_freem(m_head); m_head = m_new; f = &c->tl_ptr->tl_frag[0]; f->tlist_dadr = vtophys(mtod(m_new, caddr_t)); f->tlist_dcnt = total_len = m_new->m_len; frag = 1; } /* * Special case #2: the frame is smaller than the minimum * frame size. We have to pad it to make the chip happy. */ if (total_len < TL_MIN_FRAMELEN) { if (frag == TL_MAXFRAGS) if_printf(ifp, "all frags filled but frame still to small!\n"); f = &c->tl_ptr->tl_frag[frag]; f->tlist_dcnt = TL_MIN_FRAMELEN - total_len; f->tlist_dadr = vtophys(&sc->tl_ldata->tl_pad); total_len += f->tlist_dcnt; frag++; } c->tl_mbuf = m_head; c->tl_ptr->tl_frag[frag - 1].tlist_dcnt |= TL_LAST_FRAG; c->tl_ptr->tlist_frsize = total_len; c->tl_ptr->tlist_cstat = TL_CSTAT_READY; c->tl_ptr->tlist_fptr = 0; return(0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void tl_start(ifp) struct ifnet *ifp; { struct tl_softc *sc; sc = ifp->if_softc; TL_LOCK(sc); tl_start_locked(ifp); TL_UNLOCK(sc); } static void tl_start_locked(ifp) struct ifnet *ifp; { struct tl_softc *sc; struct mbuf *m_head = NULL; u_int32_t cmd; struct tl_chain *prev = NULL, *cur_tx = NULL, *start_tx; sc = ifp->if_softc; TL_LOCK_ASSERT(sc); /* * Check for an available queue slot. If there are none, * punt. */ if (sc->tl_cdata.tl_tx_free == NULL) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } start_tx = sc->tl_cdata.tl_tx_free; while(sc->tl_cdata.tl_tx_free != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* Pick a chain member off the free list. */ cur_tx = sc->tl_cdata.tl_tx_free; sc->tl_cdata.tl_tx_free = cur_tx->tl_next; cur_tx->tl_next = NULL; /* Pack the data into the list. */ tl_encap(sc, cur_tx, m_head); /* Chain it together */ if (prev != NULL) { prev->tl_next = cur_tx; prev->tl_ptr->tlist_fptr = vtophys(cur_tx->tl_ptr); } prev = cur_tx; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, cur_tx->tl_mbuf); } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) return; /* * That's all we can stands, we can't stands no more. * If there are no other transfers pending, then issue the * TX GO command to the adapter to start things moving. * Otherwise, just leave the data in the queue and let * the EOF/EOC interrupt handler send. */ if (sc->tl_cdata.tl_tx_head == NULL) { sc->tl_cdata.tl_tx_head = start_tx; sc->tl_cdata.tl_tx_tail = cur_tx; if (sc->tl_txeoc) { sc->tl_txeoc = 0; CSR_WRITE_4(sc, TL_CH_PARM, vtophys(start_tx->tl_ptr)); cmd = CSR_READ_4(sc, TL_HOSTCMD); cmd &= ~TL_CMD_RT; cmd |= TL_CMD_GO|TL_CMD_INTSON; CMD_PUT(sc, cmd); } } else { sc->tl_cdata.tl_tx_tail->tl_next = start_tx; sc->tl_cdata.tl_tx_tail = cur_tx; } /* * Set a timeout in case the chip goes out to lunch. */ sc->tl_timer = 5; } static void tl_init(xsc) void *xsc; { struct tl_softc *sc = xsc; TL_LOCK(sc); tl_init_locked(sc); TL_UNLOCK(sc); } static void tl_init_locked(sc) struct tl_softc *sc; { struct ifnet *ifp = sc->tl_ifp; struct mii_data *mii; TL_LOCK_ASSERT(sc); ifp = sc->tl_ifp; /* * Cancel pending I/O. */ tl_stop(sc); /* Initialize TX FIFO threshold */ tl_dio_clrbit(sc, TL_ACOMMIT, TL_AC_TXTHRESH); tl_dio_setbit(sc, TL_ACOMMIT, TL_AC_TXTHRESH_16LONG); /* Set PCI burst size */ tl_dio_write8(sc, TL_BSIZEREG, TL_RXBURST_16LONG|TL_TXBURST_16LONG); /* * Set 'capture all frames' bit for promiscuous mode. */ if (ifp->if_flags & IFF_PROMISC) tl_dio_setbit(sc, TL_NETCMD, TL_CMD_CAF); else tl_dio_clrbit(sc, TL_NETCMD, TL_CMD_CAF); /* * Set capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) tl_dio_clrbit(sc, TL_NETCMD, TL_CMD_NOBRX); else tl_dio_setbit(sc, TL_NETCMD, TL_CMD_NOBRX); tl_dio_write16(sc, TL_MAXRX, MCLBYTES); /* Init our MAC address */ tl_setfilt(sc, IF_LLADDR(sc->tl_ifp), 0); /* Init multicast filter, if needed. */ tl_setmulti(sc); /* Init circular RX list. */ if (tl_list_rx_init(sc) == ENOBUFS) { device_printf(sc->tl_dev, "initialization failed: no memory for rx buffers\n"); tl_stop(sc); return; } /* Init TX pointers. */ tl_list_tx_init(sc); /* Enable PCI interrupts. */ CMD_SET(sc, TL_CMD_INTSON); /* Load the address of the rx list */ CMD_SET(sc, TL_CMD_RT); CSR_WRITE_4(sc, TL_CH_PARM, vtophys(&sc->tl_ldata->tl_rx_list[0])); if (!sc->tl_bitrate) { if (sc->tl_miibus != NULL) { mii = device_get_softc(sc->tl_miibus); mii_mediachg(mii); } } else { tl_ifmedia_upd(ifp); } /* Send the RX go command */ CMD_SET(sc, TL_CMD_GO|TL_CMD_NES|TL_CMD_RT); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* Start the stats update counter */ callout_reset(&sc->tl_stat_callout, hz, tl_stats_update, sc); } /* * Set media options. */ static int tl_ifmedia_upd(ifp) struct ifnet *ifp; { struct tl_softc *sc; struct mii_data *mii = NULL; sc = ifp->if_softc; TL_LOCK(sc); if (sc->tl_bitrate) tl_setmode(sc, sc->ifmedia.ifm_media); else { mii = device_get_softc(sc->tl_miibus); mii_mediachg(mii); } TL_UNLOCK(sc); return(0); } /* * Report current media status. */ static void tl_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct tl_softc *sc; struct mii_data *mii; sc = ifp->if_softc; TL_LOCK(sc); ifmr->ifm_active = IFM_ETHER; if (sc->tl_bitrate) { if (tl_dio_read8(sc, TL_ACOMMIT) & TL_AC_MTXD1) ifmr->ifm_active = IFM_ETHER|IFM_10_5; else ifmr->ifm_active = IFM_ETHER|IFM_10_T; if (tl_dio_read8(sc, TL_ACOMMIT) & TL_AC_MTXD3) ifmr->ifm_active |= IFM_HDX; else ifmr->ifm_active |= IFM_FDX; return; } else { mii = device_get_softc(sc->tl_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; } TL_UNLOCK(sc); } static int tl_ioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct tl_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch(command) { case SIOCSIFFLAGS: TL_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->tl_if_flags & IFF_PROMISC)) { tl_dio_setbit(sc, TL_NETCMD, TL_CMD_CAF); tl_setmulti(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->tl_if_flags & IFF_PROMISC) { tl_dio_clrbit(sc, TL_NETCMD, TL_CMD_CAF); tl_setmulti(sc); } else tl_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { tl_stop(sc); } } sc->tl_if_flags = ifp->if_flags; TL_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: TL_LOCK(sc); tl_setmulti(sc); TL_UNLOCK(sc); error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: if (sc->tl_bitrate) error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); else { struct mii_data *mii; mii = device_get_softc(sc->tl_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } break; default: error = ether_ioctl(ifp, command, data); break; } return(error); } static void tl_watchdog(sc) struct tl_softc *sc; { struct ifnet *ifp; TL_LOCK_ASSERT(sc); ifp = sc->tl_ifp; if_printf(ifp, "device timeout\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); tl_softreset(sc, 1); tl_init_locked(sc); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void tl_stop(sc) struct tl_softc *sc; { - register int i; + int i; struct ifnet *ifp; TL_LOCK_ASSERT(sc); ifp = sc->tl_ifp; /* Stop the stats updater. */ callout_stop(&sc->tl_stat_callout); /* Stop the transmitter */ CMD_CLR(sc, TL_CMD_RT); CMD_SET(sc, TL_CMD_STOP); CSR_WRITE_4(sc, TL_CH_PARM, 0); /* Stop the receiver */ CMD_SET(sc, TL_CMD_RT); CMD_SET(sc, TL_CMD_STOP); CSR_WRITE_4(sc, TL_CH_PARM, 0); /* * Disable host interrupts. */ CMD_SET(sc, TL_CMD_INTSOFF); /* * Clear list pointer. */ CSR_WRITE_4(sc, TL_CH_PARM, 0); /* * Free the RX lists. */ for (i = 0; i < TL_RX_LIST_CNT; i++) { if (sc->tl_cdata.tl_rx_chain[i].tl_mbuf != NULL) { m_freem(sc->tl_cdata.tl_rx_chain[i].tl_mbuf); sc->tl_cdata.tl_rx_chain[i].tl_mbuf = NULL; } } bzero((char *)&sc->tl_ldata->tl_rx_list, sizeof(sc->tl_ldata->tl_rx_list)); /* * Free the TX list buffers. */ for (i = 0; i < TL_TX_LIST_CNT; i++) { if (sc->tl_cdata.tl_tx_chain[i].tl_mbuf != NULL) { m_freem(sc->tl_cdata.tl_tx_chain[i].tl_mbuf); sc->tl_cdata.tl_tx_chain[i].tl_mbuf = NULL; } } bzero((char *)&sc->tl_ldata->tl_tx_list, sizeof(sc->tl_ldata->tl_tx_list)); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int tl_shutdown(dev) device_t dev; { struct tl_softc *sc; sc = device_get_softc(dev); TL_LOCK(sc); tl_stop(sc); TL_UNLOCK(sc); return (0); } Index: stable/11/sys/dev/usb/usb_busdma.c =================================================================== --- stable/11/sys/dev/usb/usb_busdma.c (revision 331642) +++ stable/11/sys/dev/usb/usb_busdma.c (revision 331643) @@ -1,1119 +1,1119 @@ /* $FreeBSD$ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef USB_GLOBAL_INCLUDE_FILE #include USB_GLOBAL_INCLUDE_FILE #else #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define USB_DEBUG_VAR usb_debug #include #include #include #include #include #include #include #include #include #endif /* USB_GLOBAL_INCLUDE_FILE */ #if USB_HAVE_BUSDMA static void usb_dma_tag_create(struct usb_dma_tag *, usb_size_t, usb_size_t); static void usb_dma_tag_destroy(struct usb_dma_tag *); static void usb_dma_lock_cb(void *, bus_dma_lock_op_t); static void usb_pc_alloc_mem_cb(void *, bus_dma_segment_t *, int, int); static void usb_pc_load_mem_cb(void *, bus_dma_segment_t *, int, int); static void usb_pc_common_mem_cb(void *, bus_dma_segment_t *, int, int, uint8_t); #endif /*------------------------------------------------------------------------* * usbd_get_page - lookup DMA-able memory for the given offset * * NOTE: Only call this function when the "page_cache" structure has * been properly initialized ! *------------------------------------------------------------------------*/ void usbd_get_page(struct usb_page_cache *pc, usb_frlength_t offset, struct usb_page_search *res) { #if USB_HAVE_BUSDMA struct usb_page *page; if (pc->page_start) { /* Case 1 - something has been loaded into DMA */ if (pc->buffer) { /* Case 1a - Kernel Virtual Address */ res->buffer = USB_ADD_BYTES(pc->buffer, offset); } offset += pc->page_offset_buf; /* compute destination page */ page = pc->page_start; if (pc->ismultiseg) { page += (offset / USB_PAGE_SIZE); offset %= USB_PAGE_SIZE; res->length = USB_PAGE_SIZE - offset; res->physaddr = page->physaddr + offset; } else { res->length = (usb_size_t)-1; res->physaddr = page->physaddr + offset; } if (!pc->buffer) { /* Case 1b - Non Kernel Virtual Address */ res->buffer = USB_ADD_BYTES(page->buffer, offset); } return; } #endif /* Case 2 - Plain PIO */ res->buffer = USB_ADD_BYTES(pc->buffer, offset); res->length = (usb_size_t)-1; #if USB_HAVE_BUSDMA res->physaddr = 0; #endif } /*------------------------------------------------------------------------* * usb_pc_buffer_is_aligned - verify alignment * * This function is used to check if a page cache buffer is properly * aligned to reduce the use of bounce buffers in PIO mode. *------------------------------------------------------------------------*/ uint8_t usb_pc_buffer_is_aligned(struct usb_page_cache *pc, usb_frlength_t offset, usb_frlength_t len, usb_frlength_t mask) { struct usb_page_search buf_res; while (len != 0) { usbd_get_page(pc, offset, &buf_res); if (buf_res.length > len) buf_res.length = len; if (USB_P2U(buf_res.buffer) & mask) return (0); if (buf_res.length & mask) return (0); offset += buf_res.length; len -= buf_res.length; } return (1); } /*------------------------------------------------------------------------* * usbd_copy_in - copy directly to DMA-able memory *------------------------------------------------------------------------*/ void usbd_copy_in(struct usb_page_cache *cache, usb_frlength_t offset, const void *ptr, usb_frlength_t len) { struct usb_page_search buf_res; while (len != 0) { usbd_get_page(cache, offset, &buf_res); if (buf_res.length > len) { buf_res.length = len; } memcpy(buf_res.buffer, ptr, buf_res.length); offset += buf_res.length; len -= buf_res.length; ptr = USB_ADD_BYTES(ptr, buf_res.length); } } /*------------------------------------------------------------------------* * usbd_copy_in_user - copy directly to DMA-able memory from userland * * Return values: * 0: Success * Else: Failure *------------------------------------------------------------------------*/ #if USB_HAVE_USER_IO int usbd_copy_in_user(struct usb_page_cache *cache, usb_frlength_t offset, const void *ptr, usb_frlength_t len) { struct usb_page_search buf_res; int error; while (len != 0) { usbd_get_page(cache, offset, &buf_res); if (buf_res.length > len) { buf_res.length = len; } error = copyin(ptr, buf_res.buffer, buf_res.length); if (error) return (error); offset += buf_res.length; len -= buf_res.length; ptr = USB_ADD_BYTES(ptr, buf_res.length); } return (0); /* success */ } #endif /*------------------------------------------------------------------------* * usbd_m_copy_in - copy a mbuf chain directly into DMA-able memory *------------------------------------------------------------------------*/ #if USB_HAVE_MBUF struct usb_m_copy_in_arg { struct usb_page_cache *cache; usb_frlength_t dst_offset; }; static int usbd_m_copy_in_cb(void *arg, void *src, uint32_t count) { - register struct usb_m_copy_in_arg *ua = arg; + struct usb_m_copy_in_arg *ua = arg; usbd_copy_in(ua->cache, ua->dst_offset, src, count); ua->dst_offset += count; return (0); } void usbd_m_copy_in(struct usb_page_cache *cache, usb_frlength_t dst_offset, struct mbuf *m, usb_size_t src_offset, usb_frlength_t src_len) { struct usb_m_copy_in_arg arg = {cache, dst_offset}; (void) m_apply(m, src_offset, src_len, &usbd_m_copy_in_cb, &arg); } #endif /*------------------------------------------------------------------------* * usb_uiomove - factored out code *------------------------------------------------------------------------*/ #if USB_HAVE_USER_IO int usb_uiomove(struct usb_page_cache *pc, struct uio *uio, usb_frlength_t pc_offset, usb_frlength_t len) { struct usb_page_search res; int error = 0; while (len != 0) { usbd_get_page(pc, pc_offset, &res); if (res.length > len) { res.length = len; } /* * "uiomove()" can sleep so one needs to make a wrapper, * exiting the mutex and checking things */ error = uiomove(res.buffer, res.length, uio); if (error) { break; } pc_offset += res.length; len -= res.length; } return (error); } #endif /*------------------------------------------------------------------------* * usbd_copy_out - copy directly from DMA-able memory *------------------------------------------------------------------------*/ void usbd_copy_out(struct usb_page_cache *cache, usb_frlength_t offset, void *ptr, usb_frlength_t len) { struct usb_page_search res; while (len != 0) { usbd_get_page(cache, offset, &res); if (res.length > len) { res.length = len; } memcpy(ptr, res.buffer, res.length); offset += res.length; len -= res.length; ptr = USB_ADD_BYTES(ptr, res.length); } } /*------------------------------------------------------------------------* * usbd_copy_out_user - copy directly from DMA-able memory to userland * * Return values: * 0: Success * Else: Failure *------------------------------------------------------------------------*/ #if USB_HAVE_USER_IO int usbd_copy_out_user(struct usb_page_cache *cache, usb_frlength_t offset, void *ptr, usb_frlength_t len) { struct usb_page_search res; int error; while (len != 0) { usbd_get_page(cache, offset, &res); if (res.length > len) { res.length = len; } error = copyout(res.buffer, ptr, res.length); if (error) return (error); offset += res.length; len -= res.length; ptr = USB_ADD_BYTES(ptr, res.length); } return (0); /* success */ } #endif /*------------------------------------------------------------------------* * usbd_frame_zero - zero DMA-able memory *------------------------------------------------------------------------*/ void usbd_frame_zero(struct usb_page_cache *cache, usb_frlength_t offset, usb_frlength_t len) { struct usb_page_search res; while (len != 0) { usbd_get_page(cache, offset, &res); if (res.length > len) { res.length = len; } memset(res.buffer, 0, res.length); offset += res.length; len -= res.length; } } #if USB_HAVE_BUSDMA /*------------------------------------------------------------------------* * usb_dma_lock_cb - dummy callback *------------------------------------------------------------------------*/ static void usb_dma_lock_cb(void *arg, bus_dma_lock_op_t op) { /* we use "mtx_owned()" instead of this function */ } /*------------------------------------------------------------------------* * usb_dma_tag_create - allocate a DMA tag * * NOTE: If the "align" parameter has a value of 1 the DMA-tag will * allow multi-segment mappings. Else all mappings are single-segment. *------------------------------------------------------------------------*/ static void usb_dma_tag_create(struct usb_dma_tag *udt, usb_size_t size, usb_size_t align) { bus_dma_tag_t tag; if (bus_dma_tag_create ( /* parent */ udt->tag_parent->tag, /* alignment */ align, /* boundary */ 0, /* lowaddr */ (2ULL << (udt->tag_parent->dma_bits - 1)) - 1, /* highaddr */ BUS_SPACE_MAXADDR, /* filter */ NULL, /* filterarg */ NULL, /* maxsize */ size, /* nsegments */ (align == 1 && size > 1) ? (2 + (size / USB_PAGE_SIZE)) : 1, /* maxsegsz */ (align == 1 && size > USB_PAGE_SIZE) ? USB_PAGE_SIZE : size, /* flags */ BUS_DMA_KEEP_PG_OFFSET, /* lockfn */ &usb_dma_lock_cb, /* lockarg */ NULL, &tag)) { tag = NULL; } udt->tag = tag; } /*------------------------------------------------------------------------* * usb_dma_tag_free - free a DMA tag *------------------------------------------------------------------------*/ static void usb_dma_tag_destroy(struct usb_dma_tag *udt) { bus_dma_tag_destroy(udt->tag); } /*------------------------------------------------------------------------* * usb_pc_alloc_mem_cb - BUS-DMA callback function *------------------------------------------------------------------------*/ static void usb_pc_alloc_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { usb_pc_common_mem_cb(arg, segs, nseg, error, 0); } /*------------------------------------------------------------------------* * usb_pc_load_mem_cb - BUS-DMA callback function *------------------------------------------------------------------------*/ static void usb_pc_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { usb_pc_common_mem_cb(arg, segs, nseg, error, 1); } /*------------------------------------------------------------------------* * usb_pc_common_mem_cb - BUS-DMA callback function *------------------------------------------------------------------------*/ static void usb_pc_common_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error, uint8_t isload) { struct usb_dma_parent_tag *uptag; struct usb_page_cache *pc; struct usb_page *pg; usb_size_t rem; bus_size_t off; uint8_t owned; pc = arg; uptag = pc->tag_parent; /* * XXX There is sometimes recursive locking here. * XXX We should try to find a better solution. * XXX Until further the "owned" variable does * XXX the trick. */ if (error) { goto done; } off = 0; pg = pc->page_start; pg->physaddr = rounddown2(segs->ds_addr, USB_PAGE_SIZE); rem = segs->ds_addr & (USB_PAGE_SIZE - 1); pc->page_offset_buf = rem; pc->page_offset_end += rem; #ifdef USB_DEBUG if (nseg > 1) { int x; for (x = 0; x != nseg - 1; x++) { if (((segs[x].ds_addr + segs[x].ds_len) & (USB_PAGE_SIZE - 1)) == ((segs[x + 1].ds_addr & (USB_PAGE_SIZE - 1)))) continue; /* * This check verifies there is no page offset * hole between any of the segments. See the * BUS_DMA_KEEP_PG_OFFSET flag. */ DPRINTFN(0, "Page offset was not preserved\n"); error = 1; goto done; } } #endif while (pc->ismultiseg) { off += USB_PAGE_SIZE; if (off >= (segs->ds_len + rem)) { /* page crossing */ nseg--; segs++; off = 0; rem = 0; if (nseg == 0) break; } pg++; pg->physaddr = rounddown2(segs->ds_addr + off, USB_PAGE_SIZE); } done: owned = mtx_owned(uptag->mtx); if (!owned) mtx_lock(uptag->mtx); uptag->dma_error = (error ? 1 : 0); if (isload) { (uptag->func) (uptag); } else { cv_broadcast(uptag->cv); } if (!owned) mtx_unlock(uptag->mtx); } /*------------------------------------------------------------------------* * usb_pc_alloc_mem - allocate DMA'able memory * * Returns: * 0: Success * Else: Failure *------------------------------------------------------------------------*/ uint8_t usb_pc_alloc_mem(struct usb_page_cache *pc, struct usb_page *pg, usb_size_t size, usb_size_t align) { struct usb_dma_parent_tag *uptag; struct usb_dma_tag *utag; bus_dmamap_t map; void *ptr; int err; uptag = pc->tag_parent; if (align != 1) { /* * The alignment must be greater or equal to the * "size" else the object can be split between two * memory pages and we get a problem! */ while (align < size) { align *= 2; if (align == 0) { goto error; } } #if 1 /* * XXX BUS-DMA workaround - FIXME later: * * We assume that that the aligment at this point of * the code is greater than or equal to the size and * less than two times the size, so that if we double * the size, the size will be greater than the * alignment. * * The bus-dma system has a check for "alignment" * being less than "size". If that check fails we end * up using contigmalloc which is page based even for * small allocations. Try to avoid that to save * memory, hence we sometimes to a large number of * small allocations! */ if (size <= (USB_PAGE_SIZE / 2)) { size *= 2; } #endif } /* get the correct DMA tag */ utag = usb_dma_tag_find(uptag, size, align); if (utag == NULL) { goto error; } /* allocate memory */ if (bus_dmamem_alloc( utag->tag, &ptr, (BUS_DMA_WAITOK | BUS_DMA_COHERENT), &map)) { goto error; } /* setup page cache */ pc->buffer = ptr; pc->page_start = pg; pc->page_offset_buf = 0; pc->page_offset_end = size; pc->map = map; pc->tag = utag->tag; pc->ismultiseg = (align == 1); mtx_lock(uptag->mtx); /* load memory into DMA */ err = bus_dmamap_load( utag->tag, map, ptr, size, &usb_pc_alloc_mem_cb, pc, (BUS_DMA_WAITOK | BUS_DMA_COHERENT)); if (err == EINPROGRESS) { cv_wait(uptag->cv, uptag->mtx); err = 0; } mtx_unlock(uptag->mtx); if (err || uptag->dma_error) { bus_dmamem_free(utag->tag, ptr, map); goto error; } memset(ptr, 0, size); usb_pc_cpu_flush(pc); return (0); error: /* reset most of the page cache */ pc->buffer = NULL; pc->page_start = NULL; pc->page_offset_buf = 0; pc->page_offset_end = 0; pc->map = NULL; pc->tag = NULL; return (1); } /*------------------------------------------------------------------------* * usb_pc_free_mem - free DMA memory * * This function is NULL safe. *------------------------------------------------------------------------*/ void usb_pc_free_mem(struct usb_page_cache *pc) { if (pc && pc->buffer) { bus_dmamap_unload(pc->tag, pc->map); bus_dmamem_free(pc->tag, pc->buffer, pc->map); pc->buffer = NULL; } } /*------------------------------------------------------------------------* * usb_pc_load_mem - load virtual memory into DMA * * Return values: * 0: Success * Else: Error *------------------------------------------------------------------------*/ uint8_t usb_pc_load_mem(struct usb_page_cache *pc, usb_size_t size, uint8_t sync) { /* setup page cache */ pc->page_offset_buf = 0; pc->page_offset_end = size; pc->ismultiseg = 1; mtx_assert(pc->tag_parent->mtx, MA_OWNED); if (size > 0) { if (sync) { struct usb_dma_parent_tag *uptag; int err; uptag = pc->tag_parent; /* * We have to unload the previous loaded DMA * pages before trying to load a new one! */ bus_dmamap_unload(pc->tag, pc->map); /* * Try to load memory into DMA. */ err = bus_dmamap_load( pc->tag, pc->map, pc->buffer, size, &usb_pc_alloc_mem_cb, pc, BUS_DMA_WAITOK); if (err == EINPROGRESS) { cv_wait(uptag->cv, uptag->mtx); err = 0; } if (err || uptag->dma_error) { return (1); } } else { /* * We have to unload the previous loaded DMA * pages before trying to load a new one! */ bus_dmamap_unload(pc->tag, pc->map); /* * Try to load memory into DMA. The callback * will be called in all cases: */ if (bus_dmamap_load( pc->tag, pc->map, pc->buffer, size, &usb_pc_load_mem_cb, pc, BUS_DMA_WAITOK)) { } } } else { if (!sync) { /* * Call callback so that refcount is decremented * properly: */ pc->tag_parent->dma_error = 0; (pc->tag_parent->func) (pc->tag_parent); } } return (0); } /*------------------------------------------------------------------------* * usb_pc_cpu_invalidate - invalidate CPU cache *------------------------------------------------------------------------*/ void usb_pc_cpu_invalidate(struct usb_page_cache *pc) { if (pc->page_offset_end == pc->page_offset_buf) { /* nothing has been loaded into this page cache! */ return; } /* * TODO: We currently do XXX_POSTREAD and XXX_PREREAD at the * same time, but in the future we should try to isolate the * different cases to optimise the code. --HPS */ bus_dmamap_sync(pc->tag, pc->map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(pc->tag, pc->map, BUS_DMASYNC_PREREAD); } /*------------------------------------------------------------------------* * usb_pc_cpu_flush - flush CPU cache *------------------------------------------------------------------------*/ void usb_pc_cpu_flush(struct usb_page_cache *pc) { if (pc->page_offset_end == pc->page_offset_buf) { /* nothing has been loaded into this page cache! */ return; } bus_dmamap_sync(pc->tag, pc->map, BUS_DMASYNC_PREWRITE); } /*------------------------------------------------------------------------* * usb_pc_dmamap_create - create a DMA map * * Returns: * 0: Success * Else: Failure *------------------------------------------------------------------------*/ uint8_t usb_pc_dmamap_create(struct usb_page_cache *pc, usb_size_t size) { struct usb_xfer_root *info; struct usb_dma_tag *utag; /* get info */ info = USB_DMATAG_TO_XROOT(pc->tag_parent); /* sanity check */ if (info == NULL) { goto error; } utag = usb_dma_tag_find(pc->tag_parent, size, 1); if (utag == NULL) { goto error; } /* create DMA map */ if (bus_dmamap_create(utag->tag, 0, &pc->map)) { goto error; } pc->tag = utag->tag; return 0; /* success */ error: pc->map = NULL; pc->tag = NULL; return 1; /* failure */ } /*------------------------------------------------------------------------* * usb_pc_dmamap_destroy * * This function is NULL safe. *------------------------------------------------------------------------*/ void usb_pc_dmamap_destroy(struct usb_page_cache *pc) { if (pc && pc->tag) { bus_dmamap_destroy(pc->tag, pc->map); pc->tag = NULL; pc->map = NULL; } } /*------------------------------------------------------------------------* * usb_dma_tag_find - factored out code *------------------------------------------------------------------------*/ struct usb_dma_tag * usb_dma_tag_find(struct usb_dma_parent_tag *udpt, usb_size_t size, usb_size_t align) { struct usb_dma_tag *udt; uint8_t nudt; USB_ASSERT(align > 0, ("Invalid parameter align = 0\n")); USB_ASSERT(size > 0, ("Invalid parameter size = 0\n")); udt = udpt->utag_first; nudt = udpt->utag_max; while (nudt--) { if (udt->align == 0) { usb_dma_tag_create(udt, size, align); if (udt->tag == NULL) { return (NULL); } udt->align = align; udt->size = size; return (udt); } if ((udt->align == align) && (udt->size == size)) { return (udt); } udt++; } return (NULL); } /*------------------------------------------------------------------------* * usb_dma_tag_setup - initialise USB DMA tags *------------------------------------------------------------------------*/ void usb_dma_tag_setup(struct usb_dma_parent_tag *udpt, struct usb_dma_tag *udt, bus_dma_tag_t dmat, struct mtx *mtx, usb_dma_callback_t *func, uint8_t ndmabits, uint8_t nudt) { memset(udpt, 0, sizeof(*udpt)); /* sanity checking */ if ((nudt == 0) || (ndmabits == 0) || (mtx == NULL)) { /* something is corrupt */ return; } /* initialise condition variable */ cv_init(udpt->cv, "USB DMA CV"); /* store some information */ udpt->mtx = mtx; udpt->func = func; udpt->tag = dmat; udpt->utag_first = udt; udpt->utag_max = nudt; udpt->dma_bits = ndmabits; while (nudt--) { memset(udt, 0, sizeof(*udt)); udt->tag_parent = udpt; udt++; } } /*------------------------------------------------------------------------* * usb_bus_tag_unsetup - factored out code *------------------------------------------------------------------------*/ void usb_dma_tag_unsetup(struct usb_dma_parent_tag *udpt) { struct usb_dma_tag *udt; uint8_t nudt; udt = udpt->utag_first; nudt = udpt->utag_max; while (nudt--) { if (udt->align) { /* destroy the USB DMA tag */ usb_dma_tag_destroy(udt); udt->align = 0; } udt++; } if (udpt->utag_max) { /* destroy the condition variable */ cv_destroy(udpt->cv); } } /*------------------------------------------------------------------------* * usb_bdma_work_loop * * This function handles loading of virtual buffers into DMA and is * only called when "dma_refcount" is zero. *------------------------------------------------------------------------*/ void usb_bdma_work_loop(struct usb_xfer_queue *pq) { struct usb_xfer_root *info; struct usb_xfer *xfer; usb_frcount_t nframes; xfer = pq->curr; info = xfer->xroot; mtx_assert(info->xfer_mtx, MA_OWNED); if (xfer->error) { /* some error happened */ USB_BUS_LOCK(info->bus); usbd_transfer_done(xfer, 0); USB_BUS_UNLOCK(info->bus); return; } if (!xfer->flags_int.bdma_setup) { struct usb_page *pg; usb_frlength_t frlength_0; uint8_t isread; xfer->flags_int.bdma_setup = 1; /* reset BUS-DMA load state */ info->dma_error = 0; if (xfer->flags_int.isochronous_xfr) { /* only one frame buffer */ nframes = 1; frlength_0 = xfer->sumlen; } else { /* can be multiple frame buffers */ nframes = xfer->nframes; frlength_0 = xfer->frlengths[0]; } /* * Set DMA direction first. This is needed to * select the correct cache invalidate and cache * flush operations. */ isread = USB_GET_DATA_ISREAD(xfer); pg = xfer->dma_page_ptr; if (xfer->flags_int.control_xfr && xfer->flags_int.control_hdr) { /* special case */ if (xfer->flags_int.usb_mode == USB_MODE_DEVICE) { /* The device controller writes to memory */ xfer->frbuffers[0].isread = 1; } else { /* The host controller reads from memory */ xfer->frbuffers[0].isread = 0; } } else { /* default case */ xfer->frbuffers[0].isread = isread; } /* * Setup the "page_start" pointer which points to an array of * USB pages where information about the physical address of a * page will be stored. Also initialise the "isread" field of * the USB page caches. */ xfer->frbuffers[0].page_start = pg; info->dma_nframes = nframes; info->dma_currframe = 0; info->dma_frlength_0 = frlength_0; pg += (frlength_0 / USB_PAGE_SIZE); pg += 2; while (--nframes > 0) { xfer->frbuffers[nframes].isread = isread; xfer->frbuffers[nframes].page_start = pg; pg += (xfer->frlengths[nframes] / USB_PAGE_SIZE); pg += 2; } } if (info->dma_error) { USB_BUS_LOCK(info->bus); usbd_transfer_done(xfer, USB_ERR_DMA_LOAD_FAILED); USB_BUS_UNLOCK(info->bus); return; } if (info->dma_currframe != info->dma_nframes) { if (info->dma_currframe == 0) { /* special case */ usb_pc_load_mem(xfer->frbuffers, info->dma_frlength_0, 0); } else { /* default case */ nframes = info->dma_currframe; usb_pc_load_mem(xfer->frbuffers + nframes, xfer->frlengths[nframes], 0); } /* advance frame index */ info->dma_currframe++; return; } /* go ahead */ usb_bdma_pre_sync(xfer); /* start loading next USB transfer, if any */ usb_command_wrapper(pq, NULL); /* finally start the hardware */ usbd_pipe_enter(xfer); } /*------------------------------------------------------------------------* * usb_bdma_done_event * * This function is called when the BUS-DMA has loaded virtual memory * into DMA, if any. *------------------------------------------------------------------------*/ void usb_bdma_done_event(struct usb_dma_parent_tag *udpt) { struct usb_xfer_root *info; info = USB_DMATAG_TO_XROOT(udpt); mtx_assert(info->xfer_mtx, MA_OWNED); /* copy error */ info->dma_error = udpt->dma_error; /* enter workloop again */ usb_command_wrapper(&info->dma_q, info->dma_q.curr); } /*------------------------------------------------------------------------* * usb_bdma_pre_sync * * This function handles DMA synchronisation that must be done before * an USB transfer is started. *------------------------------------------------------------------------*/ void usb_bdma_pre_sync(struct usb_xfer *xfer) { struct usb_page_cache *pc; usb_frcount_t nframes; if (xfer->flags_int.isochronous_xfr) { /* only one frame buffer */ nframes = 1; } else { /* can be multiple frame buffers */ nframes = xfer->nframes; } pc = xfer->frbuffers; while (nframes--) { if (pc->isread) { usb_pc_cpu_invalidate(pc); } else { usb_pc_cpu_flush(pc); } pc++; } } /*------------------------------------------------------------------------* * usb_bdma_post_sync * * This function handles DMA synchronisation that must be done after * an USB transfer is complete. *------------------------------------------------------------------------*/ void usb_bdma_post_sync(struct usb_xfer *xfer) { struct usb_page_cache *pc; usb_frcount_t nframes; if (xfer->flags_int.isochronous_xfr) { /* only one frame buffer */ nframes = 1; } else { /* can be multiple frame buffers */ nframes = xfer->nframes; } pc = xfer->frbuffers; while (nframes--) { if (pc->isread) { usb_pc_cpu_invalidate(pc); } pc++; } } #endif Index: stable/11/sys/dev/wb/if_wb.c =================================================================== --- stable/11/sys/dev/wb/if_wb.c (revision 331642) +++ stable/11/sys/dev/wb/if_wb.c (revision 331643) @@ -1,1638 +1,1638 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1997, 1998 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Winbond fast ethernet PCI NIC driver * * Supports various cheap network adapters based on the Winbond W89C840F * fast ethernet controller chip. This includes adapters manufactured by * Winbond itself and some made by Linksys. * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * The Winbond W89C840F chip is a bus master; in some ways it resembles * a DEC 'tulip' chip, only not as complicated. Unfortunately, it has * one major difference which is that while the registers do many of * the same things as a tulip adapter, the offsets are different: where * tulip registers are typically spaced 8 bytes apart, the Winbond * registers are spaced 4 bytes apart. The receiver filter is also * programmed differently. * * Like the tulip, the Winbond chip uses small descriptors containing * a status word, a control word and 32-bit areas that can either be used * to point to two external data blocks, or to point to a single block * and another descriptor in a linked list. Descriptors can be grouped * together in blocks to form fixed length rings or can be chained * together in linked lists. A single packet may be spread out over * several descriptors if necessary. * * For the receive ring, this driver uses a linked list of descriptors, * each pointing to a single mbuf cluster buffer, which us large enough * to hold an entire packet. The link list is looped back to created a * closed ring. * * For transmission, the driver creates a linked list of 'super descriptors' * which each contain several individual descriptors linked toghether. * Each 'super descriptor' contains WB_MAXFRAGS descriptors, which we * abuse as fragment pointers. This allows us to use a buffer managment * scheme very similar to that used in the ThunderLAN and Etherlink XL * drivers. * * Autonegotiation is performed using the external PHY via the MII bus. * The sample boards I have all use a Davicom PHY. * * Note: the author of the Linux driver for the Winbond chip alludes * to some sort of flaw in the chip's design that seems to mandate some * drastic workaround which signigicantly impairs transmit performance. * I have no idea what he's on about: transmit performance with all * three of my test boards seems fine. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for vtophys */ #include /* for vtophys */ #include #include #include #include #include #include #include #include #include /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #define WB_USEIOSPACE #include MODULE_DEPEND(wb, pci, 1, 1, 1); MODULE_DEPEND(wb, ether, 1, 1, 1); MODULE_DEPEND(wb, miibus, 1, 1, 1); /* * Various supported device vendors/types and their names. */ static const struct wb_type wb_devs[] = { { WB_VENDORID, WB_DEVICEID_840F, "Winbond W89C840F 10/100BaseTX" }, { CP_VENDORID, CP_DEVICEID_RL100, "Compex RL100-ATX 10/100baseTX" }, { 0, 0, NULL } }; static int wb_probe(device_t); static int wb_attach(device_t); static int wb_detach(device_t); static void wb_bfree(struct mbuf *, void *addr, void *args); static int wb_newbuf(struct wb_softc *, struct wb_chain_onefrag *, struct mbuf *); static int wb_encap(struct wb_softc *, struct wb_chain *, struct mbuf *); static void wb_rxeof(struct wb_softc *); static void wb_rxeoc(struct wb_softc *); static void wb_txeof(struct wb_softc *); static void wb_txeoc(struct wb_softc *); static void wb_intr(void *); static void wb_tick(void *); static void wb_start(struct ifnet *); static void wb_start_locked(struct ifnet *); static int wb_ioctl(struct ifnet *, u_long, caddr_t); static void wb_init(void *); static void wb_init_locked(struct wb_softc *); static void wb_stop(struct wb_softc *); static void wb_watchdog(struct wb_softc *); static int wb_shutdown(device_t); static int wb_ifmedia_upd(struct ifnet *); static void wb_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void wb_eeprom_putbyte(struct wb_softc *, int); static void wb_eeprom_getword(struct wb_softc *, int, u_int16_t *); static void wb_read_eeprom(struct wb_softc *, caddr_t, int, int, int); static void wb_setcfg(struct wb_softc *, u_int32_t); static void wb_setmulti(struct wb_softc *); static void wb_reset(struct wb_softc *); static void wb_fixmedia(struct wb_softc *); static int wb_list_rx_init(struct wb_softc *); static int wb_list_tx_init(struct wb_softc *); static int wb_miibus_readreg(device_t, int, int); static int wb_miibus_writereg(device_t, int, int, int); static void wb_miibus_statchg(device_t); /* * MII bit-bang glue */ static uint32_t wb_mii_bitbang_read(device_t); static void wb_mii_bitbang_write(device_t, uint32_t); static const struct mii_bitbang_ops wb_mii_bitbang_ops = { wb_mii_bitbang_read, wb_mii_bitbang_write, { WB_SIO_MII_DATAOUT, /* MII_BIT_MDO */ WB_SIO_MII_DATAIN, /* MII_BIT_MDI */ WB_SIO_MII_CLK, /* MII_BIT_MDC */ WB_SIO_MII_DIR, /* MII_BIT_DIR_HOST_PHY */ 0, /* MII_BIT_DIR_PHY_HOST */ } }; #ifdef WB_USEIOSPACE #define WB_RES SYS_RES_IOPORT #define WB_RID WB_PCI_LOIO #else #define WB_RES SYS_RES_MEMORY #define WB_RID WB_PCI_LOMEM #endif static device_method_t wb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, wb_probe), DEVMETHOD(device_attach, wb_attach), DEVMETHOD(device_detach, wb_detach), DEVMETHOD(device_shutdown, wb_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, wb_miibus_readreg), DEVMETHOD(miibus_writereg, wb_miibus_writereg), DEVMETHOD(miibus_statchg, wb_miibus_statchg), DEVMETHOD_END }; static driver_t wb_driver = { "wb", wb_methods, sizeof(struct wb_softc) }; static devclass_t wb_devclass; DRIVER_MODULE(wb, pci, wb_driver, wb_devclass, 0, 0); DRIVER_MODULE(miibus, wb, miibus_driver, miibus_devclass, 0, 0); #define WB_SETBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) | (x)) #define WB_CLRBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, \ CSR_READ_4(sc, reg) & ~(x)) #define SIO_SET(x) \ CSR_WRITE_4(sc, WB_SIO, \ CSR_READ_4(sc, WB_SIO) | (x)) #define SIO_CLR(x) \ CSR_WRITE_4(sc, WB_SIO, \ CSR_READ_4(sc, WB_SIO) & ~(x)) /* * Send a read command and address to the EEPROM, check for ACK. */ static void wb_eeprom_putbyte(sc, addr) struct wb_softc *sc; int addr; { - register int d, i; + int d, i; d = addr | WB_EECMD_READ; /* * Feed in each bit and stobe the clock. */ for (i = 0x400; i; i >>= 1) { if (d & i) { SIO_SET(WB_SIO_EE_DATAIN); } else { SIO_CLR(WB_SIO_EE_DATAIN); } DELAY(100); SIO_SET(WB_SIO_EE_CLK); DELAY(150); SIO_CLR(WB_SIO_EE_CLK); DELAY(100); } } /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void wb_eeprom_getword(sc, addr, dest) struct wb_softc *sc; int addr; u_int16_t *dest; { - register int i; + int i; u_int16_t word = 0; /* Enter EEPROM access mode. */ CSR_WRITE_4(sc, WB_SIO, WB_SIO_EESEL|WB_SIO_EE_CS); /* * Send address of word we want to read. */ wb_eeprom_putbyte(sc, addr); CSR_WRITE_4(sc, WB_SIO, WB_SIO_EESEL|WB_SIO_EE_CS); /* * Start reading bits from EEPROM. */ for (i = 0x8000; i; i >>= 1) { SIO_SET(WB_SIO_EE_CLK); DELAY(100); if (CSR_READ_4(sc, WB_SIO) & WB_SIO_EE_DATAOUT) word |= i; SIO_CLR(WB_SIO_EE_CLK); DELAY(100); } /* Turn off EEPROM access mode. */ CSR_WRITE_4(sc, WB_SIO, 0); *dest = word; } /* * Read a sequence of words from the EEPROM. */ static void wb_read_eeprom(sc, dest, off, cnt, swap) struct wb_softc *sc; caddr_t dest; int off; int cnt; int swap; { int i; u_int16_t word = 0, *ptr; for (i = 0; i < cnt; i++) { wb_eeprom_getword(sc, off + i, &word); ptr = (u_int16_t *)(dest + (i * 2)); if (swap) *ptr = ntohs(word); else *ptr = word; } } /* * Read the MII serial port for the MII bit-bang module. */ static uint32_t wb_mii_bitbang_read(device_t dev) { struct wb_softc *sc; uint32_t val; sc = device_get_softc(dev); val = CSR_READ_4(sc, WB_SIO); CSR_BARRIER(sc, WB_SIO, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (val); } /* * Write the MII serial port for the MII bit-bang module. */ static void wb_mii_bitbang_write(device_t dev, uint32_t val) { struct wb_softc *sc; sc = device_get_softc(dev); CSR_WRITE_4(sc, WB_SIO, val); CSR_BARRIER(sc, WB_SIO, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } static int wb_miibus_readreg(dev, phy, reg) device_t dev; int phy, reg; { return (mii_bitbang_readreg(dev, &wb_mii_bitbang_ops, phy, reg)); } static int wb_miibus_writereg(dev, phy, reg, data) device_t dev; int phy, reg, data; { mii_bitbang_writereg(dev, &wb_mii_bitbang_ops, phy, reg, data); return(0); } static void wb_miibus_statchg(dev) device_t dev; { struct wb_softc *sc; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->wb_miibus); wb_setcfg(sc, mii->mii_media_active); } /* * Program the 64-bit multicast hash filter. */ static void wb_setmulti(sc) struct wb_softc *sc; { struct ifnet *ifp; int h = 0; u_int32_t hashes[2] = { 0, 0 }; struct ifmultiaddr *ifma; u_int32_t rxfilt; int mcnt = 0; ifp = sc->wb_ifp; rxfilt = CSR_READ_4(sc, WB_NETCFG); if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { rxfilt |= WB_NETCFG_RX_MULTI; CSR_WRITE_4(sc, WB_NETCFG, rxfilt); CSR_WRITE_4(sc, WB_MAR0, 0xFFFFFFFF); CSR_WRITE_4(sc, WB_MAR1, 0xFFFFFFFF); return; } /* first, zot all the existing hash bits */ CSR_WRITE_4(sc, WB_MAR0, 0); CSR_WRITE_4(sc, WB_MAR1, 0); /* now program new ones */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ~ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); mcnt++; } if_maddr_runlock(ifp); if (mcnt) rxfilt |= WB_NETCFG_RX_MULTI; else rxfilt &= ~WB_NETCFG_RX_MULTI; CSR_WRITE_4(sc, WB_MAR0, hashes[0]); CSR_WRITE_4(sc, WB_MAR1, hashes[1]); CSR_WRITE_4(sc, WB_NETCFG, rxfilt); } /* * The Winbond manual states that in order to fiddle with the * 'full-duplex' and '100Mbps' bits in the netconfig register, we * first have to put the transmit and/or receive logic in the idle state. */ static void wb_setcfg(sc, media) struct wb_softc *sc; u_int32_t media; { int i, restart = 0; if (CSR_READ_4(sc, WB_NETCFG) & (WB_NETCFG_TX_ON|WB_NETCFG_RX_ON)) { restart = 1; WB_CLRBIT(sc, WB_NETCFG, (WB_NETCFG_TX_ON|WB_NETCFG_RX_ON)); for (i = 0; i < WB_TIMEOUT; i++) { DELAY(10); if ((CSR_READ_4(sc, WB_ISR) & WB_ISR_TX_IDLE) && (CSR_READ_4(sc, WB_ISR) & WB_ISR_RX_IDLE)) break; } if (i == WB_TIMEOUT) device_printf(sc->wb_dev, "failed to force tx and rx to idle state\n"); } if (IFM_SUBTYPE(media) == IFM_10_T) WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_100MBPS); else WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_100MBPS); if ((media & IFM_GMASK) == IFM_FDX) WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_FULLDUPLEX); else WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_FULLDUPLEX); if (restart) WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON|WB_NETCFG_RX_ON); } static void wb_reset(sc) struct wb_softc *sc; { - register int i; + int i; struct mii_data *mii; struct mii_softc *miisc; CSR_WRITE_4(sc, WB_NETCFG, 0); CSR_WRITE_4(sc, WB_BUSCTL, 0); CSR_WRITE_4(sc, WB_TXADDR, 0); CSR_WRITE_4(sc, WB_RXADDR, 0); WB_SETBIT(sc, WB_BUSCTL, WB_BUSCTL_RESET); WB_SETBIT(sc, WB_BUSCTL, WB_BUSCTL_RESET); for (i = 0; i < WB_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_4(sc, WB_BUSCTL) & WB_BUSCTL_RESET)) break; } if (i == WB_TIMEOUT) device_printf(sc->wb_dev, "reset never completed!\n"); /* Wait a little while for the chip to get its brains in order. */ DELAY(1000); if (sc->wb_miibus == NULL) return; mii = device_get_softc(sc->wb_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); } static void wb_fixmedia(sc) struct wb_softc *sc; { struct mii_data *mii = NULL; struct ifnet *ifp; u_int32_t media; mii = device_get_softc(sc->wb_miibus); ifp = sc->wb_ifp; mii_pollstat(mii); if (IFM_SUBTYPE(mii->mii_media_active) == IFM_10_T) { media = mii->mii_media_active & ~IFM_10_T; media |= IFM_100_TX; } else if (IFM_SUBTYPE(mii->mii_media_active) == IFM_100_TX) { media = mii->mii_media_active & ~IFM_100_TX; media |= IFM_10_T; } else return; ifmedia_set(&mii->mii_media, media); } /* * Probe for a Winbond chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int wb_probe(dev) device_t dev; { const struct wb_type *t; t = wb_devs; while(t->wb_name != NULL) { if ((pci_get_vendor(dev) == t->wb_vid) && (pci_get_device(dev) == t->wb_did)) { device_set_desc(dev, t->wb_name); return (BUS_PROBE_DEFAULT); } t++; } return(ENXIO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int wb_attach(dev) device_t dev; { u_char eaddr[ETHER_ADDR_LEN]; struct wb_softc *sc; struct ifnet *ifp; int error = 0, rid; sc = device_get_softc(dev); sc->wb_dev = dev; mtx_init(&sc->wb_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->wb_stat_callout, &sc->wb_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); rid = WB_RID; sc->wb_res = bus_alloc_resource_any(dev, WB_RES, &rid, RF_ACTIVE); if (sc->wb_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } /* Allocate interrupt */ rid = 0; sc->wb_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->wb_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Save the cache line size. */ sc->wb_cachesize = pci_read_config(dev, WB_PCI_CACHELEN, 4) & 0xFF; /* Reset the adapter. */ wb_reset(sc); /* * Get station address from the EEPROM. */ wb_read_eeprom(sc, (caddr_t)&eaddr, 0, 3, 0); sc->wb_ldata = contigmalloc(sizeof(struct wb_list_data) + 8, M_DEVBUF, M_NOWAIT, 0, 0xffffffff, PAGE_SIZE, 0); if (sc->wb_ldata == NULL) { device_printf(dev, "no memory for list buffers!\n"); error = ENXIO; goto fail; } bzero(sc->wb_ldata, sizeof(struct wb_list_data)); ifp = sc->wb_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = wb_ioctl; ifp->if_start = wb_start; ifp->if_init = wb_init; ifp->if_snd.ifq_maxlen = WB_TX_LIST_CNT - 1; /* * Do MII setup. */ error = mii_attach(dev, &sc->wb_miibus, ifp, wb_ifmedia_upd, wb_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->wb_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, wb_intr, sc, &sc->wb_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } fail: if (error) wb_detach(dev); return(error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int wb_detach(dev) device_t dev; { struct wb_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->wb_mtx), ("wb mutex not initialized")); ifp = sc->wb_ifp; /* * Delete any miibus and phy devices attached to this interface. * This should only be done if attach succeeded. */ if (device_is_attached(dev)) { ether_ifdetach(ifp); WB_LOCK(sc); wb_stop(sc); WB_UNLOCK(sc); callout_drain(&sc->wb_stat_callout); } if (sc->wb_miibus) device_delete_child(dev, sc->wb_miibus); bus_generic_detach(dev); if (sc->wb_intrhand) bus_teardown_intr(dev, sc->wb_irq, sc->wb_intrhand); if (sc->wb_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->wb_irq); if (sc->wb_res) bus_release_resource(dev, WB_RES, WB_RID, sc->wb_res); if (ifp) if_free(ifp); if (sc->wb_ldata) { contigfree(sc->wb_ldata, sizeof(struct wb_list_data) + 8, M_DEVBUF); } mtx_destroy(&sc->wb_mtx); return(0); } /* * Initialize the transmit descriptors. */ static int wb_list_tx_init(sc) struct wb_softc *sc; { struct wb_chain_data *cd; struct wb_list_data *ld; int i; cd = &sc->wb_cdata; ld = sc->wb_ldata; for (i = 0; i < WB_TX_LIST_CNT; i++) { cd->wb_tx_chain[i].wb_ptr = &ld->wb_tx_list[i]; if (i == (WB_TX_LIST_CNT - 1)) { cd->wb_tx_chain[i].wb_nextdesc = &cd->wb_tx_chain[0]; } else { cd->wb_tx_chain[i].wb_nextdesc = &cd->wb_tx_chain[i + 1]; } } cd->wb_tx_free = &cd->wb_tx_chain[0]; cd->wb_tx_tail = cd->wb_tx_head = NULL; return(0); } /* * Initialize the RX descriptors and allocate mbufs for them. Note that * we arrange the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int wb_list_rx_init(sc) struct wb_softc *sc; { struct wb_chain_data *cd; struct wb_list_data *ld; int i; cd = &sc->wb_cdata; ld = sc->wb_ldata; for (i = 0; i < WB_RX_LIST_CNT; i++) { cd->wb_rx_chain[i].wb_ptr = (struct wb_desc *)&ld->wb_rx_list[i]; cd->wb_rx_chain[i].wb_buf = (void *)&ld->wb_rxbufs[i]; if (wb_newbuf(sc, &cd->wb_rx_chain[i], NULL) == ENOBUFS) return(ENOBUFS); if (i == (WB_RX_LIST_CNT - 1)) { cd->wb_rx_chain[i].wb_nextdesc = &cd->wb_rx_chain[0]; ld->wb_rx_list[i].wb_next = vtophys(&ld->wb_rx_list[0]); } else { cd->wb_rx_chain[i].wb_nextdesc = &cd->wb_rx_chain[i + 1]; ld->wb_rx_list[i].wb_next = vtophys(&ld->wb_rx_list[i + 1]); } } cd->wb_rx_head = &cd->wb_rx_chain[0]; return(0); } static void wb_bfree(struct mbuf *m, void *buf, void *args) { } /* * Initialize an RX descriptor and attach an MBUF cluster. */ static int wb_newbuf(sc, c, m) struct wb_softc *sc; struct wb_chain_onefrag *c; struct mbuf *m; { struct mbuf *m_new = NULL; if (m == NULL) { MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) return(ENOBUFS); m_new->m_data = c->wb_buf; m_new->m_pkthdr.len = m_new->m_len = WB_BUFBYTES; MEXTADD(m_new, c->wb_buf, WB_BUFBYTES, wb_bfree, c->wb_buf, NULL, 0, EXT_NET_DRV); } else { m_new = m; m_new->m_len = m_new->m_pkthdr.len = WB_BUFBYTES; m_new->m_data = m_new->m_ext.ext_buf; } m_adj(m_new, sizeof(u_int64_t)); c->wb_mbuf = m_new; c->wb_ptr->wb_data = vtophys(mtod(m_new, caddr_t)); c->wb_ptr->wb_ctl = WB_RXCTL_RLINK | 1536; c->wb_ptr->wb_status = WB_RXSTAT; return(0); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static void wb_rxeof(sc) struct wb_softc *sc; { struct mbuf *m = NULL; struct ifnet *ifp; struct wb_chain_onefrag *cur_rx; int total_len = 0; u_int32_t rxstat; WB_LOCK_ASSERT(sc); ifp = sc->wb_ifp; while(!((rxstat = sc->wb_cdata.wb_rx_head->wb_ptr->wb_status) & WB_RXSTAT_OWN)) { struct mbuf *m0 = NULL; cur_rx = sc->wb_cdata.wb_rx_head; sc->wb_cdata.wb_rx_head = cur_rx->wb_nextdesc; m = cur_rx->wb_mbuf; if ((rxstat & WB_RXSTAT_MIIERR) || (WB_RXBYTES(cur_rx->wb_ptr->wb_status) < WB_MIN_FRAMELEN) || (WB_RXBYTES(cur_rx->wb_ptr->wb_status) > 1536) || !(rxstat & WB_RXSTAT_LASTFRAG) || !(rxstat & WB_RXSTAT_RXCMP)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); wb_newbuf(sc, cur_rx, m); device_printf(sc->wb_dev, "receiver babbling: possible chip bug," " forcing reset\n"); wb_fixmedia(sc); wb_reset(sc); wb_init_locked(sc); return; } if (rxstat & WB_RXSTAT_RXERR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); wb_newbuf(sc, cur_rx, m); break; } /* No errors; receive the packet. */ total_len = WB_RXBYTES(cur_rx->wb_ptr->wb_status); /* * XXX The Winbond chip includes the CRC with every * received frame, and there's no way to turn this * behavior off (at least, I can't find anything in * the manual that explains how to do it) so we have * to trim off the CRC manually. */ total_len -= ETHER_CRC_LEN; m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp, NULL); wb_newbuf(sc, cur_rx, m); if (m0 == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } m = m0; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); WB_UNLOCK(sc); (*ifp->if_input)(ifp, m); WB_LOCK(sc); } } static void wb_rxeoc(sc) struct wb_softc *sc; { wb_rxeof(sc); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); CSR_WRITE_4(sc, WB_RXADDR, vtophys(&sc->wb_ldata->wb_rx_list[0])); WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); if (CSR_READ_4(sc, WB_ISR) & WB_RXSTATE_SUSPEND) CSR_WRITE_4(sc, WB_RXSTART, 0xFFFFFFFF); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void wb_txeof(sc) struct wb_softc *sc; { struct wb_chain *cur_tx; struct ifnet *ifp; ifp = sc->wb_ifp; /* Clear the timeout timer. */ sc->wb_timer = 0; if (sc->wb_cdata.wb_tx_head == NULL) return; /* * Go through our tx list and free mbufs for those * frames that have been transmitted. */ while(sc->wb_cdata.wb_tx_head->wb_mbuf != NULL) { u_int32_t txstat; cur_tx = sc->wb_cdata.wb_tx_head; txstat = WB_TXSTATUS(cur_tx); if ((txstat & WB_TXSTAT_OWN) || txstat == WB_UNSENT) break; if (txstat & WB_TXSTAT_TXERR) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (txstat & WB_TXSTAT_ABORT) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); if (txstat & WB_TXSTAT_LATECOLL) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); } if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (txstat & WB_TXSTAT_COLLCNT) >> 3); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); m_freem(cur_tx->wb_mbuf); cur_tx->wb_mbuf = NULL; if (sc->wb_cdata.wb_tx_head == sc->wb_cdata.wb_tx_tail) { sc->wb_cdata.wb_tx_head = NULL; sc->wb_cdata.wb_tx_tail = NULL; break; } sc->wb_cdata.wb_tx_head = cur_tx->wb_nextdesc; } } /* * TX 'end of channel' interrupt handler. */ static void wb_txeoc(sc) struct wb_softc *sc; { struct ifnet *ifp; ifp = sc->wb_ifp; sc->wb_timer = 0; if (sc->wb_cdata.wb_tx_head == NULL) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->wb_cdata.wb_tx_tail = NULL; } else { if (WB_TXOWN(sc->wb_cdata.wb_tx_head) == WB_UNSENT) { WB_TXOWN(sc->wb_cdata.wb_tx_head) = WB_TXSTAT_OWN; sc->wb_timer = 5; CSR_WRITE_4(sc, WB_TXSTART, 0xFFFFFFFF); } } } static void wb_intr(arg) void *arg; { struct wb_softc *sc; struct ifnet *ifp; u_int32_t status; sc = arg; WB_LOCK(sc); ifp = sc->wb_ifp; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { WB_UNLOCK(sc); return; } /* Disable interrupts. */ CSR_WRITE_4(sc, WB_IMR, 0x00000000); for (;;) { status = CSR_READ_4(sc, WB_ISR); if (status) CSR_WRITE_4(sc, WB_ISR, status); if ((status & WB_INTRS) == 0) break; if ((status & WB_ISR_RX_NOBUF) || (status & WB_ISR_RX_ERR)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); wb_reset(sc); if (status & WB_ISR_RX_ERR) wb_fixmedia(sc); wb_init_locked(sc); continue; } if (status & WB_ISR_RX_OK) wb_rxeof(sc); if (status & WB_ISR_RX_IDLE) wb_rxeoc(sc); if (status & WB_ISR_TX_OK) wb_txeof(sc); if (status & WB_ISR_TX_NOBUF) wb_txeoc(sc); if (status & WB_ISR_TX_IDLE) { wb_txeof(sc); if (sc->wb_cdata.wb_tx_head != NULL) { WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); CSR_WRITE_4(sc, WB_TXSTART, 0xFFFFFFFF); } } if (status & WB_ISR_TX_UNDERRUN) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); wb_txeof(sc); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); /* Jack up TX threshold */ sc->wb_txthresh += WB_TXTHRESH_CHUNK; WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_THRESH); WB_SETBIT(sc, WB_NETCFG, WB_TXTHRESH(sc->wb_txthresh)); WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); } if (status & WB_ISR_BUS_ERR) { wb_reset(sc); wb_init_locked(sc); } } /* Re-enable interrupts. */ CSR_WRITE_4(sc, WB_IMR, WB_INTRS); if (ifp->if_snd.ifq_head != NULL) { wb_start_locked(ifp); } WB_UNLOCK(sc); } static void wb_tick(xsc) void *xsc; { struct wb_softc *sc; struct mii_data *mii; sc = xsc; WB_LOCK_ASSERT(sc); mii = device_get_softc(sc->wb_miibus); mii_tick(mii); if (sc->wb_timer > 0 && --sc->wb_timer == 0) wb_watchdog(sc); callout_reset(&sc->wb_stat_callout, hz, wb_tick, sc); } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int wb_encap(sc, c, m_head) struct wb_softc *sc; struct wb_chain *c; struct mbuf *m_head; { int frag = 0; struct wb_desc *f = NULL; int total_len; struct mbuf *m; /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; total_len = 0; for (m = m_head, frag = 0; m != NULL; m = m->m_next) { if (m->m_len != 0) { if (frag == WB_MAXFRAGS) break; total_len += m->m_len; f = &c->wb_ptr->wb_frag[frag]; f->wb_ctl = WB_TXCTL_TLINK | m->m_len; if (frag == 0) { f->wb_ctl |= WB_TXCTL_FIRSTFRAG; f->wb_status = 0; } else f->wb_status = WB_TXSTAT_OWN; f->wb_next = vtophys(&c->wb_ptr->wb_frag[frag + 1]); f->wb_data = vtophys(mtod(m, vm_offset_t)); frag++; } } /* * Handle special case: we used up all 16 fragments, * but we have more mbufs left in the chain. Copy the * data into an mbuf cluster. Note that we don't * bother clearing the values in the other fragment * pointers/counters; it wouldn't gain us anything, * and would waste cycles. */ if (m != NULL) { struct mbuf *m_new = NULL; MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) return(1); if (m_head->m_pkthdr.len > MHLEN) { if (!(MCLGET(m_new, M_NOWAIT))) { m_freem(m_new); return(1); } } m_copydata(m_head, 0, m_head->m_pkthdr.len, mtod(m_new, caddr_t)); m_new->m_pkthdr.len = m_new->m_len = m_head->m_pkthdr.len; m_freem(m_head); m_head = m_new; f = &c->wb_ptr->wb_frag[0]; f->wb_status = 0; f->wb_data = vtophys(mtod(m_new, caddr_t)); f->wb_ctl = total_len = m_new->m_len; f->wb_ctl |= WB_TXCTL_TLINK|WB_TXCTL_FIRSTFRAG; frag = 1; } if (total_len < WB_MIN_FRAMELEN) { f = &c->wb_ptr->wb_frag[frag]; f->wb_ctl = WB_MIN_FRAMELEN - total_len; f->wb_data = vtophys(&sc->wb_cdata.wb_pad); f->wb_ctl |= WB_TXCTL_TLINK; f->wb_status = WB_TXSTAT_OWN; frag++; } c->wb_mbuf = m_head; c->wb_lastdesc = frag - 1; WB_TXCTL(c) |= WB_TXCTL_LASTFRAG; WB_TXNEXT(c) = vtophys(&c->wb_nextdesc->wb_ptr->wb_frag[0]); return(0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void wb_start(ifp) struct ifnet *ifp; { struct wb_softc *sc; sc = ifp->if_softc; WB_LOCK(sc); wb_start_locked(ifp); WB_UNLOCK(sc); } static void wb_start_locked(ifp) struct ifnet *ifp; { struct wb_softc *sc; struct mbuf *m_head = NULL; struct wb_chain *cur_tx = NULL, *start_tx; sc = ifp->if_softc; WB_LOCK_ASSERT(sc); /* * Check for an available queue slot. If there are none, * punt. */ if (sc->wb_cdata.wb_tx_free->wb_mbuf != NULL) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } start_tx = sc->wb_cdata.wb_tx_free; while(sc->wb_cdata.wb_tx_free->wb_mbuf == NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* Pick a descriptor off the free list. */ cur_tx = sc->wb_cdata.wb_tx_free; sc->wb_cdata.wb_tx_free = cur_tx->wb_nextdesc; /* Pack the data into the descriptor. */ wb_encap(sc, cur_tx, m_head); if (cur_tx != start_tx) WB_TXOWN(cur_tx) = WB_TXSTAT_OWN; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, cur_tx->wb_mbuf); } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) return; /* * Place the request for the upload interrupt * in the last descriptor in the chain. This way, if * we're chaining several packets at once, we'll only * get an interrupt once for the whole chain rather than * once for each packet. */ WB_TXCTL(cur_tx) |= WB_TXCTL_FINT; cur_tx->wb_ptr->wb_frag[0].wb_ctl |= WB_TXCTL_FINT; sc->wb_cdata.wb_tx_tail = cur_tx; if (sc->wb_cdata.wb_tx_head == NULL) { sc->wb_cdata.wb_tx_head = start_tx; WB_TXOWN(start_tx) = WB_TXSTAT_OWN; CSR_WRITE_4(sc, WB_TXSTART, 0xFFFFFFFF); } else { /* * We need to distinguish between the case where * the own bit is clear because the chip cleared it * and where the own bit is clear because we haven't * set it yet. The magic value WB_UNSET is just some * ramdomly chosen number which doesn't have the own * bit set. When we actually transmit the frame, the * status word will have _only_ the own bit set, so * the txeoc handler will be able to tell if it needs * to initiate another transmission to flush out pending * frames. */ WB_TXOWN(start_tx) = WB_UNSENT; } /* * Set a timeout in case the chip goes out to lunch. */ sc->wb_timer = 5; } static void wb_init(xsc) void *xsc; { struct wb_softc *sc = xsc; WB_LOCK(sc); wb_init_locked(sc); WB_UNLOCK(sc); } static void wb_init_locked(sc) struct wb_softc *sc; { struct ifnet *ifp = sc->wb_ifp; int i; struct mii_data *mii; WB_LOCK_ASSERT(sc); mii = device_get_softc(sc->wb_miibus); /* * Cancel pending I/O and free all RX/TX buffers. */ wb_stop(sc); wb_reset(sc); sc->wb_txthresh = WB_TXTHRESH_INIT; /* * Set cache alignment and burst length. */ #ifdef foo CSR_WRITE_4(sc, WB_BUSCTL, WB_BUSCTL_CONFIG); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_THRESH); WB_SETBIT(sc, WB_NETCFG, WB_TXTHRESH(sc->wb_txthresh)); #endif CSR_WRITE_4(sc, WB_BUSCTL, WB_BUSCTL_MUSTBEONE|WB_BUSCTL_ARBITRATION); WB_SETBIT(sc, WB_BUSCTL, WB_BURSTLEN_16LONG); switch(sc->wb_cachesize) { case 32: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_32LONG); break; case 16: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_16LONG); break; case 8: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_8LONG); break; case 0: default: WB_SETBIT(sc, WB_BUSCTL, WB_CACHEALIGN_NONE); break; } /* This doesn't tend to work too well at 100Mbps. */ WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_EARLY_ON); /* Init our MAC address */ for (i = 0; i < ETHER_ADDR_LEN; i++) { CSR_WRITE_1(sc, WB_NODE0 + i, IF_LLADDR(sc->wb_ifp)[i]); } /* Init circular RX list. */ if (wb_list_rx_init(sc) == ENOBUFS) { device_printf(sc->wb_dev, "initialization failed: no memory for rx buffers\n"); wb_stop(sc); return; } /* Init TX descriptors. */ wb_list_tx_init(sc); /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & IFF_PROMISC) { WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_ALLPHYS); } else { WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_ALLPHYS); } /* * Set capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) { WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_BROAD); } else { WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_BROAD); } /* * Program the multicast filter, if necessary. */ wb_setmulti(sc); /* * Load the address of the RX list. */ WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); CSR_WRITE_4(sc, WB_RXADDR, vtophys(&sc->wb_ldata->wb_rx_list[0])); /* * Enable interrupts. */ CSR_WRITE_4(sc, WB_IMR, WB_INTRS); CSR_WRITE_4(sc, WB_ISR, 0xFFFFFFFF); /* Enable receiver and transmitter. */ WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_RX_ON); CSR_WRITE_4(sc, WB_RXSTART, 0xFFFFFFFF); WB_CLRBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); CSR_WRITE_4(sc, WB_TXADDR, vtophys(&sc->wb_ldata->wb_tx_list[0])); WB_SETBIT(sc, WB_NETCFG, WB_NETCFG_TX_ON); mii_mediachg(mii); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->wb_stat_callout, hz, wb_tick, sc); } /* * Set media options. */ static int wb_ifmedia_upd(ifp) struct ifnet *ifp; { struct wb_softc *sc; sc = ifp->if_softc; WB_LOCK(sc); if (ifp->if_flags & IFF_UP) wb_init_locked(sc); WB_UNLOCK(sc); return(0); } /* * Report current media status. */ static void wb_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct wb_softc *sc; struct mii_data *mii; sc = ifp->if_softc; WB_LOCK(sc); mii = device_get_softc(sc->wb_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; WB_UNLOCK(sc); } static int wb_ioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct wb_softc *sc = ifp->if_softc; struct mii_data *mii; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch(command) { case SIOCSIFFLAGS: WB_LOCK(sc); if (ifp->if_flags & IFF_UP) { wb_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) wb_stop(sc); } WB_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: WB_LOCK(sc); wb_setmulti(sc); WB_UNLOCK(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->wb_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; default: error = ether_ioctl(ifp, command, data); break; } return(error); } static void wb_watchdog(sc) struct wb_softc *sc; { struct ifnet *ifp; WB_LOCK_ASSERT(sc); ifp = sc->wb_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); #ifdef foo if (!(wb_phy_readreg(sc, PHY_BMSR) & PHY_BMSR_LINKSTAT)) if_printf(ifp, "no carrier - transceiver cable problem?\n"); #endif wb_stop(sc); wb_reset(sc); wb_init_locked(sc); if (ifp->if_snd.ifq_head != NULL) wb_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void wb_stop(sc) struct wb_softc *sc; { - register int i; + int i; struct ifnet *ifp; WB_LOCK_ASSERT(sc); ifp = sc->wb_ifp; sc->wb_timer = 0; callout_stop(&sc->wb_stat_callout); WB_CLRBIT(sc, WB_NETCFG, (WB_NETCFG_RX_ON|WB_NETCFG_TX_ON)); CSR_WRITE_4(sc, WB_IMR, 0x00000000); CSR_WRITE_4(sc, WB_TXADDR, 0x00000000); CSR_WRITE_4(sc, WB_RXADDR, 0x00000000); /* * Free data in the RX lists. */ for (i = 0; i < WB_RX_LIST_CNT; i++) { if (sc->wb_cdata.wb_rx_chain[i].wb_mbuf != NULL) { m_freem(sc->wb_cdata.wb_rx_chain[i].wb_mbuf); sc->wb_cdata.wb_rx_chain[i].wb_mbuf = NULL; } } bzero((char *)&sc->wb_ldata->wb_rx_list, sizeof(sc->wb_ldata->wb_rx_list)); /* * Free the TX list buffers. */ for (i = 0; i < WB_TX_LIST_CNT; i++) { if (sc->wb_cdata.wb_tx_chain[i].wb_mbuf != NULL) { m_freem(sc->wb_cdata.wb_tx_chain[i].wb_mbuf); sc->wb_cdata.wb_tx_chain[i].wb_mbuf = NULL; } } bzero((char *)&sc->wb_ldata->wb_tx_list, sizeof(sc->wb_ldata->wb_tx_list)); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int wb_shutdown(dev) device_t dev; { struct wb_softc *sc; sc = device_get_softc(dev); WB_LOCK(sc); wb_stop(sc); WB_UNLOCK(sc); return (0); } Index: stable/11/sys/dev/xl/if_xl.c =================================================================== --- stable/11/sys/dev/xl/if_xl.c (revision 331642) +++ stable/11/sys/dev/xl/if_xl.c (revision 331643) @@ -1,3300 +1,3300 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1997, 1998, 1999 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * 3Com 3c90x Etherlink XL PCI NIC driver * * Supports the 3Com "boomerang", "cyclone" and "hurricane" PCI * bus-master chips (3c90x cards and embedded controllers) including * the following: * * 3Com 3c900-TPO 10Mbps/RJ-45 * 3Com 3c900-COMBO 10Mbps/RJ-45,AUI,BNC * 3Com 3c905-TX 10/100Mbps/RJ-45 * 3Com 3c905-T4 10/100Mbps/RJ-45 * 3Com 3c900B-TPO 10Mbps/RJ-45 * 3Com 3c900B-COMBO 10Mbps/RJ-45,AUI,BNC * 3Com 3c900B-TPC 10Mbps/RJ-45,BNC * 3Com 3c900B-FL 10Mbps/Fiber-optic * 3Com 3c905B-COMBO 10/100Mbps/RJ-45,AUI,BNC * 3Com 3c905B-TX 10/100Mbps/RJ-45 * 3Com 3c905B-FL/FX 10/100Mbps/Fiber-optic * 3Com 3c905C-TX 10/100Mbps/RJ-45 (Tornado ASIC) * 3Com 3c980-TX 10/100Mbps server adapter (Hurricane ASIC) * 3Com 3c980C-TX 10/100Mbps server adapter (Tornado ASIC) * 3Com 3cSOHO100-TX 10/100Mbps/RJ-45 (Hurricane ASIC) * 3Com 3c450-TX 10/100Mbps/RJ-45 (Tornado ASIC) * 3Com 3c555 10/100Mbps/RJ-45 (MiniPCI, Laptop Hurricane) * 3Com 3c556 10/100Mbps/RJ-45 (MiniPCI, Hurricane ASIC) * 3Com 3c556B 10/100Mbps/RJ-45 (MiniPCI, Hurricane ASIC) * 3Com 3c575TX 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3c575B 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3c575C 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3cxfem656 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3cxfem656b 10/100Mbps/RJ-45 (Cardbus, Hurricane ASIC) * 3Com 3cxfem656c 10/100Mbps/RJ-45 (Cardbus, Tornado ASIC) * Dell Optiplex GX1 on-board 3c918 10/100Mbps/RJ-45 * Dell on-board 3c920 10/100Mbps/RJ-45 * Dell Precision on-board 3c905B 10/100Mbps/RJ-45 * Dell Latitude laptop docking station embedded 3c905-TX * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * The 3c90x series chips use a bus-master DMA interface for transferring * packets to and from the controller chip. Some of the "vortex" cards * (3c59x) also supported a bus master mode, however for those chips * you could only DMA packets to/from a contiguous memory buffer. For * transmission this would mean copying the contents of the queued mbuf * chain into an mbuf cluster and then DMAing the cluster. This extra * copy would sort of defeat the purpose of the bus master support for * any packet that doesn't fit into a single mbuf. * * By contrast, the 3c90x cards support a fragment-based bus master * mode where mbuf chains can be encapsulated using TX descriptors. * This is similar to other PCI chips such as the Texas Instruments * ThunderLAN and the Intel 82557/82558. * * The "vortex" driver (if_vx.c) happens to work for the "boomerang" * bus master chips because they maintain the old PIO interface for * backwards compatibility, but starting with the 3c905B and the * "cyclone" chips, the compatibility interface has been dropped. * Since using bus master DMA is a big win, we use this driver to * support the PCI "boomerang" chips even though they work with the * "vortex" driver in order to obtain better performance. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(xl, pci, 1, 1, 1); MODULE_DEPEND(xl, ether, 1, 1, 1); MODULE_DEPEND(xl, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #include /* * TX Checksumming is disabled by default for two reasons: * - TX Checksumming will occasionally produce corrupt packets * - TX Checksumming seems to reduce performance * * Only 905B/C cards were reported to have this problem, it is possible * that later chips _may_ be immune. */ #define XL905B_TXCSUM_BROKEN 1 #ifdef XL905B_TXCSUM_BROKEN #define XL905B_CSUM_FEATURES 0 #else #define XL905B_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) #endif /* * Various supported device vendors/types and their names. */ static const struct xl_type xl_devs[] = { { TC_VENDORID, TC_DEVICEID_BOOMERANG_10BT, "3Com 3c900-TPO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_BOOMERANG_10BT_COMBO, "3Com 3c900-COMBO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_BOOMERANG_10_100BT, "3Com 3c905-TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_BOOMERANG_100BT4, "3Com 3c905-T4 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_KRAKATOA_10BT, "3Com 3c900B-TPO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_KRAKATOA_10BT_COMBO, "3Com 3c900B-COMBO Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_KRAKATOA_10BT_TPC, "3Com 3c900B-TPC Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10FL, "3Com 3c900B-FL Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_10_100BT, "3Com 3c905B-TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10_100BT4, "3Com 3c905B-T4 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10_100FX, "3Com 3c905B-FX/SC Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_CYCLONE_10_100_COMBO, "3Com 3c905B-COMBO Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT, "3Com 3c905C-TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT_920B, "3Com 3c920B-EMB Integrated Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT_920B_WNM, "3Com 3c920B-EMB-WNM Integrated Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_10_100BT_SERV, "3Com 3c980 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_10_100BT_SERV, "3Com 3c980C Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_SOHO100TX, "3Com 3cSOHO100-TX OfficeConnect" }, { TC_VENDORID, TC_DEVICEID_TORNADO_HOMECONNECT, "3Com 3c450-TX HomeConnect" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_555, "3Com 3c555 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_556, "3Com 3c556 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_556B, "3Com 3c556B Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_575A, "3Com 3c575TX Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_575B, "3Com 3c575B Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_575C, "3Com 3c575C Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_656, "3Com 3c656 Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_HURRICANE_656B, "3Com 3c656B Fast Etherlink XL" }, { TC_VENDORID, TC_DEVICEID_TORNADO_656C, "3Com 3c656C Fast Etherlink XL" }, { 0, 0, NULL } }; static int xl_probe(device_t); static int xl_attach(device_t); static int xl_detach(device_t); static int xl_newbuf(struct xl_softc *, struct xl_chain_onefrag *); static void xl_tick(void *); static void xl_stats_update(struct xl_softc *); static int xl_encap(struct xl_softc *, struct xl_chain *, struct mbuf **); static int xl_rxeof(struct xl_softc *); static void xl_rxeof_task(void *, int); static int xl_rx_resync(struct xl_softc *); static void xl_txeof(struct xl_softc *); static void xl_txeof_90xB(struct xl_softc *); static void xl_txeoc(struct xl_softc *); static void xl_intr(void *); static void xl_start(struct ifnet *); static void xl_start_locked(struct ifnet *); static void xl_start_90xB_locked(struct ifnet *); static int xl_ioctl(struct ifnet *, u_long, caddr_t); static void xl_init(void *); static void xl_init_locked(struct xl_softc *); static void xl_stop(struct xl_softc *); static int xl_watchdog(struct xl_softc *); static int xl_shutdown(device_t); static int xl_suspend(device_t); static int xl_resume(device_t); static void xl_setwol(struct xl_softc *); #ifdef DEVICE_POLLING static int xl_poll(struct ifnet *ifp, enum poll_cmd cmd, int count); static int xl_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count); #endif static int xl_ifmedia_upd(struct ifnet *); static void xl_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int xl_eeprom_wait(struct xl_softc *); static int xl_read_eeprom(struct xl_softc *, caddr_t, int, int, int); static void xl_rxfilter(struct xl_softc *); static void xl_rxfilter_90x(struct xl_softc *); static void xl_rxfilter_90xB(struct xl_softc *); static void xl_setcfg(struct xl_softc *); static void xl_setmode(struct xl_softc *, int); static void xl_reset(struct xl_softc *); static int xl_list_rx_init(struct xl_softc *); static int xl_list_tx_init(struct xl_softc *); static int xl_list_tx_init_90xB(struct xl_softc *); static void xl_wait(struct xl_softc *); static void xl_mediacheck(struct xl_softc *); static void xl_choose_media(struct xl_softc *sc, int *media); static void xl_choose_xcvr(struct xl_softc *, int); static void xl_dma_map_addr(void *, bus_dma_segment_t *, int, int); #ifdef notdef static void xl_testpacket(struct xl_softc *); #endif static int xl_miibus_readreg(device_t, int, int); static int xl_miibus_writereg(device_t, int, int, int); static void xl_miibus_statchg(device_t); static void xl_miibus_mediainit(device_t); /* * MII bit-bang glue */ static uint32_t xl_mii_bitbang_read(device_t); static void xl_mii_bitbang_write(device_t, uint32_t); static const struct mii_bitbang_ops xl_mii_bitbang_ops = { xl_mii_bitbang_read, xl_mii_bitbang_write, { XL_MII_DATA, /* MII_BIT_MDO */ XL_MII_DATA, /* MII_BIT_MDI */ XL_MII_CLK, /* MII_BIT_MDC */ XL_MII_DIR, /* MII_BIT_DIR_HOST_PHY */ 0, /* MII_BIT_DIR_PHY_HOST */ } }; static device_method_t xl_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xl_probe), DEVMETHOD(device_attach, xl_attach), DEVMETHOD(device_detach, xl_detach), DEVMETHOD(device_shutdown, xl_shutdown), DEVMETHOD(device_suspend, xl_suspend), DEVMETHOD(device_resume, xl_resume), /* MII interface */ DEVMETHOD(miibus_readreg, xl_miibus_readreg), DEVMETHOD(miibus_writereg, xl_miibus_writereg), DEVMETHOD(miibus_statchg, xl_miibus_statchg), DEVMETHOD(miibus_mediainit, xl_miibus_mediainit), DEVMETHOD_END }; static driver_t xl_driver = { "xl", xl_methods, sizeof(struct xl_softc) }; static devclass_t xl_devclass; DRIVER_MODULE_ORDERED(xl, pci, xl_driver, xl_devclass, NULL, NULL, SI_ORDER_ANY); DRIVER_MODULE(miibus, xl, miibus_driver, miibus_devclass, NULL, NULL); static void xl_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { u_int32_t *paddr; paddr = arg; *paddr = segs->ds_addr; } /* * Murphy's law says that it's possible the chip can wedge and * the 'command in progress' bit may never clear. Hence, we wait * only a finite amount of time to avoid getting caught in an * infinite loop. Normally this delay routine would be a macro, * but it isn't called during normal operation so we can afford * to make it a function. Suppress warning when card gone. */ static void xl_wait(struct xl_softc *sc) { - register int i; + int i; for (i = 0; i < XL_TIMEOUT; i++) { if ((CSR_READ_2(sc, XL_STATUS) & XL_STAT_CMDBUSY) == 0) break; } if (i == XL_TIMEOUT && bus_child_present(sc->xl_dev)) device_printf(sc->xl_dev, "command never completed!\n"); } /* * MII access routines are provided for adapters with external * PHYs (3c905-TX, 3c905-T4, 3c905B-T4) and those with built-in * autoneg logic that's faked up to look like a PHY (3c905B-TX). * Note: if you don't perform the MDIO operations just right, * it's possible to end up with code that works correctly with * some chips/CPUs/processor speeds/bus speeds/etc but not * with others. */ /* * Read the MII serial port for the MII bit-bang module. */ static uint32_t xl_mii_bitbang_read(device_t dev) { struct xl_softc *sc; uint32_t val; sc = device_get_softc(dev); /* We're already in window 4. */ val = CSR_READ_2(sc, XL_W4_PHY_MGMT); CSR_BARRIER(sc, XL_W4_PHY_MGMT, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (val); } /* * Write the MII serial port for the MII bit-bang module. */ static void xl_mii_bitbang_write(device_t dev, uint32_t val) { struct xl_softc *sc; sc = device_get_softc(dev); /* We're already in window 4. */ CSR_WRITE_2(sc, XL_W4_PHY_MGMT, val); CSR_BARRIER(sc, XL_W4_PHY_MGMT, 2, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } static int xl_miibus_readreg(device_t dev, int phy, int reg) { struct xl_softc *sc; sc = device_get_softc(dev); /* Select the window 4. */ XL_SEL_WIN(4); return (mii_bitbang_readreg(dev, &xl_mii_bitbang_ops, phy, reg)); } static int xl_miibus_writereg(device_t dev, int phy, int reg, int data) { struct xl_softc *sc; sc = device_get_softc(dev); /* Select the window 4. */ XL_SEL_WIN(4); mii_bitbang_writereg(dev, &xl_mii_bitbang_ops, phy, reg, data); return (0); } static void xl_miibus_statchg(device_t dev) { struct xl_softc *sc; struct mii_data *mii; uint8_t macctl; sc = device_get_softc(dev); mii = device_get_softc(sc->xl_miibus); xl_setcfg(sc); /* Set ASIC's duplex mode to match the PHY. */ XL_SEL_WIN(3); macctl = CSR_READ_1(sc, XL_W3_MAC_CTRL); if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { macctl |= XL_MACCTRL_DUPLEX; if (sc->xl_type == XL_TYPE_905B) { if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) macctl |= XL_MACCTRL_FLOW_CONTROL_ENB; else macctl &= ~XL_MACCTRL_FLOW_CONTROL_ENB; } } else { macctl &= ~XL_MACCTRL_DUPLEX; if (sc->xl_type == XL_TYPE_905B) macctl &= ~XL_MACCTRL_FLOW_CONTROL_ENB; } CSR_WRITE_1(sc, XL_W3_MAC_CTRL, macctl); } /* * Special support for the 3c905B-COMBO. This card has 10/100 support * plus BNC and AUI ports. This means we will have both an miibus attached * plus some non-MII media settings. In order to allow this, we have to * add the extra media to the miibus's ifmedia struct, but we can't do * that during xl_attach() because the miibus hasn't been attached yet. * So instead, we wait until the miibus probe/attach is done, at which * point we will get a callback telling is that it's safe to add our * extra media. */ static void xl_miibus_mediainit(device_t dev) { struct xl_softc *sc; struct mii_data *mii; struct ifmedia *ifm; sc = device_get_softc(dev); mii = device_get_softc(sc->xl_miibus); ifm = &mii->mii_media; if (sc->xl_media & (XL_MEDIAOPT_AUI | XL_MEDIAOPT_10FL)) { /* * Check for a 10baseFL board in disguise. */ if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { if (bootverbose) device_printf(sc->xl_dev, "found 10baseFL\n"); ifmedia_add(ifm, IFM_ETHER | IFM_10_FL, 0, NULL); ifmedia_add(ifm, IFM_ETHER | IFM_10_FL|IFM_HDX, 0, NULL); if (sc->xl_caps & XL_CAPS_FULL_DUPLEX) ifmedia_add(ifm, IFM_ETHER | IFM_10_FL | IFM_FDX, 0, NULL); } else { if (bootverbose) device_printf(sc->xl_dev, "found AUI\n"); ifmedia_add(ifm, IFM_ETHER | IFM_10_5, 0, NULL); } } if (sc->xl_media & XL_MEDIAOPT_BNC) { if (bootverbose) device_printf(sc->xl_dev, "found BNC\n"); ifmedia_add(ifm, IFM_ETHER | IFM_10_2, 0, NULL); } } /* * The EEPROM is slow: give it time to come ready after issuing * it a command. */ static int xl_eeprom_wait(struct xl_softc *sc) { int i; for (i = 0; i < 100; i++) { if (CSR_READ_2(sc, XL_W0_EE_CMD) & XL_EE_BUSY) DELAY(162); else break; } if (i == 100) { device_printf(sc->xl_dev, "eeprom failed to come ready\n"); return (1); } return (0); } /* * Read a sequence of words from the EEPROM. Note that ethernet address * data is stored in the EEPROM in network byte order. */ static int xl_read_eeprom(struct xl_softc *sc, caddr_t dest, int off, int cnt, int swap) { int err = 0, i; u_int16_t word = 0, *ptr; #define EEPROM_5BIT_OFFSET(A) ((((A) << 2) & 0x7F00) | ((A) & 0x003F)) #define EEPROM_8BIT_OFFSET(A) ((A) & 0x003F) /* * XXX: WARNING! DANGER! * It's easy to accidentally overwrite the rom content! * Note: the 3c575 uses 8bit EEPROM offsets. */ XL_SEL_WIN(0); if (xl_eeprom_wait(sc)) return (1); if (sc->xl_flags & XL_FLAG_EEPROM_OFFSET_30) off += 0x30; for (i = 0; i < cnt; i++) { if (sc->xl_flags & XL_FLAG_8BITROM) CSR_WRITE_2(sc, XL_W0_EE_CMD, XL_EE_8BIT_READ | EEPROM_8BIT_OFFSET(off + i)); else CSR_WRITE_2(sc, XL_W0_EE_CMD, XL_EE_READ | EEPROM_5BIT_OFFSET(off + i)); err = xl_eeprom_wait(sc); if (err) break; word = CSR_READ_2(sc, XL_W0_EE_DATA); ptr = (u_int16_t *)(dest + (i * 2)); if (swap) *ptr = ntohs(word); else *ptr = word; } return (err ? 1 : 0); } static void xl_rxfilter(struct xl_softc *sc) { if (sc->xl_type == XL_TYPE_905B) xl_rxfilter_90xB(sc); else xl_rxfilter_90x(sc); } /* * NICs older than the 3c905B have only one multicast option, which * is to enable reception of all multicast frames. */ static void xl_rxfilter_90x(struct xl_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; u_int8_t rxfilt; XL_LOCK_ASSERT(sc); ifp = sc->xl_ifp; XL_SEL_WIN(5); rxfilt = CSR_READ_1(sc, XL_W5_RX_FILTER); rxfilt &= ~(XL_RXFILTER_ALLFRAMES | XL_RXFILTER_ALLMULTI | XL_RXFILTER_BROADCAST | XL_RXFILTER_INDIVIDUAL); /* Set the individual bit to receive frames for this host only. */ rxfilt |= XL_RXFILTER_INDIVIDUAL; /* Set capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) rxfilt |= XL_RXFILTER_BROADCAST; /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI)) { if (ifp->if_flags & IFF_PROMISC) rxfilt |= XL_RXFILTER_ALLFRAMES; if (ifp->if_flags & IFF_ALLMULTI) rxfilt |= XL_RXFILTER_ALLMULTI; } else { if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; rxfilt |= XL_RXFILTER_ALLMULTI; break; } if_maddr_runlock(ifp); } CSR_WRITE_2(sc, XL_COMMAND, rxfilt | XL_CMD_RX_SET_FILT); XL_SEL_WIN(7); } /* * 3c905B adapters have a hash filter that we can program. */ static void xl_rxfilter_90xB(struct xl_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; int i, mcnt; u_int16_t h; u_int8_t rxfilt; XL_LOCK_ASSERT(sc); ifp = sc->xl_ifp; XL_SEL_WIN(5); rxfilt = CSR_READ_1(sc, XL_W5_RX_FILTER); rxfilt &= ~(XL_RXFILTER_ALLFRAMES | XL_RXFILTER_ALLMULTI | XL_RXFILTER_BROADCAST | XL_RXFILTER_INDIVIDUAL | XL_RXFILTER_MULTIHASH); /* Set the individual bit to receive frames for this host only. */ rxfilt |= XL_RXFILTER_INDIVIDUAL; /* Set capture broadcast bit to capture broadcast frames. */ if (ifp->if_flags & IFF_BROADCAST) rxfilt |= XL_RXFILTER_BROADCAST; /* If we want promiscuous mode, set the allframes bit. */ if (ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI)) { if (ifp->if_flags & IFF_PROMISC) rxfilt |= XL_RXFILTER_ALLFRAMES; if (ifp->if_flags & IFF_ALLMULTI) rxfilt |= XL_RXFILTER_ALLMULTI; } else { /* First, zot all the existing hash bits. */ for (i = 0; i < XL_HASHFILT_SIZE; i++) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_SET_HASH | i); /* Now program new ones. */ mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; /* * Note: the 3c905B currently only supports a 64-bit * hash table, which means we really only need 6 bits, * but the manual indicates that future chip revisions * will have a 256-bit hash table, hence the routine * is set up to calculate 8 bits of position info in * case we need it some day. * Note II, The Sequel: _CURRENT_ versions of the * 3c905B have a 256 bit hash table. This means we have * to use all 8 bits regardless. On older cards, the * upper 2 bits will be ignored. Grrrr.... */ h = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) & 0xFF; CSR_WRITE_2(sc, XL_COMMAND, h | XL_CMD_RX_SET_HASH | XL_HASH_SET); mcnt++; } if_maddr_runlock(ifp); if (mcnt > 0) rxfilt |= XL_RXFILTER_MULTIHASH; } CSR_WRITE_2(sc, XL_COMMAND, rxfilt | XL_CMD_RX_SET_FILT); XL_SEL_WIN(7); } static void xl_setcfg(struct xl_softc *sc) { u_int32_t icfg; /*XL_LOCK_ASSERT(sc);*/ XL_SEL_WIN(3); icfg = CSR_READ_4(sc, XL_W3_INTERNAL_CFG); icfg &= ~XL_ICFG_CONNECTOR_MASK; if (sc->xl_media & XL_MEDIAOPT_MII || sc->xl_media & XL_MEDIAOPT_BT4) icfg |= (XL_XCVR_MII << XL_ICFG_CONNECTOR_BITS); if (sc->xl_media & XL_MEDIAOPT_BTX) icfg |= (XL_XCVR_AUTO << XL_ICFG_CONNECTOR_BITS); CSR_WRITE_4(sc, XL_W3_INTERNAL_CFG, icfg); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); } static void xl_setmode(struct xl_softc *sc, int media) { u_int32_t icfg; u_int16_t mediastat; char *pmsg = "", *dmsg = ""; XL_LOCK_ASSERT(sc); XL_SEL_WIN(4); mediastat = CSR_READ_2(sc, XL_W4_MEDIA_STATUS); XL_SEL_WIN(3); icfg = CSR_READ_4(sc, XL_W3_INTERNAL_CFG); if (sc->xl_media & XL_MEDIAOPT_BT) { if (IFM_SUBTYPE(media) == IFM_10_T) { pmsg = "10baseT transceiver"; sc->xl_xcvr = XL_XCVR_10BT; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_10BT << XL_ICFG_CONNECTOR_BITS); mediastat |= XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD; mediastat &= ~XL_MEDIASTAT_SQEENB; } } if (sc->xl_media & XL_MEDIAOPT_BFX) { if (IFM_SUBTYPE(media) == IFM_100_FX) { pmsg = "100baseFX port"; sc->xl_xcvr = XL_XCVR_100BFX; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_100BFX << XL_ICFG_CONNECTOR_BITS); mediastat |= XL_MEDIASTAT_LINKBEAT; mediastat &= ~XL_MEDIASTAT_SQEENB; } } if (sc->xl_media & (XL_MEDIAOPT_AUI|XL_MEDIAOPT_10FL)) { if (IFM_SUBTYPE(media) == IFM_10_5) { pmsg = "AUI port"; sc->xl_xcvr = XL_XCVR_AUI; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_AUI << XL_ICFG_CONNECTOR_BITS); mediastat &= ~(XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD); mediastat |= ~XL_MEDIASTAT_SQEENB; } if (IFM_SUBTYPE(media) == IFM_10_FL) { pmsg = "10baseFL transceiver"; sc->xl_xcvr = XL_XCVR_AUI; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_AUI << XL_ICFG_CONNECTOR_BITS); mediastat &= ~(XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD); mediastat |= ~XL_MEDIASTAT_SQEENB; } } if (sc->xl_media & XL_MEDIAOPT_BNC) { if (IFM_SUBTYPE(media) == IFM_10_2) { pmsg = "AUI port"; sc->xl_xcvr = XL_XCVR_COAX; icfg &= ~XL_ICFG_CONNECTOR_MASK; icfg |= (XL_XCVR_COAX << XL_ICFG_CONNECTOR_BITS); mediastat &= ~(XL_MEDIASTAT_LINKBEAT | XL_MEDIASTAT_JABGUARD | XL_MEDIASTAT_SQEENB); } } if ((media & IFM_GMASK) == IFM_FDX || IFM_SUBTYPE(media) == IFM_100_FX) { dmsg = "full"; XL_SEL_WIN(3); CSR_WRITE_1(sc, XL_W3_MAC_CTRL, XL_MACCTRL_DUPLEX); } else { dmsg = "half"; XL_SEL_WIN(3); CSR_WRITE_1(sc, XL_W3_MAC_CTRL, (CSR_READ_1(sc, XL_W3_MAC_CTRL) & ~XL_MACCTRL_DUPLEX)); } if (IFM_SUBTYPE(media) == IFM_10_2) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_START); else CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); CSR_WRITE_4(sc, XL_W3_INTERNAL_CFG, icfg); XL_SEL_WIN(4); CSR_WRITE_2(sc, XL_W4_MEDIA_STATUS, mediastat); DELAY(800); XL_SEL_WIN(7); device_printf(sc->xl_dev, "selecting %s, %s duplex\n", pmsg, dmsg); } static void xl_reset(struct xl_softc *sc) { - register int i; + int i; XL_LOCK_ASSERT(sc); XL_SEL_WIN(0); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RESET | ((sc->xl_flags & XL_FLAG_WEIRDRESET) ? XL_RESETOPT_DISADVFD:0)); /* * If we're using memory mapped register mode, pause briefly * after issuing the reset command before trying to access any * other registers. With my 3c575C CardBus card, failing to do * this results in the system locking up while trying to poll * the command busy bit in the status register. */ if (sc->xl_flags & XL_FLAG_USE_MMIO) DELAY(100000); for (i = 0; i < XL_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_2(sc, XL_STATUS) & XL_STAT_CMDBUSY)) break; } if (i == XL_TIMEOUT) device_printf(sc->xl_dev, "reset didn't complete\n"); /* Reset TX and RX. */ /* Note: the RX reset takes an absurd amount of time * on newer versions of the Tornado chips such as those * on the 3c905CX and newer 3c908C cards. We wait an * extra amount of time so that xl_wait() doesn't complain * and annoy the users. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); DELAY(100000); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); if (sc->xl_flags & XL_FLAG_INVERT_LED_PWR || sc->xl_flags & XL_FLAG_INVERT_MII_PWR) { XL_SEL_WIN(2); CSR_WRITE_2(sc, XL_W2_RESET_OPTIONS, CSR_READ_2(sc, XL_W2_RESET_OPTIONS) | ((sc->xl_flags & XL_FLAG_INVERT_LED_PWR) ? XL_RESETOPT_INVERT_LED : 0) | ((sc->xl_flags & XL_FLAG_INVERT_MII_PWR) ? XL_RESETOPT_INVERT_MII : 0)); } /* Wait a little while for the chip to get its brains in order. */ DELAY(100000); } /* * Probe for a 3Com Etherlink XL chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int xl_probe(device_t dev) { const struct xl_type *t; t = xl_devs; while (t->xl_name != NULL) { if ((pci_get_vendor(dev) == t->xl_vid) && (pci_get_device(dev) == t->xl_did)) { device_set_desc(dev, t->xl_name); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } /* * This routine is a kludge to work around possible hardware faults * or manufacturing defects that can cause the media options register * (or reset options register, as it's called for the first generation * 3c90x adapters) to return an incorrect result. I have encountered * one Dell Latitude laptop docking station with an integrated 3c905-TX * which doesn't have any of the 'mediaopt' bits set. This screws up * the attach routine pretty badly because it doesn't know what media * to look for. If we find ourselves in this predicament, this routine * will try to guess the media options values and warn the user of a * possible manufacturing defect with his adapter/system/whatever. */ static void xl_mediacheck(struct xl_softc *sc) { /* * If some of the media options bits are set, assume they are * correct. If not, try to figure it out down below. * XXX I should check for 10baseFL, but I don't have an adapter * to test with. */ if (sc->xl_media & (XL_MEDIAOPT_MASK & ~XL_MEDIAOPT_VCO)) { /* * Check the XCVR value. If it's not in the normal range * of values, we need to fake it up here. */ if (sc->xl_xcvr <= XL_XCVR_AUTO) return; else { device_printf(sc->xl_dev, "bogus xcvr value in EEPROM (%x)\n", sc->xl_xcvr); device_printf(sc->xl_dev, "choosing new default based on card type\n"); } } else { if (sc->xl_type == XL_TYPE_905B && sc->xl_media & XL_MEDIAOPT_10FL) return; device_printf(sc->xl_dev, "WARNING: no media options bits set in the media options register!!\n"); device_printf(sc->xl_dev, "this could be a manufacturing defect in your adapter or system\n"); device_printf(sc->xl_dev, "attempting to guess media type; you should probably consult your vendor\n"); } xl_choose_xcvr(sc, 1); } static void xl_choose_xcvr(struct xl_softc *sc, int verbose) { u_int16_t devid; /* * Read the device ID from the EEPROM. * This is what's loaded into the PCI device ID register, so it has * to be correct otherwise we wouldn't have gotten this far. */ xl_read_eeprom(sc, (caddr_t)&devid, XL_EE_PRODID, 1, 0); switch (devid) { case TC_DEVICEID_BOOMERANG_10BT: /* 3c900-TPO */ case TC_DEVICEID_KRAKATOA_10BT: /* 3c900B-TPO */ sc->xl_media = XL_MEDIAOPT_BT; sc->xl_xcvr = XL_XCVR_10BT; if (verbose) device_printf(sc->xl_dev, "guessing 10BaseT transceiver\n"); break; case TC_DEVICEID_BOOMERANG_10BT_COMBO: /* 3c900-COMBO */ case TC_DEVICEID_KRAKATOA_10BT_COMBO: /* 3c900B-COMBO */ sc->xl_media = XL_MEDIAOPT_BT|XL_MEDIAOPT_BNC|XL_MEDIAOPT_AUI; sc->xl_xcvr = XL_XCVR_10BT; if (verbose) device_printf(sc->xl_dev, "guessing COMBO (AUI/BNC/TP)\n"); break; case TC_DEVICEID_KRAKATOA_10BT_TPC: /* 3c900B-TPC */ sc->xl_media = XL_MEDIAOPT_BT|XL_MEDIAOPT_BNC; sc->xl_xcvr = XL_XCVR_10BT; if (verbose) device_printf(sc->xl_dev, "guessing TPC (BNC/TP)\n"); break; case TC_DEVICEID_CYCLONE_10FL: /* 3c900B-FL */ sc->xl_media = XL_MEDIAOPT_10FL; sc->xl_xcvr = XL_XCVR_AUI; if (verbose) device_printf(sc->xl_dev, "guessing 10baseFL\n"); break; case TC_DEVICEID_BOOMERANG_10_100BT: /* 3c905-TX */ case TC_DEVICEID_HURRICANE_555: /* 3c555 */ case TC_DEVICEID_HURRICANE_556: /* 3c556 */ case TC_DEVICEID_HURRICANE_556B: /* 3c556B */ case TC_DEVICEID_HURRICANE_575A: /* 3c575TX */ case TC_DEVICEID_HURRICANE_575B: /* 3c575B */ case TC_DEVICEID_HURRICANE_575C: /* 3c575C */ case TC_DEVICEID_HURRICANE_656: /* 3c656 */ case TC_DEVICEID_HURRICANE_656B: /* 3c656B */ case TC_DEVICEID_TORNADO_656C: /* 3c656C */ case TC_DEVICEID_TORNADO_10_100BT_920B: /* 3c920B-EMB */ case TC_DEVICEID_TORNADO_10_100BT_920B_WNM: /* 3c920B-EMB-WNM */ sc->xl_media = XL_MEDIAOPT_MII; sc->xl_xcvr = XL_XCVR_MII; if (verbose) device_printf(sc->xl_dev, "guessing MII\n"); break; case TC_DEVICEID_BOOMERANG_100BT4: /* 3c905-T4 */ case TC_DEVICEID_CYCLONE_10_100BT4: /* 3c905B-T4 */ sc->xl_media = XL_MEDIAOPT_BT4; sc->xl_xcvr = XL_XCVR_MII; if (verbose) device_printf(sc->xl_dev, "guessing 100baseT4/MII\n"); break; case TC_DEVICEID_HURRICANE_10_100BT: /* 3c905B-TX */ case TC_DEVICEID_HURRICANE_10_100BT_SERV:/*3c980-TX */ case TC_DEVICEID_TORNADO_10_100BT_SERV: /* 3c980C-TX */ case TC_DEVICEID_HURRICANE_SOHO100TX: /* 3cSOHO100-TX */ case TC_DEVICEID_TORNADO_10_100BT: /* 3c905C-TX */ case TC_DEVICEID_TORNADO_HOMECONNECT: /* 3c450-TX */ sc->xl_media = XL_MEDIAOPT_BTX; sc->xl_xcvr = XL_XCVR_AUTO; if (verbose) device_printf(sc->xl_dev, "guessing 10/100 internal\n"); break; case TC_DEVICEID_CYCLONE_10_100_COMBO: /* 3c905B-COMBO */ sc->xl_media = XL_MEDIAOPT_BTX|XL_MEDIAOPT_BNC|XL_MEDIAOPT_AUI; sc->xl_xcvr = XL_XCVR_AUTO; if (verbose) device_printf(sc->xl_dev, "guessing 10/100 plus BNC/AUI\n"); break; default: device_printf(sc->xl_dev, "unknown device ID: %x -- defaulting to 10baseT\n", devid); sc->xl_media = XL_MEDIAOPT_BT; break; } } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int xl_attach(device_t dev) { u_char eaddr[ETHER_ADDR_LEN]; u_int16_t sinfo2, xcvr[2]; struct xl_softc *sc; struct ifnet *ifp; int media, pmcap; int error = 0, phy, rid, res, unit; uint16_t did; sc = device_get_softc(dev); sc->xl_dev = dev; unit = device_get_unit(dev); mtx_init(&sc->xl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); ifmedia_init(&sc->ifmedia, 0, xl_ifmedia_upd, xl_ifmedia_sts); did = pci_get_device(dev); sc->xl_flags = 0; if (did == TC_DEVICEID_HURRICANE_555) sc->xl_flags |= XL_FLAG_EEPROM_OFFSET_30 | XL_FLAG_PHYOK; if (did == TC_DEVICEID_HURRICANE_556 || did == TC_DEVICEID_HURRICANE_556B) sc->xl_flags |= XL_FLAG_FUNCREG | XL_FLAG_PHYOK | XL_FLAG_EEPROM_OFFSET_30 | XL_FLAG_WEIRDRESET | XL_FLAG_INVERT_LED_PWR | XL_FLAG_INVERT_MII_PWR; if (did == TC_DEVICEID_HURRICANE_555 || did == TC_DEVICEID_HURRICANE_556) sc->xl_flags |= XL_FLAG_8BITROM; if (did == TC_DEVICEID_HURRICANE_556B) sc->xl_flags |= XL_FLAG_NO_XCVR_PWR; if (did == TC_DEVICEID_HURRICANE_575B || did == TC_DEVICEID_HURRICANE_575C || did == TC_DEVICEID_HURRICANE_656B || did == TC_DEVICEID_TORNADO_656C) sc->xl_flags |= XL_FLAG_FUNCREG; if (did == TC_DEVICEID_HURRICANE_575A || did == TC_DEVICEID_HURRICANE_575B || did == TC_DEVICEID_HURRICANE_575C || did == TC_DEVICEID_HURRICANE_656B || did == TC_DEVICEID_TORNADO_656C) sc->xl_flags |= XL_FLAG_PHYOK | XL_FLAG_EEPROM_OFFSET_30 | XL_FLAG_8BITROM; if (did == TC_DEVICEID_HURRICANE_656) sc->xl_flags |= XL_FLAG_FUNCREG | XL_FLAG_PHYOK; if (did == TC_DEVICEID_HURRICANE_575B) sc->xl_flags |= XL_FLAG_INVERT_LED_PWR; if (did == TC_DEVICEID_HURRICANE_575C) sc->xl_flags |= XL_FLAG_INVERT_MII_PWR; if (did == TC_DEVICEID_TORNADO_656C) sc->xl_flags |= XL_FLAG_INVERT_MII_PWR; if (did == TC_DEVICEID_HURRICANE_656 || did == TC_DEVICEID_HURRICANE_656B) sc->xl_flags |= XL_FLAG_INVERT_MII_PWR | XL_FLAG_INVERT_LED_PWR; if (did == TC_DEVICEID_TORNADO_10_100BT_920B || did == TC_DEVICEID_TORNADO_10_100BT_920B_WNM) sc->xl_flags |= XL_FLAG_PHYOK; switch (did) { case TC_DEVICEID_BOOMERANG_10_100BT: /* 3c905-TX */ case TC_DEVICEID_HURRICANE_575A: case TC_DEVICEID_HURRICANE_575B: case TC_DEVICEID_HURRICANE_575C: sc->xl_flags |= XL_FLAG_NO_MMIO; break; default: break; } /* * Map control/status registers. */ pci_enable_busmaster(dev); if ((sc->xl_flags & XL_FLAG_NO_MMIO) == 0) { rid = XL_PCI_LOMEM; res = SYS_RES_MEMORY; sc->xl_res = bus_alloc_resource_any(dev, res, &rid, RF_ACTIVE); } if (sc->xl_res != NULL) { sc->xl_flags |= XL_FLAG_USE_MMIO; if (bootverbose) device_printf(dev, "using memory mapped I/O\n"); } else { rid = XL_PCI_LOIO; res = SYS_RES_IOPORT; sc->xl_res = bus_alloc_resource_any(dev, res, &rid, RF_ACTIVE); if (sc->xl_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } if (bootverbose) device_printf(dev, "using port I/O\n"); } sc->xl_btag = rman_get_bustag(sc->xl_res); sc->xl_bhandle = rman_get_bushandle(sc->xl_res); if (sc->xl_flags & XL_FLAG_FUNCREG) { rid = XL_PCI_FUNCMEM; sc->xl_fres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->xl_fres == NULL) { device_printf(dev, "couldn't map funcreg memory\n"); error = ENXIO; goto fail; } sc->xl_ftag = rman_get_bustag(sc->xl_fres); sc->xl_fhandle = rman_get_bushandle(sc->xl_fres); } /* Allocate interrupt */ rid = 0; sc->xl_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->xl_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Initialize interface name. */ ifp = sc->xl_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* Reset the adapter. */ XL_LOCK(sc); xl_reset(sc); XL_UNLOCK(sc); /* * Get station address from the EEPROM. */ if (xl_read_eeprom(sc, (caddr_t)&eaddr, XL_EE_OEM_ADR0, 3, 1)) { device_printf(dev, "failed to read station address\n"); error = ENXIO; goto fail; } callout_init_mtx(&sc->xl_tick_callout, &sc->xl_mtx, 0); TASK_INIT(&sc->xl_task, 0, xl_rxeof_task, sc); /* * Now allocate a tag for the DMA descriptor lists and a chunk * of DMA-able memory based on the tag. Also obtain the DMA * addresses of the RX and TX ring, which we'll need later. * All of our lists are allocated as a contiguous block * of memory. */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 8, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, XL_RX_LIST_SZ, 1, XL_RX_LIST_SZ, 0, NULL, NULL, &sc->xl_ldata.xl_rx_tag); if (error) { device_printf(dev, "failed to allocate rx dma tag\n"); goto fail; } error = bus_dmamem_alloc(sc->xl_ldata.xl_rx_tag, (void **)&sc->xl_ldata.xl_rx_list, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->xl_ldata.xl_rx_dmamap); if (error) { device_printf(dev, "no memory for rx list buffers!\n"); bus_dma_tag_destroy(sc->xl_ldata.xl_rx_tag); sc->xl_ldata.xl_rx_tag = NULL; goto fail; } error = bus_dmamap_load(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, sc->xl_ldata.xl_rx_list, XL_RX_LIST_SZ, xl_dma_map_addr, &sc->xl_ldata.xl_rx_dmaaddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "cannot get dma address of the rx ring!\n"); bus_dmamem_free(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_list, sc->xl_ldata.xl_rx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_rx_tag); sc->xl_ldata.xl_rx_tag = NULL; goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(dev), 8, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, XL_TX_LIST_SZ, 1, XL_TX_LIST_SZ, 0, NULL, NULL, &sc->xl_ldata.xl_tx_tag); if (error) { device_printf(dev, "failed to allocate tx dma tag\n"); goto fail; } error = bus_dmamem_alloc(sc->xl_ldata.xl_tx_tag, (void **)&sc->xl_ldata.xl_tx_list, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->xl_ldata.xl_tx_dmamap); if (error) { device_printf(dev, "no memory for list buffers!\n"); bus_dma_tag_destroy(sc->xl_ldata.xl_tx_tag); sc->xl_ldata.xl_tx_tag = NULL; goto fail; } error = bus_dmamap_load(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, sc->xl_ldata.xl_tx_list, XL_TX_LIST_SZ, xl_dma_map_addr, &sc->xl_ldata.xl_tx_dmaaddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "cannot get dma address of the tx ring!\n"); bus_dmamem_free(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_list, sc->xl_ldata.xl_tx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_tx_tag); sc->xl_ldata.xl_tx_tag = NULL; goto fail; } /* * Allocate a DMA tag for the mapping of mbufs. */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * XL_MAXFRAGS, XL_MAXFRAGS, MCLBYTES, 0, NULL, NULL, &sc->xl_mtag); if (error) { device_printf(dev, "failed to allocate mbuf dma tag\n"); goto fail; } /* We need a spare DMA map for the RX ring. */ error = bus_dmamap_create(sc->xl_mtag, 0, &sc->xl_tmpmap); if (error) goto fail; /* * Figure out the card type. 3c905B adapters have the * 'supportsNoTxLength' bit set in the capabilities * word in the EEPROM. * Note: my 3c575C CardBus card lies. It returns a value * of 0x1578 for its capabilities word, which is somewhat * nonsensical. Another way to distinguish a 3c90x chip * from a 3c90xB/C chip is to check for the 'supportsLargePackets' * bit. This will only be set for 3c90x boomerage chips. */ xl_read_eeprom(sc, (caddr_t)&sc->xl_caps, XL_EE_CAPS, 1, 0); if (sc->xl_caps & XL_CAPS_NO_TXLENGTH || !(sc->xl_caps & XL_CAPS_LARGE_PKTS)) sc->xl_type = XL_TYPE_905B; else sc->xl_type = XL_TYPE_90X; /* Check availability of WOL. */ if ((sc->xl_caps & XL_CAPS_PWRMGMT) != 0 && pci_find_cap(dev, PCIY_PMG, &pmcap) == 0) { sc->xl_pmcap = pmcap; sc->xl_flags |= XL_FLAG_WOL; sinfo2 = 0; xl_read_eeprom(sc, (caddr_t)&sinfo2, XL_EE_SOFTINFO2, 1, 0); if ((sinfo2 & XL_SINFO2_AUX_WOL_CON) == 0 && bootverbose) device_printf(dev, "No auxiliary remote wakeup connector!\n"); } /* Set the TX start threshold for best performance. */ sc->xl_tx_thresh = XL_MIN_FRAMELEN; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = xl_ioctl; ifp->if_capabilities = IFCAP_VLAN_MTU; if (sc->xl_type == XL_TYPE_905B) { ifp->if_hwassist = XL905B_CSUM_FEATURES; #ifdef XL905B_TXCSUM_BROKEN ifp->if_capabilities |= IFCAP_RXCSUM; #else ifp->if_capabilities |= IFCAP_HWCSUM; #endif } if ((sc->xl_flags & XL_FLAG_WOL) != 0) ifp->if_capabilities |= IFCAP_WOL_MAGIC; ifp->if_capenable = ifp->if_capabilities; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif ifp->if_start = xl_start; ifp->if_init = xl_init; IFQ_SET_MAXLEN(&ifp->if_snd, XL_TX_LIST_CNT - 1); ifp->if_snd.ifq_drv_maxlen = XL_TX_LIST_CNT - 1; IFQ_SET_READY(&ifp->if_snd); /* * Now we have to see what sort of media we have. * This includes probing for an MII interace and a * possible PHY. */ XL_SEL_WIN(3); sc->xl_media = CSR_READ_2(sc, XL_W3_MEDIA_OPT); if (bootverbose) device_printf(dev, "media options word: %x\n", sc->xl_media); xl_read_eeprom(sc, (char *)&xcvr, XL_EE_ICFG_0, 2, 0); sc->xl_xcvr = xcvr[0] | xcvr[1] << 16; sc->xl_xcvr &= XL_ICFG_CONNECTOR_MASK; sc->xl_xcvr >>= XL_ICFG_CONNECTOR_BITS; xl_mediacheck(sc); if (sc->xl_media & XL_MEDIAOPT_MII || sc->xl_media & XL_MEDIAOPT_BTX || sc->xl_media & XL_MEDIAOPT_BT4) { if (bootverbose) device_printf(dev, "found MII/AUTO\n"); xl_setcfg(sc); /* * Attach PHYs only at MII address 24 if !XL_FLAG_PHYOK. * This is to guard against problems with certain 3Com ASIC * revisions that incorrectly map the internal transceiver * control registers at all MII addresses. */ phy = MII_PHY_ANY; if ((sc->xl_flags & XL_FLAG_PHYOK) == 0) phy = 24; error = mii_attach(dev, &sc->xl_miibus, ifp, xl_ifmedia_upd, xl_ifmedia_sts, BMSR_DEFCAPMASK, phy, MII_OFFSET_ANY, sc->xl_type == XL_TYPE_905B ? MIIF_DOPAUSE : 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } goto done; } /* * Sanity check. If the user has selected "auto" and this isn't * a 10/100 card of some kind, we need to force the transceiver * type to something sane. */ if (sc->xl_xcvr == XL_XCVR_AUTO) xl_choose_xcvr(sc, bootverbose); /* * Do ifmedia setup. */ if (sc->xl_media & XL_MEDIAOPT_BT) { if (bootverbose) device_printf(dev, "found 10baseT\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_HDX, 0, NULL); if (sc->xl_caps & XL_CAPS_FULL_DUPLEX) ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); } if (sc->xl_media & (XL_MEDIAOPT_AUI|XL_MEDIAOPT_10FL)) { /* * Check for a 10baseFL board in disguise. */ if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { if (bootverbose) device_printf(dev, "found 10baseFL\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_FL, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_FL|IFM_HDX, 0, NULL); if (sc->xl_caps & XL_CAPS_FULL_DUPLEX) ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_FL|IFM_FDX, 0, NULL); } else { if (bootverbose) device_printf(dev, "found AUI\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_5, 0, NULL); } } if (sc->xl_media & XL_MEDIAOPT_BNC) { if (bootverbose) device_printf(dev, "found BNC\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_2, 0, NULL); } if (sc->xl_media & XL_MEDIAOPT_BFX) { if (bootverbose) device_printf(dev, "found 100baseFX\n"); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_FX, 0, NULL); } media = IFM_ETHER|IFM_100_TX|IFM_FDX; xl_choose_media(sc, &media); if (sc->xl_miibus == NULL) ifmedia_set(&sc->ifmedia, media); done: if (sc->xl_flags & XL_FLAG_NO_XCVR_PWR) { XL_SEL_WIN(0); CSR_WRITE_2(sc, XL_W0_MFG_ID, XL_NO_XCVR_PWR_MAGICBITS); } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); error = bus_setup_intr(dev, sc->xl_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, xl_intr, sc, &sc->xl_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } fail: if (error) xl_detach(dev); return (error); } /* * Choose a default media. * XXX This is a leaf function only called by xl_attach() and * acquires/releases the non-recursible driver mutex to * satisfy lock assertions. */ static void xl_choose_media(struct xl_softc *sc, int *media) { XL_LOCK(sc); switch (sc->xl_xcvr) { case XL_XCVR_10BT: *media = IFM_ETHER|IFM_10_T; xl_setmode(sc, *media); break; case XL_XCVR_AUI: if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { *media = IFM_ETHER|IFM_10_FL; xl_setmode(sc, *media); } else { *media = IFM_ETHER|IFM_10_5; xl_setmode(sc, *media); } break; case XL_XCVR_COAX: *media = IFM_ETHER|IFM_10_2; xl_setmode(sc, *media); break; case XL_XCVR_AUTO: case XL_XCVR_100BTX: case XL_XCVR_MII: /* Chosen by miibus */ break; case XL_XCVR_100BFX: *media = IFM_ETHER|IFM_100_FX; break; default: device_printf(sc->xl_dev, "unknown XCVR type: %d\n", sc->xl_xcvr); /* * This will probably be wrong, but it prevents * the ifmedia code from panicking. */ *media = IFM_ETHER|IFM_10_T; break; } XL_UNLOCK(sc); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int xl_detach(device_t dev) { struct xl_softc *sc; struct ifnet *ifp; int rid, res; sc = device_get_softc(dev); ifp = sc->xl_ifp; KASSERT(mtx_initialized(&sc->xl_mtx), ("xl mutex not initialized")); #ifdef DEVICE_POLLING if (ifp && ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (sc->xl_flags & XL_FLAG_USE_MMIO) { rid = XL_PCI_LOMEM; res = SYS_RES_MEMORY; } else { rid = XL_PCI_LOIO; res = SYS_RES_IOPORT; } /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { XL_LOCK(sc); xl_stop(sc); XL_UNLOCK(sc); taskqueue_drain(taskqueue_swi, &sc->xl_task); callout_drain(&sc->xl_tick_callout); ether_ifdetach(ifp); } if (sc->xl_miibus) device_delete_child(dev, sc->xl_miibus); bus_generic_detach(dev); ifmedia_removeall(&sc->ifmedia); if (sc->xl_intrhand) bus_teardown_intr(dev, sc->xl_irq, sc->xl_intrhand); if (sc->xl_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->xl_irq); if (sc->xl_fres != NULL) bus_release_resource(dev, SYS_RES_MEMORY, XL_PCI_FUNCMEM, sc->xl_fres); if (sc->xl_res) bus_release_resource(dev, res, rid, sc->xl_res); if (ifp) if_free(ifp); if (sc->xl_mtag) { bus_dmamap_destroy(sc->xl_mtag, sc->xl_tmpmap); bus_dma_tag_destroy(sc->xl_mtag); } if (sc->xl_ldata.xl_rx_tag) { bus_dmamap_unload(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap); bus_dmamem_free(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_list, sc->xl_ldata.xl_rx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_rx_tag); } if (sc->xl_ldata.xl_tx_tag) { bus_dmamap_unload(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap); bus_dmamem_free(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_list, sc->xl_ldata.xl_tx_dmamap); bus_dma_tag_destroy(sc->xl_ldata.xl_tx_tag); } mtx_destroy(&sc->xl_mtx); return (0); } /* * Initialize the transmit descriptors. */ static int xl_list_tx_init(struct xl_softc *sc) { struct xl_chain_data *cd; struct xl_list_data *ld; int error, i; XL_LOCK_ASSERT(sc); cd = &sc->xl_cdata; ld = &sc->xl_ldata; for (i = 0; i < XL_TX_LIST_CNT; i++) { cd->xl_tx_chain[i].xl_ptr = &ld->xl_tx_list[i]; error = bus_dmamap_create(sc->xl_mtag, 0, &cd->xl_tx_chain[i].xl_map); if (error) return (error); cd->xl_tx_chain[i].xl_phys = ld->xl_tx_dmaaddr + i * sizeof(struct xl_list); if (i == (XL_TX_LIST_CNT - 1)) cd->xl_tx_chain[i].xl_next = NULL; else cd->xl_tx_chain[i].xl_next = &cd->xl_tx_chain[i + 1]; } cd->xl_tx_free = &cd->xl_tx_chain[0]; cd->xl_tx_tail = cd->xl_tx_head = NULL; bus_dmamap_sync(ld->xl_tx_tag, ld->xl_tx_dmamap, BUS_DMASYNC_PREWRITE); return (0); } /* * Initialize the transmit descriptors. */ static int xl_list_tx_init_90xB(struct xl_softc *sc) { struct xl_chain_data *cd; struct xl_list_data *ld; int error, i; XL_LOCK_ASSERT(sc); cd = &sc->xl_cdata; ld = &sc->xl_ldata; for (i = 0; i < XL_TX_LIST_CNT; i++) { cd->xl_tx_chain[i].xl_ptr = &ld->xl_tx_list[i]; error = bus_dmamap_create(sc->xl_mtag, 0, &cd->xl_tx_chain[i].xl_map); if (error) return (error); cd->xl_tx_chain[i].xl_phys = ld->xl_tx_dmaaddr + i * sizeof(struct xl_list); if (i == (XL_TX_LIST_CNT - 1)) cd->xl_tx_chain[i].xl_next = &cd->xl_tx_chain[0]; else cd->xl_tx_chain[i].xl_next = &cd->xl_tx_chain[i + 1]; if (i == 0) cd->xl_tx_chain[i].xl_prev = &cd->xl_tx_chain[XL_TX_LIST_CNT - 1]; else cd->xl_tx_chain[i].xl_prev = &cd->xl_tx_chain[i - 1]; } bzero(ld->xl_tx_list, XL_TX_LIST_SZ); ld->xl_tx_list[0].xl_status = htole32(XL_TXSTAT_EMPTY); cd->xl_tx_prod = 1; cd->xl_tx_cons = 1; cd->xl_tx_cnt = 0; bus_dmamap_sync(ld->xl_tx_tag, ld->xl_tx_dmamap, BUS_DMASYNC_PREWRITE); return (0); } /* * Initialize the RX descriptors and allocate mbufs for them. Note that * we arrange the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int xl_list_rx_init(struct xl_softc *sc) { struct xl_chain_data *cd; struct xl_list_data *ld; int error, i, next; u_int32_t nextptr; XL_LOCK_ASSERT(sc); cd = &sc->xl_cdata; ld = &sc->xl_ldata; for (i = 0; i < XL_RX_LIST_CNT; i++) { cd->xl_rx_chain[i].xl_ptr = &ld->xl_rx_list[i]; error = bus_dmamap_create(sc->xl_mtag, 0, &cd->xl_rx_chain[i].xl_map); if (error) return (error); error = xl_newbuf(sc, &cd->xl_rx_chain[i]); if (error) return (error); if (i == (XL_RX_LIST_CNT - 1)) next = 0; else next = i + 1; nextptr = ld->xl_rx_dmaaddr + next * sizeof(struct xl_list_onefrag); cd->xl_rx_chain[i].xl_next = &cd->xl_rx_chain[next]; ld->xl_rx_list[i].xl_next = htole32(nextptr); } bus_dmamap_sync(ld->xl_rx_tag, ld->xl_rx_dmamap, BUS_DMASYNC_PREWRITE); cd->xl_rx_head = &cd->xl_rx_chain[0]; return (0); } /* * Initialize an RX descriptor and attach an MBUF cluster. * If we fail to do so, we need to leave the old mbuf and * the old DMA map untouched so that it can be reused. */ static int xl_newbuf(struct xl_softc *sc, struct xl_chain_onefrag *c) { struct mbuf *m_new = NULL; bus_dmamap_t map; bus_dma_segment_t segs[1]; int error, nseg; XL_LOCK_ASSERT(sc); m_new = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m_new == NULL) return (ENOBUFS); m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; /* Force longword alignment for packet payload. */ m_adj(m_new, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->xl_mtag, sc->xl_tmpmap, m_new, segs, &nseg, BUS_DMA_NOWAIT); if (error) { m_freem(m_new); device_printf(sc->xl_dev, "can't map mbuf (error %d)\n", error); return (error); } KASSERT(nseg == 1, ("%s: too many DMA segments (%d)", __func__, nseg)); bus_dmamap_unload(sc->xl_mtag, c->xl_map); map = c->xl_map; c->xl_map = sc->xl_tmpmap; sc->xl_tmpmap = map; c->xl_mbuf = m_new; c->xl_ptr->xl_frag.xl_len = htole32(m_new->m_len | XL_LAST_FRAG); c->xl_ptr->xl_frag.xl_addr = htole32(segs->ds_addr); c->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_mtag, c->xl_map, BUS_DMASYNC_PREREAD); return (0); } static int xl_rx_resync(struct xl_softc *sc) { struct xl_chain_onefrag *pos; int i; XL_LOCK_ASSERT(sc); pos = sc->xl_cdata.xl_rx_head; for (i = 0; i < XL_RX_LIST_CNT; i++) { if (pos->xl_ptr->xl_status) break; pos = pos->xl_next; } if (i == XL_RX_LIST_CNT) return (0); sc->xl_cdata.xl_rx_head = pos; return (EAGAIN); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static int xl_rxeof(struct xl_softc *sc) { struct mbuf *m; struct ifnet *ifp = sc->xl_ifp; struct xl_chain_onefrag *cur_rx; int total_len; int rx_npkts = 0; u_int32_t rxstat; XL_LOCK_ASSERT(sc); again: bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_POSTREAD); while ((rxstat = le32toh(sc->xl_cdata.xl_rx_head->xl_ptr->xl_status))) { #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { if (sc->rxcycles <= 0) break; sc->rxcycles--; } #endif cur_rx = sc->xl_cdata.xl_rx_head; sc->xl_cdata.xl_rx_head = cur_rx->xl_next; total_len = rxstat & XL_RXSTAT_LENMASK; rx_npkts++; /* * Since we have told the chip to allow large frames, * we need to trap giant frame errors in software. We allow * a little more than the normal frame size to account for * frames with VLAN tags. */ if (total_len > XL_MAX_FRAMELEN) rxstat |= (XL_RXSTAT_UP_ERROR|XL_RXSTAT_OVERSIZE); /* * If an error occurs, update stats, clear the * status word and leave the mbuf cluster in place: * it should simply get re-used next time this descriptor * comes up in the ring. */ if (rxstat & XL_RXSTAT_UP_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); continue; } /* * If the error bit was not set, the upload complete * bit should be set which means we have a valid packet. * If not, something truly strange has happened. */ if (!(rxstat & XL_RXSTAT_UP_CMPLT)) { device_printf(sc->xl_dev, "bad receive status -- packet dropped\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); continue; } /* No errors; receive the packet. */ bus_dmamap_sync(sc->xl_mtag, cur_rx->xl_map, BUS_DMASYNC_POSTREAD); m = cur_rx->xl_mbuf; /* * Try to conjure up a new mbuf cluster. If that * fails, it means we have an out of memory condition and * should leave the buffer in place and continue. This will * result in a lost packet, but there's little else we * can do in this situation. */ if (xl_newbuf(sc, cur_rx)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); cur_rx->xl_ptr->xl_status = 0; bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); continue; } bus_dmamap_sync(sc->xl_ldata.xl_rx_tag, sc->xl_ldata.xl_rx_dmamap, BUS_DMASYNC_PREWRITE); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = total_len; if (ifp->if_capenable & IFCAP_RXCSUM) { /* Do IP checksum checking. */ if (rxstat & XL_RXSTAT_IPCKOK) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & XL_RXSTAT_IPCKERR)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if ((rxstat & XL_RXSTAT_TCPCOK && !(rxstat & XL_RXSTAT_TCPCKERR)) || (rxstat & XL_RXSTAT_UDPCKOK && !(rxstat & XL_RXSTAT_UDPCKERR))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } XL_UNLOCK(sc); (*ifp->if_input)(ifp, m); XL_LOCK(sc); /* * If we are running from the taskqueue, the interface * might have been stopped while we were passing the last * packet up the network stack. */ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (rx_npkts); } /* * Handle the 'end of channel' condition. When the upload * engine hits the end of the RX ring, it will stall. This * is our cue to flush the RX ring, reload the uplist pointer * register and unstall the engine. * XXX This is actually a little goofy. With the ThunderLAN * chip, you get an interrupt when the receiver hits the end * of the receive ring, which tells you exactly when you * you need to reload the ring pointer. Here we have to * fake it. I'm mad at myself for not being clever enough * to avoid the use of a goto here. */ if (CSR_READ_4(sc, XL_UPLIST_PTR) == 0 || CSR_READ_4(sc, XL_UPLIST_STATUS) & XL_PKTSTAT_UP_STALLED) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_STALL); xl_wait(sc); CSR_WRITE_4(sc, XL_UPLIST_PTR, sc->xl_ldata.xl_rx_dmaaddr); sc->xl_cdata.xl_rx_head = &sc->xl_cdata.xl_rx_chain[0]; CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_UNSTALL); goto again; } return (rx_npkts); } /* * Taskqueue wrapper for xl_rxeof(). */ static void xl_rxeof_task(void *arg, int pending) { struct xl_softc *sc = (struct xl_softc *)arg; XL_LOCK(sc); if (sc->xl_ifp->if_drv_flags & IFF_DRV_RUNNING) xl_rxeof(sc); XL_UNLOCK(sc); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void xl_txeof(struct xl_softc *sc) { struct xl_chain *cur_tx; struct ifnet *ifp = sc->xl_ifp; XL_LOCK_ASSERT(sc); /* * Go through our tx list and free mbufs for those * frames that have been uploaded. Note: the 3c905B * sets a special bit in the status word to let us * know that a frame has been downloaded, but the * original 3c900/3c905 adapters don't do that. * Consequently, we have to use a different test if * xl_type != XL_TYPE_905B. */ while (sc->xl_cdata.xl_tx_head != NULL) { cur_tx = sc->xl_cdata.xl_tx_head; if (CSR_READ_4(sc, XL_DOWNLIST_PTR)) break; sc->xl_cdata.xl_tx_head = cur_tx->xl_next; bus_dmamap_sync(sc->xl_mtag, cur_tx->xl_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->xl_mtag, cur_tx->xl_map); m_freem(cur_tx->xl_mbuf); cur_tx->xl_mbuf = NULL; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; cur_tx->xl_next = sc->xl_cdata.xl_tx_free; sc->xl_cdata.xl_tx_free = cur_tx; } if (sc->xl_cdata.xl_tx_head == NULL) { sc->xl_wdog_timer = 0; sc->xl_cdata.xl_tx_tail = NULL; } else { if (CSR_READ_4(sc, XL_DMACTL) & XL_DMACTL_DOWN_STALLED || !CSR_READ_4(sc, XL_DOWNLIST_PTR)) { CSR_WRITE_4(sc, XL_DOWNLIST_PTR, sc->xl_cdata.xl_tx_head->xl_phys); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); } } } static void xl_txeof_90xB(struct xl_softc *sc) { struct xl_chain *cur_tx = NULL; struct ifnet *ifp = sc->xl_ifp; int idx; XL_LOCK_ASSERT(sc); bus_dmamap_sync(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, BUS_DMASYNC_POSTREAD); idx = sc->xl_cdata.xl_tx_cons; while (idx != sc->xl_cdata.xl_tx_prod) { cur_tx = &sc->xl_cdata.xl_tx_chain[idx]; if (!(le32toh(cur_tx->xl_ptr->xl_status) & XL_TXSTAT_DL_COMPLETE)) break; if (cur_tx->xl_mbuf != NULL) { bus_dmamap_sync(sc->xl_mtag, cur_tx->xl_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->xl_mtag, cur_tx->xl_map); m_freem(cur_tx->xl_mbuf); cur_tx->xl_mbuf = NULL; } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); sc->xl_cdata.xl_tx_cnt--; XL_INC(idx, XL_TX_LIST_CNT); } if (sc->xl_cdata.xl_tx_cnt == 0) sc->xl_wdog_timer = 0; sc->xl_cdata.xl_tx_cons = idx; if (cur_tx != NULL) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } /* * TX 'end of channel' interrupt handler. Actually, we should * only get a 'TX complete' interrupt if there's a transmit error, * so this is really TX error handler. */ static void xl_txeoc(struct xl_softc *sc) { u_int8_t txstat; XL_LOCK_ASSERT(sc); while ((txstat = CSR_READ_1(sc, XL_TX_STATUS))) { if (txstat & XL_TXSTATUS_UNDERRUN || txstat & XL_TXSTATUS_JABBER || txstat & XL_TXSTATUS_RECLAIM) { device_printf(sc->xl_dev, "transmission error: 0x%02x\n", txstat); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); if (sc->xl_type == XL_TYPE_905B) { if (sc->xl_cdata.xl_tx_cnt) { int i; struct xl_chain *c; i = sc->xl_cdata.xl_tx_cons; c = &sc->xl_cdata.xl_tx_chain[i]; CSR_WRITE_4(sc, XL_DOWNLIST_PTR, c->xl_phys); CSR_WRITE_1(sc, XL_DOWN_POLL, 64); sc->xl_wdog_timer = 5; } } else { if (sc->xl_cdata.xl_tx_head != NULL) { CSR_WRITE_4(sc, XL_DOWNLIST_PTR, sc->xl_cdata.xl_tx_head->xl_phys); sc->xl_wdog_timer = 5; } } /* * Remember to set this for the * first generation 3c90X chips. */ CSR_WRITE_1(sc, XL_TX_FREETHRESH, XL_PACKET_SIZE >> 8); if (txstat & XL_TXSTATUS_UNDERRUN && sc->xl_tx_thresh < XL_PACKET_SIZE) { sc->xl_tx_thresh += XL_MIN_FRAMELEN; device_printf(sc->xl_dev, "tx underrun, increasing tx start threshold to %d bytes\n", sc->xl_tx_thresh); } CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_SET_START|sc->xl_tx_thresh); if (sc->xl_type == XL_TYPE_905B) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_SET_TX_RECLAIM|(XL_PACKET_SIZE >> 4)); } CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_ENABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); } else { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_ENABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); } /* * Write an arbitrary byte to the TX_STATUS register * to clear this interrupt/error and advance to the next. */ CSR_WRITE_1(sc, XL_TX_STATUS, 0x01); } } static void xl_intr(void *arg) { struct xl_softc *sc = arg; struct ifnet *ifp = sc->xl_ifp; u_int16_t status; XL_LOCK(sc); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { XL_UNLOCK(sc); return; } #endif for (;;) { status = CSR_READ_2(sc, XL_STATUS); if ((status & XL_INTRS) == 0 || status == 0xFFFF) break; CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|(status & XL_INTRS)); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; if (status & XL_STAT_UP_COMPLETE) { if (xl_rxeof(sc) == 0) { while (xl_rx_resync(sc)) xl_rxeof(sc); } } if (status & XL_STAT_DOWN_COMPLETE) { if (sc->xl_type == XL_TYPE_905B) xl_txeof_90xB(sc); else xl_txeof(sc); } if (status & XL_STAT_TX_COMPLETE) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); xl_txeoc(sc); } if (status & XL_STAT_ADFAIL) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xl_init_locked(sc); break; } if (status & XL_STAT_STATSOFLOW) xl_stats_update(sc); } if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd) && ifp->if_drv_flags & IFF_DRV_RUNNING) { if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(ifp); else xl_start_locked(ifp); } XL_UNLOCK(sc); } #ifdef DEVICE_POLLING static int xl_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct xl_softc *sc = ifp->if_softc; int rx_npkts = 0; XL_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rx_npkts = xl_poll_locked(ifp, cmd, count); XL_UNLOCK(sc); return (rx_npkts); } static int xl_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct xl_softc *sc = ifp->if_softc; int rx_npkts; XL_LOCK_ASSERT(sc); sc->rxcycles = count; rx_npkts = xl_rxeof(sc); if (sc->xl_type == XL_TYPE_905B) xl_txeof_90xB(sc); else xl_txeof(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(ifp); else xl_start_locked(ifp); } if (cmd == POLL_AND_CHECK_STATUS) { u_int16_t status; status = CSR_READ_2(sc, XL_STATUS); if (status & XL_INTRS && status != 0xFFFF) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|(status & XL_INTRS)); if (status & XL_STAT_TX_COMPLETE) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); xl_txeoc(sc); } if (status & XL_STAT_ADFAIL) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xl_init_locked(sc); } if (status & XL_STAT_STATSOFLOW) xl_stats_update(sc); } } return (rx_npkts); } #endif /* DEVICE_POLLING */ static void xl_tick(void *xsc) { struct xl_softc *sc = xsc; struct mii_data *mii; XL_LOCK_ASSERT(sc); if (sc->xl_miibus != NULL) { mii = device_get_softc(sc->xl_miibus); mii_tick(mii); } xl_stats_update(sc); if (xl_watchdog(sc) == EJUSTRETURN) return; callout_reset(&sc->xl_tick_callout, hz, xl_tick, sc); } static void xl_stats_update(struct xl_softc *sc) { struct ifnet *ifp = sc->xl_ifp; struct xl_stats xl_stats; u_int8_t *p; int i; XL_LOCK_ASSERT(sc); bzero((char *)&xl_stats, sizeof(struct xl_stats)); p = (u_int8_t *)&xl_stats; /* Read all the stats registers. */ XL_SEL_WIN(6); for (i = 0; i < 16; i++) *p++ = CSR_READ_1(sc, XL_W6_CARRIER_LOST + i); if_inc_counter(ifp, IFCOUNTER_IERRORS, xl_stats.xl_rx_overrun); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, xl_stats.xl_tx_multi_collision + xl_stats.xl_tx_single_collision + xl_stats.xl_tx_late_collision); /* * Boomerang and cyclone chips have an extra stats counter * in window 4 (BadSSD). We have to read this too in order * to clear out all the stats registers and avoid a statsoflow * interrupt. */ XL_SEL_WIN(4); CSR_READ_1(sc, XL_W4_BADSSD); XL_SEL_WIN(7); } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int xl_encap(struct xl_softc *sc, struct xl_chain *c, struct mbuf **m_head) { struct mbuf *m_new; struct ifnet *ifp = sc->xl_ifp; int error, i, nseg, total_len; u_int32_t status; XL_LOCK_ASSERT(sc); error = bus_dmamap_load_mbuf_sg(sc->xl_mtag, c->xl_map, *m_head, sc->xl_cdata.xl_tx_segs, &nseg, BUS_DMA_NOWAIT); if (error && error != EFBIG) { if_printf(ifp, "can't map mbuf (error %d)\n", error); return (error); } /* * Handle special case: we used up all 63 fragments, * but we have more mbufs left in the chain. Copy the * data into an mbuf cluster. Note that we don't * bother clearing the values in the other fragment * pointers/counters; it wouldn't gain us anything, * and would waste cycles. */ if (error) { m_new = m_collapse(*m_head, M_NOWAIT, XL_MAXFRAGS); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m_new; error = bus_dmamap_load_mbuf_sg(sc->xl_mtag, c->xl_map, *m_head, sc->xl_cdata.xl_tx_segs, &nseg, BUS_DMA_NOWAIT); if (error) { m_freem(*m_head); *m_head = NULL; if_printf(ifp, "can't map mbuf (error %d)\n", error); return (error); } } KASSERT(nseg <= XL_MAXFRAGS, ("%s: too many DMA segments (%d)", __func__, nseg)); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } bus_dmamap_sync(sc->xl_mtag, c->xl_map, BUS_DMASYNC_PREWRITE); total_len = 0; for (i = 0; i < nseg; i++) { KASSERT(sc->xl_cdata.xl_tx_segs[i].ds_len <= MCLBYTES, ("segment size too large")); c->xl_ptr->xl_frag[i].xl_addr = htole32(sc->xl_cdata.xl_tx_segs[i].ds_addr); c->xl_ptr->xl_frag[i].xl_len = htole32(sc->xl_cdata.xl_tx_segs[i].ds_len); total_len += sc->xl_cdata.xl_tx_segs[i].ds_len; } c->xl_ptr->xl_frag[nseg - 1].xl_len |= htole32(XL_LAST_FRAG); if (sc->xl_type == XL_TYPE_905B) { status = XL_TXSTAT_RND_DEFEAT; #ifndef XL905B_TXCSUM_BROKEN if ((*m_head)->m_pkthdr.csum_flags) { if ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) status |= XL_TXSTAT_IPCKSUM; if ((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) status |= XL_TXSTAT_TCPCKSUM; if ((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) status |= XL_TXSTAT_UDPCKSUM; } #endif } else status = total_len; c->xl_ptr->xl_status = htole32(status); c->xl_ptr->xl_next = 0; c->xl_mbuf = *m_head; return (0); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit lists. We also save a * copy of the pointers since the transmit list fragment pointers are * physical addresses. */ static void xl_start(struct ifnet *ifp) { struct xl_softc *sc = ifp->if_softc; XL_LOCK(sc); if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(ifp); else xl_start_locked(ifp); XL_UNLOCK(sc); } static void xl_start_locked(struct ifnet *ifp) { struct xl_softc *sc = ifp->if_softc; struct mbuf *m_head; struct xl_chain *prev = NULL, *cur_tx = NULL, *start_tx; struct xl_chain *prev_tx; int error; XL_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; /* * Check for an available queue slot. If there are none, * punt. */ if (sc->xl_cdata.xl_tx_free == NULL) { xl_txeoc(sc); xl_txeof(sc); if (sc->xl_cdata.xl_tx_free == NULL) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } } start_tx = sc->xl_cdata.xl_tx_free; for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->xl_cdata.xl_tx_free != NULL;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* Pick a descriptor off the free list. */ prev_tx = cur_tx; cur_tx = sc->xl_cdata.xl_tx_free; /* Pack the data into the descriptor. */ error = xl_encap(sc, cur_tx, &m_head); if (error) { cur_tx = prev_tx; if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } sc->xl_cdata.xl_tx_free = cur_tx->xl_next; cur_tx->xl_next = NULL; /* Chain it together. */ if (prev != NULL) { prev->xl_next = cur_tx; prev->xl_ptr->xl_next = htole32(cur_tx->xl_phys); } prev = cur_tx; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, cur_tx->xl_mbuf); } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) return; /* * Place the request for the upload interrupt * in the last descriptor in the chain. This way, if * we're chaining several packets at once, we'll only * get an interrupt once for the whole chain rather than * once for each packet. */ cur_tx->xl_ptr->xl_status |= htole32(XL_TXSTAT_DL_INTR); /* * Queue the packets. If the TX channel is clear, update * the downlist pointer register. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_STALL); xl_wait(sc); if (sc->xl_cdata.xl_tx_head != NULL) { sc->xl_cdata.xl_tx_tail->xl_next = start_tx; sc->xl_cdata.xl_tx_tail->xl_ptr->xl_next = htole32(start_tx->xl_phys); sc->xl_cdata.xl_tx_tail->xl_ptr->xl_status &= htole32(~XL_TXSTAT_DL_INTR); sc->xl_cdata.xl_tx_tail = cur_tx; } else { sc->xl_cdata.xl_tx_head = start_tx; sc->xl_cdata.xl_tx_tail = cur_tx; } bus_dmamap_sync(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, BUS_DMASYNC_PREWRITE); if (!CSR_READ_4(sc, XL_DOWNLIST_PTR)) CSR_WRITE_4(sc, XL_DOWNLIST_PTR, start_tx->xl_phys); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); XL_SEL_WIN(7); /* * Set a timeout in case the chip goes out to lunch. */ sc->xl_wdog_timer = 5; /* * XXX Under certain conditions, usually on slower machines * where interrupts may be dropped, it's possible for the * adapter to chew up all the buffers in the receive ring * and stall, without us being able to do anything about it. * To guard against this, we need to make a pass over the * RX queue to make sure there aren't any packets pending. * Doing it here means we can flush the receive ring at the * same time the chip is DMAing the transmit descriptors we * just gave it. * * 3Com goes to some lengths to emphasize the Parallel Tasking (tm) * nature of their chips in all their marketing literature; * we may as well take advantage of it. :) */ taskqueue_enqueue(taskqueue_swi, &sc->xl_task); } static void xl_start_90xB_locked(struct ifnet *ifp) { struct xl_softc *sc = ifp->if_softc; struct mbuf *m_head; struct xl_chain *prev = NULL, *cur_tx = NULL, *start_tx; struct xl_chain *prev_tx; int error, idx; XL_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; idx = sc->xl_cdata.xl_tx_prod; start_tx = &sc->xl_cdata.xl_tx_chain[idx]; for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->xl_cdata.xl_tx_chain[idx].xl_mbuf == NULL;) { if ((XL_TX_LIST_CNT - sc->xl_cdata.xl_tx_cnt) < 3) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; prev_tx = cur_tx; cur_tx = &sc->xl_cdata.xl_tx_chain[idx]; /* Pack the data into the descriptor. */ error = xl_encap(sc, cur_tx, &m_head); if (error) { cur_tx = prev_tx; if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Chain it together. */ if (prev != NULL) prev->xl_ptr->xl_next = htole32(cur_tx->xl_phys); prev = cur_tx; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, cur_tx->xl_mbuf); XL_INC(idx, XL_TX_LIST_CNT); sc->xl_cdata.xl_tx_cnt++; } /* * If there are no packets queued, bail. */ if (cur_tx == NULL) return; /* * Place the request for the upload interrupt * in the last descriptor in the chain. This way, if * we're chaining several packets at once, we'll only * get an interrupt once for the whole chain rather than * once for each packet. */ cur_tx->xl_ptr->xl_status |= htole32(XL_TXSTAT_DL_INTR); /* Start transmission */ sc->xl_cdata.xl_tx_prod = idx; start_tx->xl_prev->xl_ptr->xl_next = htole32(start_tx->xl_phys); bus_dmamap_sync(sc->xl_ldata.xl_tx_tag, sc->xl_ldata.xl_tx_dmamap, BUS_DMASYNC_PREWRITE); /* * Set a timeout in case the chip goes out to lunch. */ sc->xl_wdog_timer = 5; } static void xl_init(void *xsc) { struct xl_softc *sc = xsc; XL_LOCK(sc); xl_init_locked(sc); XL_UNLOCK(sc); } static void xl_init_locked(struct xl_softc *sc) { struct ifnet *ifp = sc->xl_ifp; int error, i; struct mii_data *mii = NULL; XL_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* * Cancel pending I/O and free all RX/TX buffers. */ xl_stop(sc); /* Reset the chip to a known state. */ xl_reset(sc); if (sc->xl_miibus == NULL) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); xl_wait(sc); } CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); DELAY(10000); if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); /* * Clear WOL status and disable all WOL feature as WOL * would interfere Rx operation under normal environments. */ if ((sc->xl_flags & XL_FLAG_WOL) != 0) { XL_SEL_WIN(7); CSR_READ_2(sc, XL_W7_BM_PME); CSR_WRITE_2(sc, XL_W7_BM_PME, 0); } /* Init our MAC address */ XL_SEL_WIN(2); for (i = 0; i < ETHER_ADDR_LEN; i++) { CSR_WRITE_1(sc, XL_W2_STATION_ADDR_LO + i, IF_LLADDR(sc->xl_ifp)[i]); } /* Clear the station mask. */ for (i = 0; i < 3; i++) CSR_WRITE_2(sc, XL_W2_STATION_MASK_LO + (i * 2), 0); #ifdef notdef /* Reset TX and RX. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); #endif /* Init circular RX list. */ error = xl_list_rx_init(sc); if (error) { device_printf(sc->xl_dev, "initialization of the rx ring failed (%d)\n", error); xl_stop(sc); return; } /* Init TX descriptors. */ if (sc->xl_type == XL_TYPE_905B) error = xl_list_tx_init_90xB(sc); else error = xl_list_tx_init(sc); if (error) { device_printf(sc->xl_dev, "initialization of the tx ring failed (%d)\n", error); xl_stop(sc); return; } /* * Set the TX freethresh value. * Note that this has no effect on 3c905B "cyclone" * cards but is required for 3c900/3c905 "boomerang" * cards in order to enable the download engine. */ CSR_WRITE_1(sc, XL_TX_FREETHRESH, XL_PACKET_SIZE >> 8); /* Set the TX start threshold for best performance. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_SET_START|sc->xl_tx_thresh); /* * If this is a 3c905B, also set the tx reclaim threshold. * This helps cut down on the number of tx reclaim errors * that could happen on a busy network. The chip multiplies * the register value by 16 to obtain the actual threshold * in bytes, so we divide by 16 when setting the value here. * The existing threshold value can be examined by reading * the register at offset 9 in window 5. */ if (sc->xl_type == XL_TYPE_905B) { CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_SET_TX_RECLAIM|(XL_PACKET_SIZE >> 4)); } /* Set RX filter bits. */ xl_rxfilter(sc); /* * Load the address of the RX list. We have to * stall the upload engine before we can manipulate * the uplist pointer register, then unstall it when * we're finished. We also have to wait for the * stall command to complete before proceeding. * Note that we have to do this after any RX resets * have completed since the uplist register is cleared * by a reset. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_STALL); xl_wait(sc); CSR_WRITE_4(sc, XL_UPLIST_PTR, sc->xl_ldata.xl_rx_dmaaddr); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_UP_UNSTALL); xl_wait(sc); if (sc->xl_type == XL_TYPE_905B) { /* Set polling interval */ CSR_WRITE_1(sc, XL_DOWN_POLL, 64); /* Load the address of the TX list */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_STALL); xl_wait(sc); CSR_WRITE_4(sc, XL_DOWNLIST_PTR, sc->xl_cdata.xl_tx_chain[0].xl_phys); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_DOWN_UNSTALL); xl_wait(sc); } /* * If the coax transceiver is on, make sure to enable * the DC-DC converter. */ XL_SEL_WIN(3); if (sc->xl_xcvr == XL_XCVR_COAX) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_START); else CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); /* * increase packet size to allow reception of 802.1q or ISL packets. * For the 3c90x chip, set the 'allow large packets' bit in the MAC * control register. For 3c90xB/C chips, use the RX packet size * register. */ if (sc->xl_type == XL_TYPE_905B) CSR_WRITE_2(sc, XL_W3_MAXPKTSIZE, XL_PACKET_SIZE); else { u_int8_t macctl; macctl = CSR_READ_1(sc, XL_W3_MAC_CTRL); macctl |= XL_MACCTRL_ALLOW_LARGE_PACK; CSR_WRITE_1(sc, XL_W3_MAC_CTRL, macctl); } /* Clear out the stats counters. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STATS_DISABLE); xl_stats_update(sc); XL_SEL_WIN(4); CSR_WRITE_2(sc, XL_W4_NET_DIAG, XL_NETDIAG_UPPER_BYTES_ENABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STATS_ENABLE); /* * Enable interrupts. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|0xFF); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STAT_ENB|XL_INTRS); #ifdef DEVICE_POLLING /* Disable interrupts if we are polling. */ if (ifp->if_capenable & IFCAP_POLLING) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|0); else #endif CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|XL_INTRS); if (sc->xl_flags & XL_FLAG_FUNCREG) bus_space_write_4(sc->xl_ftag, sc->xl_fhandle, 4, 0x8000); /* Set the RX early threshold */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_SET_THRESH|(XL_PACKET_SIZE >>2)); CSR_WRITE_4(sc, XL_DMACTL, XL_DMACTL_UP_RX_EARLY); /* Enable receiver and transmitter. */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_ENABLE); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_ENABLE); xl_wait(sc); /* XXX Downcall to miibus. */ if (mii != NULL) mii_mediachg(mii); /* Select window 7 for normal operations. */ XL_SEL_WIN(7); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->xl_wdog_timer = 0; callout_reset(&sc->xl_tick_callout, hz, xl_tick, sc); } /* * Set media options. */ static int xl_ifmedia_upd(struct ifnet *ifp) { struct xl_softc *sc = ifp->if_softc; struct ifmedia *ifm = NULL; struct mii_data *mii = NULL; XL_LOCK(sc); if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); if (mii == NULL) ifm = &sc->ifmedia; else ifm = &mii->mii_media; switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_100_FX: case IFM_10_FL: case IFM_10_2: case IFM_10_5: xl_setmode(sc, ifm->ifm_media); XL_UNLOCK(sc); return (0); } if (sc->xl_media & XL_MEDIAOPT_MII || sc->xl_media & XL_MEDIAOPT_BTX || sc->xl_media & XL_MEDIAOPT_BT4) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xl_init_locked(sc); } else { xl_setmode(sc, ifm->ifm_media); } XL_UNLOCK(sc); return (0); } /* * Report current media status. */ static void xl_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct xl_softc *sc = ifp->if_softc; u_int32_t icfg; u_int16_t status = 0; struct mii_data *mii = NULL; XL_LOCK(sc); if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); XL_SEL_WIN(4); status = CSR_READ_2(sc, XL_W4_MEDIA_STATUS); XL_SEL_WIN(3); icfg = CSR_READ_4(sc, XL_W3_INTERNAL_CFG) & XL_ICFG_CONNECTOR_MASK; icfg >>= XL_ICFG_CONNECTOR_BITS; ifmr->ifm_active = IFM_ETHER; ifmr->ifm_status = IFM_AVALID; if ((status & XL_MEDIASTAT_CARRIER) == 0) ifmr->ifm_status |= IFM_ACTIVE; switch (icfg) { case XL_XCVR_10BT: ifmr->ifm_active = IFM_ETHER|IFM_10_T; if (CSR_READ_1(sc, XL_W3_MAC_CTRL) & XL_MACCTRL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; break; case XL_XCVR_AUI: if (sc->xl_type == XL_TYPE_905B && sc->xl_media == XL_MEDIAOPT_10FL) { ifmr->ifm_active = IFM_ETHER|IFM_10_FL; if (CSR_READ_1(sc, XL_W3_MAC_CTRL) & XL_MACCTRL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } else ifmr->ifm_active = IFM_ETHER|IFM_10_5; break; case XL_XCVR_COAX: ifmr->ifm_active = IFM_ETHER|IFM_10_2; break; /* * XXX MII and BTX/AUTO should be separate cases. */ case XL_XCVR_100BTX: case XL_XCVR_AUTO: case XL_XCVR_MII: if (mii != NULL) { mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; } break; case XL_XCVR_100BFX: ifmr->ifm_active = IFM_ETHER|IFM_100_FX; break; default: if_printf(ifp, "unknown XCVR type: %d\n", icfg); break; } XL_UNLOCK(sc); } static int xl_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct xl_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int error = 0, mask; struct mii_data *mii = NULL; switch (command) { case SIOCSIFFLAGS: XL_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && (ifp->if_flags ^ sc->xl_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) xl_rxfilter(sc); else xl_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) xl_stop(sc); } sc->xl_if_flags = ifp->if_flags; XL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* XXX Downcall from if_addmulti() possibly with locks held. */ XL_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) xl_rxfilter(sc); XL_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: if (sc->xl_miibus != NULL) mii = device_get_softc(sc->xl_miibus); if (mii == NULL) error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); else error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if ((mask & IFCAP_POLLING) != 0 && (ifp->if_capabilities & IFCAP_POLLING) != 0) { ifp->if_capenable ^= IFCAP_POLLING; if ((ifp->if_capenable & IFCAP_POLLING) != 0) { error = ether_poll_register(xl_poll, ifp); if (error) break; XL_LOCK(sc); /* Disable interrupts */ CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|0); ifp->if_capenable |= IFCAP_POLLING; XL_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); /* Enable interrupts. */ XL_LOCK(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK | 0xFF); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB | XL_INTRS); if (sc->xl_flags & XL_FLAG_FUNCREG) bus_space_write_4(sc->xl_ftag, sc->xl_fhandle, 4, 0x8000); XL_UNLOCK(sc); } } #endif /* DEVICE_POLLING */ XL_LOCK(sc); if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= XL905B_CSUM_FEATURES; else ifp->if_hwassist &= ~XL905B_CSUM_FEATURES; } if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) ifp->if_capenable ^= IFCAP_RXCSUM; if ((mask & IFCAP_WOL_MAGIC) != 0 && (ifp->if_capabilities & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; XL_UNLOCK(sc); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int xl_watchdog(struct xl_softc *sc) { struct ifnet *ifp = sc->xl_ifp; u_int16_t status = 0; int misintr; XL_LOCK_ASSERT(sc); if (sc->xl_wdog_timer == 0 || --sc->xl_wdog_timer != 0) return (0); xl_rxeof(sc); xl_txeoc(sc); misintr = 0; if (sc->xl_type == XL_TYPE_905B) { xl_txeof_90xB(sc); if (sc->xl_cdata.xl_tx_cnt == 0) misintr++; } else { xl_txeof(sc); if (sc->xl_cdata.xl_tx_head == NULL) misintr++; } if (misintr != 0) { device_printf(sc->xl_dev, "watchdog timeout (missed Tx interrupts) -- recovering\n"); return (0); } if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); XL_SEL_WIN(4); status = CSR_READ_2(sc, XL_W4_MEDIA_STATUS); device_printf(sc->xl_dev, "watchdog timeout\n"); if (status & XL_MEDIASTAT_CARRIER) device_printf(sc->xl_dev, "no carrier - transceiver cable problem?\n"); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xl_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (sc->xl_type == XL_TYPE_905B) xl_start_90xB_locked(ifp); else xl_start_locked(ifp); } return (EJUSTRETURN); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void xl_stop(struct xl_softc *sc) { - register int i; + int i; struct ifnet *ifp = sc->xl_ifp; XL_LOCK_ASSERT(sc); sc->xl_wdog_timer = 0; CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_DISABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STATS_DISABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_DISCARD); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_DISABLE); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_COAX_STOP); DELAY(800); #ifdef foo CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_RESET); xl_wait(sc); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_TX_RESET); xl_wait(sc); #endif CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ACK|XL_STAT_INTLATCH); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_STAT_ENB|0); CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_INTR_ENB|0); if (sc->xl_flags & XL_FLAG_FUNCREG) bus_space_write_4(sc->xl_ftag, sc->xl_fhandle, 4, 0x8000); /* Stop the stats updater. */ callout_stop(&sc->xl_tick_callout); /* * Free data in the RX lists. */ for (i = 0; i < XL_RX_LIST_CNT; i++) { if (sc->xl_cdata.xl_rx_chain[i].xl_mbuf != NULL) { bus_dmamap_unload(sc->xl_mtag, sc->xl_cdata.xl_rx_chain[i].xl_map); bus_dmamap_destroy(sc->xl_mtag, sc->xl_cdata.xl_rx_chain[i].xl_map); m_freem(sc->xl_cdata.xl_rx_chain[i].xl_mbuf); sc->xl_cdata.xl_rx_chain[i].xl_mbuf = NULL; } } if (sc->xl_ldata.xl_rx_list != NULL) bzero(sc->xl_ldata.xl_rx_list, XL_RX_LIST_SZ); /* * Free the TX list buffers. */ for (i = 0; i < XL_TX_LIST_CNT; i++) { if (sc->xl_cdata.xl_tx_chain[i].xl_mbuf != NULL) { bus_dmamap_unload(sc->xl_mtag, sc->xl_cdata.xl_tx_chain[i].xl_map); bus_dmamap_destroy(sc->xl_mtag, sc->xl_cdata.xl_tx_chain[i].xl_map); m_freem(sc->xl_cdata.xl_tx_chain[i].xl_mbuf); sc->xl_cdata.xl_tx_chain[i].xl_mbuf = NULL; } } if (sc->xl_ldata.xl_tx_list != NULL) bzero(sc->xl_ldata.xl_tx_list, XL_TX_LIST_SZ); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int xl_shutdown(device_t dev) { return (xl_suspend(dev)); } static int xl_suspend(device_t dev) { struct xl_softc *sc; sc = device_get_softc(dev); XL_LOCK(sc); xl_stop(sc); xl_setwol(sc); XL_UNLOCK(sc); return (0); } static int xl_resume(device_t dev) { struct xl_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->xl_ifp; XL_LOCK(sc); if (ifp->if_flags & IFF_UP) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xl_init_locked(sc); } XL_UNLOCK(sc); return (0); } static void xl_setwol(struct xl_softc *sc) { struct ifnet *ifp; u_int16_t cfg, pmstat; if ((sc->xl_flags & XL_FLAG_WOL) == 0) return; ifp = sc->xl_ifp; XL_SEL_WIN(7); /* Clear any pending PME events. */ CSR_READ_2(sc, XL_W7_BM_PME); cfg = 0; if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) cfg |= XL_BM_PME_MAGIC; CSR_WRITE_2(sc, XL_W7_BM_PME, cfg); /* Enable RX. */ if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) CSR_WRITE_2(sc, XL_COMMAND, XL_CMD_RX_ENABLE); /* Request PME. */ pmstat = pci_read_config(sc->xl_dev, sc->xl_pmcap + PCIR_POWER_STATUS, 2); if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) pmstat |= PCIM_PSTAT_PMEENABLE; else pmstat &= ~PCIM_PSTAT_PMEENABLE; pci_write_config(sc->xl_dev, sc->xl_pmcap + PCIR_POWER_STATUS, pmstat, 2); } Index: stable/11/sys/fs/fifofs/fifo_vnops.c =================================================================== --- stable/11/sys/fs/fifofs/fifo_vnops.c (revision 331642) +++ stable/11/sys/fs/fifofs/fifo_vnops.c (revision 331643) @@ -1,360 +1,360 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993, 1995 * The Regents of the University of California. * Copyright (c) 2005 Robert N. M. Watson * Copyright (c) 2012 Giovanni Trematerra * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fifo_vnops.c 8.10 (Berkeley) 5/27/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This structure is associated with the FIFO vnode and stores * the state associated with the FIFO. * Notes about locking: * - fi_pipe is invariant since init time. * - fi_readers and fi_writers are protected by the vnode lock. */ struct fifoinfo { struct pipe *fi_pipe; long fi_readers; long fi_writers; u_int fi_rgen; u_int fi_wgen; }; static vop_print_t fifo_print; static vop_open_t fifo_open; static vop_close_t fifo_close; static vop_advlock_t fifo_advlock; struct vop_vector fifo_specops = { .vop_default = &default_vnodeops, .vop_advlock = fifo_advlock, .vop_close = fifo_close, .vop_create = VOP_PANIC, .vop_getattr = VOP_EBADF, .vop_ioctl = VOP_PANIC, .vop_kqfilter = VOP_PANIC, .vop_link = VOP_PANIC, .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = fifo_open, .vop_pathconf = VOP_PANIC, .vop_print = fifo_print, .vop_read = VOP_PANIC, .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_reclaim = VOP_NULL, .vop_remove = VOP_PANIC, .vop_rename = VOP_PANIC, .vop_rmdir = VOP_PANIC, .vop_setattr = VOP_EBADF, .vop_symlink = VOP_PANIC, .vop_write = VOP_PANIC, }; /* * Dispose of fifo resources. */ static void fifo_cleanup(struct vnode *vp) { struct fifoinfo *fip; ASSERT_VOP_ELOCKED(vp, "fifo_cleanup"); fip = vp->v_fifoinfo; if (fip->fi_readers == 0 && fip->fi_writers == 0) { vp->v_fifoinfo = NULL; pipe_dtor(fip->fi_pipe); free(fip, M_VNODE); } } /* * Open called to set up a new instance of a fifo or * to find an active instance of a fifo. */ /* ARGSUSED */ static int fifo_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; struct file *a_fp; } */ *ap; { struct vnode *vp; struct file *fp; struct thread *td; struct fifoinfo *fip; struct pipe *fpipe; u_int gen; int error, stops_deferred; vp = ap->a_vp; fp = ap->a_fp; td = ap->a_td; ASSERT_VOP_ELOCKED(vp, "fifo_open"); if (fp == NULL || (ap->a_mode & FEXEC) != 0) return (EINVAL); if ((fip = vp->v_fifoinfo) == NULL) { pipe_named_ctor(&fpipe, td); fip = malloc(sizeof(*fip), M_VNODE, M_WAITOK); fip->fi_pipe = fpipe; fpipe->pipe_wgen = fip->fi_readers = fip->fi_writers = 0; KASSERT(vp->v_fifoinfo == NULL, ("fifo_open: v_fifoinfo race")); vp->v_fifoinfo = fip; } fpipe = fip->fi_pipe; KASSERT(fpipe != NULL, ("fifo_open: pipe is NULL")); /* * Use the pipe mutex here, in addition to the vnode lock, * in order to allow vnode lock dropping before msleep() calls * and still avoiding missed wakeups. */ PIPE_LOCK(fpipe); if (ap->a_mode & FREAD) { fip->fi_readers++; fip->fi_rgen++; if (fip->fi_readers == 1) { fpipe->pipe_state &= ~PIPE_EOF; if (fip->fi_writers > 0) wakeup(&fip->fi_writers); } fp->f_seqcount = fpipe->pipe_wgen - fip->fi_writers; } if (ap->a_mode & FWRITE) { if ((ap->a_mode & O_NONBLOCK) && fip->fi_readers == 0) { PIPE_UNLOCK(fpipe); if (fip->fi_writers == 0) fifo_cleanup(vp); return (ENXIO); } fip->fi_writers++; fip->fi_wgen++; if (fip->fi_writers == 1) { fpipe->pipe_state &= ~PIPE_EOF; if (fip->fi_readers > 0) wakeup(&fip->fi_readers); } } if ((ap->a_mode & O_NONBLOCK) == 0) { if ((ap->a_mode & FREAD) && fip->fi_writers == 0) { gen = fip->fi_wgen; VOP_UNLOCK(vp, 0); stops_deferred = sigdeferstop(SIGDEFERSTOP_OFF); error = msleep(&fip->fi_readers, PIPE_MTX(fpipe), PDROP | PCATCH | PSOCK, "fifoor", 0); sigallowstop(stops_deferred); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0 && gen == fip->fi_wgen) { fip->fi_readers--; if (fip->fi_readers == 0) { PIPE_LOCK(fpipe); fpipe->pipe_state |= PIPE_EOF; if (fpipe->pipe_state & PIPE_WANTW) wakeup(fpipe); PIPE_UNLOCK(fpipe); fifo_cleanup(vp); } return (error); } PIPE_LOCK(fpipe); /* * We must have got woken up because we had a writer. * That (and not still having one) is the condition * that we must wait for. */ } if ((ap->a_mode & FWRITE) && fip->fi_readers == 0) { gen = fip->fi_rgen; VOP_UNLOCK(vp, 0); stops_deferred = sigdeferstop(SIGDEFERSTOP_OFF); error = msleep(&fip->fi_writers, PIPE_MTX(fpipe), PDROP | PCATCH | PSOCK, "fifoow", 0); sigallowstop(stops_deferred); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0 && gen == fip->fi_rgen) { fip->fi_writers--; if (fip->fi_writers == 0) { PIPE_LOCK(fpipe); fpipe->pipe_state |= PIPE_EOF; if (fpipe->pipe_state & PIPE_WANTR) wakeup(fpipe); fpipe->pipe_wgen++; PIPE_UNLOCK(fpipe); fifo_cleanup(vp); } return (error); } /* * We must have got woken up because we had * a reader. That (and not still having one) * is the condition that we must wait for. */ PIPE_LOCK(fpipe); } } PIPE_UNLOCK(fpipe); KASSERT(fp != NULL, ("can't fifo/vnode bypass")); KASSERT(fp->f_ops == &badfileops, ("not badfileops in fifo_open")); finit(fp, fp->f_flag, DTYPE_FIFO, fpipe, &pipeops); return (0); } /* * Device close routine */ /* ARGSUSED */ static int fifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp; struct fifoinfo *fip; struct pipe *cpipe; vp = ap->a_vp; fip = vp->v_fifoinfo; cpipe = fip->fi_pipe; ASSERT_VOP_ELOCKED(vp, "fifo_close"); if (ap->a_fflag & FREAD) { fip->fi_readers--; if (fip->fi_readers == 0) { PIPE_LOCK(cpipe); cpipe->pipe_state |= PIPE_EOF; if ((cpipe->pipe_state & PIPE_WANTW)) { cpipe->pipe_state &= ~PIPE_WANTW; wakeup(cpipe); } pipeselwakeup(cpipe); PIPE_UNLOCK(cpipe); } } if (ap->a_fflag & FWRITE) { fip->fi_writers--; if (fip->fi_writers == 0) { PIPE_LOCK(cpipe); cpipe->pipe_state |= PIPE_EOF; if ((cpipe->pipe_state & PIPE_WANTR)) { cpipe->pipe_state &= ~PIPE_WANTR; wakeup(cpipe); } cpipe->pipe_wgen++; pipeselwakeup(cpipe); PIPE_UNLOCK(cpipe); } } fifo_cleanup(vp); return (0); } /* * Print out internal contents of a fifo vnode. */ int fifo_printinfo(vp) struct vnode *vp; { - register struct fifoinfo *fip = vp->v_fifoinfo; + struct fifoinfo *fip = vp->v_fifoinfo; if (fip == NULL){ printf(", NULL v_fifoinfo"); return (0); } printf(", fifo with %ld readers and %ld writers", fip->fi_readers, fip->fi_writers); return (0); } /* * Print out the contents of a fifo vnode. */ static int fifo_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf(" "); fifo_printinfo(ap->a_vp); printf("\n"); return (0); } /* * Fifo advisory byte-level locks. */ /* ARGSUSED */ static int fifo_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } Index: stable/11/sys/fs/nandfs/nandfs_vnops.c =================================================================== --- stable/11/sys/fs/nandfs/nandfs_vnops.c (revision 331642) +++ stable/11/sys/fs/nandfs/nandfs_vnops.c (revision 331643) @@ -1,2453 +1,2453 @@ /*- * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern uma_zone_t nandfs_node_zone; static void nandfs_read_filebuf(struct nandfs_node *, struct buf *); static void nandfs_itimes_locked(struct vnode *); static int nandfs_truncate(struct vnode *, uint64_t); static vop_pathconf_t nandfs_pathconf; #define UPDATE_CLOSE 0 #define UPDATE_WAIT 0 static int nandfs_inactive(struct vop_inactive_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int error = 0; DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node)); if (node == NULL) { DPRINTF(NODE, ("%s: inactive NULL node\n", __func__)); return (0); } if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) { nandfs_truncate(vp, 0); error = nandfs_node_destroy(node); if (error) nandfs_error("%s: destroy node: %p\n", __func__, node); node->nn_flags = 0; vrecycle(vp); } return (error); } static int nandfs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *nandfs_node = VTON(vp); struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev; uint64_t ino = nandfs_node->nn_ino; DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node)); /* Invalidate all entries to a particular vnode. */ cache_purge(vp); /* Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); /* Remove from vfs hash if not system vnode */ if (!NANDFS_SYS_NODE(nandfs_node->nn_ino)) vfs_hash_remove(vp); /* Dispose all node knowledge */ nandfs_dispose_node(&nandfs_node); if (!NANDFS_SYS_NODE(ino)) NANDFS_WRITEUNLOCK(fsdev); return (0); } static int nandfs_read(struct vop_read_args *ap) { - register struct vnode *vp = ap->a_vp; - register struct nandfs_node *node = VTON(vp); + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); struct nandfs_device *nandfsdev = node->nn_nandfsdev; struct uio *uio = ap->a_uio; struct buf *bp; uint64_t size; uint32_t blocksize; off_t bytesinfile; ssize_t toread, off; daddr_t lbn; ssize_t resid; int error = 0; if (uio->uio_resid == 0) return (0); size = node->nn_inode.i_size; if (uio->uio_offset >= size) return (0); blocksize = nandfsdev->nd_blocksize; bytesinfile = size - uio->uio_offset; resid = omin(uio->uio_resid, bytesinfile); while (resid) { lbn = uio->uio_offset / blocksize; off = uio->uio_offset & (blocksize - 1); toread = omin(resid, blocksize - off); DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n", (uintmax_t)lbn, toread, blocksize)); error = nandfs_bread(node, lbn, NOCRED, 0, &bp); if (error) { brelse(bp); break; } error = uiomove(bp->b_data + off, toread, uio); if (error) { brelse(bp); break; } brelse(bp); resid -= toread; } return (error); } static int nandfs_write(struct vop_write_args *ap) { struct nandfs_device *fsdev; struct nandfs_node *node; struct vnode *vp; struct uio *uio; struct buf *bp; uint64_t file_size, vblk; uint32_t blocksize; ssize_t towrite, off; daddr_t lbn; ssize_t resid; int error, ioflag, modified; vp = ap->a_vp; uio = ap->a_uio; ioflag = ap->a_ioflag; node = VTON(vp); fsdev = node->nn_nandfsdev; if (nandfs_fs_full(fsdev)) return (ENOSPC); DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n", uio->uio_resid, (uintmax_t)uio->uio_offset)); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); blocksize = fsdev->nd_blocksize; file_size = node->nn_inode.i_size; switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = file_size; break; case VDIR: return (EISDIR); case VLNK: break; default: panic("%s: bad file type vp: %p", __func__, vp); } /* If explicitly asked to append, uio_offset can be wrong? */ if (ioflag & IO_APPEND) uio->uio_offset = file_size; resid = uio->uio_resid; modified = error = 0; while (uio->uio_resid) { lbn = uio->uio_offset / blocksize; off = uio->uio_offset & (blocksize - 1); towrite = omin(uio->uio_resid, blocksize - off); DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n", __func__, (uintmax_t)lbn, towrite, blocksize)); error = nandfs_bmap_lookup(node, lbn, &vblk); if (error) break; DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) " "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize, vblk)); if (vblk != 0) error = nandfs_bread(node, lbn, NOCRED, 0, &bp); else error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp); DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__, vp, bp, (uintmax_t)lbn)); if (error) { if (bp) brelse(bp); break; } error = uiomove((char *)bp->b_data + off, (int)towrite, uio); if (error) break; error = nandfs_dirty_buf(bp, 0); if (error) break; modified++; } /* XXX proper handling when only part of file was properly written */ if (modified) { if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) node->nn_inode.i_mode &= ~(ISUID | ISGID); if (file_size < uio->uio_offset + uio->uio_resid) { node->nn_inode.i_size = uio->uio_offset + uio->uio_resid; node->nn_flags |= IN_CHANGE | IN_UPDATE; vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid); nandfs_itimes(vp); } } DPRINTF(WRITE, ("%s: return:%d\n", __func__, error)); return (error); } static int nandfs_lookup(struct vop_cachedlookup_args *ap) { struct vnode *dvp, **vpp; struct componentname *cnp; struct ucred *cred; struct thread *td; struct nandfs_node *dir_node, *node; struct nandfsmount *nmp; uint64_t ino, off; const char *name; int namelen, nameiop, islastcn, mounted_ro; int error, found; DPRINTF(VNCALL, ("%s\n", __func__)); dvp = ap->a_dvp; vpp = ap->a_vpp; *vpp = NULL; cnp = ap->a_cnp; cred = cnp->cn_cred; td = cnp->cn_thread; dir_node = VTON(dvp); nmp = dir_node->nn_nmp; /* Simplify/clarification flags */ nameiop = cnp->cn_nameiop; islastcn = cnp->cn_flags & ISLASTCN; mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY; /* * If requesting a modify on the last path element on a read-only * filingsystem, reject lookup; */ if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME)) return (EROFS); if (dir_node->nn_inode.i_links_count == 0) return (ENOENT); /* * Obviously, the file is not (anymore) in the namecache, we have to * search for it. There are three basic cases: '.', '..' and others. * * Following the guidelines of VOP_LOOKUP manpage and tmpfs. */ error = 0; if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { DPRINTF(LOOKUP, ("\tlookup '.'\n")); /* Special case 1 '.' */ VREF(dvp); *vpp = dvp; /* Done */ } else if (cnp->cn_flags & ISDOTDOT) { /* Special case 2 '..' */ DPRINTF(LOOKUP, ("\tlookup '..'\n")); /* Get our node */ name = ".."; namelen = 2; error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino, &found, &off); if (error) goto out; if (!found) error = ENOENT; /* First unlock parent */ VOP_UNLOCK(dvp, 0); if (error == 0) { DPRINTF(LOOKUP, ("\tfound '..'\n")); /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { DPRINTF(LOOKUP, ("\tnode retrieved/created OK\n")); *vpp = NTOV(node); } } /* Try to relock parent */ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); } else { DPRINTF(LOOKUP, ("\tlookup file\n")); /* All other files */ /* Look up filename in the directory returning its inode */ name = cnp->cn_nameptr; namelen = cnp->cn_namelen; error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino, &found, &off); if (error) goto out; if (!found) { DPRINTF(LOOKUP, ("\tNOT found\n")); /* * UGH, didn't find name. If we're creating or * renaming on the last name this is OK and we ought * to return EJUSTRETURN if its allowed to be created. */ error = ENOENT; if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cred, td); if (!error) { /* keep the component name */ cnp->cn_flags |= SAVENAME; error = EJUSTRETURN; } } /* Done */ } else { if (ino == NANDFS_WHT_INO) cnp->cn_flags |= ISWHITEOUT; if ((cnp->cn_flags & ISWHITEOUT) && (nameiop == LOOKUP)) return (ENOENT); if ((nameiop == DELETE) && islastcn) { if ((cnp->cn_flags & ISWHITEOUT) && (cnp->cn_flags & DOWHITEOUT)) { cnp->cn_flags |= SAVENAME; dir_node->nn_diroff = off; return (EJUSTRETURN); } error = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { *vpp = NTOV(node); node->nn_diroff = off; } if ((dir_node->nn_inode.i_mode & ISVTX) && cred->cr_uid != 0 && cred->cr_uid != dir_node->nn_inode.i_uid && node->nn_inode.i_uid != cred->cr_uid) { vput(*vpp); *vpp = NULL; return (EPERM); } } else if ((nameiop == RENAME) && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { *vpp = NTOV(node); node->nn_diroff = off; } } else { /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { *vpp = NTOV(node); node->nn_diroff = off; } } } } out: /* * Store result in the cache if requested. If we are creating a file, * the file might not be found and thus putting it into the namecache * might be seen as negative caching. */ if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(dvp, *vpp, cnp); return (error); } static int nandfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp)); nandfs_itimes(vp); /* Basic info */ VATTR_NULL(vap); vap->va_atime.tv_sec = inode->i_mtime; vap->va_atime.tv_nsec = inode->i_mtime_nsec; vap->va_mtime.tv_sec = inode->i_mtime; vap->va_mtime.tv_nsec = inode->i_mtime_nsec; vap->va_ctime.tv_sec = inode->i_ctime; vap->va_ctime.tv_nsec = inode->i_ctime_nsec; vap->va_type = IFTOVT(inode->i_mode); vap->va_mode = inode->i_mode & ~S_IFMT; vap->va_nlink = inode->i_links_count; vap->va_uid = inode->i_uid; vap->va_gid = inode->i_gid; vap->va_rdev = inode->i_special; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = node->nn_ino; vap->va_size = inode->i_size; vap->va_blocksize = node->nn_nandfsdev->nd_blocksize; vap->va_gen = 0; vap->va_flags = inode->i_flags; vap->va_bytes = inode->i_blocks * vap->va_blocksize; vap->va_filerev = 0; vap->va_vaflags = 0; return (0); } static int nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks) { struct nandfs_device *nffsdev; struct bufobj *bo; struct buf *bp, *nbp; bo = &vp->v_bufobj; nffsdev = VTON(vp)->nn_nandfsdev; ASSERT_VOP_LOCKED(vp, "nandfs_truncate"); restart: BO_LOCK(bo); restart_locked: TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < nblks) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) goto restart_locked; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~(B_ASYNC | B_MANAGED); BO_UNLOCK(bo); brelse(bp); BO_LOCK(bo); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < nblks) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart; bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~(B_ASYNC | B_MANAGED); brelse(bp); nandfs_dirty_bufs_decrement(nffsdev); BO_LOCK(bo); } BO_UNLOCK(bo); return (0); } static int nandfs_truncate(struct vnode *vp, uint64_t newsize) { struct nandfs_device *nffsdev; struct nandfs_node *node; struct nandfs_inode *inode; struct buf *bp = NULL; uint64_t oblks, nblks, vblk, size, rest; int error; node = VTON(vp); nffsdev = node->nn_nandfsdev; inode = &node->nn_inode; /* Calculate end of file */ size = inode->i_size; if (newsize == size) { node->nn_flags |= IN_CHANGE | IN_UPDATE; nandfs_itimes(vp); return (0); } if (newsize > size) { inode->i_size = newsize; vnode_pager_setsize(vp, newsize); node->nn_flags |= IN_CHANGE | IN_UPDATE; nandfs_itimes(vp); return (0); } nblks = howmany(newsize, nffsdev->nd_blocksize); oblks = howmany(size, nffsdev->nd_blocksize); rest = newsize % nffsdev->nd_blocksize; if (rest) { error = nandfs_bmap_lookup(node, nblks - 1, &vblk); if (error) return (error); if (vblk != 0) error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp); else error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp); if (error) { if (bp) brelse(bp); return (error); } bzero((char *)bp->b_data + rest, (u_int)(nffsdev->nd_blocksize - rest)); error = nandfs_dirty_buf(bp, 0); if (error) return (error); } DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks, nblks)); error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1); if (error) { if (bp) nandfs_undirty_buf(bp); return (error); } error = nandfs_vtruncbuf(vp, nblks); if (error) { if (bp) nandfs_undirty_buf(bp); return (error); } inode->i_size = newsize; vnode_pager_setsize(vp, newsize); node->nn_flags |= IN_CHANGE | IN_UPDATE; nandfs_itimes(vp); return (error); } static void nandfs_itimes_locked(struct vnode *vp) { struct nandfs_node *node; struct nandfs_inode *inode; struct timespec ts; ASSERT_VI_LOCKED(vp, __func__); node = VTON(vp); inode = &node->nn_inode; if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; if (((vp->v_mount->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || (node->nn_flags & (IN_CHANGE | IN_UPDATE))) node->nn_flags |= IN_MODIFIED; vfs_timestamp(&ts); if (node->nn_flags & IN_UPDATE) { inode->i_mtime = ts.tv_sec; inode->i_mtime_nsec = ts.tv_nsec; } if (node->nn_flags & IN_CHANGE) { inode->i_ctime = ts.tv_sec; inode->i_ctime_nsec = ts.tv_nsec; } node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } void nandfs_itimes(struct vnode *vp) { VI_LOCK(vp); nandfs_itimes_locked(vp); VI_UNLOCK(vp); } static int nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) { struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; uint16_t nmode; int error = 0; DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, mode, cred, td)); /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. Both of these are allowed in * jail(8). */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) return (EFTYPE); } if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); if (error) return (error); } /* * Deny setting setuid if we are not the file owner. */ if ((mode & ISUID) && inode->i_uid != cred->cr_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0); if (error) return (error); } nmode = inode->i_mode; nmode &= ~ALLPERMS; nmode |= (mode & ALLPERMS); inode->i_mode = nmode; node->nn_flags |= IN_CHANGE; DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode)); return (error); } static int nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct thread *td) { struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; uid_t ouid; gid_t ogid; int error = 0; if (uid == (uid_t)VNOVAL) uid = inode->i_uid; if (gid == (gid_t)VNOVAL) gid = inode->i_gid; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if (((uid != inode->i_uid && uid != cred->cr_uid) || (gid != inode->i_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) return (error); ogid = inode->i_gid; ouid = inode->i_uid; inode->i_gid = gid; inode->i_uid = uid; node->nn_flags |= IN_CHANGE; if ((inode->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) inode->i_mode &= ~(ISUID | ISGID); } DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td)); return (0); } static int nandfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; uint32_t flags; int error = 0; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__)); return (EINVAL); } if (vap->va_flags != VNOVAL) { DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp, td, vap->va_flags)); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. * Privileged non-jail processes may not modify system flags * if securelevel > 0 and any existing system flags are set. * Privileged jail processes behave like privileged non-jail * processes if the security.jail.chflags_allowed sysctl is * is non-zero; otherwise, they behave like unprivileged * processes. */ flags = inode->i_flags; if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } /* Snapshot flag cannot be set or cleared */ if (((vap->va_flags & SF_SNAPSHOT) != 0 && (flags & SF_SNAPSHOT) == 0) || ((vap->va_flags & SF_SNAPSHOT) == 0 && (flags & SF_SNAPSHOT) != 0)) return (EPERM); inode->i_flags = vap->va_flags; } else { if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || (vap->va_flags & UF_SETTABLE) != vap->va_flags) return (EPERM); flags &= SF_SETTABLE; flags |= (vap->va_flags & UF_SETTABLE); inode->i_flags = flags; } node->nn_flags |= IN_CHANGE; if (vap->va_flags & (IMMUTABLE | APPEND)) return (0); } if (inode->i_flags & (IMMUTABLE | APPEND)) return (EPERM); if (vap->va_size != (u_quad_t)VNOVAL) { DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td, (uintmax_t)vap->va_size)); switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((inode->i_flags & SF_SNAPSHOT) != 0) return (EPERM); break; default: return (0); } if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize) return (EFBIG); KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type)); nandfs_truncate(vp, vap->va_size); node->nn_flags |= IN_CHANGE; return (0); } if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__, vp, td, vap->va_uid, vap->va_gid)); error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td); if (error) return (error); } if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td, vap->va_mode)); error = nandfs_chmod(vp, (int)vap->va_mode, cred, td); if (error) return (error); } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL) { DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n", __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec, (uintmax_t)vap->va_mtime.tv_sec, (uintmax_t)vap->va_birthtime.tv_sec)); if (vap->va_atime.tv_sec != VNOVAL) node->nn_flags |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) node->nn_flags |= IN_CHANGE | IN_UPDATE; if (vap->va_birthtime.tv_sec != VNOVAL) node->nn_flags |= IN_MODIFIED; nandfs_itimes(vp); return (0); } return (0); } static int nandfs_open(struct vop_open_args *ap) { struct nandfs_node *node = VTON(ap->a_vp); uint64_t filesize; DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode)); if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); if ((node->nn_inode.i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); filesize = node->nn_inode.i_size; vnode_create_vobject(ap->a_vp, filesize, ap->a_td); return (0); } static int nandfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node)); mtx_lock(&vp->v_interlock); if (vp->v_usecount > 1) nandfs_itimes_locked(vp); mtx_unlock(&vp->v_interlock); return (0); } static int nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode) { /* Check if we are allowed to write */ switch (vap->va_type) { case VDIR: case VLNK: case VREG: /* * Normal nodes: check if we're on a read-only mounted * filingsystem and bomb out if we're trying to write. */ if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); break; case VBLK: case VCHR: case VSOCK: case VFIFO: /* * Special nodes: even on read-only mounted filingsystems * these are allowed to be written to if permissions allow. */ break; default: /* No idea what this is */ return (EINVAL); } /* No one may write immutable files */ if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE)) return (EPERM); return (0); } static int nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode, struct ucred *cred) { return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode, cred, NULL)); } static int nandfs_advlock(struct vop_advlock_args *ap) { struct nandfs_node *nvp; quad_t size; nvp = VTON(ap->a_vp); size = nvp->nn_inode.i_size; return (lf_advlock(ap, &(nvp->nn_lockf), size)); } static int nandfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; struct ucred *cred = ap->a_cred; struct vattr vap; int error; DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode)); error = VOP_GETATTR(vp, &vap, NULL); if (error) return (error); error = nandfs_check_possible(vp, &vap, accmode); if (error) return (error); error = nandfs_check_permitted(vp, &vap, accmode, cred); return (error); } static int nandfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *nvp = VTON(vp); printf("\tvp=%p, nandfs_node=%p\n", vp, nvp); printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino); printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS); return (0); } static void nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp) { struct nandfs_device *nandfsdev = node->nn_nandfsdev; struct buf *nbp; nandfs_daddr_t vblk, pblk; nandfs_lbn_t from; uint32_t blocksize; int error = 0; int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; /* * Translate all the block sectors into a series of buffers to read * asynchronously from the nandfs device. Note that this lookup may * induce readin's too. */ blocksize = nandfsdev->nd_blocksize; if (bp->b_bcount / blocksize != 1) panic("invalid b_count in bp %p\n", bp); from = bp->b_blkno; DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx" " count %#lx\n", (uintmax_t)node->nn_ino, from, bp->b_bcount)); /* Get virtual block numbers for the vnode's buffer span */ error = nandfs_bmap_lookup(node, from, &vblk); if (error) { bp->b_error = EINVAL; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } /* Translate virtual block numbers to physical block numbers */ error = nandfs_vtop(node, vblk, &pblk); if (error) { bp->b_error = EINVAL; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } /* Issue translated blocks */ bp->b_resid = bp->b_bcount; /* Note virtual block 0 marks not mapped */ if (vblk == 0) { vfs_bio_clrbuf(bp); bufdone(bp); return; } nbp = bp; nbp->b_blkno = pblk * blk2dev; bp->b_iooffset = dbtob(nbp->b_blkno); MPASS(bp->b_iooffset >= 0); BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp); nandfs_vblk_set(bp, vblk); DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> " "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino, (uintmax_t)(from), (uintmax_t)vblk, (uintmax_t)pblk, nbp)); } static void nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp) { struct nandfs_device *nandfsdev = node->nn_nandfsdev; bp->b_iooffset = dbtob(bp->b_blkno); MPASS(bp->b_iooffset >= 0); BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp); } static int nandfs_strategy(struct vop_strategy_args *ap) { struct vnode *vp = ap->a_vp; struct buf *bp = ap->a_bp; struct nandfs_node *node = VTON(vp); /* check if we ought to be here */ KASSERT((vp->v_type != VBLK && vp->v_type != VCHR), ("nandfs_strategy on type %d", vp->v_type)); /* Translate if needed and pass on */ if (bp->b_iocmd == BIO_READ) { nandfs_read_filebuf(node, bp); return (0); } /* Send to segment collector */ nandfs_write_filebuf(node, bp); return (0); } static int nandfs_readdir(struct vop_readdir_args *ap) { struct uio *uio = ap->a_uio; struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); struct nandfs_dir_entry *ndirent; struct dirent dirent; struct buf *bp; uint64_t file_size, diroffset, transoffset, blkoff; uint64_t blocknr; uint32_t blocksize = node->nn_nandfsdev->nd_blocksize; uint8_t *pos, name_len; int error; DPRINTF(READDIR, ("nandfs_readdir called\n")); if (vp->v_type != VDIR) return (ENOTDIR); file_size = node->nn_inode.i_size; DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n", (uintmax_t)file_size, uio->uio_resid )); /* We are called just as long as we keep on pushing data in */ error = 0; if ((uio->uio_offset < file_size) && (uio->uio_resid >= sizeof(struct dirent))) { diroffset = uio->uio_offset; transoffset = diroffset; blocknr = diroffset / blocksize; blkoff = diroffset % blocksize; error = nandfs_bread(node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (EIO); } while (diroffset < file_size) { DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n", diroffset)); if (blkoff >= blocksize) { blkoff = 0; blocknr++; brelse(bp); error = nandfs_bread(node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (EIO); } } /* Read in one dirent */ pos = (uint8_t *)bp->b_data + blkoff; ndirent = (struct nandfs_dir_entry *)pos; name_len = ndirent->name_len; memset(&dirent, 0, sizeof(struct dirent)); dirent.d_fileno = ndirent->inode; if (dirent.d_fileno) { dirent.d_type = ndirent->file_type; dirent.d_namlen = name_len; strncpy(dirent.d_name, ndirent->name, name_len); dirent.d_reclen = GENERIC_DIRSIZ(&dirent); DPRINTF(READDIR, ("copying `%*.*s`\n", name_len, name_len, dirent.d_name)); } /* * If there isn't enough space in the uio to return a * whole dirent, break off read */ if (uio->uio_resid < GENERIC_DIRSIZ(&dirent)) break; /* Transfer */ if (dirent.d_fileno) uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio); /* Advance */ diroffset += ndirent->rec_len; blkoff += ndirent->rec_len; /* Remember the last entry we transferred */ transoffset = diroffset; } brelse(bp); /* Pass on last transferred offset */ uio->uio_offset = transoffset; } if (ap->a_eofflag) *ap->a_eofflag = (uio->uio_offset >= file_size); return (error); } static int nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred) { struct nandfs_node *dnode = VTON(dvp); struct nandfs_dir_entry *dirent; uint64_t file_size = dnode->nn_inode.i_size; uint64_t blockcount = dnode->nn_inode.i_blocks; uint64_t blocknr; uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize; uint32_t limit; uint32_t off; uint8_t *pos; struct buf *bp; int error; DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp, (uintmax_t)parentino, cred)); KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp)); blocknr = 0; while (blocknr < blockcount) { error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (0); } pos = (uint8_t *)bp->b_data; off = 0; if (blocknr == (blockcount - 1)) limit = file_size % blocksize; else limit = blocksize; while (off < limit) { dirent = (struct nandfs_dir_entry *)(pos + off); off += dirent->rec_len; if (dirent->inode == 0) continue; switch (dirent->name_len) { case 0: break; case 1: if (dirent->name[0] != '.') goto notempty; KASSERT(dirent->inode == dnode->nn_ino, (".'s inode does not match dir")); break; case 2: if (dirent->name[0] != '.' && dirent->name[1] != '.') goto notempty; KASSERT(dirent->inode == parentino, ("..'s inode does not match parent")); break; default: goto notempty; } } brelse(bp); blocknr++; } return (1); notempty: brelse(bp); return (0); } static int nandfs_link(struct vop_link_args *ap) { struct vnode *tdvp = ap->a_tdvp; struct vnode *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; int error; if (inode->i_links_count >= LINK_MAX) return (EMLINK); if (inode->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* Update link count */ inode->i_links_count++; /* Add dir entry */ error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(inode->i_mode)); if (error) { inode->i_links_count--; } node->nn_flags |= IN_CHANGE; nandfs_itimes(vp); DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n", __func__, tdvp, vp, cnp)); return (0); } static int nandfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); struct nandfs_node *dir_node = VTON(dvp); struct nandfsmount *nmp = dir_node->nn_nmp; struct nandfs_node *node; int error; DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp)); if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); /* Create new vnode/inode */ error = nandfs_node_create(nmp, &node, mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; /* Add new dir entry */ error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode)); if (error) { if (nandfs_node_destroy(node)) { nandfs_error("%s: error destroying node %p\n", __func__, node); } return (error); } *vpp = NTOV(node); if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(dvp, *vpp, cnp); DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node, (uintmax_t)node->nn_ino)); return (0); } static int nandfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct nandfs_node *node = VTON(vp); struct nandfs_node *dnode = VTON(dvp); struct componentname *cnp = ap->a_cnp; DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n", __func__, dvp, vp, node, (uintmax_t)node->nn_ino, node->nn_inode.i_links_count)); if (vp->v_type == VDIR) return (EISDIR); /* Files marked as immutable or append-only cannot be deleted. */ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->nn_inode.i_flags & APPEND)) return (EPERM); nandfs_remove_dirent(dvp, node, cnp); node->nn_inode.i_links_count--; node->nn_flags |= IN_CHANGE; return (0); } /* * Check if source directory is in the path of the target directory. * Target is supplied locked, source is unlocked. * The target is always vput before returning. */ static int nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest, struct ucred *cred) { struct vnode *vp; int error, rootino; struct nandfs_dir_entry dirent; vp = NTOV(dest); if (src->nn_ino == dest->nn_ino) { error = EEXIST; goto out; } rootino = NANDFS_ROOT_INO; error = 0; if (dest->nn_ino == rootino) goto out; for (;;) { if (vp->v_type != VDIR) { error = ENOTDIR; break; } error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent, NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, NULL); if (error != 0) break; if (dirent.name_len != 2 || dirent.name[0] != '.' || dirent.name[1] != '.') { error = ENOTDIR; break; } if (dirent.inode == src->nn_ino) { error = EINVAL; break; } if (dirent.inode == rootino) break; vput(vp); if ((error = VFS_VGET(vp->v_mount, dirent.inode, LK_EXCLUSIVE, &vp)) != 0) { vp = NULL; break; } } out: if (error == ENOTDIR) printf("checkpath: .. not a directory\n"); if (vp != NULL) vput(vp); return (error); } static int nandfs_rename(struct vop_rename_args *ap) { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; struct nandfs_node *fdnode, *fnode, *fnode1; struct nandfs_node *tdnode = VTON(tdvp); struct nandfs_node *tnode; uint32_t tdflags, fflags, fdflags; uint16_t mode; DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp, fvp, tdvp, tvp)); /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); return (error); } tdflags = tdnode->nn_inode.i_flags; if (tvp && ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdflags & APPEND))) { error = EPERM; goto abortit; } /* * Renaming a file to itself has no effect. The upper layers should * not call us in that case. Temporarily just warn if they do. */ if (fvp == tvp) { printf("nandfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto abortit; } if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) goto abortit; fdnode = VTON(fdvp); fnode = VTON(fvp); if (fnode->nn_inode.i_links_count >= LINK_MAX) { VOP_UNLOCK(fvp, 0); error = EMLINK; goto abortit; } fflags = fnode->nn_inode.i_flags; fdflags = fdnode->nn_inode.i_flags; if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdflags & APPEND)) { VOP_UNLOCK(fvp, 0); error = EPERM; goto abortit; } mode = fnode->nn_inode.i_mode; if ((mode & S_IFMT) == S_IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || (fdvp == fvp) || ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) || (fnode->nn_flags & IN_RENAME)) { VOP_UNLOCK(fvp, 0); error = EINVAL; goto abortit; } fnode->nn_flags |= IN_RENAME; doingdirectory = 1; DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__, tdvp)); oldparent = fdnode->nn_ino; } vrele(fdvp); tnode = NULL; if (tvp) tnode = VTON(tvp); /* * Bump link count on fvp while we are moving stuff around. If we * crash before completing the work, the link count may be wrong * but correctable. */ fnode->nn_inode.i_links_count++; /* Check for in path moving XXX */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); VOP_UNLOCK(fvp, 0); if (oldparent != tdnode->nn_ino) newparent = tdnode->nn_ino; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (tnode != NULL) vput(tvp); error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred); if (error) goto out; VREF(tdvp); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; vrele(tdvp); tdnode = VTON(tdvp); tnode = NULL; if (tvp) tnode = VTON(tvp); } /* * If the target doesn't exist, link the target to the source and * unlink the source. Otherwise, rewrite the target directory to * reference the source and remove the original entry. */ if (tvp == NULL) { /* * Account for ".." in new directory. */ if (doingdirectory && fdvp != tdvp) tdnode->nn_inode.i_links_count++; DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp)); /* * Add name in new directory. */ error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr, tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode)); if (error) { if (doingdirectory && fdvp != tdvp) tdnode->nn_inode.i_links_count--; goto bad; } vput(tdvp); } else { /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((tdnode->nn_inode.i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid && tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ mode = tnode->nn_inode.i_mode; if ((mode & S_IFMT) == S_IFDIR) { if (!nandfs_dirempty(tvp, tdnode->nn_ino, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } /* * Update name cache since directory is going away. */ cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp)); /* * Change name tcnp in tdvp to point at fvp. */ error = nandfs_update_dirent(tdvp, fnode, tnode); if (error) goto bad; if (doingdirectory && !newparent) tdnode->nn_inode.i_links_count--; vput(tdvp); tnode->nn_inode.i_links_count--; vput(tvp); tnode = NULL; } /* * Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp != NULL) { fnode1 = VTON(fvp); fdnode = VTON(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("nandfs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode)); /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; its link * count of three would cause a rmdir to fail with ENOTEMPTY. * The IN_RENAME flag ensures that it cannot be moved by another * rename. */ if (fnode != fnode1) { if (doingdirectory) panic("nandfs: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n", __func__, (uintmax_t) oldparent, (uintmax_t) newparent)); error = nandfs_update_parent_dir(fvp, newparent); if (!error) { fdnode->nn_inode.i_links_count--; fdnode->nn_flags |= IN_CHANGE; } } error = nandfs_remove_dirent(fdvp, fnode, fcnp); if (!error) { fnode->nn_inode.i_links_count--; fnode->nn_flags |= IN_CHANGE; } fnode->nn_flags &= ~IN_RENAME; } if (fdnode) vput(fdvp); if (fnode) vput(fvp); vrele(ap->a_fvp); return (error); bad: DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error)); if (tnode) vput(NTOV(tnode)); vput(NTOV(tdnode)); out: if (doingdirectory) fnode->nn_flags &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { fnode->nn_inode.i_links_count--; fnode->nn_flags |= IN_CHANGE; fnode->nn_flags &= ~IN_RENAME; vput(fvp); } else vrele(fvp); return (error); } static int nandfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct nandfs_node *dir_node = VTON(dvp); struct nandfs_inode *dir_inode = &dir_node->nn_inode; struct nandfs_node *node; struct nandfsmount *nmp = dir_node->nn_nmp; uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); int error; DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp)); if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); if (dir_inode->i_links_count >= LINK_MAX) return (EMLINK); error = nandfs_node_create(nmp, &node, mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; *vpp = NTOV(node); error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode)); if (error) { vput(*vpp); return (error); } dir_node->nn_inode.i_links_count++; dir_node->nn_flags |= IN_CHANGE; error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino); if (error) { vput(NTOV(node)); return (error); } DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node, (uintmax_t)node->nn_ino)); return (0); } static int nandfs_mknod(struct vop_mknod_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct vattr *vap = ap->a_vap; uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode); struct componentname *cnp = ap->a_cnp; struct nandfs_node *dir_node = VTON(dvp); struct nandfsmount *nmp = dir_node->nn_nmp; struct nandfs_node *node; int error; if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); error = nandfs_node_create(nmp, &node, mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; if (vap->va_rdev != VNOVAL) node->nn_inode.i_special = vap->va_rdev; *vpp = NTOV(node); if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode))) { vput(*vpp); return (ENOTDIR); } node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE; return (0); } static int nandfs_symlink(struct vop_symlink_args *ap) { struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); struct componentname *cnp = ap->a_cnp; struct nandfs_node *dir_node = VTON(dvp); struct nandfsmount *nmp = dir_node->nn_nmp; struct nandfs_node *node; int len, error; if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); error = nandfs_node_create(nmp, &node, S_IFLNK | mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; *vpp = NTOV(node); if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode))) { vput(*vpp); return (ENOTDIR); } len = strlen(ap->a_target); error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cnp->cn_cred, NOCRED, NULL, NULL); if (error) vput(*vpp); return (error); } static int nandfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } static int nandfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nandfs_node *node, *dnode; uint32_t dflag, flag; int error = 0; node = VTON(vp); dnode = VTON(dvp); /* Files marked as immutable or append-only cannot be deleted. */ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->nn_inode.i_flags & APPEND)) return (EPERM); DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__, dvp, vp, node, (uintmax_t)node->nn_ino)); if (node->nn_inode.i_links_count < 2) return (EINVAL); if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred)) return (ENOTEMPTY); /* Files marked as immutable or append-only cannot be deleted. */ dflag = dnode->nn_inode.i_flags; flag = node->nn_inode.i_flags; if ((dflag & APPEND) || (flag & (NOUNLINK | IMMUTABLE | APPEND))) { return (EPERM); } if (vp->v_mountedhere != 0) return (EINVAL); nandfs_remove_dirent(dvp, node, cnp); dnode->nn_inode.i_links_count -= 1; dnode->nn_flags |= IN_CHANGE; cache_purge(dvp); error = nandfs_truncate(vp, (uint64_t)0); if (error) return (error); node->nn_inode.i_links_count -= 2; node->nn_flags |= IN_CHANGE; cache_purge(vp); return (error); } static int nandfs_fsync(struct vop_fsync_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int locked; DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp, node, (uintmax_t)node->nn_ino)); /* * Start syncing vnode only if inode was modified or * there are some dirty buffers */ if (VTON(vp)->nn_flags & IN_MODIFIED || vp->v_bufobj.bo_dirty.bv_cnt) { locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC); VOP_LOCK(vp, locked | LK_RETRY); } return (0); } static int nandfs_bmap(struct vop_bmap_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *nnode = VTON(vp); struct nandfs_device *nandfsdev = nnode->nn_nandfsdev; nandfs_daddr_t l2vmap, v2pmap; int error; int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp, nnode, (uintmax_t)nnode->nn_ino)); if (ap->a_bop != NULL) *ap->a_bop = &nandfsdev->nd_devvp->v_bufobj; if (ap->a_bnp == NULL) return (0); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; /* * Translate all the block sectors into a series of buffers to read * asynchronously from the nandfs device. Note that this lookup may * induce readin's too. */ /* Get virtual block numbers for the vnode's buffer span */ error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap); if (error) return (-1); /* Translate virtual block numbers to physical block numbers */ error = nandfs_vtop(nnode, l2vmap, &v2pmap); if (error) return (-1); /* Note virtual block 0 marks not mapped */ if (l2vmap == 0) *ap->a_bnp = -1; else *ap->a_bnp = v2pmap * blk2dev; /* in DEV_BSIZE */ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n", __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn, (uintmax_t)*ap->a_bnp )); return (0); } static void nandfs_force_syncer(struct nandfsmount *nmp) { nmp->nm_flags |= NANDFS_FORCE_SYNCER; nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE); } static int nandfs_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp = ap->a_vp; u_long command = ap->a_command; caddr_t data = ap->a_data; struct nandfs_node *node = VTON(vp); struct nandfs_device *nandfsdev = node->nn_nandfsdev; struct nandfsmount *nmp = node->nn_nmp; uint64_t *tab, *cno; struct nandfs_seg_stat *nss; struct nandfs_cpmode *ncpm; struct nandfs_argv *nargv; struct nandfs_cpstat *ncp; int error; DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command)); error = priv_check(ap->a_td, PRIV_VFS_MOUNT); if (error) return (error); if (nmp->nm_ronly) { switch (command) { case NANDFS_IOCTL_GET_FSINFO: case NANDFS_IOCTL_GET_SUSTAT: case NANDFS_IOCTL_GET_CPINFO: case NANDFS_IOCTL_GET_CPSTAT: case NANDFS_IOCTL_GET_SUINFO: case NANDFS_IOCTL_GET_VINFO: case NANDFS_IOCTL_GET_BDESCS: break; default: return (EROFS); } } switch (command) { case NANDFS_IOCTL_GET_FSINFO: error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data); break; case NANDFS_IOCTL_GET_SUSTAT: nss = (struct nandfs_seg_stat *)data; error = nandfs_get_seg_stat(nandfsdev, nss); break; case NANDFS_IOCTL_CHANGE_CPMODE: ncpm = (struct nandfs_cpmode *)data; error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm); nandfs_force_syncer(nmp); break; case NANDFS_IOCTL_GET_CPINFO: nargv = (struct nandfs_argv *)data; error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv); break; case NANDFS_IOCTL_DELETE_CP: tab = (uint64_t *)data; error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]); nandfs_force_syncer(nmp); break; case NANDFS_IOCTL_GET_CPSTAT: ncp = (struct nandfs_cpstat *)data; error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp); break; case NANDFS_IOCTL_GET_SUINFO: nargv = (struct nandfs_argv *)data; error = nandfs_get_segment_info_ioctl(nandfsdev, nargv); break; case NANDFS_IOCTL_GET_VINFO: nargv = (struct nandfs_argv *)data; error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv); break; case NANDFS_IOCTL_GET_BDESCS: nargv = (struct nandfs_argv *)data; error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv); break; case NANDFS_IOCTL_SYNC: cno = (uint64_t *)data; nandfs_force_syncer(nmp); *cno = nandfsdev->nd_last_cno; error = 0; break; case NANDFS_IOCTL_MAKE_SNAP: cno = (uint64_t *)data; error = nandfs_make_snap(nandfsdev, cno); nandfs_force_syncer(nmp); break; case NANDFS_IOCTL_DELETE_SNAP: cno = (uint64_t *)data; error = nandfs_delete_snap(nandfsdev, *cno); nandfs_force_syncer(nmp); break; default: error = ENOTTY; break; } return (error); } /* * Whiteout vnode call */ static int nandfs_whiteout(struct vop_whiteout_args *ap) { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; int error = 0; switch (ap->a_flags) { case LOOKUP: return (0); case CREATE: /* Create a new directory whiteout */ #ifdef INVARIANTS if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_whiteout: missing name"); #endif error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr, cnp->cn_namelen, DT_WHT); break; case DELETE: /* Remove an existing directory whiteout */ cnp->cn_flags &= ~DOWHITEOUT; error = nandfs_remove_dirent(dvp, NULL, cnp); break; default: panic("nandf_whiteout: unknown op: %d", ap->a_flags); } return (error); } static int nandfs_pathconf(struct vop_pathconf_args *ap) { int error; error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; break; case _PC_NAME_MAX: *ap->a_retval = NANDFS_NAME_LEN; break; case _PC_PIPE_BUF: if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) *ap->a_retval = PIPE_BUF; else error = EINVAL; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; case _PC_ACL_EXTENDED: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: *ap->a_retval = 64; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; default: error = vop_stdpathconf(ap); break; } return (error); } static int nandfs_vnlock1(struct vop_lock1_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int error, vi_locked; /* * XXX can vnode go away while we are sleeping? */ vi_locked = mtx_owned(&vp->v_interlock); if (vi_locked) VI_UNLOCK(vp); error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev, ap->a_flags & LK_NOWAIT); if (vi_locked && !error) VI_LOCK(vp); if (error) return (error); error = vop_stdlock(ap); if (error) { NANDFS_WRITEUNLOCK(node->nn_nandfsdev); return (error); } return (0); } static int nandfs_vnunlock(struct vop_unlock_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int error; error = vop_stdunlock(ap); if (error) return (error); NANDFS_WRITEUNLOCK(node->nn_nandfsdev); return (0); } /* * Global vfs data structures */ struct vop_vector nandfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nandfs_access, .vop_advlock = nandfs_advlock, .vop_bmap = nandfs_bmap, .vop_close = nandfs_close, .vop_create = nandfs_create, .vop_fsync = nandfs_fsync, .vop_getattr = nandfs_getattr, .vop_inactive = nandfs_inactive, .vop_cachedlookup = nandfs_lookup, .vop_ioctl = nandfs_ioctl, .vop_link = nandfs_link, .vop_lookup = vfs_cache_lookup, .vop_mkdir = nandfs_mkdir, .vop_mknod = nandfs_mknod, .vop_open = nandfs_open, .vop_pathconf = nandfs_pathconf, .vop_print = nandfs_print, .vop_read = nandfs_read, .vop_readdir = nandfs_readdir, .vop_readlink = nandfs_readlink, .vop_reclaim = nandfs_reclaim, .vop_remove = nandfs_remove, .vop_rename = nandfs_rename, .vop_rmdir = nandfs_rmdir, .vop_whiteout = nandfs_whiteout, .vop_write = nandfs_write, .vop_setattr = nandfs_setattr, .vop_strategy = nandfs_strategy, .vop_symlink = nandfs_symlink, .vop_lock1 = nandfs_vnlock1, .vop_unlock = nandfs_vnunlock, }; struct vop_vector nandfs_system_vnodeops = { .vop_default = &default_vnodeops, .vop_close = nandfs_close, .vop_inactive = nandfs_inactive, .vop_reclaim = nandfs_reclaim, .vop_strategy = nandfs_strategy, .vop_fsync = nandfs_fsync, .vop_bmap = nandfs_bmap, .vop_access = VOP_PANIC, .vop_advlock = VOP_PANIC, .vop_create = VOP_PANIC, .vop_getattr = VOP_PANIC, .vop_cachedlookup = VOP_PANIC, .vop_ioctl = VOP_PANIC, .vop_link = VOP_PANIC, .vop_lookup = VOP_PANIC, .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = VOP_PANIC, .vop_pathconf = VOP_PANIC, .vop_print = VOP_PANIC, .vop_read = VOP_PANIC, .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_remove = VOP_PANIC, .vop_rename = VOP_PANIC, .vop_rmdir = VOP_PANIC, .vop_whiteout = VOP_PANIC, .vop_write = VOP_PANIC, .vop_setattr = VOP_PANIC, .vop_symlink = VOP_PANIC, }; static int nandfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node)); mtx_lock(&vp->v_interlock); if (vp->v_usecount > 1) nandfs_itimes_locked(vp); mtx_unlock(&vp->v_interlock); return (fifo_specops.vop_close(ap)); } struct vop_vector nandfs_fifoops = { .vop_default = &fifo_specops, .vop_fsync = VOP_PANIC, .vop_access = nandfs_access, .vop_close = nandfsfifo_close, .vop_getattr = nandfs_getattr, .vop_inactive = nandfs_inactive, .vop_pathconf = nandfs_pathconf, .vop_print = nandfs_print, .vop_read = VOP_PANIC, .vop_reclaim = nandfs_reclaim, .vop_setattr = nandfs_setattr, .vop_write = VOP_PANIC, .vop_lock1 = nandfs_vnlock1, .vop_unlock = nandfs_vnunlock, }; int nandfs_vinit(struct vnode *vp, uint64_t ino) { struct nandfs_node *node; ASSERT_VOP_LOCKED(vp, __func__); node = VTON(vp); /* Check if we're fetching the root */ if (ino == NANDFS_ROOT_INO) vp->v_vflag |= VV_ROOT; if (ino != NANDFS_GC_INO) vp->v_type = IFTOVT(node->nn_inode.i_mode); else vp->v_type = VREG; if (vp->v_type == VFIFO) vp->v_op = &nandfs_fifoops; return (0); } Index: stable/11/sys/i386/i386/in_cksum.c =================================================================== --- stable/11/sys/i386/i386/in_cksum.c (revision 331642) +++ stable/11/sys/i386/i386/in_cksum.c (revision 331643) @@ -1,493 +1,493 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from tahoe: in_cksum.c 1.2 86/01/05 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include /* * Checksum routine for Internet Protocol family headers. * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. * * This implementation is 386 version. */ #undef ADDCARRY #define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff /* * icc needs to be special cased here, as the asm code below results * in broken code if compiled with icc. */ #if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER) /* non gcc parts stolen from sys/alpha/alpha/in_cksum.c */ #define REDUCE32 \ { \ q_util.q = sum; \ sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ } #define REDUCE16 \ { \ q_util.q = sum; \ l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ sum = l_util.s[0] + l_util.s[1]; \ ADDCARRY(sum); \ } #endif #define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} #if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER) static const u_int32_t in_masks[] = { /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/ 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */ 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */ 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */ 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */ }; union l_util { u_int16_t s[2]; u_int32_t l; }; union q_util { u_int16_t s[4]; u_int32_t l[2]; u_int64_t q; }; static u_int64_t in_cksumdata(const u_int32_t *lw, int len) { u_int64_t sum = 0; u_int64_t prefilled; int offset; union q_util q_util; if ((3 & (long) lw) == 0 && len == 20) { sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4]; REDUCE32; return sum; } if ((offset = 3 & (long) lw) != 0) { const u_int32_t *masks = in_masks + (offset << 2); lw = (u_int32_t *) (((long) lw) - offset); sum = *lw++ & masks[len >= 3 ? 3 : len]; len -= 4 - offset; if (len <= 0) { REDUCE32; return sum; } } #if 0 /* * Force to cache line boundary. */ offset = 32 - (0x1f & (long) lw); if (offset < 32 && len > offset) { len -= offset; if (4 & offset) { sum += (u_int64_t) lw[0]; lw += 1; } if (8 & offset) { sum += (u_int64_t) lw[0] + lw[1]; lw += 2; } if (16 & offset) { sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; lw += 4; } } #endif /* * access prefilling to start load of next cache line. * then add current cache line * save result of prefilling for loop iteration. */ prefilled = lw[0]; while ((len -= 32) >= 4) { u_int64_t prefilling = lw[8]; sum += prefilled + lw[1] + lw[2] + lw[3] + lw[4] + lw[5] + lw[6] + lw[7]; lw += 8; prefilled = prefilling; } if (len >= 0) { sum += prefilled + lw[1] + lw[2] + lw[3] + lw[4] + lw[5] + lw[6] + lw[7]; lw += 8; } else { len += 32; } while ((len -= 16) >= 0) { sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; lw += 4; } len += 16; while ((len -= 4) >= 0) { sum += (u_int64_t) *lw++; } len += 4; if (len > 0) sum += (u_int64_t) (in_masks[len] & *lw); REDUCE32; return sum; } u_short in_addword(u_short a, u_short b) { u_int64_t sum = a + b; ADDCARRY(sum); return (sum); } u_short in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c) { u_int64_t sum; union q_util q_util; union l_util l_util; sum = (u_int64_t) a + b + c; REDUCE16; return (sum); } u_short in_cksum_skip(struct mbuf *m, int len, int skip) { u_int64_t sum = 0; int mlen = 0; int clen = 0; caddr_t addr; union q_util q_util; union l_util l_util; len -= skip; for (; skip && m; m = m->m_next) { if (m->m_len > skip) { mlen = m->m_len - skip; addr = mtod(m, caddr_t) + skip; goto skip_start; } else { skip -= m->m_len; } } for (; m && len; m = m->m_next) { if (m->m_len == 0) continue; mlen = m->m_len; addr = mtod(m, caddr_t); skip_start: if (len < mlen) mlen = len; if ((clen ^ (long) addr) & 1) sum += in_cksumdata((const u_int32_t *)addr, mlen) << 8; else sum += in_cksumdata((const u_int32_t *)addr, mlen); clen += mlen; len -= mlen; } REDUCE16; return (~sum & 0xffff); } u_int in_cksum_hdr(const struct ip *ip) { u_int64_t sum = in_cksumdata((const u_int32_t *)ip, sizeof(struct ip)); union q_util q_util; union l_util l_util; REDUCE16; return (~sum & 0xffff); } #else /* * These asm statements require __volatile because they pass information * via the condition codes. GCC does not currently provide a way to specify * the condition codes as an input or output operand. * * The LOAD macro below is effectively a prefetch into cache. GCC will * load the value into a register but will not use it. Since modern CPUs * reorder operations, this will generally take place in parallel with * other calculations. */ u_short in_cksum_skip(m, len, skip) struct mbuf *m; int len; int skip; { - register u_short *w; - register unsigned sum = 0; - register int mlen = 0; + u_short *w; + unsigned sum = 0; + int mlen = 0; int byte_swapped = 0; union { char c[2]; u_short s; } su; len -= skip; for (; skip && m; m = m->m_next) { if (m->m_len > skip) { mlen = m->m_len - skip; w = (u_short *)(mtod(m, u_char *) + skip); goto skip_start; } else { skip -= m->m_len; } } for (;m && len; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, u_short *); if (mlen == -1) { /* * The first byte of this mbuf is the continuation * of a word spanning between this mbuf and the * last mbuf. */ /* su.c[0] is already saved when scanning previous * mbuf. sum was REDUCEd when we found mlen == -1 */ su.c[1] = *(u_char *)w; sum += su.s; w = (u_short *)((char *)w + 1); mlen = m->m_len - 1; len--; } else mlen = m->m_len; skip_start: if (len < mlen) mlen = len; len -= mlen; /* * Force to long boundary so we do longword aligned * memory operations */ if (3 & (int) w) { REDUCE; if ((1 & (int) w) && (mlen > 0)) { sum <<= 8; su.c[0] = *(char *)w; w = (u_short *)((char *)w + 1); mlen--; byte_swapped = 1; } if ((2 & (int) w) && (mlen >= 2)) { sum += *w++; mlen -= 2; } } /* * Advance to a 486 cache line boundary. */ if (4 & (int) w && mlen >= 4) { __asm __volatile ( "addl %1, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]) ); w += 2; mlen -= 4; } if (8 & (int) w && mlen >= 8) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]) ); w += 4; mlen -= 8; } /* * Do as much of the checksum as possible 32 bits at at time. * In fact, this loop is unrolled to make overhead from * branches &c small. */ mlen -= 1; while ((mlen -= 32) >= 0) { /* * Add with carry 16 words and fold in the last * carry by adding a 0 with carry. * * The early ADD(16) and the LOAD(32) are to load * the next 2 cache lines in advance on 486's. The * 486 has a penalty of 2 clock cycles for loading * a cache line, plus whatever time the external * memory takes to load the first word(s) addressed. * These penalties are unavoidable. Subsequent * accesses to a cache line being loaded (and to * other external memory?) are delayed until the * whole load finishes. These penalties are mostly * avoided by not accessing external memory for * 8 cycles after the ADD(16) and 12 cycles after * the LOAD(32). The loop terminates when mlen * is initially 33 (not 32) to guaranteed that * the LOAD(32) is within bounds. */ __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl %3, %0\n" "adcl %4, %0\n" "adcl %5, %0\n" "mov %6, %%eax\n" "adcl %7, %0\n" "adcl %8, %0\n" "adcl %9, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[4]), "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]), "g" (((const u_int32_t *)w)[2]), "g" (((const u_int32_t *)w)[3]), "g" (((const u_int32_t *)w)[8]), "g" (((const u_int32_t *)w)[5]), "g" (((const u_int32_t *)w)[6]), "g" (((const u_int32_t *)w)[7]) : "eax" ); w += 16; } mlen += 32 + 1; if (mlen >= 32) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl %3, %0\n" "adcl %4, %0\n" "adcl %5, %0\n" "adcl %6, %0\n" "adcl %7, %0\n" "adcl %8, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[4]), "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]), "g" (((const u_int32_t *)w)[2]), "g" (((const u_int32_t *)w)[3]), "g" (((const u_int32_t *)w)[5]), "g" (((const u_int32_t *)w)[6]), "g" (((const u_int32_t *)w)[7]) ); w += 16; mlen -= 32; } if (mlen >= 16) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl %3, %0\n" "adcl %4, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]), "g" (((const u_int32_t *)w)[2]), "g" (((const u_int32_t *)w)[3]) ); w += 8; mlen -= 16; } if (mlen >= 8) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]) ); w += 4; mlen -= 8; } if (mlen == 0 && byte_swapped == 0) continue; /* worth 1% maybe ?? */ REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { sum <<= 8; byte_swapped = 0; if (mlen == -1) { su.c[1] = *(char *)w; sum += su.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) /* * This mbuf has odd number of bytes. * There could be a word split betwen * this mbuf and the next mbuf. * Save the last byte (to prepend to next mbuf). */ su.c[0] = *(char *)w; } if (len) printf("%s: out of data by %d\n", __func__, len); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the standard (the odd byte is shifted left by 8 bits) */ su.c[1] = 0; sum += su.s; } REDUCE; return (~sum & 0xffff); } #endif Index: stable/11/sys/i386/i386/k6_mem.c =================================================================== --- stable/11/sys/i386/i386/k6_mem.c (revision 331642) +++ stable/11/sys/i386/i386/k6_mem.c (revision 331643) @@ -1,191 +1,191 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 Brian Fundakowski Feldman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include /* * A K6-2 MTRR is defined as the highest 15 bits having the address, the next * 15 having the mask, the 1st bit being "write-combining" and the 0th bit * being "uncacheable". * * Address Mask WC UC * | XXXXXXXXXXXXXXX | XXXXXXXXXXXXXXX | X | X | * * There are two of these in the 64-bit UWCCR. */ #define UWCCR 0xc0000085 #define K6_REG_GET(reg, addr, mask, wc, uc) do { \ addr = (reg) & 0xfffe0000; \ mask = ((reg) & 0x1fffc) >> 2; \ wc = ((reg) & 0x2) >> 1; \ uc = (reg) & 0x1; \ } while (0) #define K6_REG_MAKE(addr, mask, wc, uc) \ ((addr) | ((mask) << 2) | ((wc) << 1) | uc) static void k6_mrinit(struct mem_range_softc *sc); static int k6_mrset(struct mem_range_softc *, struct mem_range_desc *, int *); static __inline int k6_mrmake(struct mem_range_desc *, u_int32_t *); static void k6_mem_drvinit(void *); static struct mem_range_ops k6_mrops = { k6_mrinit, k6_mrset, NULL, NULL }; static __inline int k6_mrmake(struct mem_range_desc *desc, u_int32_t *mtrr) { u_int32_t len = 0, wc, uc; - register int bit; + int bit; if (desc->mr_base &~ 0xfffe0000) return (EINVAL); if (desc->mr_len < 131072 || !powerof2(desc->mr_len)) return (EINVAL); if (desc->mr_flags &~ (MDF_WRITECOMBINE|MDF_UNCACHEABLE|MDF_FORCE)) return (EOPNOTSUPP); for (bit = ffs(desc->mr_len >> 17) - 1; bit < 15; bit++) len |= 1 << bit; wc = (desc->mr_flags & MDF_WRITECOMBINE) ? 1 : 0; uc = (desc->mr_flags & MDF_UNCACHEABLE) ? 1 : 0; *mtrr = K6_REG_MAKE(desc->mr_base, len, wc, uc); return (0); } static void k6_mrinit(struct mem_range_softc *sc) { u_int64_t reg; u_int32_t addr, mask, wc, uc; int d; sc->mr_cap = 0; sc->mr_ndesc = 2; /* XXX (BFF) For now, we only have one msr for this */ sc->mr_desc = malloc(sc->mr_ndesc * sizeof(struct mem_range_desc), M_MEMDESC, M_NOWAIT | M_ZERO); if (sc->mr_desc == NULL) panic("k6_mrinit: malloc returns NULL"); reg = rdmsr(UWCCR); for (d = 0; d < sc->mr_ndesc; d++) { u_int32_t one = (reg & (0xffffffff << (32 * d))) >> (32 * d); K6_REG_GET(one, addr, mask, wc, uc); sc->mr_desc[d].mr_base = addr; sc->mr_desc[d].mr_len = ffs(mask) << 17; if (wc) sc->mr_desc[d].mr_flags |= MDF_WRITECOMBINE; if (uc) sc->mr_desc[d].mr_flags |= MDF_UNCACHEABLE; } printf("K6-family MTRR support enabled (%d registers)\n", sc->mr_ndesc); } static int k6_mrset(struct mem_range_softc *sc, struct mem_range_desc *desc, int *arg) { u_int64_t reg; u_int32_t mtrr; int error, d; switch (*arg) { case MEMRANGE_SET_UPDATE: error = k6_mrmake(desc, &mtrr); if (error) return (error); for (d = 0; d < sc->mr_ndesc; d++) { if (!sc->mr_desc[d].mr_len) { sc->mr_desc[d] = *desc; goto out; } if (sc->mr_desc[d].mr_base == desc->mr_base && sc->mr_desc[d].mr_len == desc->mr_len) return (EEXIST); } return (ENOSPC); case MEMRANGE_SET_REMOVE: mtrr = 0; for (d = 0; d < sc->mr_ndesc; d++) if (sc->mr_desc[d].mr_base == desc->mr_base && sc->mr_desc[d].mr_len == desc->mr_len) { bzero(&sc->mr_desc[d], sizeof(sc->mr_desc[d])); goto out; } return (ENOENT); default: return (EOPNOTSUPP); } out: disable_intr(); wbinvd(); reg = rdmsr(UWCCR); reg &= ~(0xffffffff << (32 * d)); reg |= mtrr << (32 * d); wrmsr(UWCCR, reg); wbinvd(); enable_intr(); return (0); } static void k6_mem_drvinit(void *unused) { if (cpu_vendor_id != CPU_VENDOR_AMD) return; if ((cpu_id & 0xf00) != 0x500) return; if ((cpu_id & 0xf0) < 0x80 || ((cpu_id & 0xf0) == 0x80 && (cpu_id & 0xf) <= 0x7)) return; mem_range_softc.mr_op = &k6_mrops; } SYSINIT(k6memdev, SI_SUB_DRIVERS, SI_ORDER_FIRST, k6_mem_drvinit, NULL); Index: stable/11/sys/i386/i386/sys_machdep.c =================================================================== --- stable/11/sys/i386/i386/sys_machdep.c (revision 331642) +++ stable/11/sys/i386/i386/sys_machdep.c (revision 331643) @@ -1,813 +1,811 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for kernel_map */ #define MAX_LD 8192 #define LD_PER_PAGE 512 #define NEW_MAX_LD(num) rounddown2(num + LD_PER_PAGE, LD_PER_PAGE) #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) #define NULL_LDT_BASE ((caddr_t)NULL) #ifdef SMP static void set_user_ldt_rv(void *arg); #endif static int i386_set_ldt_data(struct thread *, int start, int num, union descriptor *descs); static int i386_ldt_grow(struct thread *td, int len); void fill_based_sd(struct segment_descriptor *sdp, uint32_t base) { sdp->sd_lobase = base & 0xffffff; sdp->sd_hibase = (base >> 24) & 0xff; sdp->sd_lolimit = 0xffff; /* 4GB limit, wraps around */ sdp->sd_hilimit = 0xf; sdp->sd_type = SDT_MEMRWA; sdp->sd_dpl = SEL_UPL; sdp->sd_p = 1; sdp->sd_xx = 0; sdp->sd_def32 = 1; sdp->sd_gran = 1; } /* * Construct special descriptors for "base" selectors. Store them in * the PCB for later use by cpu_switch(). Store them in the GDT for * more immediate use. The GDT entries are part of the current * context. Callers must load related segment registers to complete * setting up the current context. */ void set_fsbase(struct thread *td, uint32_t base) { struct segment_descriptor sd; fill_based_sd(&sd, base); critical_enter(); td->td_pcb->pcb_fsd = sd; PCPU_GET(fsgs_gdt)[0] = sd; critical_exit(); } void set_gsbase(struct thread *td, uint32_t base) { struct segment_descriptor sd; fill_based_sd(&sd, base); critical_enter(); td->td_pcb->pcb_gsd = sd; PCPU_GET(fsgs_gdt)[1] = sd; critical_exit(); } #ifndef _SYS_SYSPROTO_H_ struct sysarch_args { int op; char *parms; }; #endif int -sysarch(td, uap) - struct thread *td; - register struct sysarch_args *uap; +sysarch(struct thread *td, struct sysarch_args *uap) { int error; union descriptor *lp; union { struct i386_ldt_args largs; struct i386_ioperm_args iargs; struct i386_get_xfpustate xfpu; } kargs; uint32_t base; struct segment_descriptor *sdp; AUDIT_ARG_CMD(uap->op); #ifdef CAPABILITY_MODE /* * When adding new operations, add a new case statement here to * explicitly indicate whether or not the operation is safe to * perform in capability mode. */ if (IN_CAPABILITY_MODE(td)) { switch (uap->op) { case I386_GET_LDT: case I386_SET_LDT: case I386_GET_IOPERM: case I386_GET_FSBASE: case I386_SET_FSBASE: case I386_GET_GSBASE: case I386_SET_GSBASE: case I386_GET_XFPUSTATE: break; case I386_SET_IOPERM: default: #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); #endif return (ECAPMODE); } } #endif switch (uap->op) { case I386_GET_IOPERM: case I386_SET_IOPERM: if ((error = copyin(uap->parms, &kargs.iargs, sizeof(struct i386_ioperm_args))) != 0) return (error); break; case I386_GET_LDT: case I386_SET_LDT: if ((error = copyin(uap->parms, &kargs.largs, sizeof(struct i386_ldt_args))) != 0) return (error); break; case I386_GET_XFPUSTATE: if ((error = copyin(uap->parms, &kargs.xfpu, sizeof(struct i386_get_xfpustate))) != 0) return (error); break; default: break; } switch (uap->op) { case I386_GET_LDT: error = i386_get_ldt(td, &kargs.largs); break; case I386_SET_LDT: if (kargs.largs.descs != NULL) { if (kargs.largs.num > MAX_LD) return (EINVAL); lp = malloc(kargs.largs.num * sizeof(union descriptor), M_TEMP, M_WAITOK); error = copyin(kargs.largs.descs, lp, kargs.largs.num * sizeof(union descriptor)); if (error == 0) error = i386_set_ldt(td, &kargs.largs, lp); free(lp, M_TEMP); } else { error = i386_set_ldt(td, &kargs.largs, NULL); } break; case I386_GET_IOPERM: error = i386_get_ioperm(td, &kargs.iargs); if (error == 0) error = copyout(&kargs.iargs, uap->parms, sizeof(struct i386_ioperm_args)); break; case I386_SET_IOPERM: error = i386_set_ioperm(td, &kargs.iargs); break; case I386_VM86: error = vm86_sysarch(td, uap->parms); break; case I386_GET_FSBASE: sdp = &td->td_pcb->pcb_fsd; base = sdp->sd_hibase << 24 | sdp->sd_lobase; error = copyout(&base, uap->parms, sizeof(base)); break; case I386_SET_FSBASE: error = copyin(uap->parms, &base, sizeof(base)); if (error == 0) { /* * Construct the special descriptor for fsbase * and arrange for doreti to load its selector * soon enough. */ set_fsbase(td, base); td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); } break; case I386_GET_GSBASE: sdp = &td->td_pcb->pcb_gsd; base = sdp->sd_hibase << 24 | sdp->sd_lobase; error = copyout(&base, uap->parms, sizeof(base)); break; case I386_SET_GSBASE: error = copyin(uap->parms, &base, sizeof(base)); if (error == 0) { /* * Construct the special descriptor for gsbase. * The selector is loaded immediately, since we * normally only reload %gs on context switches. */ set_gsbase(td, base); load_gs(GSEL(GUGS_SEL, SEL_UPL)); } break; case I386_GET_XFPUSTATE: if (kargs.xfpu.len > cpu_max_ext_state_size - sizeof(union savefpu)) return (EINVAL); npxgetregs(td); error = copyout((char *)(get_pcb_user_save_td(td) + 1), kargs.xfpu.addr, kargs.xfpu.len); break; default: error = EINVAL; break; } return (error); } int i386_extend_pcb(struct thread *td) { int i, offset; u_long *addr; struct pcb_ext *ext; struct soft_segment_descriptor ssd = { 0, /* segment base address (overwritten) */ ctob(IOPAGES + 1) - 1, /* length */ SDT_SYS386TSS, /* segment type */ 0, /* priority level */ 1, /* descriptor present */ 0, 0, 0, /* default 32 size */ 0 /* granularity */ }; ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1), M_WAITOK | M_ZERO); /* -16 is so we can convert a trapframe into vm86trapframe inplace */ ext->ext_tss.tss_esp0 = (vm_offset_t)td->td_pcb - 16; ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); /* * The last byte of the i/o map must be followed by an 0xff byte. * We arbitrarily allocate 16 bytes here, to keep the starting * address on a doubleword boundary. */ offset = PAGE_SIZE - 16; ext->ext_tss.tss_ioopt = (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; ext->ext_iomap = (caddr_t)ext + offset; ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; addr = (u_long *)ext->ext_vm86.vm86_intmap; for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) *addr++ = ~0; ssd.ssd_base = (unsigned)&ext->ext_tss; ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); ssdtosd(&ssd, &ext->ext_tssd); KASSERT(td == curthread, ("giving TSS to !curthread")); KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); /* Switch to the new TSS. */ critical_enter(); td->td_pcb->pcb_ext = ext; PCPU_SET(private_tss, 1); *PCPU_GET(tss_gdt) = ext->ext_tssd; ltr(GSEL(GPROC0_SEL, SEL_KPL)); critical_exit(); return 0; } int i386_set_ioperm(td, uap) struct thread *td; struct i386_ioperm_args *uap; { char *iomap; u_int i; int error; if ((error = priv_check(td, PRIV_IO)) != 0) return (error); if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); /* * XXX * While this is restricted to root, we should probably figure out * whether any other driver is using this i/o address, as so not to * cause confusion. This probably requires a global 'usage registry'. */ if (td->td_pcb->pcb_ext == 0) if ((error = i386_extend_pcb(td)) != 0) return (error); iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; if (uap->start > uap->start + uap->length || uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) return (EINVAL); for (i = uap->start; i < uap->start + uap->length; i++) { if (uap->enable) iomap[i >> 3] &= ~(1 << (i & 7)); else iomap[i >> 3] |= (1 << (i & 7)); } return (error); } int i386_get_ioperm(td, uap) struct thread *td; struct i386_ioperm_args *uap; { int i, state; char *iomap; if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) return (EINVAL); if (td->td_pcb->pcb_ext == 0) { uap->length = 0; goto done; } iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; i = uap->start; state = (iomap[i >> 3] >> (i & 7)) & 1; uap->enable = !state; uap->length = 1; for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) break; uap->length++; } done: return (0); } /* * Update the GDT entry pointing to the LDT to point to the LDT of the * current process. Manage dt_lock holding/unholding autonomously. */ static void set_user_ldt_locked(struct mdproc *mdp) { struct proc_ldt *pldt; int gdt_idx; mtx_assert(&dt_lock, MA_OWNED); pldt = mdp->md_ldt; gdt_idx = GUSERLDT_SEL; gdt_idx += PCPU_GET(cpuid) * NGDT; /* always 0 on UP */ gdt[gdt_idx].sd = pldt->ldt_sd; lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); } void set_user_ldt(struct mdproc *mdp) { mtx_lock_spin(&dt_lock); set_user_ldt_locked(mdp); mtx_unlock_spin(&dt_lock); } #ifdef SMP static void set_user_ldt_rv(void *arg) { struct proc *p; p = curproc; if (arg == p->p_vmspace) set_user_ldt(&p->p_md); } #endif /* * dt_lock must be held. Returns with dt_lock held. */ struct proc_ldt * user_ldt_alloc(struct mdproc *mdp, int len) { struct proc_ldt *pldt, *new_ldt; mtx_assert(&dt_lock, MA_OWNED); mtx_unlock_spin(&dt_lock); new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK); new_ldt->ldt_len = len = NEW_MAX_LD(len); new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, len * sizeof(union descriptor), M_WAITOK | M_ZERO); new_ldt->ldt_refcnt = 1; new_ldt->ldt_active = 0; mtx_lock_spin(&dt_lock); gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); if ((pldt = mdp->md_ldt) != NULL) { if (len > pldt->ldt_len) len = pldt->ldt_len; bcopy(pldt->ldt_base, new_ldt->ldt_base, len * sizeof(union descriptor)); } else bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); return (new_ldt); } /* * Must be called with dt_lock held. Returns with dt_lock unheld. */ void user_ldt_free(struct thread *td) { struct mdproc *mdp; struct proc_ldt *pldt; mtx_assert(&dt_lock, MA_OWNED); mdp = &td->td_proc->p_md; if ((pldt = mdp->md_ldt) == NULL) { mtx_unlock_spin(&dt_lock); return; } if (td == curthread) { lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); } mdp->md_ldt = NULL; user_ldt_deref(pldt); } void user_ldt_deref(struct proc_ldt *pldt) { mtx_assert(&dt_lock, MA_OWNED); if (--pldt->ldt_refcnt == 0) { mtx_unlock_spin(&dt_lock); kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base, pldt->ldt_len * sizeof(union descriptor)); free(pldt, M_SUBPROC); } else mtx_unlock_spin(&dt_lock); } /* * Note for the authors of compat layers (linux, etc): copyout() in * the function below is not a problem since it presents data in * arch-specific format (i.e. i386-specific in this case), not in * the OS-specific one. */ int i386_get_ldt(struct thread *td, struct i386_ldt_args *uap) { struct proc_ldt *pldt; char *data; u_int nldt, num; int error; #ifdef DEBUG printf("i386_get_ldt: start=%u num=%u descs=%p\n", uap->start, uap->num, (void *)uap->descs); #endif num = min(uap->num, MAX_LD); data = malloc(num * sizeof(union descriptor), M_TEMP, M_WAITOK); mtx_lock_spin(&dt_lock); pldt = td->td_proc->p_md.md_ldt; nldt = pldt != NULL ? pldt->ldt_len : nitems(ldt); if (uap->start >= nldt) { num = 0; } else { num = min(num, nldt - uap->start); bcopy(pldt != NULL ? &((union descriptor *)(pldt->ldt_base))[uap->start] : &ldt[uap->start], data, num * sizeof(union descriptor)); } mtx_unlock_spin(&dt_lock); error = copyout(data, uap->descs, num * sizeof(union descriptor)); if (error == 0) td->td_retval[0] = num; free(data, M_TEMP); return (error); } int i386_set_ldt(struct thread *td, struct i386_ldt_args *uap, union descriptor *descs) { struct mdproc *mdp; struct proc_ldt *pldt; union descriptor *dp; u_int largest_ld, i; int error; #ifdef DEBUG printf("i386_set_ldt: start=%u num=%u descs=%p\n", uap->start, uap->num, (void *)uap->descs); #endif error = 0; mdp = &td->td_proc->p_md; if (descs == NULL) { /* Free descriptors */ if (uap->start == 0 && uap->num == 0) { /* * Treat this as a special case, so userland needn't * know magic number NLDT. */ uap->start = NLDT; uap->num = MAX_LD - NLDT; } mtx_lock_spin(&dt_lock); if ((pldt = mdp->md_ldt) == NULL || uap->start >= pldt->ldt_len) { mtx_unlock_spin(&dt_lock); return (0); } largest_ld = uap->start + uap->num; if (largest_ld > pldt->ldt_len) largest_ld = pldt->ldt_len; for (i = uap->start; i < largest_ld; i++) atomic_store_rel_64(&((uint64_t *)(pldt->ldt_base))[i], 0); mtx_unlock_spin(&dt_lock); return (0); } if (uap->start != LDT_AUTO_ALLOC || uap->num != 1) { /* verify range of descriptors to modify */ largest_ld = uap->start + uap->num; if (uap->start >= MAX_LD || largest_ld > MAX_LD) return (EINVAL); } /* Check descriptors for access violations */ for (i = 0; i < uap->num; i++) { dp = &descs[i]; switch (dp->sd.sd_type) { case SDT_SYSNULL: /* system null */ dp->sd.sd_p = 0; break; case SDT_SYS286TSS: /* system 286 TSS available */ case SDT_SYSLDT: /* system local descriptor table */ case SDT_SYS286BSY: /* system 286 TSS busy */ case SDT_SYSTASKGT: /* system task gate */ case SDT_SYS286IGT: /* system 286 interrupt gate */ case SDT_SYS286TGT: /* system 286 trap gate */ case SDT_SYSNULL2: /* undefined by Intel */ case SDT_SYS386TSS: /* system 386 TSS available */ case SDT_SYSNULL3: /* undefined by Intel */ case SDT_SYS386BSY: /* system 386 TSS busy */ case SDT_SYSNULL4: /* undefined by Intel */ case SDT_SYS386IGT: /* system 386 interrupt gate */ case SDT_SYS386TGT: /* system 386 trap gate */ case SDT_SYS286CGT: /* system 286 call gate */ case SDT_SYS386CGT: /* system 386 call gate */ return (EACCES); /* memory segment types */ case SDT_MEMEC: /* memory execute only conforming */ case SDT_MEMEAC: /* memory execute only accessed conforming */ case SDT_MEMERC: /* memory execute read conforming */ case SDT_MEMERAC: /* memory execute read accessed conforming */ /* Must be "present" if executable and conforming. */ if (dp->sd.sd_p == 0) return (EACCES); break; case SDT_MEMRO: /* memory read only */ case SDT_MEMROA: /* memory read only accessed */ case SDT_MEMRW: /* memory read write */ case SDT_MEMRWA: /* memory read write accessed */ case SDT_MEMROD: /* memory read only expand dwn limit */ case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ case SDT_MEMRWD: /* memory read write expand dwn limit */ case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ case SDT_MEME: /* memory execute only */ case SDT_MEMEA: /* memory execute only accessed */ case SDT_MEMER: /* memory execute read */ case SDT_MEMERA: /* memory execute read accessed */ break; default: return (EINVAL); } /* Only user (ring-3) descriptors may be present. */ if (dp->sd.sd_p != 0 && dp->sd.sd_dpl != SEL_UPL) return (EACCES); } if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { /* Allocate a free slot */ mtx_lock_spin(&dt_lock); if ((pldt = mdp->md_ldt) == NULL) { if ((error = i386_ldt_grow(td, NLDT + 1))) { mtx_unlock_spin(&dt_lock); return (error); } pldt = mdp->md_ldt; } again: /* * start scanning a bit up to leave room for NVidia and * Wine, which still user the "Blat" method of allocation. */ dp = &((union descriptor *)(pldt->ldt_base))[NLDT]; for (i = NLDT; i < pldt->ldt_len; ++i) { if (dp->sd.sd_type == SDT_SYSNULL) break; dp++; } if (i >= pldt->ldt_len) { if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) { mtx_unlock_spin(&dt_lock); return (error); } goto again; } uap->start = i; error = i386_set_ldt_data(td, i, 1, descs); mtx_unlock_spin(&dt_lock); } else { largest_ld = uap->start + uap->num; mtx_lock_spin(&dt_lock); if (!(error = i386_ldt_grow(td, largest_ld))) { error = i386_set_ldt_data(td, uap->start, uap->num, descs); } mtx_unlock_spin(&dt_lock); } if (error == 0) td->td_retval[0] = uap->start; return (error); } static int i386_set_ldt_data(struct thread *td, int start, int num, union descriptor *descs) { struct mdproc *mdp; struct proc_ldt *pldt; uint64_t *dst, *src; int i; mtx_assert(&dt_lock, MA_OWNED); mdp = &td->td_proc->p_md; pldt = mdp->md_ldt; dst = (uint64_t *)(pldt->ldt_base); src = (uint64_t *)descs; /* * Atomic(9) is used only to get 64bit atomic store with * cmpxchg8b when available. There is no op without release * semantic. */ for (i = 0; i < num; i++) atomic_store_rel_64(&dst[start + i], src[i]); return (0); } static int i386_ldt_grow(struct thread *td, int len) { struct mdproc *mdp; struct proc_ldt *new_ldt, *pldt; caddr_t old_ldt_base; int old_ldt_len; mtx_assert(&dt_lock, MA_OWNED); if (len > MAX_LD) return (ENOMEM); if (len < NLDT + 1) len = NLDT + 1; mdp = &td->td_proc->p_md; old_ldt_base = NULL_LDT_BASE; old_ldt_len = 0; /* Allocate a user ldt. */ if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) { new_ldt = user_ldt_alloc(mdp, len); if (new_ldt == NULL) return (ENOMEM); pldt = mdp->md_ldt; if (pldt != NULL) { if (new_ldt->ldt_len <= pldt->ldt_len) { /* * We just lost the race for allocation, so * free the new object and return. */ mtx_unlock_spin(&dt_lock); kmem_free(kernel_arena, (vm_offset_t)new_ldt->ldt_base, new_ldt->ldt_len * sizeof(union descriptor)); free(new_ldt, M_SUBPROC); mtx_lock_spin(&dt_lock); return (0); } /* * We have to substitute the current LDT entry for * curproc with the new one since its size grew. */ old_ldt_base = pldt->ldt_base; old_ldt_len = pldt->ldt_len; pldt->ldt_sd = new_ldt->ldt_sd; pldt->ldt_base = new_ldt->ldt_base; pldt->ldt_len = new_ldt->ldt_len; } else mdp->md_ldt = pldt = new_ldt; #ifdef SMP /* * Signal other cpus to reload ldt. We need to unlock dt_lock * here because other CPU will contest on it since their * curthreads won't hold the lock and will block when trying * to acquire it. */ mtx_unlock_spin(&dt_lock); smp_rendezvous(NULL, set_user_ldt_rv, NULL, td->td_proc->p_vmspace); #else set_user_ldt_locked(&td->td_proc->p_md); mtx_unlock_spin(&dt_lock); #endif if (old_ldt_base != NULL_LDT_BASE) { kmem_free(kernel_arena, (vm_offset_t)old_ldt_base, old_ldt_len * sizeof(union descriptor)); free(new_ldt, M_SUBPROC); } mtx_lock_spin(&dt_lock); } return (0); } Index: stable/11/sys/i386/i386/vm_machdep.c =================================================================== --- stable/11/sys/i386/i386/vm_machdep.c (revision 331642) +++ stable/11/sys/i386/i386/vm_machdep.c (revision 331643) @@ -1,879 +1,879 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_npx.h" #include "opt_reset.h" #include "opt_cpu.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CPU_ELAN #include #endif #include #include #include #include #include #include #ifdef PC98 #include #else #include #endif #ifdef XBOX #include #endif #ifndef NSFBUFS #define NSFBUFS (512 + maxusers * 16) #endif _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread), "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread."); _Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb), "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb."); _Static_assert(__OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "__OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf."); static void cpu_reset_real(void); #ifdef SMP static void cpu_reset_proxy(void); static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif union savefpu * get_pcb_user_save_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area")); return ((union savefpu *)p); } union savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { vm_offset_t p; p = (vm_offset_t)(pcb + 1); return ((union savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - sizeof(struct pcb); return ((struct pcb *)p); } void * alloc_fpusave(int flags) { void *res; struct savefpu_ymm *sf; res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } return (res); } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { - register struct proc *p1; + struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct mdproc *mdp1 = &p1->p_md; struct proc_ldt *pldt, *pldt1; mtx_lock_spin(&dt_lock); if ((pldt1 = mdp1->md_ldt) != NULL && pldt1->ldt_refcnt > 1) { pldt = user_ldt_alloc(mdp1, pldt1->ldt_len); if (pldt == NULL) panic("could not copy LDT"); mdp1->md_ldt = pldt; set_user_ldt(mdp1); user_ldt_deref(pldt1); } else mtx_unlock_spin(&dt_lock); } return; } /* Ensure that td1's pcb is up to date. */ if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); critical_enter(); if (PCPU_GET(fpcurthread) == td1) npxsave(td1->td_pcb->pcb_save); critical_exit(); /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. * The -16 is so we can expand the trapframe if we go to vm86. */ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_eax = 0; /* Child returns zero */ td2->td_frame->tf_eflags &= ~PSL_C; /* success */ td2->td_frame->tf_edx = 1; /* * If the parent process has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame unless the debugger had set PF_FORK * on the parent. Otherwise, the child will receive a (likely * unexpected) SIGTRAP when it executes the first instruction after * returning to userland. */ if ((p1->p_pfsflags & PF_FORK) == 0) td2->td_frame->tf_eflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ #if defined(PAE) || defined(PAE_TABLES) pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); #else pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); #endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; pcb2->pcb_psl = PSL_KERNEL; /* ints disabled */ /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = 0; /* Copy the LDT, if necessary. */ mtx_lock_spin(&dt_lock); if (mdp2->md_ldt != NULL) { if (flags & RFMEM) { mdp2->md_ldt->ldt_refcnt++; } else { mdp2->md_ldt = user_ldt_alloc(mdp2, mdp2->md_ldt->ldt_len); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); } } mtx_unlock_spin(&dt_lock); /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; /* * Now, cpu_switch() can schedule the new process. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_esi = (int) func; /* function */ td->td_pcb->pcb_ebx = (int) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. Reset pc->pcb_gs * and %gs before we free it in case they refer to an LDT entry. */ mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) { td->td_pcb->pcb_gs = _udatasel; load_gs(_udatasel); user_ldt_free(td); } else mtx_unlock_spin(&dt_lock); } void cpu_thread_exit(struct thread *td) { critical_enter(); if (td == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); /* Disable any hardware breakpoints. */ if (td->td_pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); td->td_pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (pcb->pcb_ext != NULL) { /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) */ kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext, ctob(IOPAGES + 1)); pcb->pcb_ext = NULL; } } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; struct xstate_hdr *xhdr; td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1; pcb->pcb_ext = NULL; pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); } void cpu_set_syscall_retval(struct thread *td, int error) { switch (error) { case 0: td->td_frame->tf_eax = td->td_retval[0]; td->td_frame->tf_edx = td->td_retval[1]; td->td_frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, int * 0x80 is 2 bytes. We saved this in tf_err. */ td->td_frame->tf_eip -= td->td_frame->tf_err; break; case EJUSTRETURN: break; default: td->td_frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error); td->td_frame->tf_eflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE | PCB_KERNNPX); pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); /* * Create a new fresh stack for the new thread. */ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); /* If the current thread has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame. Otherwise, the new thread will * receive a (likely unexpected) SIGTRAP when it executes the first * instruction after returning to userland. */ td->td_frame->tf_eflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_ebx = (int)td; /* trampoline arg */ pcb2->pcb_eip = (int)fork_trampoline; pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */ pcb2->pcb_gs = rgs(); /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_cr3: cloned above. * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ pcb2->pcb_ext = NULL; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = PSL_KERNEL | PSL_I; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_ebp = 0; td->td_frame->tf_esp = (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_eip = (int)entry; /* Return address sentinel value to stop stack unwinding. */ suword((void *)td->td_frame->tf_esp, 0); /* Pass the argument to the entry point. */ suword((void *)(td->td_frame->tf_esp + sizeof(void *)), (int)arg); } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct segment_descriptor sd; uint32_t base; /* * Construct a descriptor and store it in the pcb for * the next context switch. Also store it in the gdt * so that the load of tf_fs into %fs will activate it * at return to userland. */ base = (uint32_t)tls_base; sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; sd.sd_p = 1; sd.sd_xx = 0; sd.sd_def32 = 1; sd.sd_gran = 1; critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; if (td == curthread) { PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); } critical_exit(); return (0); } /* * Convert kernel VA to physical address */ vm_paddr_t kvtop(void *addr) { vm_paddr_t pa; pa = pmap_kextract((vm_offset_t)addr); if (pa == 0) panic("kvtop: zero page frame"); return (pa); } #ifdef SMP static void cpu_reset_proxy() { cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ CPU_SETOF(cpu_reset_proxyid, &tcrp); stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); } #endif void cpu_reset() { #ifdef XBOX if (arch_i386_is_xbox) { /* Kick the PIC16L, it can reboot the box */ pic16l_reboot(); for (;;); } #endif #ifdef SMP cpuset_t map; u_int cnt; if (smp_started) { map = all_cpus; CPU_CLR(PCPU_GET(cpuid), &map); CPU_NAND(&map, &stopped_cpus); if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ /* XXX: restart_cpus(1 << 0); */ CPU_SETOF(0, &started_cpus); wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) cnt++; /* Wait for BSP to announce restart */ if (cpu_reset_proxy_active == 0) printf("cpu_reset: Failed to restart BSP\n"); enable_intr(); cpu_reset_proxy_active = 2; while (1); /* NOTREACHED */ } DELAY(1000000); } #endif cpu_reset_real(); /* NOTREACHED */ } static void cpu_reset_real() { struct region_descriptor null_idt; #ifndef PC98 int b; #endif disable_intr(); #ifdef CPU_ELAN if (elan_mmcr != NULL) elan_mmcr->RESCFG = 1; #endif if (cpu == CPU_GEODE1100) { /* Attempt Geode's own reset */ outl(0xcf8, 0x80009044ul); outl(0xcfc, 0xf); } #ifdef PC98 /* * Attempt to do a CPU reset via CPU reset port. */ if ((inb(0x35) & 0xa0) != 0xa0) { outb(0x37, 0x0f); /* SHUT0 = 0. */ outb(0x37, 0x0b); /* SHUT1 = 0. */ } outb(0xf0, 0x00); /* Reset. */ #else #if !defined(BROKEN_KEYBOARD_RESET) /* * Attempt to do a CPU reset via the keyboard controller, * do not turn off GateA20, as any machine that fails * to do the reset here would then end up in no man's land. */ outb(IO_KBD + 4, 0xFE); DELAY(500000); /* wait 0.5 sec to see if that did it */ #endif /* * Attempt to force a reset via the Reset Control register at * I/O port 0xcf9. Bit 2 forces a system reset when it * transitions from 0 to 1. Bit 1 selects the type of reset * to attempt: 0 selects a "soft" reset, and 1 selects a * "hard" reset. We try a "hard" reset. The first write sets * bit 1 to select a "hard" reset and clears bit 2. The * second write forces a 0 -> 1 transition in bit 2 to trigger * a reset. */ outb(0xcf9, 0x2); outb(0xcf9, 0x6); DELAY(500000); /* wait 0.5 sec to see if that did it */ /* * Attempt to force a reset via the Fast A20 and Init register * at I/O port 0x92. Bit 1 serves as an alternate A20 gate. * Bit 0 asserts INIT# when set to 1. We are careful to only * preserve bit 1 while setting bit 0. We also must clear bit * 0 before setting it if it isn't already clear. */ b = inb(0x92); if (b != 0xff) { if ((b & 0x1) != 0) outb(0x92, b & 0xfe); outb(0x92, b | 0x1); DELAY(500000); /* wait 0.5 sec to see if that did it */ } #endif /* PC98 */ printf("No known reset method worked, attempting CPU shutdown\n"); DELAY(1000000); /* wait 1 sec for printf to complete */ /* Wipe the IDT. */ null_idt.rd_limit = 0; null_idt.rd_base = 0; lidt(&null_idt); /* "good night, sweet prince .... " */ breakpoint(); /* NOTREACHED */ while(1); } /* * Get an sf_buf from the freelist. May block if none are available. */ void sf_buf_map(struct sf_buf *sf, int flags) { pt_entry_t opte, *ptep; /* * Update the sf_buf's virtual-to-physical mapping, flushing the * virtual address from the TLB. Since the reference count for * the sf_buf's old mapping was zero, that mapping is not * currently in use. Consequently, there is no need to exchange * the old and new PTEs atomically, even under PAE. */ ptep = vtopte(sf->kva); opte = *ptep; *ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0); /* * Avoid unnecessary TLB invalidations: If the sf_buf's old * virtual-to-physical mapping was not used, then any processor * that has invalidated the sf_buf's virtual address from its TLB * since the last used mapping need not invalidate again. */ #ifdef SMP if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) CPU_ZERO(&sf->cpumask); sf_buf_shootdown(sf, flags); #else if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) pmap_invalidate_page(kernel_pmap, sf->kva); #endif } #ifdef SMP void sf_buf_shootdown(struct sf_buf *sf, int flags) { cpuset_t other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sf->cpumask)) { CPU_SET(cpuid, &sf->cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_NAND(&other_cpus, &sf->cpumask); if (!CPU_EMPTY(&other_cpus)) { CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap); } } sched_unpin(); } #endif /* * MD part of sf_buf_free(). */ int sf_buf_unmap(struct sf_buf *sf) { return (0); } static void sf_buf_invalidate(struct sf_buf *sf) { vm_page_t m = sf->m; /* * Use pmap_qenter to update the pte for * existing mapping, in particular, the PAT * settings are recalculated. */ pmap_qenter(sf->kva, &m, 1); pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE, FALSE); } /* * Invalidate the cache lines that may belong to the page, if * (possibly old) mapping of the page by sf buffer exists. Returns * TRUE when mapping was found and cache invalidated. */ boolean_t sf_buf_invalidate_cache(vm_page_t m) { return (sf_buf_process_page(m, sf_buf_invalidate)); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(vm_paddr_t addr) { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } Index: stable/11/sys/i386/ibcs2/ibcs2_misc.c =================================================================== --- stable/11/sys/i386/ibcs2/ibcs2_misc.c (revision 331642) +++ stable/11/sys/i386/ibcs2/ibcs2_misc.c (revision 331643) @@ -1,1263 +1,1193 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1995 Steven Wallace * Copyright (c) 1994, 1995 Scott Bartram * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Header: sun_misc.c,v 1.16 93/04/07 02:46:27 torek Exp * * @(#)sun_misc.c 8.1 (Berkeley) 6/18/93 */ #include __FBSDID("$FreeBSD$"); /* * IBCS2 compatibility module. * * IBCS2 system calls that are implemented differently in BSD are * handled here. */ #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int -ibcs2_ulimit(td, uap) - struct thread *td; - struct ibcs2_ulimit_args *uap; +ibcs2_ulimit(struct thread *td, struct ibcs2_ulimit_args *uap) { struct rlimit rl; int error; #define IBCS2_GETFSIZE 1 #define IBCS2_SETFSIZE 2 #define IBCS2_GETPSIZE 3 #define IBCS2_GETDTABLESIZE 4 switch (uap->cmd) { case IBCS2_GETFSIZE: td->td_retval[0] = lim_cur(td, RLIMIT_FSIZE); if (td->td_retval[0] == -1) td->td_retval[0] = 0x7fffffff; return 0; case IBCS2_SETFSIZE: rl.rlim_max = lim_max(td, RLIMIT_FSIZE); rl.rlim_cur = uap->newlimit; error = kern_setrlimit(td, RLIMIT_FSIZE, &rl); if (!error) { td->td_retval[0] = lim_cur(td, RLIMIT_FSIZE); } else { DPRINTF(("failed ")); } return error; case IBCS2_GETPSIZE: td->td_retval[0] = lim_cur(td, RLIMIT_RSS); /* XXX */ return 0; case IBCS2_GETDTABLESIZE: uap->cmd = IBCS2_SC_OPEN_MAX; return ibcs2_sysconf(td, (struct ibcs2_sysconf_args *)uap); default: return ENOSYS; } } #define IBCS2_WSTOPPED 0177 #define IBCS2_STOPCODE(sig) ((sig) << 8 | IBCS2_WSTOPPED) int -ibcs2_wait(td, uap) - struct thread *td; - struct ibcs2_wait_args *uap; +ibcs2_wait(struct thread *td, struct ibcs2_wait_args *uap) { int error, options, status; int *statusp; pid_t pid; struct trapframe *tf = td->td_frame; if ((tf->tf_eflags & (PSL_Z|PSL_PF|PSL_N|PSL_V)) == (PSL_Z|PSL_PF|PSL_N|PSL_V)) { /* waitpid */ pid = uap->a1; statusp = (int *)uap->a2; options = uap->a3; } else { /* wait */ pid = WAIT_ANY; statusp = (int *)uap->a1; options = 0; } error = kern_wait(td, pid, &status, options, NULL); if (error) return error; if (statusp) { /* * Convert status/signal result. */ if (WIFSTOPPED(status)) { if (WSTOPSIG(status) <= 0 || WSTOPSIG(status) > IBCS2_SIGTBLSZ) return (EINVAL); status = IBCS2_STOPCODE(bsd_to_ibcs2_sig[_SIG_IDX(WSTOPSIG(status))]); } else if (WIFSIGNALED(status)) { if (WTERMSIG(status) <= 0 || WTERMSIG(status) > IBCS2_SIGTBLSZ) return (EINVAL); status = bsd_to_ibcs2_sig[_SIG_IDX(WTERMSIG(status))]; } /* else exit status -- identical */ /* record result/status */ td->td_retval[1] = status; return copyout(&status, statusp, sizeof(status)); } return 0; } int -ibcs2_execv(td, uap) - struct thread *td; - struct ibcs2_execv_args *uap; +ibcs2_execv(struct thread *td, struct ibcs2_execv_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = pre_execve(td, &oldvmspace); if (error != 0) { free(path, M_TEMP); return (error); } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, NULL); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); post_execve(td, error, oldvmspace); return (error); } int -ibcs2_execve(td, uap) - struct thread *td; - struct ibcs2_execve_args *uap; +ibcs2_execve(struct thread *td, struct ibcs2_execve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = pre_execve(td, &oldvmspace); if (error != 0) { free(path, M_TEMP); return (error); } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, uap->envp); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); post_execve(td, error, oldvmspace); return (error); } int -ibcs2_umount(td, uap) - struct thread *td; - struct ibcs2_umount_args *uap; +ibcs2_umount(struct thread *td, struct ibcs2_umount_args *uap) { struct unmount_args um; um.path = uap->name; um.flags = 0; return sys_unmount(td, &um); } int -ibcs2_mount(td, uap) - struct thread *td; - struct ibcs2_mount_args *uap; +ibcs2_mount(struct thread *td, struct ibcs2_mount_args *uap) { #ifdef notyet int oflags = uap->flags, nflags, error; char fsname[MFSNAMELEN]; if (oflags & (IBCS2_MS_NOSUB | IBCS2_MS_SYS5)) return (EINVAL); if ((oflags & IBCS2_MS_NEWTYPE) == 0) return (EINVAL); nflags = 0; if (oflags & IBCS2_MS_RDONLY) nflags |= MNT_RDONLY; if (oflags & IBCS2_MS_NOSUID) nflags |= MNT_NOSUID; if (oflags & IBCS2_MS_REMOUNT) nflags |= MNT_UPDATE; uap->flags = nflags; if (error = copyinstr((caddr_t)uap->type, fsname, sizeof fsname, (u_int *)0)) return (error); if (strcmp(fsname, "4.2") == 0) { uap->type = (caddr_t)STACK_ALLOC(); if (error = copyout("ufs", uap->type, sizeof("ufs"))) return (error); } else if (strcmp(fsname, "nfs") == 0) { struct ibcs2_nfs_args sna; struct sockaddr_in sain; struct nfs_args na; struct sockaddr sa; if (error = copyin(uap->data, &sna, sizeof sna)) return (error); if (error = copyin(sna.addr, &sain, sizeof sain)) return (error); bcopy(&sain, &sa, sizeof sa); sa.sa_len = sizeof(sain); uap->data = (caddr_t)STACK_ALLOC(); na.addr = (struct sockaddr *)((int)uap->data + sizeof na); na.sotype = SOCK_DGRAM; na.proto = IPPROTO_UDP; na.fh = (nfsv2fh_t *)sna.fh; na.flags = sna.flags; na.wsize = sna.wsize; na.rsize = sna.rsize; na.timeo = sna.timeo; na.retrans = sna.retrans; na.hostname = sna.hostname; if (error = copyout(&sa, na.addr, sizeof sa)) return (error); if (error = copyout(&na, uap->data, sizeof na)) return (error); } return (mount(td, uap)); #else return EINVAL; #endif } /* * Read iBCS2-style directory entries. We suck them into kernel space so * that they can be massaged before being copied out to user code. Like * SunOS, we squish out `empty' entries. * * This is quite ugly, but what do you expect from compatibility code? */ int -ibcs2_getdents(td, uap) - struct thread *td; - register struct ibcs2_getdents_args *uap; +ibcs2_getdents(struct thread *td, struct ibcs2_getdents_args *uap) { - register struct vnode *vp; - register caddr_t inp, buf; /* BSD-format */ - register int len, reclen; /* BSD-format */ - register caddr_t outp; /* iBCS2-format */ - register int resid; /* iBCS2-format */ + struct vnode *vp; + caddr_t inp, buf; /* BSD-format */ + int len, reclen; /* BSD-format */ + caddr_t outp; /* iBCS2-format */ + int resid; /* iBCS2-format */ cap_rights_t rights; struct file *fp; struct uio auio; struct iovec aiov; struct ibcs2_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; u_long *cookies = NULL, *cookiep; int ncookies; #define BSD_DIRENT(cp) ((struct dirent *)(cp)) #define IBCS2_RECLEN(reclen) (reclen + sizeof(u_short)) error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { /* XXX vnode readdir op should do this */ fdrop(fp, td); return (EINVAL); } off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ buflen = max(DIRBLKSIZ, uap->nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_SHARED | LK_RETRY); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies)) != 0) goto out; inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) <= 0) goto eof; cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { len -= BSD_DIRENT(inp)->d_reclen; inp += BSD_DIRENT(inp)->d_reclen; cookiep++; ncookies--; } } for (; len > 0; len -= reclen) { if (cookiep && ncookies == 0) break; reclen = BSD_DIRENT(inp)->d_reclen; if (reclen & 3) { printf("ibcs2_getdents: reclen=%d\n", reclen); error = EFAULT; goto out; } if (BSD_DIRENT(inp)->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; continue; } if (reclen > len || resid < IBCS2_RECLEN(reclen)) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make an iBCS2-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). */ idb.d_ino = (ibcs2_ino_t)BSD_DIRENT(inp)->d_fileno; idb.d_off = (ibcs2_off_t)off; idb.d_reclen = (u_short)IBCS2_RECLEN(reclen); if ((error = copyout((caddr_t)&idb, outp, 10)) != 0 || (error = copyout(BSD_DIRENT(inp)->d_name, outp + 10, BSD_DIRENT(inp)->d_namlen + 1)) != 0) goto out; /* advance past this real entry */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; inp += reclen; /* advance output past iBCS2-shaped entry */ outp += IBCS2_RECLEN(reclen); resid -= IBCS2_RECLEN(reclen); } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: td->td_retval[0] = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return (error); } int -ibcs2_read(td, uap) - struct thread *td; - struct ibcs2_read_args *uap; +ibcs2_read(struct thread *td, struct ibcs2_read_args *uap) { - register struct vnode *vp; - register caddr_t inp, buf; /* BSD-format */ - register int len, reclen; /* BSD-format */ - register caddr_t outp; /* iBCS2-format */ - register int resid; /* iBCS2-format */ + struct vnode *vp; + caddr_t inp, buf; /* BSD-format */ + int len, reclen; /* BSD-format */ + caddr_t outp; /* iBCS2-format */ + int resid; /* iBCS2-format */ cap_rights_t rights; struct file *fp; struct uio auio; struct iovec aiov; struct ibcs2_direct { ibcs2_ino_t ino; char name[14]; } idb; off_t off; /* true file offset */ int buflen, error, eofflag, size; u_long *cookies = NULL, *cookiep; int ncookies; error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) { if (error == EINVAL) return sys_read(td, (struct read_args *)uap); else return error; } if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return sys_read(td, (struct read_args *)uap); } off = fp->f_offset; DPRINTF(("ibcs2_read: read directory\n")); buflen = max(DIRBLKSIZ, uap->nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_SHARED | LK_RETRY); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies)) != 0) { DPRINTF(("VOP_READDIR failed: %d\n", error)); goto out; } inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) <= 0) goto eof; cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { len -= BSD_DIRENT(inp)->d_reclen; inp += BSD_DIRENT(inp)->d_reclen; cookiep++; ncookies--; } } for (; len > 0 && resid > 0; len -= reclen) { if (cookiep && ncookies == 0) break; reclen = BSD_DIRENT(inp)->d_reclen; if (reclen & 3) { printf("ibcs2_read: reclen=%d\n", reclen); error = EFAULT; goto out; } if (BSD_DIRENT(inp)->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; continue; } if (reclen > len || resid < sizeof(struct ibcs2_direct)) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make an iBCS2-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). * * TODO: if length(filename) > 14, then break filename into * multiple entries and set inode = 0xffff except last */ idb.ino = (BSD_DIRENT(inp)->d_fileno > 0xfffe) ? 0xfffe : BSD_DIRENT(inp)->d_fileno; (void)copystr(BSD_DIRENT(inp)->d_name, idb.name, 14, &size); bzero(idb.name + size, 14 - size); if ((error = copyout(&idb, outp, sizeof(struct ibcs2_direct))) != 0) goto out; /* advance past this real entry */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; inp += reclen; /* advance output past iBCS2-shaped entry */ outp += sizeof(struct ibcs2_direct); resid -= sizeof(struct ibcs2_direct); } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: td->td_retval[0] = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return (error); } int -ibcs2_mknod(td, uap) - struct thread *td; - struct ibcs2_mknod_args *uap; +ibcs2_mknod(struct thread *td, struct ibcs2_mknod_args *uap) { char *path; int error; CHECKALTCREAT(td, uap->path, &path); if (S_ISFIFO(uap->mode)) { error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode); } else { error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode, uap->dev); } free(path, M_TEMP); return (error); } int -ibcs2_getgroups(td, uap) - struct thread *td; - struct ibcs2_getgroups_args *uap; +ibcs2_getgroups(struct thread *td, struct ibcs2_getgroups_args *uap) { struct ucred *cred; ibcs2_gid_t *iset; u_int i, ngrp; int error; cred = td->td_ucred; ngrp = cred->cr_ngroups; if (uap->gidsetsize == 0) { error = 0; goto out; } if (uap->gidsetsize < ngrp) return (EINVAL); iset = malloc(ngrp * sizeof(*iset), M_TEMP, M_WAITOK); for (i = 0; i < ngrp; i++) iset[i] = (ibcs2_gid_t)cred->cr_groups[i]; error = copyout(iset, uap->gidset, ngrp * sizeof(ibcs2_gid_t)); free(iset, M_TEMP); out: td->td_retval[0] = ngrp; return (error); } int -ibcs2_setgroups(td, uap) - struct thread *td; - struct ibcs2_setgroups_args *uap; +ibcs2_setgroups(struct thread *td, struct ibcs2_setgroups_args *uap) { ibcs2_gid_t *iset; gid_t *gp; int error, i; if (uap->gidsetsize < 0 || uap->gidsetsize > ngroups_max + 1) return (EINVAL); if (uap->gidsetsize && uap->gidset == NULL) return (EINVAL); gp = malloc(uap->gidsetsize * sizeof(*gp), M_TEMP, M_WAITOK); if (uap->gidsetsize) { iset = malloc(uap->gidsetsize * sizeof(*iset), M_TEMP, M_WAITOK); error = copyin(uap->gidset, iset, sizeof(ibcs2_gid_t) * uap->gidsetsize); if (error) { free(iset, M_TEMP); goto out; } for (i = 0; i < uap->gidsetsize; i++) gp[i] = (gid_t)iset[i]; } error = kern_setgroups(td, uap->gidsetsize, gp); out: free(gp, M_TEMP); return (error); } int -ibcs2_setuid(td, uap) - struct thread *td; - struct ibcs2_setuid_args *uap; +ibcs2_setuid(struct thread *td, struct ibcs2_setuid_args *uap) { struct setuid_args sa; sa.uid = (uid_t)uap->uid; return sys_setuid(td, &sa); } int -ibcs2_setgid(td, uap) - struct thread *td; - struct ibcs2_setgid_args *uap; +ibcs2_setgid(struct thread *td, struct ibcs2_setgid_args *uap) { struct setgid_args sa; sa.gid = (gid_t)uap->gid; return sys_setgid(td, &sa); } int -ibcs2_time(td, uap) - struct thread *td; - struct ibcs2_time_args *uap; +ibcs2_time(struct thread *td, struct ibcs2_time_args *uap) { struct timeval tv; microtime(&tv); td->td_retval[0] = tv.tv_sec; if (uap->tp) return copyout((caddr_t)&tv.tv_sec, (caddr_t)uap->tp, sizeof(ibcs2_time_t)); else return 0; } int -ibcs2_pathconf(td, uap) - struct thread *td; - struct ibcs2_pathconf_args *uap; +ibcs2_pathconf(struct thread *td, struct ibcs2_pathconf_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); uap->name++; /* iBCS2 _PC_* defines are offset by one */ error = kern_pathconf(td, path, UIO_SYSSPACE, uap->name, FOLLOW); free(path, M_TEMP); return (error); } int -ibcs2_fpathconf(td, uap) - struct thread *td; - struct ibcs2_fpathconf_args *uap; +ibcs2_fpathconf(struct thread *td, struct ibcs2_fpathconf_args *uap) { uap->name++; /* iBCS2 _PC_* defines are offset by one */ return sys_fpathconf(td, (struct fpathconf_args *)uap); } int -ibcs2_sysconf(td, uap) - struct thread *td; - struct ibcs2_sysconf_args *uap; +ibcs2_sysconf(struct thread *td, struct ibcs2_sysconf_args *uap) { int mib[2], value, len, error; switch(uap->name) { case IBCS2_SC_ARG_MAX: mib[1] = KERN_ARGMAX; break; case IBCS2_SC_CHILD_MAX: td->td_retval[0] = lim_cur(td, RLIMIT_NPROC); return 0; case IBCS2_SC_CLK_TCK: td->td_retval[0] = hz; return 0; case IBCS2_SC_NGROUPS_MAX: mib[1] = KERN_NGROUPS; break; case IBCS2_SC_OPEN_MAX: td->td_retval[0] = lim_cur(td, RLIMIT_NOFILE); return 0; case IBCS2_SC_JOB_CONTROL: mib[1] = KERN_JOB_CONTROL; break; case IBCS2_SC_SAVED_IDS: mib[1] = KERN_SAVED_IDS; break; case IBCS2_SC_VERSION: mib[1] = KERN_POSIX1; break; case IBCS2_SC_PASS_MAX: td->td_retval[0] = 128; /* XXX - should we create PASS_MAX ? */ return 0; case IBCS2_SC_XOPEN_VERSION: td->td_retval[0] = 2; /* XXX: What should that be? */ return 0; default: return EINVAL; } mib[0] = CTL_KERN; len = sizeof(value); error = kernel_sysctl(td, mib, 2, &value, &len, NULL, 0, NULL, 0); if (error) return error; td->td_retval[0] = value; return 0; } int -ibcs2_alarm(td, uap) - struct thread *td; - struct ibcs2_alarm_args *uap; +ibcs2_alarm(struct thread *td, struct ibcs2_alarm_args *uap) { struct itimerval itv, oitv; int error; timevalclear(&itv.it_interval); itv.it_value.tv_sec = uap->sec; itv.it_value.tv_usec = 0; error = kern_setitimer(td, ITIMER_REAL, &itv, &oitv); if (error) return (error); if (oitv.it_value.tv_usec != 0) oitv.it_value.tv_sec++; td->td_retval[0] = oitv.it_value.tv_sec; return (0); } int -ibcs2_times(td, uap) - struct thread *td; - struct ibcs2_times_args *uap; +ibcs2_times(struct thread *td, struct ibcs2_times_args *uap) { struct rusage ru; struct timeval t; struct tms tms; int error; #define CONVTCK(r) (r.tv_sec * hz + r.tv_usec / (1000000 / hz)) error = kern_getrusage(td, RUSAGE_SELF, &ru); if (error) return (error); tms.tms_utime = CONVTCK(ru.ru_utime); tms.tms_stime = CONVTCK(ru.ru_stime); error = kern_getrusage(td, RUSAGE_CHILDREN, &ru); if (error) return (error); tms.tms_cutime = CONVTCK(ru.ru_utime); tms.tms_cstime = CONVTCK(ru.ru_stime); microtime(&t); td->td_retval[0] = CONVTCK(t); return (copyout(&tms, uap->tp, sizeof(struct tms))); } int -ibcs2_stime(td, uap) - struct thread *td; - struct ibcs2_stime_args *uap; +ibcs2_stime(struct thread *td, struct ibcs2_stime_args *uap) { struct timeval tv; long secs; int error; error = copyin(uap->timep, &secs, sizeof(long)); if (error) return (error); tv.tv_sec = secs; tv.tv_usec = 0; error = kern_settimeofday(td, &tv, NULL); if (error) error = EPERM; return (error); } int -ibcs2_utime(td, uap) - struct thread *td; - struct ibcs2_utime_args *uap; +ibcs2_utime(struct thread *td, struct ibcs2_utime_args *uap) { struct ibcs2_utimbuf ubuf; struct timeval tbuf[2], *tp; char *path; int error; if (uap->buf) { error = copyin(uap->buf, &ubuf, sizeof(ubuf)); if (error) return (error); tbuf[0].tv_sec = ubuf.actime; tbuf[0].tv_usec = 0; tbuf[1].tv_sec = ubuf.modtime; tbuf[1].tv_usec = 0; tp = tbuf; } else tp = NULL; CHECKALTEXIST(td, uap->path, &path); error = kern_utimesat(td, AT_FDCWD, path, UIO_SYSSPACE, tp, UIO_SYSSPACE); free(path, M_TEMP); return (error); } int -ibcs2_nice(td, uap) - struct thread *td; - struct ibcs2_nice_args *uap; +ibcs2_nice(struct thread *td, struct ibcs2_nice_args *uap) { int error; struct setpriority_args sa; sa.which = PRIO_PROCESS; sa.who = 0; sa.prio = td->td_proc->p_nice + uap->incr; if ((error = sys_setpriority(td, &sa)) != 0) return EPERM; td->td_retval[0] = td->td_proc->p_nice; return 0; } /* * iBCS2 getpgrp, setpgrp, setsid, and setpgid */ int -ibcs2_pgrpsys(td, uap) - struct thread *td; - struct ibcs2_pgrpsys_args *uap; +ibcs2_pgrpsys(struct thread *td, struct ibcs2_pgrpsys_args *uap) { struct proc *p = td->td_proc; switch (uap->type) { case 0: /* getpgrp */ PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; case 1: /* setpgrp */ { struct setpgid_args sa; sa.pid = 0; sa.pgid = 0; sys_setpgid(td, &sa); PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; } case 2: /* setpgid */ { struct setpgid_args sa; sa.pid = uap->pid; sa.pgid = uap->pgid; return sys_setpgid(td, &sa); } case 3: /* setsid */ return sys_setsid(td, NULL); default: return EINVAL; } } /* * XXX - need to check for nested calls */ int -ibcs2_plock(td, uap) - struct thread *td; - struct ibcs2_plock_args *uap; +ibcs2_plock(struct thread *td, struct ibcs2_plock_args *uap) { int error; #define IBCS2_UNLOCK 0 #define IBCS2_PROCLOCK 1 #define IBCS2_TEXTLOCK 2 #define IBCS2_DATALOCK 4 switch(uap->cmd) { case IBCS2_UNLOCK: error = priv_check(td, PRIV_VM_MUNLOCK); if (error) return (error); /* XXX - TODO */ return (0); case IBCS2_PROCLOCK: case IBCS2_TEXTLOCK: case IBCS2_DATALOCK: error = priv_check(td, PRIV_VM_MLOCK); if (error) return (error); /* XXX - TODO */ return 0; } return EINVAL; } int -ibcs2_uadmin(td, uap) - struct thread *td; - struct ibcs2_uadmin_args *uap; +ibcs2_uadmin(struct thread *td, struct ibcs2_uadmin_args *uap) { #define SCO_A_REBOOT 1 #define SCO_A_SHUTDOWN 2 #define SCO_A_REMOUNT 4 #define SCO_A_CLOCK 8 #define SCO_A_SETCONFIG 128 #define SCO_A_GETDEV 130 #define SCO_AD_HALT 0 #define SCO_AD_BOOT 1 #define SCO_AD_IBOOT 2 #define SCO_AD_PWRDOWN 3 #define SCO_AD_PWRNAP 4 #define SCO_AD_PANICBOOT 1 #define SCO_AD_GETBMAJ 0 #define SCO_AD_GETCMAJ 1 switch(uap->cmd) { case SCO_A_REBOOT: case SCO_A_SHUTDOWN: switch(uap->func) { struct reboot_args r; case SCO_AD_HALT: case SCO_AD_PWRDOWN: case SCO_AD_PWRNAP: r.opt = RB_HALT; return (sys_reboot(td, &r)); case SCO_AD_BOOT: case SCO_AD_IBOOT: r.opt = RB_AUTOBOOT; return (sys_reboot(td, &r)); } return EINVAL; case SCO_A_REMOUNT: case SCO_A_CLOCK: case SCO_A_SETCONFIG: return 0; case SCO_A_GETDEV: return EINVAL; /* XXX - TODO */ } return EINVAL; } int -ibcs2_sysfs(td, uap) - struct thread *td; - struct ibcs2_sysfs_args *uap; +ibcs2_sysfs(struct thread *td, struct ibcs2_sysfs_args *uap) { #define IBCS2_GETFSIND 1 #define IBCS2_GETFSTYP 2 #define IBCS2_GETNFSTYP 3 switch(uap->cmd) { case IBCS2_GETFSIND: case IBCS2_GETFSTYP: case IBCS2_GETNFSTYP: break; } return EINVAL; /* XXX - TODO */ } int -ibcs2_unlink(td, uap) - struct thread *td; - struct ibcs2_unlink_args *uap; +ibcs2_unlink(struct thread *td, struct ibcs2_unlink_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0); free(path, M_TEMP); return (error); } int -ibcs2_chdir(td, uap) - struct thread *td; - struct ibcs2_chdir_args *uap; +ibcs2_chdir(struct thread *td, struct ibcs2_chdir_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_chdir(td, path, UIO_SYSSPACE); free(path, M_TEMP); return (error); } int -ibcs2_chmod(td, uap) - struct thread *td; - struct ibcs2_chmod_args *uap; +ibcs2_chmod(struct thread *td, struct ibcs2_chmod_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode, 0); free(path, M_TEMP); return (error); } int -ibcs2_chown(td, uap) - struct thread *td; - struct ibcs2_chown_args *uap; +ibcs2_chown(struct thread *td, struct ibcs2_chown_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->uid, uap->gid, 0); free(path, M_TEMP); return (error); } int -ibcs2_rmdir(td, uap) - struct thread *td; - struct ibcs2_rmdir_args *uap; +ibcs2_rmdir(struct thread *td, struct ibcs2_rmdir_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE); free(path, M_TEMP); return (error); } int -ibcs2_mkdir(td, uap) - struct thread *td; - struct ibcs2_mkdir_args *uap; +ibcs2_mkdir(struct thread *td, struct ibcs2_mkdir_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->mode); free(path, M_TEMP); return (error); } int -ibcs2_symlink(td, uap) - struct thread *td; - struct ibcs2_symlink_args *uap; +ibcs2_symlink(struct thread *td, struct ibcs2_symlink_args *uap) { char *path, *link; int error; CHECKALTEXIST(td, uap->path, &path); /* * Have to expand CHECKALTCREAT() so that 'path' can be freed on * errors. */ error = ibcs2_emul_find(td, uap->link, UIO_USERSPACE, &link, 1); if (link == NULL) { free(path, M_TEMP); return (error); } error = kern_symlinkat(td, path, AT_FDCWD, link, UIO_SYSSPACE); free(path, M_TEMP); free(link, M_TEMP); return (error); } int -ibcs2_rename(td, uap) - struct thread *td; - struct ibcs2_rename_args *uap; +ibcs2_rename(struct thread *td, struct ibcs2_rename_args *uap) { char *from, *to; int error; CHECKALTEXIST(td, uap->from, &from); /* * Have to expand CHECKALTCREAT() so that 'from' can be freed on * errors. */ error = ibcs2_emul_find(td, uap->to, UIO_USERSPACE, &to, 1); if (to == NULL) { free(from, M_TEMP); return (error); } error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE); free(from, M_TEMP); free(to, M_TEMP); return (error); } int -ibcs2_readlink(td, uap) - struct thread *td; - struct ibcs2_readlink_args *uap; +ibcs2_readlink(struct thread *td, struct ibcs2_readlink_args *uap) { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_readlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, uap->buf, UIO_USERSPACE, uap->count); free(path, M_TEMP); return (error); } Index: stable/11/sys/i386/ibcs2/ibcs2_other.c =================================================================== --- stable/11/sys/i386/ibcs2/ibcs2_other.c (revision 331642) +++ stable/11/sys/i386/ibcs2/ibcs2_other.c (revision 331643) @@ -1,121 +1,121 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1995 Steven Wallace * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IBCS2 compatibility module. */ #include "opt_spx_hack.h" #include #include #include #include #include #include #include #include #include #include #include #include #define IBCS2_SECURE_GETLUID 1 #define IBCS2_SECURE_SETLUID 2 int ibcs2_secure(struct thread *td, struct ibcs2_secure_args *uap) { switch (uap->cmd) { case IBCS2_SECURE_GETLUID: /* get login uid */ td->td_retval[0] = td->td_ucred->cr_uid; return 0; case IBCS2_SECURE_SETLUID: /* set login uid */ return EPERM; default: printf("IBCS2: 'secure' cmd=%d not implemented\n", uap->cmd); } return EINVAL; } int -ibcs2_lseek(struct thread *td, register struct ibcs2_lseek_args *uap) +ibcs2_lseek(struct thread *td, struct ibcs2_lseek_args *uap) { struct lseek_args largs; int error; largs.fd = uap->fd; largs.offset = uap->offset; largs.whence = uap->whence; error = sys_lseek(td, &largs); return (error); } #ifdef SPX_HACK #include #include int spx_open(struct thread *td) { struct socket_args sock; struct sockaddr_un sun; int fd, error; /* obtain a socket. */ DPRINTF(("SPX: open socket\n")); sock.domain = AF_UNIX; sock.type = SOCK_STREAM; sock.protocol = 0; error = sys_socket(td, &sock); if (error) return error; fd = td->td_retval[0]; /* connect the socket to standard X socket */ DPRINTF(("SPX: connect to /tmp/X11-unix/X0\n")); sun.sun_family = AF_UNIX; strcpy(sun.sun_path, "/tmp/.X11-unix/X0"); sun.sun_len = sizeof(struct sockaddr_un) - sizeof(sun.sun_path) + strlen(sun.sun_path) + 1; error = kern_connectat(td, AT_FDCWD, fd, (struct sockaddr *)&sun); if (error) { kern_close(td, fd); return error; } td->td_retval[0] = fd; return 0; } #endif /* SPX_HACK */ Index: stable/11/sys/i386/ibcs2/ibcs2_signal.c =================================================================== --- stable/11/sys/i386/ibcs2/ibcs2_signal.c (revision 331642) +++ stable/11/sys/i386/ibcs2/ibcs2_signal.c (revision 331643) @@ -1,443 +1,429 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995 Scott Bartram * Copyright (c) 1995 Steven Wallace * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define sigemptyset(s) SIGEMPTYSET(*(s)) #define sigismember(s, n) SIGISMEMBER(*(s), n) #define sigaddset(s, n) SIGADDSET(*(s), n) #define ibcs2_sigmask(n) (1 << ((n) - 1)) #define ibcs2_sigemptyset(s) bzero((s), sizeof(*(s))) #define ibcs2_sigismember(s, n) (*(s) & ibcs2_sigmask(n)) #define ibcs2_sigaddset(s, n) (*(s) |= ibcs2_sigmask(n)) static void ibcs2_to_bsd_sigset(const ibcs2_sigset_t *, sigset_t *); static void bsd_to_ibcs2_sigset(const sigset_t *, ibcs2_sigset_t *); static void ibcs2_to_bsd_sigaction(struct ibcs2_sigaction *, struct sigaction *); static void bsd_to_ibcs2_sigaction(struct sigaction *, struct ibcs2_sigaction *); int bsd_to_ibcs2_sig[IBCS2_SIGTBLSZ] = { IBCS2_SIGHUP, /* 1 */ IBCS2_SIGINT, /* 2 */ IBCS2_SIGQUIT, /* 3 */ IBCS2_SIGILL, /* 4 */ IBCS2_SIGTRAP, /* 5 */ IBCS2_SIGABRT, /* 6 */ IBCS2_SIGEMT, /* 7 */ IBCS2_SIGFPE, /* 8 */ IBCS2_SIGKILL, /* 9 */ IBCS2_SIGBUS, /* 10 */ IBCS2_SIGSEGV, /* 11 */ IBCS2_SIGSYS, /* 12 */ IBCS2_SIGPIPE, /* 13 */ IBCS2_SIGALRM, /* 14 */ IBCS2_SIGTERM, /* 15 */ 0, /* 16 - SIGURG */ IBCS2_SIGSTOP, /* 17 */ IBCS2_SIGTSTP, /* 18 */ IBCS2_SIGCONT, /* 19 */ IBCS2_SIGCLD, /* 20 */ IBCS2_SIGTTIN, /* 21 */ IBCS2_SIGTTOU, /* 22 */ IBCS2_SIGPOLL, /* 23 */ 0, /* 24 - SIGXCPU */ 0, /* 25 - SIGXFSZ */ IBCS2_SIGVTALRM, /* 26 */ IBCS2_SIGPROF, /* 27 */ IBCS2_SIGWINCH, /* 28 */ 0, /* 29 */ IBCS2_SIGUSR1, /* 30 */ IBCS2_SIGUSR2, /* 31 */ 0 /* 32 */ }; static int ibcs2_to_bsd_sig[IBCS2_SIGTBLSZ] = { SIGHUP, /* 1 */ SIGINT, /* 2 */ SIGQUIT, /* 3 */ SIGILL, /* 4 */ SIGTRAP, /* 5 */ SIGABRT, /* 6 */ SIGEMT, /* 7 */ SIGFPE, /* 8 */ SIGKILL, /* 9 */ SIGBUS, /* 10 */ SIGSEGV, /* 11 */ SIGSYS, /* 12 */ SIGPIPE, /* 13 */ SIGALRM, /* 14 */ SIGTERM, /* 15 */ SIGUSR1, /* 16 */ SIGUSR2, /* 17 */ SIGCHLD, /* 18 */ 0, /* 19 - SIGPWR */ SIGWINCH, /* 20 */ 0, /* 21 */ SIGIO, /* 22 */ SIGSTOP, /* 23 */ SIGTSTP, /* 24 */ SIGCONT, /* 25 */ SIGTTIN, /* 26 */ SIGTTOU, /* 27 */ SIGVTALRM, /* 28 */ SIGPROF, /* 29 */ 0, /* 30 */ 0, /* 31 */ 0 /* 32 */ }; void ibcs2_to_bsd_sigset(iss, bss) const ibcs2_sigset_t *iss; sigset_t *bss; { int i, newsig; sigemptyset(bss); for (i = 1; i <= IBCS2_SIGTBLSZ; i++) { if (ibcs2_sigismember(iss, i)) { newsig = ibcs2_to_bsd_sig[_SIG_IDX(i)]; if (newsig) sigaddset(bss, newsig); } } } static void bsd_to_ibcs2_sigset(bss, iss) const sigset_t *bss; ibcs2_sigset_t *iss; { int i, newsig; ibcs2_sigemptyset(iss); for (i = 1; i <= IBCS2_SIGTBLSZ; i++) { if (sigismember(bss, i)) { newsig = bsd_to_ibcs2_sig[_SIG_IDX(i)]; if (newsig) ibcs2_sigaddset(iss, newsig); } } } static void ibcs2_to_bsd_sigaction(isa, bsa) struct ibcs2_sigaction *isa; struct sigaction *bsa; { bsa->sa_handler = isa->isa_handler; ibcs2_to_bsd_sigset(&isa->isa_mask, &bsa->sa_mask); bsa->sa_flags = 0; /* ??? SA_NODEFER */ if ((isa->isa_flags & IBCS2_SA_NOCLDSTOP) != 0) bsa->sa_flags |= SA_NOCLDSTOP; } static void bsd_to_ibcs2_sigaction(bsa, isa) struct sigaction *bsa; struct ibcs2_sigaction *isa; { isa->isa_handler = bsa->sa_handler; bsd_to_ibcs2_sigset(&bsa->sa_mask, &isa->isa_mask); isa->isa_flags = 0; if ((bsa->sa_flags & SA_NOCLDSTOP) != 0) isa->isa_flags |= IBCS2_SA_NOCLDSTOP; } int -ibcs2_sigaction(td, uap) - register struct thread *td; - struct ibcs2_sigaction_args *uap; +ibcs2_sigaction(struct thread *td, struct ibcs2_sigaction_args *uap) { struct ibcs2_sigaction isa; struct sigaction nbsa, obsa; struct sigaction *nbsap; int error; if (uap->act != NULL) { if ((error = copyin(uap->act, &isa, sizeof(isa))) != 0) return (error); ibcs2_to_bsd_sigaction(&isa, &nbsa); nbsap = &nbsa; } else nbsap = NULL; if (uap->sig <= 0 || uap->sig > IBCS2_NSIG) return (EINVAL); error = kern_sigaction(td, ibcs2_to_bsd_sig[_SIG_IDX(uap->sig)], &nbsa, &obsa, 0); if (error == 0 && uap->oact != NULL) { bsd_to_ibcs2_sigaction(&obsa, &isa); error = copyout(&isa, uap->oact, sizeof(isa)); } return (error); } int -ibcs2_sigsys(td, uap) - register struct thread *td; - struct ibcs2_sigsys_args *uap; +ibcs2_sigsys(struct thread *td, struct ibcs2_sigsys_args *uap) { struct proc *p = td->td_proc; struct sigaction sa; int signum = IBCS2_SIGNO(uap->sig); int error; if (signum <= 0 || signum > IBCS2_NSIG) { if (IBCS2_SIGCALL(uap->sig) == IBCS2_SIGNAL_MASK || IBCS2_SIGCALL(uap->sig) == IBCS2_SIGSET_MASK) td->td_retval[0] = (int)IBCS2_SIG_ERR; return EINVAL; } signum = ibcs2_to_bsd_sig[_SIG_IDX(signum)]; switch (IBCS2_SIGCALL(uap->sig)) { case IBCS2_SIGSET_MASK: /* * Check for SIG_HOLD action. * Otherwise, perform signal() except with different sa_flags. */ if (uap->fp != IBCS2_SIG_HOLD) { /* add sig to mask before exececuting signal handler */ sa.sa_flags = 0; goto ibcs2_sigset; } /* else FALLTHROUGH to sighold */ case IBCS2_SIGHOLD_MASK: { sigset_t mask; SIGEMPTYSET(mask); SIGADDSET(mask, signum); return (kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, 0)); } case IBCS2_SIGNAL_MASK: { struct sigaction osa; /* do not automatically block signal */ sa.sa_flags = SA_NODEFER; #ifdef SA_RESETHAND if((signum != IBCS2_SIGILL) && (signum != IBCS2_SIGTRAP) && (signum != IBCS2_SIGPWR)) /* set to SIG_DFL before executing handler */ sa.sa_flags |= SA_RESETHAND; #endif ibcs2_sigset: sa.sa_handler = uap->fp; sigemptyset(&sa.sa_mask); #if 0 if (signum != SIGALRM) sa.sa_flags |= SA_RESTART; #endif error = kern_sigaction(td, signum, &sa, &osa, 0); if (error != 0) { DPRINTF(("signal: sigaction failed: %d\n", error)); td->td_retval[0] = (int)IBCS2_SIG_ERR; return (error); } td->td_retval[0] = (int)osa.sa_handler; /* special sigset() check */ if(IBCS2_SIGCALL(uap->sig) == IBCS2_SIGSET_MASK) { PROC_LOCK(p); /* check to make sure signal is not blocked */ if(sigismember(&td->td_sigmask, signum)) { /* return SIG_HOLD and unblock signal*/ td->td_retval[0] = (int)IBCS2_SIG_HOLD; SIGDELSET(td->td_sigmask, signum); signotify(td); } PROC_UNLOCK(p); } return 0; } case IBCS2_SIGRELSE_MASK: { sigset_t mask; SIGEMPTYSET(mask); SIGADDSET(mask, signum); return (kern_sigprocmask(td, SIG_UNBLOCK, &mask, NULL, 0)); } case IBCS2_SIGIGNORE_MASK: { sa.sa_handler = SIG_IGN; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; error = kern_sigaction(td, signum, &sa, NULL, 0); if (error != 0) DPRINTF(("sigignore: sigaction failed\n")); return (error); } case IBCS2_SIGPAUSE_MASK: { sigset_t mask; PROC_LOCK(p); mask = td->td_sigmask; PROC_UNLOCK(p); SIGDELSET(mask, signum); return kern_sigsuspend(td, mask); } default: return ENOSYS; } } int -ibcs2_sigprocmask(td, uap) - register struct thread *td; - struct ibcs2_sigprocmask_args *uap; +ibcs2_sigprocmask(struct thread *td, struct ibcs2_sigprocmask_args *uap) { ibcs2_sigset_t iss; sigset_t oss, nss; sigset_t *nssp; int error, how; switch (uap->how) { case IBCS2_SIG_BLOCK: how = SIG_BLOCK; break; case IBCS2_SIG_UNBLOCK: how = SIG_UNBLOCK; break; case IBCS2_SIG_SETMASK: how = SIG_SETMASK; break; default: return (EINVAL); } if (uap->set != NULL) { if ((error = copyin(uap->set, &iss, sizeof(iss))) != 0) return error; ibcs2_to_bsd_sigset(&iss, &nss); nssp = &nss; } else nssp = NULL; error = kern_sigprocmask(td, how, nssp, &oss, 0); if (error == 0 && uap->oset != NULL) { bsd_to_ibcs2_sigset(&oss, &iss); error = copyout(&iss, uap->oset, sizeof(iss)); } return (error); } int -ibcs2_sigpending(td, uap) - register struct thread *td; - struct ibcs2_sigpending_args *uap; +ibcs2_sigpending(struct thread *td, struct ibcs2_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t bss; ibcs2_sigset_t iss; PROC_LOCK(p); bss = td->td_siglist; SIGSETOR(bss, p->p_siglist); SIGSETAND(bss, td->td_sigmask); PROC_UNLOCK(p); bsd_to_ibcs2_sigset(&bss, &iss); return copyout(&iss, uap->mask, sizeof(iss)); } int -ibcs2_sigsuspend(td, uap) - register struct thread *td; - struct ibcs2_sigsuspend_args *uap; +ibcs2_sigsuspend(struct thread *td, struct ibcs2_sigsuspend_args *uap) { ibcs2_sigset_t sss; sigset_t bss; int error; if ((error = copyin(uap->mask, &sss, sizeof(sss))) != 0) return error; ibcs2_to_bsd_sigset(&sss, &bss); return kern_sigsuspend(td, bss); } int -ibcs2_pause(td, uap) - register struct thread *td; - struct ibcs2_pause_args *uap; +ibcs2_pause(struct thread *td, struct ibcs2_pause_args *uap) { sigset_t mask; PROC_LOCK(td->td_proc); mask = td->td_sigmask; PROC_UNLOCK(td->td_proc); return kern_sigsuspend(td, mask); } int -ibcs2_kill(td, uap) - register struct thread *td; - struct ibcs2_kill_args *uap; +ibcs2_kill(struct thread *td, struct ibcs2_kill_args *uap) { struct kill_args ka; if (uap->signo <= 0 || uap->signo > IBCS2_NSIG) return (EINVAL); ka.pid = uap->pid; ka.signum = ibcs2_to_bsd_sig[_SIG_IDX(uap->signo)]; return sys_kill(td, &ka); } Index: stable/11/sys/i386/ibcs2/ibcs2_socksys.c =================================================================== --- stable/11/sys/i386/ibcs2/ibcs2_socksys.c (revision 331642) +++ stable/11/sys/i386/ibcs2/ibcs2_socksys.c (revision 331643) @@ -1,212 +1,206 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994, 1995 Scott Bartram * Copyright (c) 1994 Arne H Juul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include /* Local structures */ struct getipdomainname_args { char *ipdomainname; int len; }; struct setipdomainname_args { char *ipdomainname; int len; }; /* Local prototypes */ static int ibcs2_getipdomainname(struct thread *, struct getipdomainname_args *); static int ibcs2_setipdomainname(struct thread *, struct setipdomainname_args *); /* * iBCS2 socksys calls. */ int -ibcs2_socksys(td, uap) - register struct thread *td; - register struct ibcs2_socksys_args *uap; +ibcs2_socksys(struct thread *td, struct ibcs2_socksys_args *uap) { int error; int realargs[7]; /* 1 for command, 6 for recvfrom */ void *passargs; /* * SOCKET should only be legal on /dev/socksys. * GETIPDOMAINNAME should only be legal on /dev/socksys ? * The others are (and should be) only legal on sockets. */ if ((error = copyin(uap->argsp, (caddr_t)realargs, sizeof(realargs))) != 0) return error; DPRINTF(("ibcs2_socksys: %08x %08x %08x %08x %08x %08x %08x\n", realargs[0], realargs[1], realargs[2], realargs[3], realargs[4], realargs[5], realargs[6])); passargs = (void *)(realargs + 1); switch (realargs[0]) { case SOCKSYS_ACCEPT: return sys_accept(td, passargs); case SOCKSYS_BIND: return sys_bind(td, passargs); case SOCKSYS_CONNECT: return sys_connect(td, passargs); case SOCKSYS_GETPEERNAME: return sys_getpeername(td, passargs); case SOCKSYS_GETSOCKNAME: return sys_getsockname(td, passargs); case SOCKSYS_GETSOCKOPT: return sys_getsockopt(td, passargs); case SOCKSYS_LISTEN: return sys_listen(td, passargs); case SOCKSYS_RECV: realargs[5] = realargs[6] = 0; /* FALLTHROUGH */ case SOCKSYS_RECVFROM: return sys_recvfrom(td, passargs); case SOCKSYS_SEND: realargs[5] = realargs[6] = 0; /* FALLTHROUGH */ case SOCKSYS_SENDTO: return sys_sendto(td, passargs); case SOCKSYS_SETSOCKOPT: return sys_setsockopt(td, passargs); case SOCKSYS_SHUTDOWN: return sys_shutdown(td, passargs); case SOCKSYS_SOCKET: return sys_socket(td, passargs); case SOCKSYS_SELECT: return sys_select(td, passargs); case SOCKSYS_GETIPDOMAIN: return ibcs2_getipdomainname(td, passargs); case SOCKSYS_SETIPDOMAIN: return ibcs2_setipdomainname(td, passargs); case SOCKSYS_ADJTIME: return sys_adjtime(td, passargs); case SOCKSYS_SETREUID: return sys_setreuid(td, passargs); case SOCKSYS_SETREGID: return sys_setregid(td, passargs); case SOCKSYS_GETTIME: return sys_gettimeofday(td, passargs); case SOCKSYS_SETTIME: return sys_settimeofday(td, passargs); case SOCKSYS_GETITIMER: return sys_getitimer(td, passargs); case SOCKSYS_SETITIMER: return sys_setitimer(td, passargs); default: printf("socksys unknown %08x %08x %08x %08x %08x %08x %08x\n", realargs[0], realargs[1], realargs[2], realargs[3], realargs[4], realargs[5], realargs[6]); return EINVAL; } /* NOTREACHED */ } /* ARGSUSED */ static int -ibcs2_getipdomainname(td, uap) - struct thread *td; - struct getipdomainname_args *uap; +ibcs2_getipdomainname(struct thread *td, struct getipdomainname_args *uap) { char hname[MAXHOSTNAMELEN], *dptr; int len; /* Get the domain name. */ getcredhostname(td->td_ucred, hname, sizeof(hname)); dptr = strchr(hname, '.'); if ( dptr ) dptr++; else /* Make it effectively an empty string */ dptr = hname + strlen(hname); len = strlen(dptr) + 1; if ((u_int)uap->len > len + 1) uap->len = len + 1; return (copyout((caddr_t)dptr, (caddr_t)uap->ipdomainname, uap->len)); } /* ARGSUSED */ static int -ibcs2_setipdomainname(td, uap) - struct thread *td; - struct setipdomainname_args *uap; +ibcs2_setipdomainname(struct thread *td, struct setipdomainname_args *uap) { char hname[MAXHOSTNAMELEN], *ptr; int error, sctl[2], hlen; /* Get the domain name */ getcredhostname(td->td_ucred, hname, sizeof(hname)); /* W/out a hostname a domain-name is nonsense */ if ( strlen(hname) == 0 ) return EINVAL; /* Get the host's unqualified name (strip off the domain) */ ptr = strchr(hname, '.'); if ( ptr != NULL ) { ptr++; *ptr = '\0'; } else { if (strlcat(hname, ".", sizeof(hname)) >= sizeof(hname)) return (EINVAL); } /* Set ptr to the end of the string so we can append to it */ hlen = strlen(hname); ptr = hname + hlen; if ((u_int)uap->len > (sizeof (hname) - hlen - 1)) return EINVAL; /* Append the ipdomain to the end */ error = copyinstr((caddr_t)uap->ipdomainname, ptr, uap->len, NULL); if (error) return (error); /* 'sethostname' with the new information */ sctl[0] = CTL_KERN; sctl[1] = KERN_HOSTNAME; hlen = strlen(hname) + 1; return (kernel_sysctl(td, sctl, 2, 0, 0, hname, hlen, 0, 0)); } Index: stable/11/sys/i386/isa/ccbque.h =================================================================== --- stable/11/sys/i386/isa/ccbque.h (revision 331642) +++ stable/11/sys/i386/isa/ccbque.h (revision 331643) @@ -1,124 +1,124 @@ /* $NetBSD$ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * [NetBSD for NEC PC98 series] * Copyright (c) 1994, 1995, 1996 NetBSD/pc98 porting staff. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * Common command control queue funcs. * Written by N. Honda. */ #ifndef _CCBQUE_H_ #define _CCBQUE_H_ #define CCB_MWANTED 0x01 /* (I) structure and prototype */ #define GENERIC_CCB_ASSERT(DEV, CCBTYPE) \ TAILQ_HEAD(CCBTYPE##tab, CCBTYPE); \ struct CCBTYPE##que { \ struct CCBTYPE##tab CCBTYPE##tab; \ int count; \ int maxccb; \ u_int flags; \ }; \ \ void DEV##_init_ccbque(int); \ struct CCBTYPE *DEV##_get_ccb(void); \ -void DEV##_free_ccb(register struct CCBTYPE *); +void DEV##_free_ccb(struct CCBTYPE *); /* (II) static allocated memory */ #define GENERIC_CCB_STATIC_ALLOC(DEV, CCBTYPE) \ static struct CCBTYPE##que CCBTYPE##que; /* (III) functions */ #define GENERIC_CCB(DEV, CCBTYPE, CHAIN) \ \ void \ DEV##_init_ccbque(count) \ int count; \ { \ if (CCBTYPE##que.maxccb == 0) \ TAILQ_INIT(&CCBTYPE##que.CCBTYPE##tab); \ CCBTYPE##que.maxccb += count; \ } \ \ struct CCBTYPE * \ DEV##_get_ccb() \ { \ - register struct CCBTYPE *cb; \ + struct CCBTYPE *cb; \ int s = splcam(); \ \ if (CCBTYPE##que.count < CCBTYPE##que.maxccb) \ { \ CCBTYPE##que.count ++; \ cb = TAILQ_FIRST(&(CCBTYPE##que.CCBTYPE##tab)); \ if (cb != NULL) \ { \ TAILQ_REMOVE(&CCBTYPE##que.CCBTYPE##tab, cb, CHAIN);\ goto out; \ } \ else \ { \ cb = malloc(sizeof(*cb), M_DEVBUF, M_NOWAIT); \ if (cb != NULL) \ { \ bzero(cb, sizeof(*cb)); \ goto out; \ } \ } \ CCBTYPE##que.count --; \ } \ \ cb = NULL; \ \ out: \ splx(s); \ return cb; \ } \ \ void \ DEV##_free_ccb(cb) \ - register struct CCBTYPE *cb; \ + struct CCBTYPE *cb; \ { \ int s = splcam(); \ \ TAILQ_INSERT_TAIL(&CCBTYPE##que.CCBTYPE##tab, cb, CHAIN); \ CCBTYPE##que.count --; \ \ if (CCBTYPE##que.flags & CCB_MWANTED) \ { \ CCBTYPE##que.flags &= ~CCB_MWANTED; \ wakeup ((caddr_t) &CCBTYPE##que.count); \ } \ splx(s); \ } #endif /* !_CCBQUE_H_ */ Index: stable/11/sys/i386/isa/elink.c =================================================================== --- stable/11/sys/i386/isa/elink.c (revision 331642) +++ stable/11/sys/i386/isa/elink.c (revision 331643) @@ -1,96 +1,96 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1994 Charles Hannum. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles Hannum. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Common code for dealing with 3COM ethernet cards. */ #include #include #include #include #include #include /* * Issue a `global reset' to all cards. We have to be careful to do this only * once during autoconfig, to prevent resetting boards that have already been * configured. */ void elink_reset() { static int x = 0; if (x == 0) { x = 1; outb(ELINK_ID_PORT, ELINK_RESET); } outb(ELINK_ID_PORT, 0); outb(ELINK_ID_PORT, 0); return; } /* * The `ID sequence' is really just snapshots of an 8-bit CRC register as 0 * bits are shifted in. Different board types use different polynomials. */ void elink_idseq(u_char p) { - register int i; - register u_char c; + int i; + u_char c; c = 0xff; for (i = 255; i; i--) { outb(ELINK_ID_PORT, c); if (c & 0x80) { c <<= 1; c ^= p; } else c <<= 1; } } static moduledata_t elink_mod = { "elink",/* module name */ NULL, /* event handler */ 0 /* extra data */ }; DECLARE_MODULE(elink, elink_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(elink, 1); Index: stable/11/sys/kern/inflate.c =================================================================== --- stable/11/sys/kern/inflate.c (revision 331642) +++ stable/11/sys/kern/inflate.c (revision 331643) @@ -1,1077 +1,1077 @@ /* * Most parts of this file are not covered by: * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- */ #include __FBSDID("$FreeBSD$"); #include #include #ifdef _KERNEL #include #include #endif #include #ifdef _KERNEL static MALLOC_DEFINE(M_GZIP, "gzip_trees", "Gzip trees"); #endif /* needed to make inflate() work */ #define uch u_char #define ush u_short #define ulg u_long /* Stuff to make inflate() work */ #ifdef _KERNEL #define memzero(dest,len) bzero(dest,len) #endif #define NOMEMCPY #ifdef _KERNEL #define FPRINTF printf #else extern void putstr (char *); #define FPRINTF putstr #endif #define FLUSH(x,y) { \ int foo = (*x->gz_output)(x->gz_private,x->gz_slide,y); \ if (foo) \ return foo; \ } static const int qflag = 0; #ifndef _KERNEL /* want to use this file in kzip also */ extern unsigned char *kzipmalloc (int); extern void kzipfree (void*); #define malloc(x, y, z) kzipmalloc((x)) #define free(x, y) kzipfree((x)) #endif /* * This came from unzip-5.12. I have changed it the flow to pass * a structure pointer around, thus hopefully making it re-entrant. * Poul-Henning */ /* inflate.c -- put in the public domain by Mark Adler version c14o, 23 August 1994 */ /* You can do whatever you like with this source file, though I would prefer that if you modify it and redistribute it that you include comments to that effect with your name and the date. Thank you. History: vers date who what ---- --------- -------------- ------------------------------------ a ~~ Feb 92 M. Adler used full (large, one-step) lookup table b1 21 Mar 92 M. Adler first version with partial lookup tables b2 21 Mar 92 M. Adler fixed bug in fixed-code blocks b3 22 Mar 92 M. Adler sped up match copies, cleaned up some b4 25 Mar 92 M. Adler added prototypes; removed window[] (now is the responsibility of unzip.h--also changed name to slide[]), so needs diffs for unzip.c and unzip.h (this allows compiling in the small model on MSDOS); fixed cast of q in huft_build(); b5 26 Mar 92 M. Adler got rid of unintended macro recursion. b6 27 Mar 92 M. Adler got rid of nextbyte() routine. fixed bug in inflate_fixed(). c1 30 Mar 92 M. Adler removed lbits, dbits environment variables. changed BMAX to 16 for explode. Removed OUTB usage, and replaced it with flush()-- this was a 20% speed improvement! Added an explode.c (to replace unimplod.c) that uses the huft routines here. Removed register union. c2 4 Apr 92 M. Adler fixed bug for file sizes a multiple of 32k. c3 10 Apr 92 M. Adler reduced memory of code tables made by huft_build significantly (factor of two to three). c4 15 Apr 92 M. Adler added NOMEMCPY do kill use of memcpy(). worked around a Turbo C optimization bug. c5 21 Apr 92 M. Adler added the GZ_WSIZE #define to allow reducing the 32K window size for specialized applications. c6 31 May 92 M. Adler added some typecasts to eliminate warnings c7 27 Jun 92 G. Roelofs added some more typecasts (444: MSC bug). c8 5 Oct 92 J-l. Gailly added ifdef'd code to deal with PKZIP bug. c9 9 Oct 92 M. Adler removed a memory error message (~line 416). c10 17 Oct 92 G. Roelofs changed ULONG/UWORD/byte to ulg/ush/uch, removed old inflate, renamed inflate_entry to inflate, added Mark's fix to a comment. c10.5 14 Dec 92 M. Adler fix up error messages for incomplete trees. c11 2 Jan 93 M. Adler fixed bug in detection of incomplete tables, and removed assumption that EOB is the longest code (bad assumption). c12 3 Jan 93 M. Adler make tables for fixed blocks only once. c13 5 Jan 93 M. Adler allow all zero length codes (pkzip 2.04c outputs one zero length code for an empty distance tree). c14 12 Mar 93 M. Adler made inflate.c standalone with the introduction of inflate.h. c14b 16 Jul 93 G. Roelofs added (unsigned) typecast to w at 470. c14c 19 Jul 93 J. Bush changed v[N_MAX], l[288], ll[28x+3x] arrays to static for Amiga. c14d 13 Aug 93 J-l. Gailly de-complicatified Mark's c[*p++]++ thing. c14e 8 Oct 93 G. Roelofs changed memset() to memzero(). c14f 22 Oct 93 G. Roelofs renamed quietflg to qflag; made Trace() conditional; added inflate_free(). c14g 28 Oct 93 G. Roelofs changed l/(lx+1) macro to pointer (Cray bug) c14h 7 Dec 93 C. Ghisler huft_build() optimizations. c14i 9 Jan 94 A. Verheijen set fixed_t{d,l} to NULL after freeing; G. Roelofs check NEXTBYTE macro for GZ_EOF. c14j 23 Jan 94 G. Roelofs removed Ghisler "optimizations"; ifdef'd GZ_EOF check. c14k 27 Feb 94 G. Roelofs added some typecasts to avoid warnings. c14l 9 Apr 94 G. Roelofs fixed split comments on preprocessor lines to avoid bug in Encore compiler. c14m 7 Jul 94 P. Kienitz modified to allow assembler version of inflate_codes() (define ASM_INFLATECODES) c14n 22 Jul 94 G. Roelofs changed fprintf to FPRINTF for DLL versions c14o 23 Aug 94 C. Spieler added a newline to a debug statement; G. Roelofs added another typecast to avoid MSC warning */ /* Inflate deflated (PKZIP's method 8 compressed) data. The compression method searches for as much of the current string of bytes (up to a length of 258) in the previous 32K bytes. If it doesn't find any matches (of at least length 3), it codes the next byte. Otherwise, it codes the length of the matched string and its distance backwards from the current position. There is a single Huffman code that codes both single bytes (called "literals") and match lengths. A second Huffman code codes the distance information, which follows a length code. Each length or distance code actually represents a base value and a number of "extra" (sometimes zero) bits to get to add to the base value. At the end of each deflated block is a special end-of-block (EOB) literal/ length code. The decoding process is basically: get a literal/length code; if EOB then done; if a literal, emit the decoded byte; if a length then get the distance and emit the referred-to bytes from the sliding window of previously emitted data. There are (currently) three kinds of inflate blocks: stored, fixed, and dynamic. The compressor outputs a chunk of data at a time and decides which method to use on a chunk-by-chunk basis. A chunk might typically be 32K to 64K, uncompressed. If the chunk is uncompressible, then the "stored" method is used. In this case, the bytes are simply stored as is, eight bits per byte, with none of the above coding. The bytes are preceded by a count, since there is no longer an EOB code. If the data is compressible, then either the fixed or dynamic methods are used. In the dynamic method, the compressed data is preceded by an encoding of the literal/length and distance Huffman codes that are to be used to decode this block. The representation is itself Huffman coded, and so is preceded by a description of that code. These code descriptions take up a little space, and so for small blocks, there is a predefined set of codes, called the fixed codes. The fixed method is used if the block ends up smaller that way (usually for quite small chunks); otherwise the dynamic method is used. In the latter case, the codes are customized to the probabilities in the current block and so can code it much better than the pre-determined fixed codes can. The Huffman codes themselves are decoded using a mutli-level table lookup, in order to maximize the speed of decoding plus the speed of building the decoding tables. See the comments below that precede the lbits and dbits tuning parameters. */ /* Notes beyond the 1.93a appnote.txt: 1. Distance pointers never point before the beginning of the output stream. 2. Distance pointers can point back across blocks, up to 32k away. 3. There is an implied maximum of 7 bits for the bit length table and 15 bits for the actual data. 4. If only one code exists, then it is encoded using one bit. (Zero would be more efficient, but perhaps a little confusing.) If two codes exist, they are coded using one bit each (0 and 1). 5. There is no way of sending zero distance codes--a dummy must be sent if there are none. (History: a pre 2.0 version of PKZIP would store blocks with no distance codes, but this was discovered to be too harsh a criterion.) Valid only for 1.93a. 2.04c does allow zero distance codes, which is sent as one code of zero bits in length. 6. There are up to 286 literal/length codes. Code 256 represents the end-of-block. Note however that the static length tree defines 288 codes just to fill out the Huffman codes. Codes 286 and 287 cannot be used though, since there is no length base or extra bits defined for them. Similarly, there are up to 30 distance codes. However, static trees define 32 codes (all 5 bits) to fill out the Huffman codes, but the last two had better not show up in the data. 7. Unzip can check dynamic Huffman blocks for complete code sets. The exception is that a single code would not be complete (see #4). 8. The five bits following the block type is really the number of literal codes sent minus 257. 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits (1+6+6). Therefore, to output three times the length, you output three codes (1+1+1), whereas to output four times the same length, you only need two codes (1+3). Hmm. 10. In the tree reconstruction algorithm, Code = Code + Increment only if BitLength(i) is not zero. (Pretty obvious.) 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) 12. Note: length code 284 can represent 227-258, but length code 285 really is 258. The last length deserves its own, short code since it gets used a lot in very redundant files. The length 258 is special since 258 - 3 (the min match length) is 255. 13. The literal/length and distance code bit lengths are read as a single stream of lengths. It is possible (and advantageous) for a repeat code (16, 17, or 18) to go across the boundary between the two sets of lengths. */ #define PKZIP_BUG_WORKAROUND /* PKZIP 1.93a problem--live with it */ /* inflate.h must supply the uch slide[GZ_WSIZE] array and the NEXTBYTE, FLUSH() and memzero macros. If the window size is not 32K, it should also define GZ_WSIZE. If INFMOD is defined, it can include compiled functions to support the NEXTBYTE and/or FLUSH() macros. There are defaults for NEXTBYTE and FLUSH() below for use as examples of what those functions need to do. Normally, you would also want FLUSH() to compute a crc on the data. inflate.h also needs to provide these typedefs: typedef unsigned char uch; typedef unsigned short ush; typedef unsigned long ulg; This module uses the external functions malloc() and free() (and probably memset() or bzero() in the memzero() macro). Their prototypes are normally found in and . */ #define INFMOD /* tell inflate.h to include code to be * compiled */ /* Huffman code lookup table entry--this entry is four bytes for machines that have 16-bit pointers (e.g. PC's in the small or medium model). Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16 means that v is a literal, 16 < e < 32 means that v is a pointer to the next table, which codes e - 16 bits, and lastly e == 99 indicates an unused code. If a code with e == 99 is looked up, this implies an error in the data. */ struct huft { uch e; /* number of extra bits or operation */ uch b; /* number of bits in this code or subcode */ union { ush n; /* literal, length base, or distance * base */ struct huft *t; /* pointer to next level of table */ } v; }; /* Function prototypes */ static int huft_build(struct inflate *, unsigned *, unsigned, unsigned, const ush *, const ush *, struct huft **, int *); static int huft_free(struct inflate *, struct huft *); static int inflate_codes(struct inflate *, struct huft *, struct huft *, int, int); static int inflate_stored(struct inflate *); static int xinflate(struct inflate *); static int inflate_fixed(struct inflate *); static int inflate_dynamic(struct inflate *); static int inflate_block(struct inflate *, int *); /* The inflate algorithm uses a sliding 32K byte window on the uncompressed stream to find repeated byte strings. This is implemented here as a circular buffer. The index is updated simply by incrementing and then and'ing with 0x7fff (32K-1). */ /* It is left to other modules to supply the 32K area. It is assumed to be usable as if it were declared "uch slide[32768];" or as just "uch *slide;" and then malloc'ed in the latter case. The definition must be in unzip.h, included above. */ /* Tables for deflate from PKZIP's appnote.txt. */ /* Order of the bit length code lengths */ static const unsigned border[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; static const ush cplens[] = { /* Copy lengths for literal codes 257..285 */ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; /* note: see note #13 above about the 258 in this list. */ static const ush cplext[] = { /* Extra bits for literal codes 257..285 */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */ static const ush cpdist[] = { /* Copy offsets for distance codes 0..29 */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; static const ush cpdext[] = { /* Extra bits for distance codes */ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; /* And'ing with mask[n] masks the lower n bits */ static const ush mask[] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff }; /* Macros for inflate() bit peeking and grabbing. The usage is: NEEDBITS(glbl,j) x = b & mask[j]; DUMPBITS(j) where NEEDBITS makes sure that b has at least j bits in it, and DUMPBITS removes the bits from b. The macros use the variable k for the number of bits in b. Normally, b and k are register variables for speed, and are initialized at the beginning of a routine that uses these macros from a global bit buffer and count. In order to not ask for more bits than there are in the compressed stream, the Huffman tables are constructed to only ask for just enough bits to make up the end-of-block code (value 256). Then no bytes need to be "returned" to the buffer at the end of the last block. See the huft_build() routine. */ /* * The following 2 were global variables. * They are now fields of the inflate structure. */ #define NEEDBITS(glbl,n) { \ while(k<(n)) { \ int c=(*glbl->gz_input)(glbl->gz_private); \ if(c==GZ_EOF) \ return 1; \ b|=((ulg)c)<>=(n);k-=(n);} /* Huffman code decoding is performed using a multi-level table lookup. The fastest way to decode is to simply build a lookup table whose size is determined by the longest code. However, the time it takes to build this table can also be a factor if the data being decoded is not very long. The most common codes are necessarily the shortest codes, so those codes dominate the decoding time, and hence the speed. The idea is you can have a shorter table that decodes the shorter, more probable codes, and then point to subsidiary tables for the longer codes. The time it costs to decode the longer codes is then traded against the time it takes to make longer tables. This results of this trade are in the variables lbits and dbits below. lbits is the number of bits the first level table for literal/ length codes can decode in one step, and dbits is the same thing for the distance codes. Subsequent tables are also less than or equal to those sizes. These values may be adjusted either when all of the codes are shorter than that, in which case the longest code length in bits is used, or when the shortest code is *longer* than the requested table size, in which case the length of the shortest code in bits is used. There are two different values for the two tables, since they code a different number of possibilities each. The literal/length table codes 286 possible values, or in a flat code, a little over eight bits. The distance table codes 30 possible values, or a little less than five bits, flat. The optimum values for speed end up being about one bit more than those, so lbits is 8+1 and dbits is 5+1. The optimum values may differ though from machine to machine, and possibly even between compilers. Your mileage may vary. */ static const int lbits = 9; /* bits in base literal/length lookup table */ static const int dbits = 6; /* bits in base distance lookup table */ /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ #define BMAX 16 /* maximum bit length of any code (16 for * explode) */ #define N_MAX 288 /* maximum number of codes in any set */ /* Given a list of code lengths and a maximum table size, make a set of tables to decode that set of codes. Return zero on success, one if the given code set is incomplete (the tables are still built in this case), two if the input is invalid (all zero length codes or an oversubscribed set of lengths), and three if not enough memory. The code with value 256 is special, and the tables are constructed so that no bits beyond that code are fetched when that code is decoded. */ static int huft_build(glbl, b, n, s, d, e, t, m) struct inflate *glbl; unsigned *b; /* code lengths in bits (all assumed <= BMAX) */ unsigned n; /* number of codes (assumed <= N_MAX) */ unsigned s; /* number of simple-valued codes (0..s-1) */ const ush *d; /* list of base values for non-simple codes */ const ush *e; /* list of extra bits for non-simple codes */ struct huft **t; /* result: starting table */ int *m; /* maximum lookup bits, returns actual */ { unsigned a; /* counter for codes of length k */ unsigned c[BMAX + 1]; /* bit length count table */ unsigned el; /* length of EOB code (value 256) */ unsigned f; /* i repeats in table every f entries */ int g; /* maximum code length */ int h; /* table level */ - register unsigned i; /* counter, current code */ - register unsigned j; /* counter */ - register int k; /* number of bits in current code */ + unsigned i; /* counter, current code */ + unsigned j; /* counter */ + int k; /* number of bits in current code */ int lx[BMAX + 1]; /* memory for l[-1..BMAX-1] */ int *l = lx + 1; /* stack of bits per table */ - register unsigned *p; /* pointer into c[], b[], or v[] */ - register struct huft *q;/* points to current table */ + unsigned *p; /* pointer into c[], b[], or v[] */ + struct huft *q; /* points to current table */ struct huft r; /* table entry for structure assignment */ struct huft *u[BMAX];/* table stack */ unsigned v[N_MAX]; /* values in order of bit length */ - register int w; /* bits before this table == (l * h) */ + int w; /* bits before this table == (l * h) */ unsigned x[BMAX + 1]; /* bit offsets, then code stack */ unsigned *xp; /* pointer into x */ int y; /* number of dummy codes added */ unsigned z; /* number of entries in current table */ /* Generate counts for each bit length */ el = n > 256 ? b[256] : BMAX; /* set length of EOB code, if any */ #ifdef _KERNEL memzero((char *) c, sizeof(c)); #else for (i = 0; i < BMAX+1; i++) c [i] = 0; #endif p = b; i = n; do { c[*p]++; p++; /* assume all entries <= BMAX */ } while (--i); if (c[0] == n) { /* null input--all zero length codes */ *t = (struct huft *) NULL; *m = 0; return 0; } /* Find minimum and maximum length, bound *m by those */ for (j = 1; j <= BMAX; j++) if (c[j]) break; k = j; /* minimum code length */ if ((unsigned) *m < j) *m = j; for (i = BMAX; i; i--) if (c[i]) break; g = i; /* maximum code length */ if ((unsigned) *m > i) *m = i; /* Adjust last length count to fill out codes, if needed */ for (y = 1 << j; j < i; j++, y <<= 1) if ((y -= c[j]) < 0) return 2; /* bad input: more codes than bits */ if ((y -= c[i]) < 0) return 2; c[i] += y; /* Generate starting offsets into the value table for each length */ x[1] = j = 0; p = c + 1; xp = x + 2; while (--i) { /* note that i == g from above */ *xp++ = (j += *p++); } /* Make a table of values in order of bit lengths */ p = b; i = 0; do { if ((j = *p++) != 0) v[x[j]++] = i; } while (++i < n); /* Generate the Huffman codes and for each, make the table entries */ x[0] = i = 0; /* first Huffman code is zero */ p = v; /* grab values in bit order */ h = -1; /* no tables yet--level -1 */ w = l[-1] = 0; /* no bits decoded yet */ u[0] = (struct huft *) NULL; /* just to keep compilers happy */ q = (struct huft *) NULL; /* ditto */ z = 0; /* ditto */ /* go through the bit lengths (k already is bits in shortest code) */ for (; k <= g; k++) { a = c[k]; while (a--) { /* * here i is the Huffman code of length k bits for * value *p */ /* make tables up to required level */ while (k > w + l[h]) { w += l[h++]; /* add bits already decoded */ /* * compute minimum size table less than or * equal to *m bits */ z = (z = g - w) > (unsigned) *m ? *m : z; /* upper limit */ if ((f = 1 << (j = k - w)) > a + 1) { /* try a k-w bit table *//* t * oo few codes for k-w * bit table */ f -= a + 1; /* deduct codes from * patterns left */ xp = c + k; while (++j < z) { /* try smaller tables up * to z bits */ if ((f <<= 1) <= *++xp) break; /* enough codes to use * up j bits */ f -= *xp; /* else deduct codes * from patterns */ } } if ((unsigned) w + j > el && (unsigned) w < el) j = el - w; /* make EOB code end at * table */ z = 1 << j; /* table entries for j-bit * table */ l[h] = j; /* set table size in stack */ /* allocate and link in new table */ if ((q = (struct huft *) malloc((z + 1) * sizeof(struct huft), M_GZIP, M_WAITOK)) == (struct huft *) NULL) { if (h) huft_free(glbl, u[0]); return 3; /* not enough memory */ } glbl->gz_hufts += z + 1; /* track memory usage */ *t = q + 1; /* link to list for * huft_free() */ *(t = &(q->v.t)) = (struct huft *) NULL; u[h] = ++q; /* table starts after link */ /* connect to last table, if there is one */ if (h) { x[h] = i; /* save pattern for * backing up */ r.b = (uch) l[h - 1]; /* bits to dump before * this table */ r.e = (uch) (16 + j); /* bits in this table */ r.v.t = q; /* pointer to this table */ j = (i & ((1 << w) - 1)) >> (w - l[h - 1]); u[h - 1][j] = r; /* connect to last table */ } } /* set up table entry in r */ r.b = (uch) (k - w); if (p >= v + n) r.e = 99; /* out of values--invalid * code */ else if (*p < s) { r.e = (uch) (*p < 256 ? 16 : 15); /* 256 is end-of-block * code */ r.v.n = *p++; /* simple code is just the * value */ } else { r.e = (uch) e[*p - s]; /* non-simple--look up * in lists */ r.v.n = d[*p++ - s]; } /* fill code-like entries with r */ f = 1 << (k - w); for (j = i >> w; j < z; j += f) q[j] = r; /* backwards increment the k-bit code i */ for (j = 1 << (k - 1); i & j; j >>= 1) i ^= j; i ^= j; /* backup over finished tables */ while ((i & ((1 << w) - 1)) != x[h]) w -= l[--h]; /* don't need to update q */ } } /* return actual size of base table */ *m = l[0]; /* Return true (1) if we were given an incomplete table */ return y != 0 && g != 1; } static int huft_free(glbl, t) struct inflate *glbl; struct huft *t; /* table to free */ /* Free the malloc'ed tables built by huft_build(), which makes a linked list of the tables it made, with the links in a dummy first entry of each table. */ { - register struct huft *p, *q; + struct huft *p, *q; /* Go through linked list, freeing from the malloced (t[-1]) address. */ p = t; while (p != (struct huft *) NULL) { q = (--p)->v.t; free(p, M_GZIP); p = q; } return 0; } /* inflate (decompress) the codes in a deflated (compressed) block. Return an error code or zero if it all goes ok. */ static int inflate_codes(glbl, tl, td, bl, bd) struct inflate *glbl; struct huft *tl, *td;/* literal/length and distance decoder tables */ int bl, bd; /* number of bits decoded by tl[] and td[] */ { - register unsigned e; /* table entry flag/number of extra bits */ + unsigned e; /* table entry flag/number of extra bits */ unsigned n, d; /* length and index for copy */ unsigned w; /* current window position */ struct huft *t; /* pointer to table entry */ unsigned ml, md; /* masks for bl and bd bits */ - register ulg b; /* bit buffer */ - register unsigned k; /* number of bits in bit buffer */ + ulg b; /* bit buffer */ + unsigned k; /* number of bits in bit buffer */ /* make local copies of globals */ b = glbl->gz_bb; /* initialize bit buffer */ k = glbl->gz_bk; w = glbl->gz_wp; /* initialize window position */ /* inflate the coded data */ ml = mask[bl]; /* precompute masks for speed */ md = mask[bd]; while (1) { /* do until end of block */ NEEDBITS(glbl, (unsigned) bl) if ((e = (t = tl + ((unsigned) b & ml))->e) > 16) do { if (e == 99) return 1; DUMPBITS(t->b) e -= 16; NEEDBITS(glbl, e) } while ((e = (t = t->v.t + ((unsigned) b & mask[e]))->e) > 16); DUMPBITS(t->b) if (e == 16) { /* then it's a literal */ glbl->gz_slide[w++] = (uch) t->v.n; if (w == GZ_WSIZE) { FLUSH(glbl, w); w = 0; } } else { /* it's an EOB or a length */ /* exit if end of block */ if (e == 15) break; /* get length of block to copy */ NEEDBITS(glbl, e) n = t->v.n + ((unsigned) b & mask[e]); DUMPBITS(e); /* decode distance of block to copy */ NEEDBITS(glbl, (unsigned) bd) if ((e = (t = td + ((unsigned) b & md))->e) > 16) do { if (e == 99) return 1; DUMPBITS(t->b) e -= 16; NEEDBITS(glbl, e) } while ((e = (t = t->v.t + ((unsigned) b & mask[e]))->e) > 16); DUMPBITS(t->b) NEEDBITS(glbl, e) d = w - t->v.n - ((unsigned) b & mask[e]); DUMPBITS(e) /* do the copy */ do { n -= (e = (e = GZ_WSIZE - ((d &= GZ_WSIZE - 1) > w ? d : w)) > n ? n : e); #ifndef NOMEMCPY if (w - d >= e) { /* (this test assumes * unsigned comparison) */ memcpy(glbl->gz_slide + w, glbl->gz_slide + d, e); w += e; d += e; } else /* do it slow to avoid memcpy() * overlap */ #endif /* !NOMEMCPY */ do { glbl->gz_slide[w++] = glbl->gz_slide[d++]; } while (--e); if (w == GZ_WSIZE) { FLUSH(glbl, w); w = 0; } } while (n); } } /* restore the globals from the locals */ glbl->gz_wp = w; /* restore global window pointer */ glbl->gz_bb = b; /* restore global bit buffer */ glbl->gz_bk = k; /* done */ return 0; } /* "decompress" an inflated type 0 (stored) block. */ static int inflate_stored(glbl) struct inflate *glbl; { unsigned n; /* number of bytes in block */ unsigned w; /* current window position */ - register ulg b; /* bit buffer */ - register unsigned k; /* number of bits in bit buffer */ + ulg b; /* bit buffer */ + unsigned k; /* number of bits in bit buffer */ /* make local copies of globals */ b = glbl->gz_bb; /* initialize bit buffer */ k = glbl->gz_bk; w = glbl->gz_wp; /* initialize window position */ /* go to byte boundary */ n = k & 7; DUMPBITS(n); /* get the length and its complement */ NEEDBITS(glbl, 16) n = ((unsigned) b & 0xffff); DUMPBITS(16) NEEDBITS(glbl, 16) if (n != (unsigned) ((~b) & 0xffff)) return 1; /* error in compressed data */ DUMPBITS(16) /* read and output the compressed data */ while (n--) { NEEDBITS(glbl, 8) glbl->gz_slide[w++] = (uch) b; if (w == GZ_WSIZE) { FLUSH(glbl, w); w = 0; } DUMPBITS(8) } /* restore the globals from the locals */ glbl->gz_wp = w; /* restore global window pointer */ glbl->gz_bb = b; /* restore global bit buffer */ glbl->gz_bk = k; return 0; } /* decompress an inflated type 1 (fixed Huffman codes) block. We should either replace this with a custom decoder, or at least precompute the Huffman tables. */ static int inflate_fixed(glbl) struct inflate *glbl; { /* if first time, set up tables for fixed blocks */ if (glbl->gz_fixed_tl == (struct huft *) NULL) { int i; /* temporary variable */ static unsigned l[288]; /* length list for huft_build */ /* literal table */ for (i = 0; i < 144; i++) l[i] = 8; for (; i < 256; i++) l[i] = 9; for (; i < 280; i++) l[i] = 7; for (; i < 288; i++) /* make a complete, but wrong code * set */ l[i] = 8; glbl->gz_fixed_bl = 7; if ((i = huft_build(glbl, l, 288, 257, cplens, cplext, &glbl->gz_fixed_tl, &glbl->gz_fixed_bl)) != 0) { glbl->gz_fixed_tl = (struct huft *) NULL; return i; } /* distance table */ for (i = 0; i < 30; i++) /* make an incomplete code * set */ l[i] = 5; glbl->gz_fixed_bd = 5; if ((i = huft_build(glbl, l, 30, 0, cpdist, cpdext, &glbl->gz_fixed_td, &glbl->gz_fixed_bd)) > 1) { huft_free(glbl, glbl->gz_fixed_tl); glbl->gz_fixed_tl = (struct huft *) NULL; return i; } } /* decompress until an end-of-block code */ return inflate_codes(glbl, glbl->gz_fixed_tl, glbl->gz_fixed_td, glbl->gz_fixed_bl, glbl->gz_fixed_bd) != 0; } /* decompress an inflated type 2 (dynamic Huffman codes) block. */ static int inflate_dynamic(glbl) struct inflate *glbl; { int i; /* temporary variables */ unsigned j; unsigned l; /* last length */ unsigned m; /* mask for bit lengths table */ unsigned n; /* number of lengths to get */ struct huft *tl; /* literal/length code table */ struct huft *td; /* distance code table */ int bl; /* lookup bits for tl */ int bd; /* lookup bits for td */ unsigned nb; /* number of bit length codes */ unsigned nl; /* number of literal/length codes */ unsigned nd; /* number of distance codes */ #ifdef PKZIP_BUG_WORKAROUND unsigned ll[288 + 32]; /* literal/length and distance code * lengths */ #else unsigned ll[286 + 30]; /* literal/length and distance code * lengths */ #endif - register ulg b; /* bit buffer */ - register unsigned k; /* number of bits in bit buffer */ + ulg b; /* bit buffer */ + unsigned k; /* number of bits in bit buffer */ /* make local bit buffer */ b = glbl->gz_bb; k = glbl->gz_bk; /* read in table lengths */ NEEDBITS(glbl, 5) nl = 257 + ((unsigned) b & 0x1f); /* number of * literal/length codes */ DUMPBITS(5) NEEDBITS(glbl, 5) nd = 1 + ((unsigned) b & 0x1f); /* number of distance codes */ DUMPBITS(5) NEEDBITS(glbl, 4) nb = 4 + ((unsigned) b & 0xf); /* number of bit length codes */ DUMPBITS(4) #ifdef PKZIP_BUG_WORKAROUND if (nl > 288 || nd > 32) #else if (nl > 286 || nd > 30) #endif return 1; /* bad lengths */ /* read in bit-length-code lengths */ for (j = 0; j < nb; j++) { NEEDBITS(glbl, 3) ll[border[j]] = (unsigned) b & 7; DUMPBITS(3) } for (; j < 19; j++) ll[border[j]] = 0; /* build decoding table for trees--single level, 7 bit lookup */ bl = 7; if ((i = huft_build(glbl, ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) { if (i == 1) huft_free(glbl, tl); return i; /* incomplete code set */ } /* read in literal and distance code lengths */ n = nl + nd; m = mask[bl]; i = l = 0; while ((unsigned) i < n) { NEEDBITS(glbl, (unsigned) bl) j = (td = tl + ((unsigned) b & m))->b; DUMPBITS(j) j = td->v.n; if (j < 16) /* length of code in bits (0..15) */ ll[i++] = l = j; /* save last length in l */ else if (j == 16) { /* repeat last length 3 to 6 times */ NEEDBITS(glbl, 2) j = 3 + ((unsigned) b & 3); DUMPBITS(2) if ((unsigned) i + j > n) return 1; while (j--) ll[i++] = l; } else if (j == 17) { /* 3 to 10 zero length codes */ NEEDBITS(glbl, 3) j = 3 + ((unsigned) b & 7); DUMPBITS(3) if ((unsigned) i + j > n) return 1; while (j--) ll[i++] = 0; l = 0; } else { /* j == 18: 11 to 138 zero length codes */ NEEDBITS(glbl, 7) j = 11 + ((unsigned) b & 0x7f); DUMPBITS(7) if ((unsigned) i + j > n) return 1; while (j--) ll[i++] = 0; l = 0; } } /* free decoding table for trees */ huft_free(glbl, tl); /* restore the global bit buffer */ glbl->gz_bb = b; glbl->gz_bk = k; /* build the decoding tables for literal/length and distance codes */ bl = lbits; i = huft_build(glbl, ll, nl, 257, cplens, cplext, &tl, &bl); if (i != 0) { if (i == 1 && !qflag) { FPRINTF("(incomplete l-tree) "); huft_free(glbl, tl); } return i; /* incomplete code set */ } bd = dbits; i = huft_build(glbl, ll + nl, nd, 0, cpdist, cpdext, &td, &bd); if (i != 0) { if (i == 1 && !qflag) { FPRINTF("(incomplete d-tree) "); #ifdef PKZIP_BUG_WORKAROUND i = 0; } #else huft_free(glbl, td); } huft_free(glbl, tl); return i; /* incomplete code set */ #endif } /* decompress until an end-of-block code */ if (inflate_codes(glbl, tl, td, bl, bd)) return 1; /* free the decoding tables, return */ huft_free(glbl, tl); huft_free(glbl, td); return 0; } /* decompress an inflated block */ static int inflate_block(glbl, e) struct inflate *glbl; int *e; /* last block flag */ { unsigned t; /* block type */ - register ulg b; /* bit buffer */ - register unsigned k; /* number of bits in bit buffer */ + ulg b; /* bit buffer */ + unsigned k; /* number of bits in bit buffer */ /* make local bit buffer */ b = glbl->gz_bb; k = glbl->gz_bk; /* read in last block bit */ NEEDBITS(glbl, 1) * e = (int) b & 1; DUMPBITS(1) /* read in block type */ NEEDBITS(glbl, 2) t = (unsigned) b & 3; DUMPBITS(2) /* restore the global bit buffer */ glbl->gz_bb = b; glbl->gz_bk = k; /* inflate that block type */ if (t == 2) return inflate_dynamic(glbl); if (t == 0) return inflate_stored(glbl); if (t == 1) return inflate_fixed(glbl); /* bad block type */ return 2; } /* decompress an inflated entry */ static int xinflate(glbl) struct inflate *glbl; { int e; /* last block flag */ int r; /* result code */ unsigned h; /* maximum struct huft's malloc'ed */ glbl->gz_fixed_tl = (struct huft *) NULL; /* initialize window, bit buffer */ glbl->gz_wp = 0; glbl->gz_bk = 0; glbl->gz_bb = 0; /* decompress until the last block */ h = 0; do { glbl->gz_hufts = 0; if ((r = inflate_block(glbl, &e)) != 0) return r; if (glbl->gz_hufts > h) h = glbl->gz_hufts; } while (!e); /* flush out slide */ FLUSH(glbl, glbl->gz_wp); /* return success */ return 0; } /* Nobody uses this - why not? */ int inflate(glbl) struct inflate *glbl; { int i; #ifdef _KERNEL u_char *p = NULL; if (!glbl->gz_slide) p = glbl->gz_slide = malloc(GZ_WSIZE, M_GZIP, M_WAITOK); #endif if (!glbl->gz_slide) #ifdef _KERNEL return(ENOMEM); #else return 3; /* kzip expects 3 */ #endif i = xinflate(glbl); if (glbl->gz_fixed_td != (struct huft *) NULL) { huft_free(glbl, glbl->gz_fixed_td); glbl->gz_fixed_td = (struct huft *) NULL; } if (glbl->gz_fixed_tl != (struct huft *) NULL) { huft_free(glbl, glbl->gz_fixed_tl); glbl->gz_fixed_tl = (struct huft *) NULL; } #ifdef _KERNEL if (p == glbl->gz_slide) { free(glbl->gz_slide, M_GZIP); glbl->gz_slide = NULL; } #endif return i; } /* ----------------------- END INFLATE.C */ Index: stable/11/sys/kern/kern_clock.c =================================================================== --- stable/11/sys/kern/kern_clock.c (revision 331642) +++ stable/11/sys/kern/kern_clock.c (revision 331643) @@ -1,897 +1,893 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include "opt_device_polling.h" #include "opt_hwpmc_hooks.h" #include "opt_ntp.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , clock, hard); PMC_SOFT_DEFINE( , , clock, stat); PMC_SOFT_DEFINE_EX( , , clock, prof, \ cpu_startprofclock, cpu_stopprofclock); #endif #ifdef DEVICE_POLLING extern void hardclock_device_poll(void); #endif /* DEVICE_POLLING */ static void initclocks(void *dummy); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); /* Spin-lock protecting profiling statistics. */ static struct mtx time_lock; SDT_PROVIDER_DECLARE(sched); SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); static int sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) { int error; long cp_time[CPUSTATES]; #ifdef SCTL_MASK32 int i; unsigned int cp_time32[CPUSTATES]; #endif read_cpu_time(cp_time); #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time32)); for (i = 0; i < CPUSTATES; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time)); error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); } return error; } SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); static long empty[CPUSTATES]; static int sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) { struct pcpu *pcpu; int error; int c; long *cp_time; #ifdef SCTL_MASK32 unsigned int cp_time32[CPUSTATES]; int i; #endif if (!req->oldptr) { #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); else #endif return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); } for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { if (!CPU_ABSENT(c)) { pcpu = pcpu_find(c); cp_time = pcpu->pc_cp_time; } else { cp_time = empty; } #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { for (i = 0; i < CPUSTATES; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); } return error; } SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); #ifdef DEADLKRES static const char *blessed[] = { "getblk", "so_snd_sx", "so_rcv_sx", NULL }; static int slptime_threshold = 1800; static int blktime_threshold = 900; static int sleepfreq = 3; static void deadlkres(void) { struct proc *p; struct thread *td; void *wchan; int blkticks, i, slpticks, slptype, tryl, tticks; tryl = 0; for (;;) { blkticks = blktime_threshold * hz; slpticks = slptime_threshold * hz; /* * Avoid to sleep on the sx_lock in order to avoid a possible * priority inversion problem leading to starvation. * If the lock can't be held after 100 tries, panic. */ if (!sx_try_slock(&allproc_lock)) { if (tryl > 100) panic("%s: possible deadlock detected on allproc_lock\n", __func__); tryl++; pause("allproc", sleepfreq * hz); continue; } tryl = 0; FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (TD_ON_LOCK(td)) { /* * The thread should be blocked on a * turnstile, simply check if the * turnstile channel is in good state. */ MPASS(td->td_blocked != NULL); tticks = ticks - td->td_blktick; thread_unlock(td); if (tticks > blkticks) { /* * Accordingly with provided * thresholds, this thread is * stuck for too long on a * turnstile. */ PROC_UNLOCK(p); sx_sunlock(&allproc_lock); panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", __func__, td, tticks); } } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { /* * Check if the thread is sleeping on a * lock, otherwise skip the check. * Drop the thread lock in order to * avoid a LOR with the sleepqueue * spinlock. */ wchan = td->td_wchan; tticks = ticks - td->td_slptick; thread_unlock(td); slptype = sleepq_type(wchan); if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && tticks > slpticks) { /* * Accordingly with provided * thresholds, this thread is * stuck for too long on a * sleepqueue. * However, being on a * sleepqueue, we might still * check for the blessed * list. */ tryl = 0; for (i = 0; blessed[i] != NULL; i++) { if (!strcmp(blessed[i], td->td_wmesg)) { tryl = 1; break; } } if (tryl != 0) { tryl = 0; continue; } PROC_UNLOCK(p); sx_sunlock(&allproc_lock); panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", __func__, td, tticks); } } else thread_unlock(td); } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); /* Sleep for sleepfreq seconds. */ pause("-", sleepfreq * hz); } } static struct kthread_desc deadlkres_kd = { "deadlkres", deadlkres, (struct thread **)NULL }; SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, &slptime_threshold, 0, "Number of seconds within is valid to sleep on a sleepqueue"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, &blktime_threshold, 0, "Number of seconds within is valid to block on a turnstile"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, "Number of seconds between any deadlock resolver thread run"); #endif /* DEADLKRES */ void read_cpu_time(long *cp_time) { struct pcpu *pc; int i, j; /* Sum up global cp_time[]. */ bzero(cp_time, sizeof(long) * CPUSTATES); CPU_FOREACH(i) { pc = pcpu_find(i); for (j = 0; j < CPUSTATES; j++) cp_time[j] += pc->pc_cp_time[j]; } } #include static int watchdog_ticks; static int watchdog_enabled; static void watchdog_fire(void); static void watchdog_config(void *, u_int, int *); static void watchdog_attach(void) { EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); } /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. * * The main timer, running hz times per second, is used to trigger interval * timers, timeouts and rescheduling as needed. * * The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the cpu * just before its quantum expires. Otherwise, it would never accumulate * cpu ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) * * Time-of-day is maintained using a "timecounter", which may or may * not be related to the hardware generating the above mentioned * interrupts. */ int stathz; int profhz; int profprocs; volatile int ticks; int psratio; static DPCPU_DEFINE(int, pcputicks); /* Per-CPU version of ticks. */ #ifdef DEVICE_POLLING static int devpoll_run = 0; #endif /* * Initialize clock frequencies and start both clocks running. */ /* ARGSUSED*/ static void -initclocks(dummy) - void *dummy; +initclocks(void *dummy) { - register int i; + int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ mtx_init(&time_lock, "time lock", NULL, MTX_DEF); cpu_initclocks(); /* * Compute profhz/stathz, and fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; #ifdef SW_WATCHDOG /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ watchdog_attach(); #else /* Volunteer to run a software watchdog. */ if (wdog_software_attach == NULL) wdog_software_attach = watchdog_attach; #endif } /* * Each time the real-time timer fires, this function is called on all CPUs. * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only * the other CPUs in the system need to call this function. */ void hardclock_cpu(int usermode) { struct pstats *pstats; struct thread *td = curthread; struct proc *p = td->td_proc; int flags; /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; flags = 0; if (usermode && timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) flags |= TDF_ALRMPEND | TDF_ASTPENDING; PROC_ITIMUNLOCK(p); } if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) flags |= TDF_PROFPEND | TDF_ASTPENDING; PROC_ITIMUNLOCK(p); } thread_lock(td); td->td_flags |= flags; thread_unlock(td); #ifdef HWPMC_HOOKS if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif callout_process(sbinuptime()); } /* * The real-time timer, interrupting hz times per second. */ void hardclock(int usermode, uintfptr_t pc) { atomic_add_int(&ticks, 1); hardclock_cpu(usermode); tc_ticktock(1); cpu_tick_calibration(); /* * If no separate statistics clock is available, run it from here. * * XXX: this only works for UP */ if (stathz == 0) { profclock(usermode, pc); statclock(usermode); } #ifdef DEVICE_POLLING hardclock_device_poll(); /* this is very short and quick */ #endif /* DEVICE_POLLING */ if (watchdog_enabled > 0 && --watchdog_ticks <= 0) watchdog_fire(); } void hardclock_cnt(int cnt, int usermode) { struct pstats *pstats; struct thread *td = curthread; struct proc *p = td->td_proc; int *t = DPCPU_PTR(pcputicks); int flags, global, newticks; int i; /* * Update per-CPU and possibly global ticks values. */ *t += cnt; do { global = ticks; newticks = *t - global; if (newticks <= 0) { if (newticks < -1) *t = global - 1; newticks = 0; break; } } while (!atomic_cmpset_int(&ticks, global, *t)); /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; flags = 0; if (usermode && timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick * cnt) == 0) flags |= TDF_ALRMPEND | TDF_ASTPENDING; PROC_ITIMUNLOCK(p); } if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { PROC_ITIMLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick * cnt) == 0) flags |= TDF_PROFPEND | TDF_ASTPENDING; PROC_ITIMUNLOCK(p); } if (flags != 0) { thread_lock(td); td->td_flags |= flags; thread_unlock(td); } #ifdef HWPMC_HOOKS if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif /* We are in charge to handle this tick duty. */ if (newticks > 0) { tc_ticktock(newticks); #ifdef DEVICE_POLLING /* Dangerous and no need to call these things concurrently. */ if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { /* This is very short and quick. */ hardclock_device_poll(); atomic_store_rel_int(&devpoll_run, 0); } #endif /* DEVICE_POLLING */ if (watchdog_enabled > 0) { i = atomic_fetchadd_int(&watchdog_ticks, -newticks); if (i > 0 && i <= newticks) watchdog_fire(); } } if (curcpu == CPU_FIRST()) cpu_tick_calibration(); } void hardclock_sync(int cpu) { int *t = DPCPU_ID_PTR(cpu, pcputicks); *t = ticks; } /* * Compute number of ticks in the specified amount of time. */ int -tvtohz(tv) - struct timeval *tv; +tvtohz(struct timeval *tv) { - register unsigned long ticks; - register long sec, usec; + unsigned long ticks; + long sec, usec; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * If ints have 32 bits, then the maximum value for any timeout in * 10ms ticks is 248 days. */ sec = tv->tv_sec; usec = tv->tv_usec; if (usec < 0) { sec--; usec += 1000000; } if (sec < 0) { #ifdef DIAGNOSTIC if (usec > 0) { sec++; usec -= 1000000; } printf("tvotohz: negative time difference %ld sec %ld usec\n", sec, usec); #endif ticks = 1; } else if (sec <= LONG_MAX / 1000000) ticks = howmany(sec * 1000000 + (unsigned long)usec, tick) + 1; else if (sec <= LONG_MAX / hz) ticks = sec * hz + howmany((unsigned long)usec, tick) + 1; else ticks = LONG_MAX; if (ticks > INT_MAX) ticks = INT_MAX; return ((int)ticks); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void -startprofclock(p) - register struct proc *p; +startprofclock(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_STOPPROF) return; if ((p->p_flag & P_PROFIL) == 0) { p->p_flag |= P_PROFIL; mtx_lock(&time_lock); if (++profprocs == 1) cpu_startprofclock(); mtx_unlock(&time_lock); } } /* * Stop profiling on a process. */ void -stopprofclock(p) - register struct proc *p; +stopprofclock(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_PROFIL) { if (p->p_profthreads != 0) { while (p->p_profthreads != 0) { p->p_flag |= P_STOPPROF; msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, "stopprof", 0); } } if ((p->p_flag & P_PROFIL) == 0) return; p->p_flag &= ~P_PROFIL; mtx_lock(&time_lock); if (--profprocs == 0) cpu_stopprofclock(); mtx_unlock(&time_lock); } } /* * Statistics clock. Updates rusage information and calls the scheduler * to adjust priorities of the active thread. * * This should be called by all active processors. */ void statclock(int usermode) { statclock_cnt(1, usermode); } void statclock_cnt(int cnt, int usermode) { struct rusage *ru; struct vmspace *vm; struct thread *td; struct proc *p; long rss; long *cp_time; td = curthread; p = td->td_proc; cp_time = (long *)PCPU_PTR(cp_time); if (usermode) { /* * Charge the time as appropriate. */ td->td_uticks += cnt; if (p->p_nice > NZERO) cp_time[CP_NICE] += cnt; else cp_time[CP_USER] += cnt; } else { /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ if ((td->td_pflags & TDP_ITHREAD) || td->td_intr_nesting_level >= 2) { td->td_iticks += cnt; cp_time[CP_INTR] += cnt; } else { td->td_pticks += cnt; td->td_sticks += cnt; if (!TD_IS_IDLETHREAD(td)) cp_time[CP_SYS] += cnt; else cp_time[CP_IDLE] += cnt; } } /* Update resource usage integrals and maximums. */ MPASS(p->p_vmspace != NULL); vm = p->p_vmspace; ru = &td->td_ru; ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); SDT_PROBE2(sched, , , tick, td, td->td_proc); thread_lock_flags(td, MTX_QUIET); for ( ; cnt > 0; cnt--) sched_clock(td); thread_unlock(td); #ifdef HWPMC_HOOKS if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); #endif } void profclock(int usermode, uintfptr_t pc) { profclock_cnt(1, usermode, pc); } void profclock_cnt(int cnt, int usermode, uintfptr_t pc) { struct thread *td; #ifdef GPROF struct gmonparam *g; uintfptr_t i; #endif td = curthread; if (usermode) { /* * Came from user mode; CPU was in user state. * If this process is being profiled, record the tick. * if there is no related user location yet, don't * bother trying to count it. */ if (td->td_proc->p_flag & P_PROFIL) addupc_intr(td, pc, cnt); } #ifdef GPROF else { /* * Kernel statistics are just like addupc_intr, only easier. */ g = &_gmonparam; if (g->state == GMON_PROF_ON && pc >= g->lowpc) { i = PC_TO_I(g, pc); if (i < g->textsize) { KCOUNT(g, i) += cnt; } } } #endif #ifdef HWPMC_HOOKS if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); #endif } /* * Return information about system clocks. */ static int sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) { struct clockinfo clkinfo; /* * Construct clockinfo structure. */ bzero(&clkinfo, sizeof(clkinfo)); clkinfo.hz = hz; clkinfo.tick = tick; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); } SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_clockrate, "S,clockinfo", "Rate and period of various kernel clocks"); static void watchdog_config(void *unused __unused, u_int cmd, int *error) { u_int u; u = cmd & WD_INTERVAL; if (u >= WD_TO_1SEC) { watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; watchdog_enabled = 1; *error = 0; } else { watchdog_enabled = 0; } } /* * Handle a watchdog timeout by dumping interrupt information and * then either dropping to DDB or panicking. */ static void watchdog_fire(void) { int nintr; uint64_t inttotal; u_long *curintr; char *curname; curintr = intrcnt; curname = intrnames; inttotal = 0; nintr = sintrcnt / sizeof(u_long); printf("interrupt total\n"); while (--nintr >= 0) { if (*curintr) printf("%-12s %20lu\n", curname, *curintr); curname += strlen(curname) + 1; inttotal += *curintr++; } printf("Total %20ju\n", (uintmax_t)inttotal); #if defined(KDB) && !defined(KDB_UNATTENDED) kdb_backtrace(); kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); #else panic("watchdog timeout"); #endif } Index: stable/11/sys/kern/kern_exec.c =================================================================== --- stable/11/sys/kern/kern_exec.c (revision 331642) +++ stable/11/sys/kern/kern_exec.c (revision 331643) @@ -1,1744 +1,1734 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, , , exec, "char *"); SDT_PROBE_DEFINE1(proc, , , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, , , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); int coredump_pack_fileinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, &coredump_pack_fileinfo, 0, "Enable file path packing in 'procstat -f' coredump notes"); int coredump_pack_vmmapinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN, &coredump_pack_vmmapinfo, 0, "Enable file path packing in 'procstat -v' coredump notes"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU", ""); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU", ""); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_stackprot, "I", ""); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, ""); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_psstrings; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, sizeof(p->p_sysent->sv_psstrings)); return error; } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_usrstack; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, sizeof(p->p_sysent->sv_usrstack)); return error; } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int sys_execve(struct thread *td, struct execve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL); post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; } #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL); } post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p); post_execve(td, error, oldvmspace); return (error); #else return (ENOSYS); #endif } int pre_execve(struct thread *td, struct vmspace **oldvmspace) { struct proc *p; int error; KASSERT(td == curthread, ("non-current thread %p", td)); error = 0; p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); if (thread_single(p, SINGLE_BOUNDARY) != 0) error = ERESTART; PROC_UNLOCK(p); } KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); *oldvmspace = p->p_vmspace; return (error); } void post_execve(struct thread *td, int error, struct vmspace *oldvmspace) { struct proc *p; KASSERT(td == curthread, ("non-current thread %p", td)); p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == 0) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(p->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } } /* * XXX: kern_execve has the astonishing property of not always returning to * the caller. If sufficiently bad things happen during the call to * do_execve(), it can end up calling exit1(); as a result, callers must * avoid doing anything which they might need to undo (e.g., allocating * memory). */ int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p) { AUDIT_ARG_ARGV(args->begin_argv, args->argc, args->begin_envv - args->begin_argv); AUDIT_ARG_ENVV(args->begin_envv, args->envc, args->endp - args->begin_envv); return (do_execve(td, args, mac_p)); } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int -do_execve(td, args, mac_p) - struct thread *td; - struct image_args *args; - struct mac *mac_p; +do_execve(struct thread *td, struct image_args *args, struct mac *mac_p) { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *oldcred; struct uidinfo *euip = NULL; register_t *stack_base; int error, i; struct image_params image_params, *imgp; struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE struct vnode *tracevp = NULL; struct ucred *tracecred = NULL; #endif struct vnode *oldtextvp = NULL, *newtextvp; cap_rights_t rights; int credential_changing; int textset; #ifdef MAC struct label *interpvplabel = NULL; int will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; oldcred = p->p_ucred; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif /* * Translate the file name. namei() returns a vnode pointer * in ni_vp among other things. * * XXXAUDIT: It would be desirable to also audit the name of the * interpreter if this is an interpreted binary. */ if (args->fname != NULL) { NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); } SDT_PROBE1(proc, , , exec, args->fname); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif error = namei(&nd); if (error) goto exec_fail; newtextvp = nd.ni_vp; imgp->vp = newtextvp; } else { AUDIT_ARG_FD(args->fd); /* * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, cap_rights_init(&rights, CAP_FEXECVE), &newtextvp); if (error) goto exec_fail; vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } /* * Check file permissions (also 'opens' file) */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); /* * Set VV_TEXT now so no one can write to the executable while we're * activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ textset = VOP_IS_TEXT(imgp->vp); VOP_SET_TEXT(imgp->vp); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; /* * Implement image setuid/setgid. * * Determine new credentials before attempting image activators * so that it can be used by process_exec handlers to determine * credential/setid changes. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in capability mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = 0; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp); credential_changing |= will_transition; #endif if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { imgp->credential_setid = true; VOP_UNLOCK(imgp->vp, 0); imgp->newcred = crdup(oldcred); if (attr.va_mode & S_ISUID) { euip = uifind(attr.va_uid); change_euid(imgp->newcred, euip); } vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (attr.va_mode & S_ISGID) change_egid(imgp->newcred, attr.va_gid); /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } else { /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { VOP_UNLOCK(imgp->vp, 0); imgp->newcred = crdup(oldcred); vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } } /* The new credentials are installed into the process later. */ /* * Do the best to calculate the full path to the image file. */ if (args->fname != NULL && args->fname[0] == '/') imgp->execpath = args->fname; else { VOP_UNLOCK(imgp->vp, 0); if (vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); } /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell * scripts differently. */ error = -1; if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) error = img_first(imgp); /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL || execsw[i]->ex_imgact == img_first) { continue; } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) { if (textset == 0) VOP_UNSET_TEXT(imgp->vp); error = ENOEXEC; } goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * VV_TEXT needs to be unset for scripts. There is a short * period before we determine that something is a script where * VV_TEXT will be set. The vnode lock is held over this * entire period so nothing should illegitimately be blocked. */ VOP_UNSET_TEXT(imgp->vp); /* free name buffer and old vnode */ if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = 0; } vput(newtextvp); vm_object_deallocate(imgp->object); imgp->object = NULL; imgp->credential_setid = false; if (imgp->newcred != NULL) { crfree(imgp->newcred); imgp->newcred = NULL; } imgp->execpath = NULL; free(imgp->freepath, M_TEMP); imgp->freepath = NULL; /* set new name to that of the interpreter */ NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td); args->fname = imgp->interpreter_name; goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp, 0); if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* ABI enforces the use of Capsicum. Switch into capabilities mode. */ if (SV_PROC_FLAG(p, SV_CAPSICUM)) sys_cap_enter(td, NULL); /* * Copy out strings (args and env) and initialize stack base */ if (p->p_sysent->sv_copyout_strings) stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); else stack_base = exec_copyout_strings(imgp); /* * If custom stack fixup routine present for this process * let it do the stack setup. * Else stuff argument count as first item on stack */ if (p->p_sysent->sv_fixup != NULL) (*p->p_sysent->sv_fixup)(&stack_base, imgp); else suword(--stack_base, imgp->args->argc); if (args->fdp != NULL) { /* Install a brand new file descriptor table. */ fdinstall_remapped(td, args->fdp); args->fdp = NULL; } else { /* * Keep on using the existing file descriptor table. For * security and other reasons, the file descriptor table * cannot be shared after an exec. */ fdunshare(td); /* close files on exec */ fdcloseexec(td); } /* * Malloc things before we need locks. */ i = imgp->args->begin_envv - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); /* STOPs are no longer ignored, arrange for AST */ signotify(td); } /* * Implement image setuid/setgid installation. */ if (imgp->credential_setid) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE if (p->p_tracecred != NULL && priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED, 0)) ktrprocexec(p, &tracecred, &tracevp); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); fdsetugidsafety(td); error = fdcheckstd(td); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto exec_fail_dealloc; PROC_LOCK(p); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, imgp->newcred, imgp->vp, interpvplabel, imgp); } #endif } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; } /* * Set the new credentials. */ if (imgp->newcred != NULL) { proc_set_cred(p, imgp->newcred); crfree(oldcred); oldcred = NULL; } /* * Store the vp for use in procfs. This vnode was referenced by namei * or fgetvp_exec. */ oldtextvp = p->p_textvp; p->p_textvp = newtextvp; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. * * The proc lock needs to be released before taking the PMC * SX. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } else PROC_UNLOCK(p); #else /* !HWPMC_HOOKS */ PROC_UNLOCK(p); #endif /* Set values passed into the program in registers. */ if (p->p_sysent->sv_setregs) (*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base); else exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); vfs_mark_atime(imgp->vp, td->td_ucred); SDT_PROBE1(proc, , , exec__success, args->fname); exec_fail_dealloc: if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (args->fname) NDFREE(&nd, NDF_ONLY_PNBUF); if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); if (error != 0) vput(imgp->vp); else VOP_UNLOCK(imgp->vp, 0); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { PROC_LOCK(p); if (p->p_ptevents & PTRACE_EXEC) td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); /* * Stop the process here if its stop event mask has * the S_EXEC bit set. */ STOPEVENT(p, S_EXEC, 0); } else { exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); } if (imgp->newcred != NULL && oldcred != NULL) crfree(imgp->newcred); #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); /* * Handle deferred decrement of ref counts. */ if (oldtextvp != NULL) vrele(oldtextvp); #ifdef KTRACE if (tracevp != NULL) vrele(tracevp); if (tracecred != NULL) crfree(tracecred); #endif pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); if (euip != NULL) uifree(euip); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(td, 0, SIGABRT); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif return (error); } int exec_map_first_page(imgp) struct image_params *imgp; { int rv, i, after, initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); VM_OBJECT_WLOCK(object); #if VM_NRESERVLEVEL > 0 vm_object_color(object, 0); #endif ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); if (ma[0]->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(ma[0]); if (!vm_pager_has_page(object, 0, NULL, &after)) { vm_page_lock(ma[0]); vm_page_free(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); return (EIO); } initial_pagein = min(after, VM_INITIAL_PAGEIN); KASSERT(initial_pagein <= object->size, ("%s: initial_pagein %d object->size %ju", __func__, initial_pagein, (uintmax_t )object->size)); for (i = 1; i < initial_pagein; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; if (!vm_page_tryxbusy(ma[i])) break; } else { ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL); if (ma[i] == NULL) break; } } initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { for (i = 0; i < initial_pagein; i++) { vm_page_lock(ma[i]); vm_page_free(ma[i]); vm_page_unlock(ma[i]); } VM_OBJECT_WUNLOCK(object); return (EIO); } vm_page_xunbusy(ma[0]); for (i = 1; i < initial_pagein; i++) vm_page_readahead_finish(ma[i]); } vm_page_lock(ma[0]); vm_page_hold(ma[0]); vm_page_activate(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void -exec_unmap_first_page(imgp) - struct image_params *imgp; +exec_unmap_first_page(struct image_params *imgp) { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } } /* * Destroy old address space, and allocate a new stack. * The new stack is only sgrowsiz large because it is grown * automatically on a page fault. */ int -exec_new_vmspace(imgp, sv) - struct image_params *imgp; - struct sysentvec *sv; +exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; vm_object_t obj; struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; vm_map_t map; u_long ssiz; imgp->vmspace_destroyed = 1; imgp->sysent = sv; /* May be called with Giant held */ EVENTHANDLER_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser) { shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); /* An exec terminates mlockall(MCL_FUTURE). */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj != NULL) { vm_object_reference(obj); error = vm_map_fixed(map, obj, 0, sv->sv_shared_page_base, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); if (error != KERN_SUCCESS) { vm_object_deallocate(obj); return (vm_mmap_to_errno(error)); } } /* Allocate a new stack */ if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } stack_addr = sv->sv_usrstack - ssiz; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error != KERN_SUCCESS) return (vm_mmap_to_errno(error)); /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)stack_addr; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, char **argv, char **envv) { u_long argp, envp; int error; size_t length; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ for (;;) { error = fueword(argv++, &argp); if (error == -1) { error = EFAULT; goto err_exit; } if (argp == 0) break; error = copyinstr((void *)(uintptr_t)argp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &envp); if (error == -1) { error = EFAULT; goto err_exit; } if (envp == 0) break; error = copyinstr((void *)(uintptr_t)envp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int exec_copyin_data_fds(struct thread *td, struct image_args *args, const void *data, size_t datalen, const int *fds, size_t fdslen) { struct filedesc *ofdp; const char *p; int *kfds; int error; memset(args, '\0', sizeof(*args)); ofdp = td->td_proc->p_fd; if (datalen >= ARG_MAX || fdslen > ofdp->fd_lastfile + 1) return (E2BIG); error = exec_alloc_args(args); if (error != 0) return (error); args->begin_argv = args->buf; args->stringspace = ARG_MAX; if (datalen > 0) { /* * Argument buffer has been provided. Copy it into the * kernel as a single string and add a terminating null * byte. */ error = copyin(data, args->begin_argv, datalen); if (error != 0) goto err_exit; args->begin_argv[datalen] = '\0'; args->endp = args->begin_argv + datalen + 1; args->stringspace -= datalen + 1; /* * Traditional argument counting. Count the number of * null bytes. */ for (p = args->begin_argv; p < args->endp; ++p) if (*p == '\0') ++args->argc; } else { /* No argument buffer provided. */ args->endp = args->begin_argv; } /* There are no environment variables. */ args->begin_envv = args->endp; /* Create new file descriptor table. */ kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK); error = copyin(fds, kfds, fdslen * sizeof(int)); if (error != 0) { free(kfds, M_TEMP); goto err_exit; } error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp); free(kfds, M_TEMP); if (error != 0) goto err_exit; return (0); err_exit: exec_free_args(args); return (error); } struct exec_args_kva { vm_offset_t addr; u_int gen; SLIST_ENTRY(exec_args_kva) next; }; static DPCPU_DEFINE(struct exec_args_kva *, exec_args_kva); static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist; static struct mtx exec_args_kva_mtx; static u_int exec_args_gen; static void exec_prealloc_args_kva(void *arg __unused) { struct exec_args_kva *argkva; u_int i; SLIST_INIT(&exec_args_kva_freelist); mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF); for (i = 0; i < exec_map_entries; i++) { argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK); argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size); argkva->gen = exec_args_gen; SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); } } SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL); static vm_offset_t exec_alloc_args_kva(void **cookie) { struct exec_args_kva *argkva; argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_PTR(exec_args_kva)); if (argkva == NULL) { mtx_lock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL) (void)mtx_sleep(&exec_args_kva_freelist, &exec_args_kva_mtx, 0, "execkva", 0); SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next); mtx_unlock(&exec_args_kva_mtx); } *(struct exec_args_kva **)cookie = argkva; return (argkva->addr); } static void exec_release_args_kva(struct exec_args_kva *argkva, u_int gen) { vm_offset_t base; base = argkva->addr; if (argkva->gen != gen) { vm_map_madvise(exec_map, base, base + exec_map_entry_size, MADV_FREE); argkva->gen = gen; } if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva), (uintptr_t)NULL, (uintptr_t)argkva)) { mtx_lock(&exec_args_kva_mtx); SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); wakeup_one(&exec_args_kva_freelist); mtx_unlock(&exec_args_kva_mtx); } } static void exec_free_args_kva(void *cookie) { exec_release_args_kva(cookie, exec_args_gen); } static void exec_args_kva_lowmem(void *arg __unused) { SLIST_HEAD(, exec_args_kva) head; struct exec_args_kva *argkva; u_int gen; int i; gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1; /* * Force an madvise of each KVA range. Any currently allocated ranges * will have MADV_FREE applied once they are freed. */ SLIST_INIT(&head); mtx_lock(&exec_args_kva_mtx); SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva); mtx_unlock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&head)) != NULL) { SLIST_REMOVE_HEAD(&head, next); exec_release_args_kva(argkva, gen); } CPU_FOREACH(i) { argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva)); if (argkva != NULL) exec_release_args_kva(argkva, gen); } } EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL, EVENTHANDLER_PRI_ANY); /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)exec_alloc_args_kva(&args->bufkva); return (0); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { exec_free_args_kva(args->bufkva); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } if (args->fdp != NULL) fdescfree_remapped(args->fdp); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ register_t * -exec_copyout_strings(imgp) - struct image_params *imgp; +exec_copyout_strings(struct image_params *imgp) { int argc, envc; char **vectp; char *stringp; uintptr_t destp; register_t *stack_base; struct ps_strings *arginfo; struct proc *p; size_t execpath_len; int szsigcode, szps; char canary[sizeof(long) * 8]; szps = sizeof(pagesizes[0]) * MAXPAGESIZES; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; szsigcode = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; if (p->p_sysent->sv_sigcode_base == 0) { if (p->p_sysent->sv_szsigcode != NULL) szsigcode = *(p->p_sysent->sv_szsigcode); } destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ destp -= szps; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = destp; copyout(pagesizes, (void *)destp, szps); imgp->pagesizeslen = szps; destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int -exec_check_permissions(imgp) - struct image_params *imgp; +exec_check_permissions(struct image_params *imgp) { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error, writecount; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) return (error); if (writecount != 0) return (ETXTBSY); /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = 1; return (error); } /* * Exec handler registration */ int -exec_register(execsw_arg) - const struct execsw *execsw_arg; +exec_register(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int -exec_unregister(execsw_arg) - const struct execsw *execsw_arg; +exec_unregister(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } Index: stable/11/sys/kern/kern_prot.c =================================================================== --- stable/11/sys/kern/kern_prot.c (revision 331642) +++ stable/11/sys/kern/kern_prot.c (revision 331643) @@ -1,2245 +1,2245 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993 * The Regents of the University of California. * (c) UNIX System Laboratories, Inc. * Copyright (c) 2000-2001 Robert N. M. Watson. * All rights reserved. * * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_prot.c 8.6 (Berkeley) 1/21/94 */ /* * System calls related to processes and protection */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef REGRESSION FEATURE(regression, "Kernel support for interfaces necessary for regression testing (SECURITY RISK!)"); #endif #if defined(INET) || defined(INET6) #include #include #endif #include #include static MALLOC_DEFINE(M_CRED, "cred", "credentials"); SYSCTL_NODE(_security, OID_AUTO, bsd, CTLFLAG_RW, 0, "BSD security policy"); static void crsetgroups_locked(struct ucred *cr, int ngrp, gid_t *groups); #ifndef _SYS_SYSPROTO_H_ struct getpid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getpid(struct thread *td, struct getpid_args *uap) { struct proc *p = td->td_proc; td->td_retval[0] = p->p_pid; #if defined(COMPAT_43) td->td_retval[1] = kern_getppid(td); #endif return (0); } #ifndef _SYS_SYSPROTO_H_ struct getppid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getppid(struct thread *td, struct getppid_args *uap) { td->td_retval[0] = kern_getppid(td); return (0); } int kern_getppid(struct thread *td) { struct proc *p = td->td_proc; struct proc *pp; int ppid; PROC_LOCK(p); if (!(p->p_flag & P_TRACED)) { ppid = p->p_pptr->p_pid; PROC_UNLOCK(p); } else { PROC_UNLOCK(p); sx_slock(&proctree_lock); pp = proc_realparent(p); ppid = pp->p_pid; sx_sunlock(&proctree_lock); } return (ppid); } /* * Get process group ID; note that POSIX getpgrp takes no parameter. */ #ifndef _SYS_SYSPROTO_H_ struct getpgrp_args { int dummy; }; #endif int sys_getpgrp(struct thread *td, struct getpgrp_args *uap) { struct proc *p = td->td_proc; PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return (0); } /* Get an arbitrary pid's process group id */ #ifndef _SYS_SYSPROTO_H_ struct getpgid_args { pid_t pid; }; #endif int sys_getpgid(struct thread *td, struct getpgid_args *uap) { struct proc *p; int error; if (uap->pid == 0) { p = td->td_proc; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); error = p_cansee(td, p); if (error) { PROC_UNLOCK(p); return (error); } } td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return (0); } /* * Get an arbitrary pid's session id. */ #ifndef _SYS_SYSPROTO_H_ struct getsid_args { pid_t pid; }; #endif int sys_getsid(struct thread *td, struct getsid_args *uap) { struct proc *p; int error; if (uap->pid == 0) { p = td->td_proc; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); error = p_cansee(td, p); if (error) { PROC_UNLOCK(p); return (error); } } td->td_retval[0] = p->p_session->s_sid; PROC_UNLOCK(p); return (0); } #ifndef _SYS_SYSPROTO_H_ struct getuid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getuid(struct thread *td, struct getuid_args *uap) { td->td_retval[0] = td->td_ucred->cr_ruid; #if defined(COMPAT_43) td->td_retval[1] = td->td_ucred->cr_uid; #endif return (0); } #ifndef _SYS_SYSPROTO_H_ struct geteuid_args { int dummy; }; #endif /* ARGSUSED */ int sys_geteuid(struct thread *td, struct geteuid_args *uap) { td->td_retval[0] = td->td_ucred->cr_uid; return (0); } #ifndef _SYS_SYSPROTO_H_ struct getgid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getgid(struct thread *td, struct getgid_args *uap) { td->td_retval[0] = td->td_ucred->cr_rgid; #if defined(COMPAT_43) td->td_retval[1] = td->td_ucred->cr_groups[0]; #endif return (0); } /* * Get effective group ID. The "egid" is groups[0], and could be obtained * via getgroups. This syscall exists because it is somewhat painful to do * correctly in a library function. */ #ifndef _SYS_SYSPROTO_H_ struct getegid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getegid(struct thread *td, struct getegid_args *uap) { td->td_retval[0] = td->td_ucred->cr_groups[0]; return (0); } #ifndef _SYS_SYSPROTO_H_ struct getgroups_args { u_int gidsetsize; gid_t *gidset; }; #endif int -sys_getgroups(struct thread *td, register struct getgroups_args *uap) +sys_getgroups(struct thread *td, struct getgroups_args *uap) { struct ucred *cred; u_int ngrp; int error; cred = td->td_ucred; ngrp = cred->cr_ngroups; if (uap->gidsetsize == 0) { error = 0; goto out; } if (uap->gidsetsize < ngrp) return (EINVAL); error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t)); out: td->td_retval[0] = ngrp; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setsid_args { int dummy; }; #endif /* ARGSUSED */ int -sys_setsid(register struct thread *td, struct setsid_args *uap) +sys_setsid(struct thread *td, struct setsid_args *uap) { struct pgrp *pgrp; int error; struct proc *p = td->td_proc; struct pgrp *newpgrp; struct session *newsess; error = 0; pgrp = NULL; newpgrp = malloc(sizeof(struct pgrp), M_PGRP, M_WAITOK | M_ZERO); newsess = malloc(sizeof(struct session), M_SESSION, M_WAITOK | M_ZERO); sx_xlock(&proctree_lock); if (p->p_pgid == p->p_pid || (pgrp = pgfind(p->p_pid)) != NULL) { if (pgrp != NULL) PGRP_UNLOCK(pgrp); error = EPERM; } else { (void)enterpgrp(p, p->p_pid, newpgrp, newsess); td->td_retval[0] = p->p_pid; newpgrp = NULL; newsess = NULL; } sx_xunlock(&proctree_lock); if (newpgrp != NULL) free(newpgrp, M_PGRP); if (newsess != NULL) free(newsess, M_SESSION); return (error); } /* * set process group (setpgid/old setpgrp) * * caller does setpgid(targpid, targpgid) * * pid must be caller or child of caller (ESRCH) * if a child * pid must be in same session (EPERM) * pid can't have done an exec (EACCES) * if pgid != pid * there must exist some pid in same session having pgid (EPERM) * pid must not be session leader (EPERM) */ #ifndef _SYS_SYSPROTO_H_ struct setpgid_args { int pid; /* target process id */ int pgid; /* target pgrp id */ }; #endif /* ARGSUSED */ int -sys_setpgid(struct thread *td, register struct setpgid_args *uap) +sys_setpgid(struct thread *td, struct setpgid_args *uap) { struct proc *curp = td->td_proc; - register struct proc *targp; /* target process */ - register struct pgrp *pgrp; /* target pgrp */ + struct proc *targp; /* target process */ + struct pgrp *pgrp; /* target pgrp */ int error; struct pgrp *newpgrp; if (uap->pgid < 0) return (EINVAL); error = 0; newpgrp = malloc(sizeof(struct pgrp), M_PGRP, M_WAITOK | M_ZERO); sx_xlock(&proctree_lock); if (uap->pid != 0 && uap->pid != curp->p_pid) { if ((targp = pfind(uap->pid)) == NULL) { error = ESRCH; goto done; } if (!inferior(targp)) { PROC_UNLOCK(targp); error = ESRCH; goto done; } if ((error = p_cansee(td, targp))) { PROC_UNLOCK(targp); goto done; } if (targp->p_pgrp == NULL || targp->p_session != curp->p_session) { PROC_UNLOCK(targp); error = EPERM; goto done; } if (targp->p_flag & P_EXEC) { PROC_UNLOCK(targp); error = EACCES; goto done; } PROC_UNLOCK(targp); } else targp = curp; if (SESS_LEADER(targp)) { error = EPERM; goto done; } if (uap->pgid == 0) uap->pgid = targp->p_pid; if ((pgrp = pgfind(uap->pgid)) == NULL) { if (uap->pgid == targp->p_pid) { error = enterpgrp(targp, uap->pgid, newpgrp, NULL); if (error == 0) newpgrp = NULL; } else error = EPERM; } else { if (pgrp == targp->p_pgrp) { PGRP_UNLOCK(pgrp); goto done; } if (pgrp->pg_id != targp->p_pid && pgrp->pg_session != curp->p_session) { PGRP_UNLOCK(pgrp); error = EPERM; goto done; } PGRP_UNLOCK(pgrp); error = enterthispgrp(targp, pgrp); } done: sx_xunlock(&proctree_lock); KASSERT((error == 0) || (newpgrp != NULL), ("setpgid failed and newpgrp is NULL")); if (newpgrp != NULL) free(newpgrp, M_PGRP); return (error); } /* * Use the clause in B.4.2.2 that allows setuid/setgid to be 4.2/4.3BSD * compatible. It says that setting the uid/gid to euid/egid is a special * case of "appropriate privilege". Once the rules are expanded out, this * basically means that setuid(nnn) sets all three id's, in all permitted * cases unless _POSIX_SAVED_IDS is enabled. In that case, setuid(getuid()) * does not set the saved id - this is dangerous for traditional BSD * programs. For this reason, we *really* do not want to set * _POSIX_SAVED_IDS and do not want to clear POSIX_APPENDIX_B_4_2_2. */ #define POSIX_APPENDIX_B_4_2_2 #ifndef _SYS_SYSPROTO_H_ struct setuid_args { uid_t uid; }; #endif /* ARGSUSED */ int sys_setuid(struct thread *td, struct setuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t uid; struct uidinfo *uip; int error; uid = uap->uid; AUDIT_ARG_UID(uid); newcred = crget(); uip = uifind(uid); PROC_LOCK(p); /* * Copy credentials so other references do not see our changes. */ oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setuid(oldcred, uid); if (error) goto fail; #endif /* * See if we have "permission" by POSIX 1003.1 rules. * * Note that setuid(geteuid()) is a special case of * "appropriate privileges" in appendix B.4.2.2. We need * to use this clause to be compatible with traditional BSD * semantics. Basically, it means that "setuid(xx)" sets all * three id's (assuming you have privs). * * Notes on the logic. We do things in three steps. * 1: We determine if the euid is going to change, and do EPERM * right away. We unconditionally change the euid later if this * test is satisfied, simplifying that part of the logic. * 2: We determine if the real and/or saved uids are going to * change. Determined by compile options. * 3: Change euid last. (after tests in #2 for "appropriate privs") */ if (uid != oldcred->cr_ruid && /* allow setuid(getuid()) */ #ifdef _POSIX_SAVED_IDS uid != oldcred->cr_svuid && /* allow setuid(saved gid) */ #endif #ifdef POSIX_APPENDIX_B_4_2_2 /* Use BSD-compat clause from B.4.2.2 */ uid != oldcred->cr_uid && /* allow setuid(geteuid()) */ #endif (error = priv_check_cred(oldcred, PRIV_CRED_SETUID, 0)) != 0) goto fail; #ifdef _POSIX_SAVED_IDS /* * Do we have "appropriate privileges" (are we root or uid == euid) * If so, we are changing the real uid and/or saved uid. */ if ( #ifdef POSIX_APPENDIX_B_4_2_2 /* Use the clause from B.4.2.2 */ uid == oldcred->cr_uid || #endif /* We are using privs. */ priv_check_cred(oldcred, PRIV_CRED_SETUID, 0) == 0) #endif { /* * Set the real uid and transfer proc count to new user. */ if (uid != oldcred->cr_ruid) { change_ruid(newcred, uip); setsugid(p); } /* * Set saved uid * * XXX always set saved uid even if not _POSIX_SAVED_IDS, as * the security of seteuid() depends on it. B.4.2.2 says it * is important that we should do this. */ if (uid != oldcred->cr_svuid) { change_svuid(newcred, uid); setsugid(p); } } /* * In all permitted cases, we are changing the euid. */ if (uid != oldcred->cr_uid) { change_euid(newcred, uip); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); #endif uifree(uip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(uip); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct seteuid_args { uid_t euid; }; #endif /* ARGSUSED */ int sys_seteuid(struct thread *td, struct seteuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t euid; struct uidinfo *euip; int error; euid = uap->euid; AUDIT_ARG_EUID(euid); newcred = crget(); euip = uifind(euid); PROC_LOCK(p); /* * Copy credentials so other references do not see our changes. */ oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_seteuid(oldcred, euid); if (error) goto fail; #endif if (euid != oldcred->cr_ruid && /* allow seteuid(getuid()) */ euid != oldcred->cr_svuid && /* allow seteuid(saved uid) */ (error = priv_check_cred(oldcred, PRIV_CRED_SETEUID, 0)) != 0) goto fail; /* * Everything's okay, do it. */ if (oldcred->cr_uid != euid) { change_euid(newcred, euip); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); uifree(euip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(euip); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setgid_args { gid_t gid; }; #endif /* ARGSUSED */ int sys_setgid(struct thread *td, struct setgid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t gid; int error; gid = uap->gid; AUDIT_ARG_GID(gid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setgid(oldcred, gid); if (error) goto fail; #endif /* * See if we have "permission" by POSIX 1003.1 rules. * * Note that setgid(getegid()) is a special case of * "appropriate privileges" in appendix B.4.2.2. We need * to use this clause to be compatible with traditional BSD * semantics. Basically, it means that "setgid(xx)" sets all * three id's (assuming you have privs). * * For notes on the logic here, see setuid() above. */ if (gid != oldcred->cr_rgid && /* allow setgid(getgid()) */ #ifdef _POSIX_SAVED_IDS gid != oldcred->cr_svgid && /* allow setgid(saved gid) */ #endif #ifdef POSIX_APPENDIX_B_4_2_2 /* Use BSD-compat clause from B.4.2.2 */ gid != oldcred->cr_groups[0] && /* allow setgid(getegid()) */ #endif (error = priv_check_cred(oldcred, PRIV_CRED_SETGID, 0)) != 0) goto fail; #ifdef _POSIX_SAVED_IDS /* * Do we have "appropriate privileges" (are we root or gid == egid) * If so, we are changing the real uid and saved gid. */ if ( #ifdef POSIX_APPENDIX_B_4_2_2 /* use the clause from B.4.2.2 */ gid == oldcred->cr_groups[0] || #endif /* We are using privs. */ priv_check_cred(oldcred, PRIV_CRED_SETGID, 0) == 0) #endif { /* * Set real gid */ if (oldcred->cr_rgid != gid) { change_rgid(newcred, gid); setsugid(p); } /* * Set saved gid * * XXX always set saved gid even if not _POSIX_SAVED_IDS, as * the security of setegid() depends on it. B.4.2.2 says it * is important that we should do this. */ if (oldcred->cr_svgid != gid) { change_svgid(newcred, gid); setsugid(p); } } /* * In all cases permitted cases, we are changing the egid. * Copy credentials so other references do not see our changes. */ if (oldcred->cr_groups[0] != gid) { change_egid(newcred, gid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setegid_args { gid_t egid; }; #endif /* ARGSUSED */ int sys_setegid(struct thread *td, struct setegid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t egid; int error; egid = uap->egid; AUDIT_ARG_EGID(egid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setegid(oldcred, egid); if (error) goto fail; #endif if (egid != oldcred->cr_rgid && /* allow setegid(getgid()) */ egid != oldcred->cr_svgid && /* allow setegid(saved gid) */ (error = priv_check_cred(oldcred, PRIV_CRED_SETEGID, 0)) != 0) goto fail; if (oldcred->cr_groups[0] != egid) { change_egid(newcred, egid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setgroups_args { u_int gidsetsize; gid_t *gidset; }; #endif /* ARGSUSED */ int sys_setgroups(struct thread *td, struct setgroups_args *uap) { gid_t smallgroups[XU_NGROUPS]; gid_t *groups; u_int gidsetsize; int error; gidsetsize = uap->gidsetsize; if (gidsetsize > ngroups_max + 1) return (EINVAL); if (gidsetsize > XU_NGROUPS) groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK); else groups = smallgroups; error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t)); if (error == 0) error = kern_setgroups(td, gidsetsize, groups); if (gidsetsize > XU_NGROUPS) free(groups, M_TEMP); return (error); } int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; int error; MPASS(ngrp <= ngroups_max + 1); AUDIT_ARG_GROUPSET(groups, ngrp); newcred = crget(); crextend(newcred, ngrp); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setgroups(oldcred, ngrp, groups); if (error) goto fail; #endif error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0); if (error) goto fail; if (ngrp == 0) { /* * setgroups(0, NULL) is a legitimate way of clearing the * groups vector on non-BSD systems (which generally do not * have the egid in the groups[0]). We risk security holes * when running non-BSD software if we do not do the same. */ newcred->cr_ngroups = 1; } else { crsetgroups_locked(newcred, ngrp, groups); } setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setreuid_args { uid_t ruid; uid_t euid; }; #endif /* ARGSUSED */ int -sys_setreuid(register struct thread *td, struct setreuid_args *uap) +sys_setreuid(struct thread *td, struct setreuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t euid, ruid; struct uidinfo *euip, *ruip; int error; euid = uap->euid; ruid = uap->ruid; AUDIT_ARG_EUID(euid); AUDIT_ARG_RUID(ruid); newcred = crget(); euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setreuid(oldcred, ruid, euid); if (error) goto fail; #endif if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid && ruid != oldcred->cr_svuid) || (euid != (uid_t)-1 && euid != oldcred->cr_uid && euid != oldcred->cr_ruid && euid != oldcred->cr_svuid)) && (error = priv_check_cred(oldcred, PRIV_CRED_SETREUID, 0)) != 0) goto fail; if (euid != (uid_t)-1 && oldcred->cr_uid != euid) { change_euid(newcred, euip); setsugid(p); } if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) { change_ruid(newcred, ruip); setsugid(p); } if ((ruid != (uid_t)-1 || newcred->cr_uid != newcred->cr_ruid) && newcred->cr_svuid != newcred->cr_uid) { change_svuid(newcred, newcred->cr_uid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); #endif uifree(ruip); uifree(euip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(ruip); uifree(euip); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setregid_args { gid_t rgid; gid_t egid; }; #endif /* ARGSUSED */ int -sys_setregid(register struct thread *td, struct setregid_args *uap) +sys_setregid(struct thread *td, struct setregid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t egid, rgid; int error; egid = uap->egid; rgid = uap->rgid; AUDIT_ARG_EGID(egid); AUDIT_ARG_RGID(rgid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setregid(oldcred, rgid, egid); if (error) goto fail; #endif if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid && rgid != oldcred->cr_svgid) || (egid != (gid_t)-1 && egid != oldcred->cr_groups[0] && egid != oldcred->cr_rgid && egid != oldcred->cr_svgid)) && (error = priv_check_cred(oldcred, PRIV_CRED_SETREGID, 0)) != 0) goto fail; if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) { change_egid(newcred, egid); setsugid(p); } if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) { change_rgid(newcred, rgid); setsugid(p); } if ((rgid != (gid_t)-1 || newcred->cr_groups[0] != newcred->cr_rgid) && newcred->cr_svgid != newcred->cr_groups[0]) { change_svgid(newcred, newcred->cr_groups[0]); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } /* * setresuid(ruid, euid, suid) is like setreuid except control over the saved * uid is explicit. */ #ifndef _SYS_SYSPROTO_H_ struct setresuid_args { uid_t ruid; uid_t euid; uid_t suid; }; #endif /* ARGSUSED */ int -sys_setresuid(register struct thread *td, struct setresuid_args *uap) +sys_setresuid(struct thread *td, struct setresuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t euid, ruid, suid; struct uidinfo *euip, *ruip; int error; euid = uap->euid; ruid = uap->ruid; suid = uap->suid; AUDIT_ARG_EUID(euid); AUDIT_ARG_RUID(ruid); AUDIT_ARG_SUID(suid); newcred = crget(); euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setresuid(oldcred, ruid, euid, suid); if (error) goto fail; #endif if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid && ruid != oldcred->cr_svuid && ruid != oldcred->cr_uid) || (euid != (uid_t)-1 && euid != oldcred->cr_ruid && euid != oldcred->cr_svuid && euid != oldcred->cr_uid) || (suid != (uid_t)-1 && suid != oldcred->cr_ruid && suid != oldcred->cr_svuid && suid != oldcred->cr_uid)) && (error = priv_check_cred(oldcred, PRIV_CRED_SETRESUID, 0)) != 0) goto fail; if (euid != (uid_t)-1 && oldcred->cr_uid != euid) { change_euid(newcred, euip); setsugid(p); } if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) { change_ruid(newcred, ruip); setsugid(p); } if (suid != (uid_t)-1 && oldcred->cr_svuid != suid) { change_svuid(newcred, suid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); #endif uifree(ruip); uifree(euip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(ruip); uifree(euip); crfree(newcred); return (error); } /* * setresgid(rgid, egid, sgid) is like setregid except control over the saved * gid is explicit. */ #ifndef _SYS_SYSPROTO_H_ struct setresgid_args { gid_t rgid; gid_t egid; gid_t sgid; }; #endif /* ARGSUSED */ int -sys_setresgid(register struct thread *td, struct setresgid_args *uap) +sys_setresgid(struct thread *td, struct setresgid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t egid, rgid, sgid; int error; egid = uap->egid; rgid = uap->rgid; sgid = uap->sgid; AUDIT_ARG_EGID(egid); AUDIT_ARG_RGID(rgid); AUDIT_ARG_SGID(sgid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setresgid(oldcred, rgid, egid, sgid); if (error) goto fail; #endif if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid && rgid != oldcred->cr_svgid && rgid != oldcred->cr_groups[0]) || (egid != (gid_t)-1 && egid != oldcred->cr_rgid && egid != oldcred->cr_svgid && egid != oldcred->cr_groups[0]) || (sgid != (gid_t)-1 && sgid != oldcred->cr_rgid && sgid != oldcred->cr_svgid && sgid != oldcred->cr_groups[0])) && (error = priv_check_cred(oldcred, PRIV_CRED_SETRESGID, 0)) != 0) goto fail; if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) { change_egid(newcred, egid); setsugid(p); } if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) { change_rgid(newcred, rgid); setsugid(p); } if (sgid != (gid_t)-1 && oldcred->cr_svgid != sgid) { change_svgid(newcred, sgid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getresuid_args { uid_t *ruid; uid_t *euid; uid_t *suid; }; #endif /* ARGSUSED */ int -sys_getresuid(register struct thread *td, struct getresuid_args *uap) +sys_getresuid(struct thread *td, struct getresuid_args *uap) { struct ucred *cred; int error1 = 0, error2 = 0, error3 = 0; cred = td->td_ucred; if (uap->ruid) error1 = copyout(&cred->cr_ruid, uap->ruid, sizeof(cred->cr_ruid)); if (uap->euid) error2 = copyout(&cred->cr_uid, uap->euid, sizeof(cred->cr_uid)); if (uap->suid) error3 = copyout(&cred->cr_svuid, uap->suid, sizeof(cred->cr_svuid)); return (error1 ? error1 : error2 ? error2 : error3); } #ifndef _SYS_SYSPROTO_H_ struct getresgid_args { gid_t *rgid; gid_t *egid; gid_t *sgid; }; #endif /* ARGSUSED */ int -sys_getresgid(register struct thread *td, struct getresgid_args *uap) +sys_getresgid(struct thread *td, struct getresgid_args *uap) { struct ucred *cred; int error1 = 0, error2 = 0, error3 = 0; cred = td->td_ucred; if (uap->rgid) error1 = copyout(&cred->cr_rgid, uap->rgid, sizeof(cred->cr_rgid)); if (uap->egid) error2 = copyout(&cred->cr_groups[0], uap->egid, sizeof(cred->cr_groups[0])); if (uap->sgid) error3 = copyout(&cred->cr_svgid, uap->sgid, sizeof(cred->cr_svgid)); return (error1 ? error1 : error2 ? error2 : error3); } #ifndef _SYS_SYSPROTO_H_ struct issetugid_args { int dummy; }; #endif /* ARGSUSED */ int -sys_issetugid(register struct thread *td, struct issetugid_args *uap) +sys_issetugid(struct thread *td, struct issetugid_args *uap) { struct proc *p = td->td_proc; /* * Note: OpenBSD sets a P_SUGIDEXEC flag set at execve() time, * we use P_SUGID because we consider changing the owners as * "tainting" as well. * This is significant for procs that start as root and "become" * a user without an exec - programs cannot know *everything* * that libc *might* have put in their data segment. */ td->td_retval[0] = (p->p_flag & P_SUGID) ? 1 : 0; return (0); } int sys___setugid(struct thread *td, struct __setugid_args *uap) { #ifdef REGRESSION struct proc *p; p = td->td_proc; switch (uap->flag) { case 0: PROC_LOCK(p); p->p_flag &= ~P_SUGID; PROC_UNLOCK(p); return (0); case 1: PROC_LOCK(p); p->p_flag |= P_SUGID; PROC_UNLOCK(p); return (0); default: return (EINVAL); } #else /* !REGRESSION */ return (ENOSYS); #endif /* REGRESSION */ } /* * Check if gid is a member of the group set. */ int groupmember(gid_t gid, struct ucred *cred) { int l; int h; int m; if (cred->cr_groups[0] == gid) return(1); /* * If gid was not our primary group, perform a binary search * of the supplemental groups. This is possible because we * sort the groups in crsetgroups(). */ l = 1; h = cred->cr_ngroups; while (l < h) { m = l + ((h - l) / 2); if (cred->cr_groups[m] < gid) l = m + 1; else h = m; } if ((l < cred->cr_ngroups) && (cred->cr_groups[l] == gid)) return (1); return (0); } /* * Test the active securelevel against a given level. securelevel_gt() * implements (securelevel > level). securelevel_ge() implements * (securelevel >= level). Note that the logic is inverted -- these * functions return EPERM on "success" and 0 on "failure". * * Due to care taken when setting the securelevel, we know that no jail will * be less secure that its parent (or the physical system), so it is sufficient * to test the current jail only. * * XXXRW: Possibly since this has to do with privilege, it should move to * kern_priv.c. */ int securelevel_gt(struct ucred *cr, int level) { return (cr->cr_prison->pr_securelevel > level ? EPERM : 0); } int securelevel_ge(struct ucred *cr, int level) { return (cr->cr_prison->pr_securelevel >= level ? EPERM : 0); } /* * 'see_other_uids' determines whether or not visibility of processes * and sockets with credentials holding different real uids is possible * using a variety of system MIBs. * XXX: data declarations should be together near the beginning of the file. */ static int see_other_uids = 1; SYSCTL_INT(_security_bsd, OID_AUTO, see_other_uids, CTLFLAG_RW, &see_other_uids, 0, "Unprivileged processes may see subjects/objects with different real uid"); /*- * Determine if u1 "can see" the subject specified by u2, according to the * 'see_other_uids' policy. * Returns: 0 for permitted, ESRCH otherwise * Locks: none * References: *u1 and *u2 must not change during the call * u1 may equal u2, in which case only one reference is required */ static int cr_seeotheruids(struct ucred *u1, struct ucred *u2) { if (!see_other_uids && u1->cr_ruid != u2->cr_ruid) { if (priv_check_cred(u1, PRIV_SEEOTHERUIDS, 0) != 0) return (ESRCH); } return (0); } /* * 'see_other_gids' determines whether or not visibility of processes * and sockets with credentials holding different real gids is possible * using a variety of system MIBs. * XXX: data declarations should be together near the beginning of the file. */ static int see_other_gids = 1; SYSCTL_INT(_security_bsd, OID_AUTO, see_other_gids, CTLFLAG_RW, &see_other_gids, 0, "Unprivileged processes may see subjects/objects with different real gid"); /* * Determine if u1 can "see" the subject specified by u2, according to the * 'see_other_gids' policy. * Returns: 0 for permitted, ESRCH otherwise * Locks: none * References: *u1 and *u2 must not change during the call * u1 may equal u2, in which case only one reference is required */ static int cr_seeothergids(struct ucred *u1, struct ucred *u2) { int i, match; if (!see_other_gids) { match = 0; for (i = 0; i < u1->cr_ngroups; i++) { if (groupmember(u1->cr_groups[i], u2)) match = 1; if (match) break; } if (!match) { if (priv_check_cred(u1, PRIV_SEEOTHERGIDS, 0) != 0) return (ESRCH); } } return (0); } /*- * Determine if u1 "can see" the subject specified by u2. * Returns: 0 for permitted, an errno value otherwise * Locks: none * References: *u1 and *u2 must not change during the call * u1 may equal u2, in which case only one reference is required */ int cr_cansee(struct ucred *u1, struct ucred *u2) { int error; if ((error = prison_check(u1, u2))) return (error); #ifdef MAC if ((error = mac_cred_check_visible(u1, u2))) return (error); #endif if ((error = cr_seeotheruids(u1, u2))) return (error); if ((error = cr_seeothergids(u1, u2))) return (error); return (0); } /*- * Determine if td "can see" the subject specified by p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect p->p_ucred must be held. td really * should be curthread. * References: td and p must be valid for the lifetime of the call */ int p_cansee(struct thread *td, struct proc *p) { /* Wrap cr_cansee() for all functionality. */ KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); return (cr_cansee(td->td_ucred, p->p_ucred)); } /* * 'conservative_signals' prevents the delivery of a broad class of * signals by unprivileged processes to processes that have changed their * credentials since the last invocation of execve(). This can prevent * the leakage of cached information or retained privileges as a result * of a common class of signal-related vulnerabilities. However, this * may interfere with some applications that expect to be able to * deliver these signals to peer processes after having given up * privilege. */ static int conservative_signals = 1; SYSCTL_INT(_security_bsd, OID_AUTO, conservative_signals, CTLFLAG_RW, &conservative_signals, 0, "Unprivileged processes prevented from " "sending certain signals to processes whose credentials have changed"); /*- * Determine whether cred may deliver the specified signal to proc. * Returns: 0 for permitted, an errno value otherwise. * Locks: A lock must be held for proc. * References: cred and proc must be valid for the lifetime of the call. */ int cr_cansignal(struct ucred *cred, struct proc *proc, int signum) { int error; PROC_LOCK_ASSERT(proc, MA_OWNED); /* * Jail semantics limit the scope of signalling to proc in the * same jail as cred, if cred is in jail. */ error = prison_check(cred, proc->p_ucred); if (error) return (error); #ifdef MAC if ((error = mac_proc_check_signal(cred, proc, signum))) return (error); #endif if ((error = cr_seeotheruids(cred, proc->p_ucred))) return (error); if ((error = cr_seeothergids(cred, proc->p_ucred))) return (error); /* * UNIX signal semantics depend on the status of the P_SUGID * bit on the target process. If the bit is set, then additional * restrictions are placed on the set of available signals. */ if (conservative_signals && (proc->p_flag & P_SUGID)) { switch (signum) { case 0: case SIGKILL: case SIGINT: case SIGTERM: case SIGALRM: case SIGSTOP: case SIGTTIN: case SIGTTOU: case SIGTSTP: case SIGHUP: case SIGUSR1: case SIGUSR2: /* * Generally, permit job and terminal control * signals. */ break; default: /* Not permitted without privilege. */ error = priv_check_cred(cred, PRIV_SIGNAL_SUGID, 0); if (error) return (error); } } /* * Generally, the target credential's ruid or svuid must match the * subject credential's ruid or euid. */ if (cred->cr_ruid != proc->p_ucred->cr_ruid && cred->cr_ruid != proc->p_ucred->cr_svuid && cred->cr_uid != proc->p_ucred->cr_ruid && cred->cr_uid != proc->p_ucred->cr_svuid) { error = priv_check_cred(cred, PRIV_SIGNAL_DIFFCRED, 0); if (error) return (error); } return (0); } /*- * Determine whether td may deliver the specified signal to p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must be * held for p. * References: td and p must be valid for the lifetime of the call */ int p_cansignal(struct thread *td, struct proc *p, int signum) { KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if (td->td_proc == p) return (0); /* * UNIX signalling semantics require that processes in the same * session always be able to deliver SIGCONT to one another, * overriding the remaining protections. */ /* XXX: This will require an additional lock of some sort. */ if (signum == SIGCONT && td->td_proc->p_session == p->p_session) return (0); /* * Some compat layers use SIGTHR and higher signals for * communication between different kernel threads of the same * process, so that they expect that it's always possible to * deliver them, even for suid applications where cr_cansignal() can * deny such ability for security consideration. It should be * pretty safe to do since the only way to create two processes * with the same p_leader is via rfork(2). */ if (td->td_proc->p_leader != NULL && signum >= SIGTHR && signum < SIGTHR + 4 && td->td_proc->p_leader == p->p_leader) return (0); return (cr_cansignal(td->td_ucred, p, signum)); } /*- * Determine whether td may reschedule p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must * be held for p. * References: td and p must be valid for the lifetime of the call */ int p_cansched(struct thread *td, struct proc *p) { int error; KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if (td->td_proc == p) return (0); if ((error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_sched(td->td_ucred, p))) return (error); #endif if ((error = cr_seeotheruids(td->td_ucred, p->p_ucred))) return (error); if ((error = cr_seeothergids(td->td_ucred, p->p_ucred))) return (error); if (td->td_ucred->cr_ruid != p->p_ucred->cr_ruid && td->td_ucred->cr_uid != p->p_ucred->cr_ruid) { error = priv_check(td, PRIV_SCHED_DIFFCRED); if (error) return (error); } return (0); } /* * The 'unprivileged_proc_debug' flag may be used to disable a variety of * unprivileged inter-process debugging services, including some procfs * functionality, ptrace(), and ktrace(). In the past, inter-process * debugging has been involved in a variety of security problems, and sites * not requiring the service might choose to disable it when hardening * systems. * * XXX: Should modifying and reading this variable require locking? * XXX: data declarations should be together near the beginning of the file. */ static int unprivileged_proc_debug = 1; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_proc_debug, CTLFLAG_RW, &unprivileged_proc_debug, 0, "Unprivileged processes may use process debugging facilities"); /*- * Determine whether td may debug p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must * be held for p. * References: td and p must be valid for the lifetime of the call */ int p_candebug(struct thread *td, struct proc *p) { int credentialchanged, error, grpsubset, i, uidsubset; KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if (!unprivileged_proc_debug) { error = priv_check(td, PRIV_DEBUG_UNPRIV); if (error) return (error); } if (td->td_proc == p) return (0); if ((error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_debug(td->td_ucred, p))) return (error); #endif if ((error = cr_seeotheruids(td->td_ucred, p->p_ucred))) return (error); if ((error = cr_seeothergids(td->td_ucred, p->p_ucred))) return (error); /* * Is p's group set a subset of td's effective group set? This * includes p's egid, group access list, rgid, and svgid. */ grpsubset = 1; for (i = 0; i < p->p_ucred->cr_ngroups; i++) { if (!groupmember(p->p_ucred->cr_groups[i], td->td_ucred)) { grpsubset = 0; break; } } grpsubset = grpsubset && groupmember(p->p_ucred->cr_rgid, td->td_ucred) && groupmember(p->p_ucred->cr_svgid, td->td_ucred); /* * Are the uids present in p's credential equal to td's * effective uid? This includes p's euid, svuid, and ruid. */ uidsubset = (td->td_ucred->cr_uid == p->p_ucred->cr_uid && td->td_ucred->cr_uid == p->p_ucred->cr_svuid && td->td_ucred->cr_uid == p->p_ucred->cr_ruid); /* * Has the credential of the process changed since the last exec()? */ credentialchanged = (p->p_flag & P_SUGID); /* * If p's gids aren't a subset, or the uids aren't a subset, * or the credential has changed, require appropriate privilege * for td to debug p. */ if (!grpsubset || !uidsubset) { error = priv_check(td, PRIV_DEBUG_DIFFCRED); if (error) return (error); } if (credentialchanged) { error = priv_check(td, PRIV_DEBUG_SUGID); if (error) return (error); } /* Can't trace init when securelevel > 0. */ if (p == initproc) { error = securelevel_gt(td->td_ucred, 0); if (error) return (error); } /* * Can't trace a process that's currently exec'ing. * * XXX: Note, this is not a security policy decision, it's a * basic correctness/functionality decision. Therefore, this check * should be moved to the caller's of p_candebug(). */ if ((p->p_flag & P_INEXEC) != 0) return (EBUSY); /* Denied explicitely */ if ((p->p_flag2 & P2_NOTRACE) != 0) { error = priv_check(td, PRIV_DEBUG_DENIED); if (error != 0) return (error); } return (0); } /*- * Determine whether the subject represented by cred can "see" a socket. * Returns: 0 for permitted, ENOENT otherwise. */ int cr_canseesocket(struct ucred *cred, struct socket *so) { int error; error = prison_check(cred, so->so_cred); if (error) return (ENOENT); #ifdef MAC error = mac_socket_check_visible(cred, so); if (error) return (error); #endif if (cr_seeotheruids(cred, so->so_cred)) return (ENOENT); if (cr_seeothergids(cred, so->so_cred)) return (ENOENT); return (0); } #if defined(INET) || defined(INET6) /*- * Determine whether the subject represented by cred can "see" a socket. * Returns: 0 for permitted, ENOENT otherwise. */ int cr_canseeinpcb(struct ucred *cred, struct inpcb *inp) { int error; error = prison_check(cred, inp->inp_cred); if (error) return (ENOENT); #ifdef MAC INP_LOCK_ASSERT(inp); error = mac_inpcb_check_visible(cred, inp); if (error) return (error); #endif if (cr_seeotheruids(cred, inp->inp_cred)) return (ENOENT); if (cr_seeothergids(cred, inp->inp_cred)) return (ENOENT); return (0); } #endif /*- * Determine whether td can wait for the exit of p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must * be held for p. * References: td and p must be valid for the lifetime of the call */ int p_canwait(struct thread *td, struct proc *p) { int error; KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_wait(td->td_ucred, p))) return (error); #endif #if 0 /* XXXMAC: This could have odd effects on some shells. */ if ((error = cr_seeotheruids(td->td_ucred, p->p_ucred))) return (error); #endif return (0); } /* * Allocate a zeroed cred structure. */ struct ucred * crget(void) { - register struct ucred *cr; + struct ucred *cr; cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO); refcount_init(&cr->cr_ref, 1); #ifdef AUDIT audit_cred_init(cr); #endif #ifdef MAC mac_cred_init(cr); #endif cr->cr_groups = cr->cr_smallgroups; cr->cr_agroups = sizeof(cr->cr_smallgroups) / sizeof(cr->cr_smallgroups[0]); return (cr); } /* * Claim another reference to a ucred structure. */ struct ucred * crhold(struct ucred *cr) { refcount_acquire(&cr->cr_ref); return (cr); } /* * Free a cred structure. Throws away space when ref count gets to 0. */ void crfree(struct ucred *cr) { KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref)); KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred")); if (refcount_release(&cr->cr_ref)) { /* * Some callers of crget(), such as nfs_statfs(), * allocate a temporary credential, but don't * allocate a uidinfo structure. */ if (cr->cr_uidinfo != NULL) uifree(cr->cr_uidinfo); if (cr->cr_ruidinfo != NULL) uifree(cr->cr_ruidinfo); /* * Free a prison, if any. */ if (cr->cr_prison != NULL) prison_free(cr->cr_prison); if (cr->cr_loginclass != NULL) loginclass_free(cr->cr_loginclass); #ifdef AUDIT audit_cred_destroy(cr); #endif #ifdef MAC mac_cred_destroy(cr); #endif if (cr->cr_groups != cr->cr_smallgroups) free(cr->cr_groups, M_CRED); free(cr, M_CRED); } } /* * Copy a ucred's contents from a template. Does not block. */ void crcopy(struct ucred *dest, struct ucred *src) { KASSERT(dest->cr_ref == 1, ("crcopy of shared ucred")); bcopy(&src->cr_startcopy, &dest->cr_startcopy, (unsigned)((caddr_t)&src->cr_endcopy - (caddr_t)&src->cr_startcopy)); crsetgroups(dest, src->cr_ngroups, src->cr_groups); uihold(dest->cr_uidinfo); uihold(dest->cr_ruidinfo); prison_hold(dest->cr_prison); loginclass_hold(dest->cr_loginclass); #ifdef AUDIT audit_cred_copy(src, dest); #endif #ifdef MAC mac_cred_copy(src, dest); #endif } /* * Dup cred struct to a new held one. */ struct ucred * crdup(struct ucred *cr) { struct ucred *newcr; newcr = crget(); crcopy(newcr, cr); return (newcr); } /* * Fill in a struct xucred based on a struct ucred. */ void cru2x(struct ucred *cr, struct xucred *xcr) { int ngroups; bzero(xcr, sizeof(*xcr)); xcr->cr_version = XUCRED_VERSION; xcr->cr_uid = cr->cr_uid; ngroups = MIN(cr->cr_ngroups, XU_NGROUPS); xcr->cr_ngroups = ngroups; bcopy(cr->cr_groups, xcr->cr_groups, ngroups * sizeof(*cr->cr_groups)); } /* * Set initial process credentials. * Callers are responsible for providing the reference for provided credentials. */ void proc_set_cred_init(struct proc *p, struct ucred *newcred) { p->p_ucred = newcred; } /* * Change process credentials. * Callers are responsible for providing the reference for passed credentials * and for freeing old ones. * * Process has to be locked except when it does not have credentials (as it * should not be visible just yet) or when newcred is NULL (as this can be * only used when the process is about to be freed, at which point it should * not be visible anymore). */ struct ucred * proc_set_cred(struct proc *p, struct ucred *newcred) { struct ucred *oldcred; MPASS(p->p_ucred != NULL); if (newcred == NULL) MPASS(p->p_state == PRS_ZOMBIE); else PROC_LOCK_ASSERT(p, MA_OWNED); oldcred = p->p_ucred; p->p_ucred = newcred; if (newcred != NULL) PROC_UPDATE_COW(p); return (oldcred); } struct ucred * crcopysafe(struct proc *p, struct ucred *cr) { struct ucred *oldcred; int groups; PROC_LOCK_ASSERT(p, MA_OWNED); oldcred = p->p_ucred; while (cr->cr_agroups < oldcred->cr_agroups) { groups = oldcred->cr_agroups; PROC_UNLOCK(p); crextend(cr, groups); PROC_LOCK(p); oldcred = p->p_ucred; } crcopy(cr, oldcred); return (oldcred); } /* * Extend the passed in credential to hold n items. */ void crextend(struct ucred *cr, int n) { int cnt; /* Truncate? */ if (n <= cr->cr_agroups) return; /* * We extend by 2 each time since we're using a power of two * allocator until we need enough groups to fill a page. * Once we're allocating multiple pages, only allocate as many * as we actually need. The case of processes needing a * non-power of two number of pages seems more likely than * a real world process that adds thousands of groups one at a * time. */ if ( n < PAGE_SIZE / sizeof(gid_t) ) { if (cr->cr_agroups == 0) cnt = MINALLOCSIZE / sizeof(gid_t); else cnt = cr->cr_agroups * 2; while (cnt < n) cnt *= 2; } else cnt = roundup2(n, PAGE_SIZE / sizeof(gid_t)); /* Free the old array. */ if (cr->cr_groups != cr->cr_smallgroups) free(cr->cr_groups, M_CRED); cr->cr_groups = malloc(cnt * sizeof(gid_t), M_CRED, M_WAITOK | M_ZERO); cr->cr_agroups = cnt; } /* * Copy groups in to a credential, preserving any necessary invariants. * Currently this includes the sorting of all supplemental gids. * crextend() must have been called before hand to ensure sufficient * space is available. */ static void crsetgroups_locked(struct ucred *cr, int ngrp, gid_t *groups) { int i; int j; gid_t g; KASSERT(cr->cr_agroups >= ngrp, ("cr_ngroups is too small")); bcopy(groups, cr->cr_groups, ngrp * sizeof(gid_t)); cr->cr_ngroups = ngrp; /* * Sort all groups except cr_groups[0] to allow groupmember to * perform a binary search. * * XXX: If large numbers of groups become common this should * be replaced with shell sort like linux uses or possibly * heap sort. */ for (i = 2; i < ngrp; i++) { g = cr->cr_groups[i]; for (j = i-1; j >= 1 && g < cr->cr_groups[j]; j--) cr->cr_groups[j + 1] = cr->cr_groups[j]; cr->cr_groups[j + 1] = g; } } /* * Copy groups in to a credential after expanding it if required. * Truncate the list to (ngroups_max + 1) if it is too large. */ void crsetgroups(struct ucred *cr, int ngrp, gid_t *groups) { if (ngrp > ngroups_max + 1) ngrp = ngroups_max + 1; crextend(cr, ngrp); crsetgroups_locked(cr, ngrp, groups); } /* * Get login name, if available. */ #ifndef _SYS_SYSPROTO_H_ struct getlogin_args { char *namebuf; u_int namelen; }; #endif /* ARGSUSED */ int sys_getlogin(struct thread *td, struct getlogin_args *uap) { char login[MAXLOGNAME]; struct proc *p = td->td_proc; size_t len; if (uap->namelen > MAXLOGNAME) uap->namelen = MAXLOGNAME; PROC_LOCK(p); SESS_LOCK(p->p_session); len = strlcpy(login, p->p_session->s_login, uap->namelen) + 1; SESS_UNLOCK(p->p_session); PROC_UNLOCK(p); if (len > uap->namelen) return (ERANGE); return (copyout(login, uap->namebuf, len)); } /* * Set login name. */ #ifndef _SYS_SYSPROTO_H_ struct setlogin_args { char *namebuf; }; #endif /* ARGSUSED */ int sys_setlogin(struct thread *td, struct setlogin_args *uap) { struct proc *p = td->td_proc; int error; char logintmp[MAXLOGNAME]; CTASSERT(sizeof(p->p_session->s_login) >= sizeof(logintmp)); error = priv_check(td, PRIV_PROC_SETLOGIN); if (error) return (error); error = copyinstr(uap->namebuf, logintmp, sizeof(logintmp), NULL); if (error != 0) { if (error == ENAMETOOLONG) error = EINVAL; return (error); } PROC_LOCK(p); SESS_LOCK(p->p_session); strcpy(p->p_session->s_login, logintmp); SESS_UNLOCK(p->p_session); PROC_UNLOCK(p); return (0); } void setsugid(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); p->p_flag |= P_SUGID; if (!(p->p_pfsflags & PF_ISUGID)) p->p_stops = 0; } /*- * Change a process's effective uid. * Side effects: newcred->cr_uid and newcred->cr_uidinfo will be modified. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_euid(struct ucred *newcred, struct uidinfo *euip) { newcred->cr_uid = euip->ui_uid; uihold(euip); uifree(newcred->cr_uidinfo); newcred->cr_uidinfo = euip; } /*- * Change a process's effective gid. * Side effects: newcred->cr_gid will be modified. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_egid(struct ucred *newcred, gid_t egid) { newcred->cr_groups[0] = egid; } /*- * Change a process's real uid. * Side effects: newcred->cr_ruid will be updated, newcred->cr_ruidinfo * will be updated, and the old and new cr_ruidinfo proc * counts will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_ruid(struct ucred *newcred, struct uidinfo *ruip) { (void)chgproccnt(newcred->cr_ruidinfo, -1, 0); newcred->cr_ruid = ruip->ui_uid; uihold(ruip); uifree(newcred->cr_ruidinfo); newcred->cr_ruidinfo = ruip; (void)chgproccnt(newcred->cr_ruidinfo, 1, 0); } /*- * Change a process's real gid. * Side effects: newcred->cr_rgid will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_rgid(struct ucred *newcred, gid_t rgid) { newcred->cr_rgid = rgid; } /*- * Change a process's saved uid. * Side effects: newcred->cr_svuid will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_svuid(struct ucred *newcred, uid_t svuid) { newcred->cr_svuid = svuid; } /*- * Change a process's saved gid. * Side effects: newcred->cr_svgid will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_svgid(struct ucred *newcred, gid_t svgid) { newcred->cr_svgid = svgid; } Index: stable/11/sys/kern/kern_resource.c =================================================================== --- stable/11/sys/kern/kern_resource.c (revision 331642) +++ stable/11/sys/kern/kern_resource.c (revision 331643) @@ -1,1443 +1,1443 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static struct rwlock uihashtbl_lock; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); static void ruxagg_locked(struct rusage_ext *rux, struct thread *td); /* * Resource controls and accounting. */ #ifndef _SYS_SYSPROTO_H_ struct getpriority_args { int which; int who; }; #endif int -sys_getpriority(struct thread *td, register struct getpriority_args *uap) +sys_getpriority(struct thread *td, struct getpriority_args *uap) { struct proc *p; struct pgrp *pg; int error, low; error = 0; low = PRIO_MAX + 1; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) low = td->td_proc->p_nice; else { p = pfind(uap->who); if (p == NULL) break; if (p_cansee(td, p) == 0) low = p->p_nice; PROC_UNLOCK(p); } break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0 && p->p_ucred->cr_uid == uap->who) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (low == PRIO_MAX + 1 && error == 0) error = ESRCH; td->td_retval[0] = low; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setpriority_args { int which; int who; int prio; }; #endif int sys_setpriority(struct thread *td, struct setpriority_args *uap) { struct proc *curp, *p; struct pgrp *pg; int found = 0, error = 0; curp = td->td_proc; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) { PROC_LOCK(curp); error = donice(td, curp, uap->prio); PROC_UNLOCK(curp); } else { p = pfind(uap->who); if (p == NULL) break; error = p_cansee(td, p); if (error == 0) error = donice(td, p, uap->prio); PROC_UNLOCK(p); } found++; break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = curp->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p->p_ucred->cr_uid == uap->who && p_cansee(td, p) == 0) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (found == 0 && error == 0) error = ESRCH; return (error); } /* * Set "nice" for a (whole) process. */ static int donice(struct thread *td, struct proc *p, int n) { int error; PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = p_cansched(td, p))) return (error); if (n > PRIO_MAX) n = PRIO_MAX; if (n < PRIO_MIN) n = PRIO_MIN; if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) return (EACCES); sched_nice(p, n); return (0); } static int unprivileged_idprio; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW, &unprivileged_idprio, 0, "Allow non-root users to set an idle priority"); /* * Set realtime priority for LWP. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_thread_args { int function; lwpid_t lwpid; struct rtprio *rtp; }; #endif int sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) { struct proc *p; struct rtprio rtp; struct thread *td1; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; if (uap->lwpid == 0 || uap->lwpid == td->td_tid) { p = td->td_proc; td1 = td; PROC_LOCK(p); } else { /* Only look up thread in current process */ td1 = tdfind(uap->lwpid, curproc->p_pid); if (td1 == NULL) return (ESRCH); p = td1->td_proc; } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; pri_to_rtp(td1, &rtp); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* Disallow setting rtprio in most cases if not superuser. */ /* * Realtime priority has to be restricted for reasons which * should be obvious. However, for idleprio processes, there is * a potential for system deadlock if an idleprio process gains * a lock on a resource that other processes need (and the * idleprio process can't run due to a CPU-bound normal * process). Fix me! XXX * * This problem is not only related to idleprio process. * A user level program can obtain a file lock and hold it * indefinitely. Additionally, without idleprio processes it is * still conceivable that a program with low priority will never * get to run. In short, allowing this feature might make it * easier to lock a resource indefinitely, but it is not the * only thing that makes it possible. */ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME || (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE && unprivileged_idprio == 0)) { error = priv_check(td, PRIV_SCHED_RTPRIO); if (error) break; } error = rtp_to_pri(&rtp, td1); break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } /* * Set realtime priority. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_args { int function; pid_t pid; struct rtprio *rtp; }; #endif int -sys_rtprio(struct thread *td, register struct rtprio_args *uap) +sys_rtprio(struct thread *td, struct rtprio_args *uap) { struct proc *p; struct thread *tdp; struct rtprio rtp; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; if (uap->pid == 0) { p = td->td_proc; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; /* * Return OUR priority if no pid specified, * or if one is, report the highest priority * in the process. There isn't much more you can do as * there is only room to return a single priority. * Note: specifying our own pid is not the same * as leaving it zero. */ if (uap->pid == 0) { pri_to_rtp(td, &rtp); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; FOREACH_THREAD_IN_PROC(p, tdp) { pri_to_rtp(tdp, &rtp2); if (rtp2.type < rtp.type || (rtp2.type == rtp.type && rtp2.prio < rtp.prio)) { rtp.type = rtp2.type; rtp.prio = rtp2.prio; } } } PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* * Disallow setting rtprio in most cases if not superuser. * See the comment in sys_rtprio_thread about idprio * threads holding a lock. */ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME || (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE && !unprivileged_idprio)) { error = priv_check(td, PRIV_SCHED_RTPRIO); if (error) break; } /* * If we are setting our own priority, set just our * thread but if we are doing another process, * do all the threads on that process. If we * specify our own pid we do the latter. */ if (uap->pid == 0) { error = rtp_to_pri(&rtp, td); } else { FOREACH_THREAD_IN_PROC(p, td) { if ((error = rtp_to_pri(&rtp, td)) != 0) break; } } break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } int rtp_to_pri(struct rtprio *rtp, struct thread *td) { u_char newpri, oldclass, oldpri; switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); newpri = PRI_MIN_REALTIME + rtp->prio; break; case RTP_PRIO_NORMAL: if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) return (EINVAL); newpri = PRI_MIN_TIMESHARE + rtp->prio; break; case RTP_PRIO_IDLE: if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); newpri = PRI_MIN_IDLE + rtp->prio; break; default: return (EINVAL); } thread_lock(td); oldclass = td->td_pri_class; sched_class(td, rtp->type); /* XXX fix */ oldpri = td->td_user_pri; sched_user_prio(td, newpri); if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL || td->td_pri_class != RTP_PRIO_NORMAL)) sched_prio(td, td->td_user_pri); if (TD_ON_UPILOCK(td) && oldpri != newpri) { critical_enter(); thread_unlock(td); umtx_pi_adjust(td, oldpri); critical_exit(); } else thread_unlock(td); return (0); } void pri_to_rtp(struct thread *td, struct rtprio *rtp) { thread_lock(td); switch (PRI_BASE(td->td_pri_class)) { case PRI_REALTIME: rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; break; case PRI_TIMESHARE: rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; break; case PRI_IDLE: rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; break; default: break; } rtp->type = td->td_pri_class; thread_unlock(td); } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int -osetrlimit(struct thread *td, register struct osetrlimit_args *uap) +osetrlimit(struct thread *td, struct osetrlimit_args *uap) { struct orlimit olim; struct rlimit lim; int error; if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) return (error); lim.rlim_cur = olim.rlim_cur; lim.rlim_max = olim.rlim_max; error = kern_setrlimit(td, uap->which, &lim); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ogetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int -ogetrlimit(struct thread *td, register struct ogetrlimit_args *uap) +ogetrlimit(struct thread *td, struct ogetrlimit_args *uap) { struct orlimit olim; struct rlimit rl; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); lim_rlimit(td, uap->which, &rl); /* * XXX would be more correct to convert only RLIM_INFINITY to the * old RLIM_INFINITY and fail with EOVERFLOW for other larger * values. Most 64->32 and 32->16 conversions, including not * unimportant ones of uids are even more broken than what we * do here (they blindly truncate). We don't do this correctly * here since we have little experience with EOVERFLOW yet. * Elsewhere, getuid() can't fail... */ olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; error = copyout(&olim, uap->rlp, sizeof(olim)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct __setrlimit_args { u_int which; struct rlimit *rlp; }; #endif int -sys_setrlimit(struct thread *td, register struct __setrlimit_args *uap) +sys_setrlimit(struct thread *td, struct __setrlimit_args *uap) { struct rlimit alim; int error; if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) return (error); error = kern_setrlimit(td, uap->which, &alim); return (error); } static void lim_cb(void *arg) { struct rlimit rlim; struct thread *td; struct proc *p; p = arg; PROC_LOCK_ASSERT(p, MA_OWNED); /* * Check if the process exceeds its cpu resource allocation. If * it reaches the max, arrange to kill the process in ast(). */ if (p->p_cpulimit == RLIM_INFINITY) return; PROC_STATLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); } PROC_STATUNLOCK(p); if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { lim_rlimit_proc(p, RLIMIT_CPU, &rlim); if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { killproc(p, "exceeded maximum CPU limit"); } else { if (p->p_cpulimit < rlim.rlim_max) p->p_cpulimit += 5; kern_psignal(p, SIGXCPU); } } if ((p->p_flag & P_WEXIT) == 0) callout_reset_sbt(&p->p_limco, SBT_1S, 0, lim_cb, p, C_PREL(1)); } int kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp) { return (kern_proc_setrlimit(td, td->td_proc, which, limp)); } int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp) { struct plimit *newlim, *oldlim; - register struct rlimit *alimp; + struct rlimit *alimp; struct rlimit oldssiz; int error; if (which >= RLIM_NLIMITS) return (EINVAL); /* * Preserve historical bugs by treating negative limits as unsigned. */ if (limp->rlim_cur < 0) limp->rlim_cur = RLIM_INFINITY; if (limp->rlim_max < 0) limp->rlim_max = RLIM_INFINITY; oldssiz.rlim_cur = 0; newlim = lim_alloc(); PROC_LOCK(p); oldlim = p->p_limit; alimp = &oldlim->pl_rlimit[which]; if (limp->rlim_cur > alimp->rlim_max || limp->rlim_max > alimp->rlim_max) if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) { PROC_UNLOCK(p); lim_free(newlim); return (error); } if (limp->rlim_cur > limp->rlim_max) limp->rlim_cur = limp->rlim_max; lim_copy(newlim, oldlim); alimp = &newlim->pl_rlimit[which]; switch (which) { case RLIMIT_CPU: if (limp->rlim_cur != RLIM_INFINITY && p->p_cpulimit == RLIM_INFINITY) callout_reset_sbt(&p->p_limco, SBT_1S, 0, lim_cb, p, C_PREL(1)); p->p_cpulimit = limp->rlim_cur; break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) limp->rlim_cur = maxdsiz; if (limp->rlim_max > maxdsiz) limp->rlim_max = maxdsiz; break; case RLIMIT_STACK: if (limp->rlim_cur > maxssiz) limp->rlim_cur = maxssiz; if (limp->rlim_max > maxssiz) limp->rlim_max = maxssiz; oldssiz = *alimp; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(&oldssiz, RLIMIT_STACK); break; case RLIMIT_NOFILE: if (limp->rlim_cur > maxfilesperproc) limp->rlim_cur = maxfilesperproc; if (limp->rlim_max > maxfilesperproc) limp->rlim_max = maxfilesperproc; break; case RLIMIT_NPROC: if (limp->rlim_cur > maxprocperuid) limp->rlim_cur = maxprocperuid; if (limp->rlim_max > maxprocperuid) limp->rlim_max = maxprocperuid; if (limp->rlim_cur < 1) limp->rlim_cur = 1; if (limp->rlim_max < 1) limp->rlim_max = 1; break; } if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(limp, which); *alimp = *limp; p->p_limit = newlim; PROC_UPDATE_COW(p); PROC_UNLOCK(p); lim_free(oldlim); if (which == RLIMIT_STACK && /* * Skip calls from exec_new_vmspace(), done when stack is * not mapped yet. */ (td != curthread || (p->p_flag & P_INEXEC) == 0)) { /* * Stack is allocated to the max at exec time with only * "rlim_cur" bytes accessible. If stack limit is going * up make more accessible, if going down make inaccessible. */ if (limp->rlim_cur != oldssiz.rlim_cur) { vm_offset_t addr; vm_size_t size; vm_prot_t prot; if (limp->rlim_cur > oldssiz.rlim_cur) { prot = p->p_sysent->sv_stackprot; size = limp->rlim_cur - oldssiz.rlim_cur; addr = p->p_sysent->sv_usrstack - limp->rlim_cur; } else { prot = VM_PROT_NONE; size = oldssiz.rlim_cur - limp->rlim_cur; addr = p->p_sysent->sv_usrstack - oldssiz.rlim_cur; } addr = trunc_page(addr); size = round_page(size); (void)vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, FALSE); } } return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* ARGSUSED */ int -sys_getrlimit(struct thread *td, register struct __getrlimit_args *uap) +sys_getrlimit(struct thread *td, struct __getrlimit_args *uap) { struct rlimit rlim; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); lim_rlimit(td, uap->which, &rlim); error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); return (error); } /* * Transform the running time and tick information for children of proc p * into user and system time usage. */ void calccru(struct proc *p, struct timeval *up, struct timeval *sp) { PROC_LOCK_ASSERT(p, MA_OWNED); calcru1(p, &p->p_crux, up, sp); } /* * Transform the running time and tick information in proc p into user * and system time usage. If appropriate, include the current time slice * on this CPU. */ void calcru(struct proc *p, struct timeval *up, struct timeval *sp) { struct thread *td; uint64_t runtime, u; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_STATLOCK_ASSERT(p, MA_OWNED); /* * If we are getting stats for the current process, then add in the * stats that this thread has accumulated in its current time slice. * We reset the thread and CPU state as if we had performed a context * switch right here. */ td = curthread; if (td->td_proc == p) { u = cpu_ticks(); runtime = u - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, u); } /* Make sure the per-thread stats are current. */ FOREACH_THREAD_IN_PROC(p, td) { if (td->td_incruntime == 0) continue; ruxagg(p, td); } calcru1(p, &p->p_rux, up, sp); } /* Collect resource usage for a single thread. */ void rufetchtd(struct thread *td, struct rusage *ru) { struct proc *p; uint64_t runtime, u; p = td->td_proc; PROC_STATLOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * If we are getting stats for the current thread, then add in the * stats that this thread has accumulated in its current time slice. * We reset the thread and CPU state as if we had performed a context * switch right here. */ if (td == curthread) { u = cpu_ticks(); runtime = u - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, u); } ruxagg(p, td); *ru = td->td_ru; calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime); } static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp) { /* {user, system, interrupt, total} {ticks, usec}: */ uint64_t ut, uu, st, su, it, tt, tu; ut = ruxp->rux_uticks; st = ruxp->rux_sticks; it = ruxp->rux_iticks; tt = ut + st + it; if (tt == 0) { /* Avoid divide by zero */ st = 1; tt = 1; } tu = cputick2usec(ruxp->rux_runtime); if ((int64_t)tu < 0) { /* XXX: this should be an assert /phk */ printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", (intmax_t)tu, p->p_pid, p->p_comm); tu = ruxp->rux_tu; } if (tu >= ruxp->rux_tu) { /* * The normal case, time increased. * Enforce monotonicity of bucketed numbers. */ uu = (tu * ut) / tt; if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; su = (tu * st) / tt; if (su < ruxp->rux_su) su = ruxp->rux_su; } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { /* * When we calibrate the cputicker, it is not uncommon to * see the presumably fixed frequency increase slightly over * time as a result of thermal stabilization and NTP * discipline (of the reference clock). We therefore ignore * a bit of backwards slop because we expect to catch up * shortly. We use a 3 microsecond limit to catch low * counts and a 1% limit for high counts. */ uu = ruxp->rux_uu; su = ruxp->rux_su; tu = ruxp->rux_tu; } else { /* tu < ruxp->rux_tu */ /* * What happened here was likely that a laptop, which ran at * a reduced clock frequency at boot, kicked into high gear. * The wisdom of spamming this message in that case is * dubious, but it might also be indicative of something * serious, so lets keep it and hope laptops can be made * more truthful about their CPU speed via ACPI. */ printf("calcru: runtime went backwards from %ju usec " "to %ju usec for pid %d (%s)\n", (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, p->p_pid, p->p_comm); uu = (tu * ut) / tt; su = (tu * st) / tt; } ruxp->rux_uu = uu; ruxp->rux_su = su; ruxp->rux_tu = tu; up->tv_sec = uu / 1000000; up->tv_usec = uu % 1000000; sp->tv_sec = su / 1000000; sp->tv_usec = su % 1000000; } #ifndef _SYS_SYSPROTO_H_ struct getrusage_args { int who; struct rusage *rusage; }; #endif int -sys_getrusage(register struct thread *td, register struct getrusage_args *uap) +sys_getrusage(struct thread *td, struct getrusage_args *uap) { struct rusage ru; int error; error = kern_getrusage(td, uap->who, &ru); if (error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int kern_getrusage(struct thread *td, int who, struct rusage *rup) { struct proc *p; int error; error = 0; p = td->td_proc; PROC_LOCK(p); switch (who) { case RUSAGE_SELF: rufetchcalc(p, rup, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_CHILDREN: *rup = p->p_stats->p_cru; calccru(p, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_THREAD: PROC_STATLOCK(p); thread_lock(td); rufetchtd(td, rup); thread_unlock(td); PROC_STATUNLOCK(p); break; default: error = EINVAL; } PROC_UNLOCK(p); return (error); } void rucollect(struct rusage *ru, struct rusage *ru2) { long *ip, *ip2; int i; if (ru->ru_maxrss < ru2->ru_maxrss) ru->ru_maxrss = ru2->ru_maxrss; ip = &ru->ru_first; ip2 = &ru2->ru_first; for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) *ip++ += *ip2++; } void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, struct rusage_ext *rux2) { rux->rux_runtime += rux2->rux_runtime; rux->rux_uticks += rux2->rux_uticks; rux->rux_sticks += rux2->rux_sticks; rux->rux_iticks += rux2->rux_iticks; rux->rux_uu += rux2->rux_uu; rux->rux_su += rux2->rux_su; rux->rux_tu += rux2->rux_tu; rucollect(ru, ru2); } /* * Aggregate tick counts into the proc's rusage_ext. */ static void ruxagg_locked(struct rusage_ext *rux, struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED); rux->rux_runtime += td->td_incruntime; rux->rux_uticks += td->td_uticks; rux->rux_sticks += td->td_sticks; rux->rux_iticks += td->td_iticks; } void ruxagg(struct proc *p, struct thread *td) { thread_lock(td); ruxagg_locked(&p->p_rux, td); ruxagg_locked(&td->td_rux, td); td->td_incruntime = 0; td->td_uticks = 0; td->td_iticks = 0; td->td_sticks = 0; thread_unlock(td); } /* * Update the rusage_ext structure and fetch a valid aggregate rusage * for proc p if storage for one is supplied. */ void rufetch(struct proc *p, struct rusage *ru) { struct thread *td; PROC_STATLOCK_ASSERT(p, MA_OWNED); *ru = p->p_ru; if (p->p_numthreads > 0) { FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); rucollect(ru, &td->td_ru); } } } /* * Atomically perform a rufetch and a calcru together. * Consumers, can safely assume the calcru is executed only once * rufetch is completed. */ void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, struct timeval *sp) { PROC_STATLOCK(p); rufetch(p, ru); calcru(p, up, sp); PROC_STATUNLOCK(p); } /* * Allocate a new resource limits structure and initialize its * reference count and mutex pointer. */ struct plimit * lim_alloc() { struct plimit *limp; limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); refcount_init(&limp->pl_refcnt, 1); return (limp); } struct plimit * lim_hold(struct plimit *limp) { refcount_acquire(&limp->pl_refcnt); return (limp); } void lim_fork(struct proc *p1, struct proc *p2) { PROC_LOCK_ASSERT(p1, MA_OWNED); PROC_LOCK_ASSERT(p2, MA_OWNED); p2->p_limit = lim_hold(p1->p_limit); callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); if (p1->p_cpulimit != RLIM_INFINITY) callout_reset_sbt(&p2->p_limco, SBT_1S, 0, lim_cb, p2, C_PREL(1)); } void lim_free(struct plimit *limp) { if (refcount_release(&limp->pl_refcnt)) free((void *)limp, M_PLIMIT); } /* * Make a copy of the plimit structure. * We share these structures copy-on-write after fork. */ void lim_copy(struct plimit *dst, struct plimit *src) { KASSERT(dst->pl_refcnt <= 1, ("lim_copy to shared limit")); bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); } /* * Return the hard limit for a particular system resource. The * which parameter specifies the index into the rlimit array. */ rlim_t lim_max(struct thread *td, int which) { struct rlimit rl; lim_rlimit(td, which, &rl); return (rl.rlim_max); } rlim_t lim_max_proc(struct proc *p, int which) { struct rlimit rl; lim_rlimit_proc(p, which, &rl); return (rl.rlim_max); } /* * Return the current (soft) limit for a particular system resource. * The which parameter which specifies the index into the rlimit array */ rlim_t lim_cur(struct thread *td, int which) { struct rlimit rl; lim_rlimit(td, which, &rl); return (rl.rlim_cur); } rlim_t lim_cur_proc(struct proc *p, int which) { struct rlimit rl; lim_rlimit_proc(p, which, &rl); return (rl.rlim_cur); } /* * Return a copy of the entire rlimit structure for the system limit * specified by 'which' in the rlimit structure pointed to by 'rlp'. */ void lim_rlimit(struct thread *td, int which, struct rlimit *rlp) { struct proc *p = td->td_proc; MPASS(td == curthread); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = td->td_limit->pl_rlimit[which]; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(rlp, which); } void lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp) { PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = p->p_limit->pl_rlimit[which]; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(rlp, which); } void uihashinit() { uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); rw_init(&uihashtbl_lock, "uidinfo hash"); } /* * Look up a uidinfo struct for the parameter uid. * uihashtbl_lock must be locked. * Increase refcount on uidinfo struct returned. */ static struct uidinfo * uilookup(uid_t uid) { struct uihashhead *uipp; struct uidinfo *uip; rw_assert(&uihashtbl_lock, RA_LOCKED); uipp = UIHASH(uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) { uihold(uip); break; } return (uip); } /* * Find or allocate a struct uidinfo for a particular uid. * Returns with uidinfo struct referenced. * uifree() should be called on a struct uidinfo when released. */ struct uidinfo * uifind(uid_t uid) { struct uidinfo *new_uip, *uip; rw_rlock(&uihashtbl_lock); uip = uilookup(uid); rw_runlock(&uihashtbl_lock); if (uip != NULL) return (uip); new_uip = malloc(sizeof(*new_uip), M_UIDINFO, M_WAITOK | M_ZERO); racct_create(&new_uip->ui_racct); refcount_init(&new_uip->ui_ref, 1); new_uip->ui_uid = uid; mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF); rw_wlock(&uihashtbl_lock); /* * There's a chance someone created our uidinfo while we * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate uidinfo. */ if ((uip = uilookup(uid)) == NULL) { LIST_INSERT_HEAD(UIHASH(uid), new_uip, ui_hash); rw_wunlock(&uihashtbl_lock); uip = new_uip; } else { rw_wunlock(&uihashtbl_lock); racct_destroy(&new_uip->ui_racct); mtx_destroy(&new_uip->ui_vmsize_mtx); free(new_uip, M_UIDINFO); } return (uip); } /* * Place another refcount on a uidinfo struct. */ void uihold(struct uidinfo *uip) { refcount_acquire(&uip->ui_ref); } /*- * Since uidinfo structs have a long lifetime, we use an * opportunistic refcounting scheme to avoid locking the lookup hash * for each release. * * If the refcount hits 0, we need to free the structure, * which means we need to lock the hash. * Optimal case: * After locking the struct and lowering the refcount, if we find * that we don't need to free, simply unlock and return. * Suboptimal case: * If refcount lowering results in need to free, bump the count * back up, lose the lock and acquire the locks in the proper * order to try again. */ void uifree(struct uidinfo *uip) { int old; /* Prepare for optimal case. */ old = uip->ui_ref; if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1)) return; /* Prepare for suboptimal case. */ rw_wlock(&uihashtbl_lock); if (refcount_release(&uip->ui_ref) == 0) { rw_wunlock(&uihashtbl_lock); return; } racct_destroy(&uip->ui_racct); LIST_REMOVE(uip, ui_hash); rw_wunlock(&uihashtbl_lock); if (uip->ui_sbsize != 0) printf("freeing uidinfo: uid = %d, sbsize = %ld\n", uip->ui_uid, uip->ui_sbsize); if (uip->ui_proccnt != 0) printf("freeing uidinfo: uid = %d, proccnt = %ld\n", uip->ui_uid, uip->ui_proccnt); if (uip->ui_vmsize != 0) printf("freeing uidinfo: uid = %d, swapuse = %lld\n", uip->ui_uid, (unsigned long long)uip->ui_vmsize); mtx_destroy(&uip->ui_vmsize_mtx); free(uip, M_UIDINFO); } #ifdef RACCT void ui_racct_foreach(void (*callback)(struct racct *racct, void *arg2, void *arg3), void (*pre)(void), void (*post)(void), void *arg2, void *arg3) { struct uidinfo *uip; struct uihashhead *uih; rw_rlock(&uihashtbl_lock); if (pre != NULL) (pre)(); for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) { LIST_FOREACH(uip, uih, ui_hash) { (callback)(uip->ui_racct, arg2, arg3); } } if (post != NULL) (post)(); rw_runlock(&uihashtbl_lock); } #endif static inline int chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name) { /* Don't allow them to exceed max, but allow subtraction. */ if (diff > 0 && max != 0) { if (atomic_fetchadd_long(limit, (long)diff) + diff > max) { atomic_subtract_long(limit, (long)diff); return (0); } } else { atomic_add_long(limit, (long)diff); if (*limit < 0) printf("negative %s for uid = %d\n", name, uip->ui_uid); } return (1); } /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ int chgproccnt(struct uidinfo *uip, int diff, rlim_t max) { return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt")); } /* * Change the total socket buffer size a user has used. */ int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max) { int diff, rv; diff = to - *hiwat; if (diff > 0 && max == 0) { rv = 0; } else { rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize"); if (rv != 0) *hiwat = to; } return (rv); } /* * Change the count associated with number of pseudo-terminals * a given user is using. When 'max' is 0, don't enforce a limit */ int chgptscnt(struct uidinfo *uip, int diff, rlim_t max) { return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt")); } int chgkqcnt(struct uidinfo *uip, int diff, rlim_t max) { return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt")); } int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t max) { return (chglimit(uip, &uip->ui_umtxcnt, diff, max, "umtxcnt")); } Index: stable/11/sys/kern/kern_sig.c =================================================================== --- stable/11/sys/kern/kern_sig.c (revision 331642) +++ stable/11/sys/kern/kern_sig.c (revision 331643) @@ -1,3777 +1,3736 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_gzio.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ONSIG 32 /* NSIG for osig* syscalls. XXX. */ SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE3(proc, , , signal__send, "struct thread *", "struct proc *", "int"); SDT_PROBE_DEFINE2(proc, , , signal__clear, "int", "ksiginfo_t *"); SDT_PROBE_DEFINE3(proc, , , signal__discard, "struct thread *", "struct proc *", "int"); static int coredump(struct thread *); static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi); static int issignal(struct thread *td); static int sigprop(int sig); static void tdsigwakeup(struct thread *, int, sig_t, int); static int sig_suspend_threads(struct thread *, struct proc *, int); static int filt_sigattach(struct knote *kn); static void filt_sigdetach(struct knote *kn); static int filt_signal(struct knote *kn, long hint); static struct thread *sigtd(struct proc *p, int sig, int prop); static void sigqueue_start(void); static uma_zone_t ksiginfo_zone = NULL; struct filterops sig_filtops = { .f_isfd = 0, .f_attach = filt_sigattach, .f_detach = filt_sigdetach, .f_event = filt_signal, }; static int kern_logsigexit = 1; SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, &kern_logsigexit, 0, "Log processes quitting on abnormal signals to syslog(3)"); static int kern_forcesigexit = 1; SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW, &kern_forcesigexit, 0, "Force trap signal to be handled"); static SYSCTL_NODE(_kern, OID_AUTO, sigqueue, CTLFLAG_RW, 0, "POSIX real time signal"); static int max_pending_per_proc = 128; SYSCTL_INT(_kern_sigqueue, OID_AUTO, max_pending_per_proc, CTLFLAG_RW, &max_pending_per_proc, 0, "Max pending signals per proc"); static int preallocate_siginfo = 1024; SYSCTL_INT(_kern_sigqueue, OID_AUTO, preallocate, CTLFLAG_RDTUN, &preallocate_siginfo, 0, "Preallocated signal memory size"); static int signal_overflow = 0; SYSCTL_INT(_kern_sigqueue, OID_AUTO, overflow, CTLFLAG_RD, &signal_overflow, 0, "Number of signals overflew"); static int signal_alloc_fail = 0; SYSCTL_INT(_kern_sigqueue, OID_AUTO, alloc_fail, CTLFLAG_RD, &signal_alloc_fail, 0, "signals failed to be allocated"); static int kern_lognosys = 0; SYSCTL_INT(_kern, OID_AUTO, lognosys, CTLFLAG_RWTUN, &kern_lognosys, 0, "Log invalid syscalls"); SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL); /* * Policy -- Can ucred cr1 send SIGIO to process cr2? * Should use cr_cansignal() once cr_cansignal() allows SIGIO and SIGURG * in the right situations. */ #define CANSIGIO(cr1, cr2) \ ((cr1)->cr_uid == 0 || \ (cr1)->cr_ruid == (cr2)->cr_ruid || \ (cr1)->cr_uid == (cr2)->cr_ruid || \ (cr1)->cr_ruid == (cr2)->cr_uid || \ (cr1)->cr_uid == (cr2)->cr_uid) static int sugid_coredump; SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); static int capmode_coredump; SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, &capmode_coredump, 0, "Allow processes in capability mode to dump core"); static int do_coredump = 1; SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, &do_coredump, 0, "Enable/Disable coredumps"); static int set_core_nodump_flag = 0; SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, 0, "Enable setting the NODUMP flag on coredump files"); static int coredump_devctl = 0; SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, 0, "Generate a devctl notification when processes coredump"); /* * Signal properties and actions. * The array below categorizes the signals and their default actions * according to the following properties: */ #define SA_KILL 0x01 /* terminates process by default */ #define SA_CORE 0x02 /* ditto and coredumps */ #define SA_STOP 0x04 /* suspend process */ #define SA_TTYSTOP 0x08 /* ditto, from tty */ #define SA_IGNORE 0x10 /* ignore by default */ #define SA_CONT 0x20 /* continue if suspended */ #define SA_CANTMASK 0x40 /* non-maskable, catchable */ static int sigproptbl[NSIG] = { SA_KILL, /* SIGHUP */ SA_KILL, /* SIGINT */ SA_KILL|SA_CORE, /* SIGQUIT */ SA_KILL|SA_CORE, /* SIGILL */ SA_KILL|SA_CORE, /* SIGTRAP */ SA_KILL|SA_CORE, /* SIGABRT */ SA_KILL|SA_CORE, /* SIGEMT */ SA_KILL|SA_CORE, /* SIGFPE */ SA_KILL, /* SIGKILL */ SA_KILL|SA_CORE, /* SIGBUS */ SA_KILL|SA_CORE, /* SIGSEGV */ SA_KILL|SA_CORE, /* SIGSYS */ SA_KILL, /* SIGPIPE */ SA_KILL, /* SIGALRM */ SA_KILL, /* SIGTERM */ SA_IGNORE, /* SIGURG */ SA_STOP, /* SIGSTOP */ SA_STOP|SA_TTYSTOP, /* SIGTSTP */ SA_IGNORE|SA_CONT, /* SIGCONT */ SA_IGNORE, /* SIGCHLD */ SA_STOP|SA_TTYSTOP, /* SIGTTIN */ SA_STOP|SA_TTYSTOP, /* SIGTTOU */ SA_IGNORE, /* SIGIO */ SA_KILL, /* SIGXCPU */ SA_KILL, /* SIGXFSZ */ SA_KILL, /* SIGVTALRM */ SA_KILL, /* SIGPROF */ SA_IGNORE, /* SIGWINCH */ SA_IGNORE, /* SIGINFO */ SA_KILL, /* SIGUSR1 */ SA_KILL, /* SIGUSR2 */ }; static void reschedule_signals(struct proc *p, sigset_t block, int flags); static void sigqueue_start(void) { ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_prealloc(ksiginfo_zone, preallocate_siginfo); p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS); p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1); p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc); } ksiginfo_t * ksiginfo_alloc(int wait) { int flags; flags = M_ZERO; if (! wait) flags |= M_NOWAIT; if (ksiginfo_zone != NULL) return ((ksiginfo_t *)uma_zalloc(ksiginfo_zone, flags)); return (NULL); } void ksiginfo_free(ksiginfo_t *ksi) { uma_zfree(ksiginfo_zone, ksi); } static __inline int ksiginfo_tryfree(ksiginfo_t *ksi) { if (!(ksi->ksi_flags & KSI_EXT)) { uma_zfree(ksiginfo_zone, ksi); return (1); } return (0); } void sigqueue_init(sigqueue_t *list, struct proc *p) { SIGEMPTYSET(list->sq_signals); SIGEMPTYSET(list->sq_kill); SIGEMPTYSET(list->sq_ptrace); TAILQ_INIT(&list->sq_list); list->sq_proc = p; list->sq_flags = SQ_INIT; } /* * Get a signal's ksiginfo. * Return: * 0 - signal not found * others - signal number */ static int sigqueue_get(sigqueue_t *sq, int signo, ksiginfo_t *si) { struct proc *p = sq->sq_proc; struct ksiginfo *ksi, *next; int count = 0; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); if (!SIGISMEMBER(sq->sq_signals, signo)) return (0); if (SIGISMEMBER(sq->sq_ptrace, signo)) { count++; SIGDELSET(sq->sq_ptrace, signo); si->ksi_flags |= KSI_PTRACE; } if (SIGISMEMBER(sq->sq_kill, signo)) { count++; if (count == 1) SIGDELSET(sq->sq_kill, signo); } TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) { if (ksi->ksi_signo == signo) { if (count == 0) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; ksiginfo_copy(ksi, si); if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } if (++count > 1) break; } } if (count <= 1) SIGDELSET(sq->sq_signals, signo); si->ksi_signo = signo; return (signo); } void sigqueue_take(ksiginfo_t *ksi) { struct ksiginfo *kp; struct proc *p; sigqueue_t *sq; if (ksi == NULL || (sq = ksi->ksi_sigq) == NULL) return; p = sq->sq_proc; TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (!(ksi->ksi_flags & KSI_EXT) && p != NULL) p->p_pendingcnt--; for (kp = TAILQ_FIRST(&sq->sq_list); kp != NULL; kp = TAILQ_NEXT(kp, ksi_link)) { if (kp->ksi_signo == ksi->ksi_signo) break; } if (kp == NULL && !SIGISMEMBER(sq->sq_kill, ksi->ksi_signo) && !SIGISMEMBER(sq->sq_ptrace, ksi->ksi_signo)) SIGDELSET(sq->sq_signals, ksi->ksi_signo); } static int sigqueue_add(sigqueue_t *sq, int signo, ksiginfo_t *si) { struct proc *p = sq->sq_proc; struct ksiginfo *ksi; int ret = 0; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); /* * SIGKILL/SIGSTOP cannot be caught or masked, so take the fast path * for these signals. */ if (signo == SIGKILL || signo == SIGSTOP || si == NULL) { SIGADDSET(sq->sq_kill, signo); goto out_set_bit; } /* directly insert the ksi, don't copy it */ if (si->ksi_flags & KSI_INS) { if (si->ksi_flags & KSI_HEAD) TAILQ_INSERT_HEAD(&sq->sq_list, si, ksi_link); else TAILQ_INSERT_TAIL(&sq->sq_list, si, ksi_link); si->ksi_sigq = sq; goto out_set_bit; } if (__predict_false(ksiginfo_zone == NULL)) { SIGADDSET(sq->sq_kill, signo); goto out_set_bit; } if (p != NULL && p->p_pendingcnt >= max_pending_per_proc) { signal_overflow++; ret = EAGAIN; } else if ((ksi = ksiginfo_alloc(0)) == NULL) { signal_alloc_fail++; ret = EAGAIN; } else { if (p != NULL) p->p_pendingcnt++; ksiginfo_copy(si, ksi); ksi->ksi_signo = signo; if (si->ksi_flags & KSI_HEAD) TAILQ_INSERT_HEAD(&sq->sq_list, ksi, ksi_link); else TAILQ_INSERT_TAIL(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = sq; } if (ret != 0) { if ((si->ksi_flags & KSI_PTRACE) != 0) { SIGADDSET(sq->sq_ptrace, signo); ret = 0; goto out_set_bit; } else if ((si->ksi_flags & KSI_TRAP) != 0 || (si->ksi_flags & KSI_SIGQ) == 0) { SIGADDSET(sq->sq_kill, signo); ret = 0; goto out_set_bit; } return (ret); } out_set_bit: SIGADDSET(sq->sq_signals, signo); return (ret); } void sigqueue_flush(sigqueue_t *sq) { struct proc *p = sq->sq_proc; ksiginfo_t *ksi; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); if (p != NULL) PROC_LOCK_ASSERT(p, MA_OWNED); while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } SIGEMPTYSET(sq->sq_signals); SIGEMPTYSET(sq->sq_kill); SIGEMPTYSET(sq->sq_ptrace); } static void sigqueue_move_set(sigqueue_t *src, sigqueue_t *dst, const sigset_t *set) { sigset_t tmp; struct proc *p1, *p2; ksiginfo_t *ksi, *next; KASSERT(src->sq_flags & SQ_INIT, ("src sigqueue not inited")); KASSERT(dst->sq_flags & SQ_INIT, ("dst sigqueue not inited")); p1 = src->sq_proc; p2 = dst->sq_proc; /* Move siginfo to target list */ TAILQ_FOREACH_SAFE(ksi, &src->sq_list, ksi_link, next) { if (SIGISMEMBER(*set, ksi->ksi_signo)) { TAILQ_REMOVE(&src->sq_list, ksi, ksi_link); if (p1 != NULL) p1->p_pendingcnt--; TAILQ_INSERT_TAIL(&dst->sq_list, ksi, ksi_link); ksi->ksi_sigq = dst; if (p2 != NULL) p2->p_pendingcnt++; } } /* Move pending bits to target list */ tmp = src->sq_kill; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_kill, tmp); SIGSETNAND(src->sq_kill, tmp); tmp = src->sq_ptrace; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_ptrace, tmp); SIGSETNAND(src->sq_ptrace, tmp); tmp = src->sq_signals; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_signals, tmp); SIGSETNAND(src->sq_signals, tmp); } #if 0 static void sigqueue_move(sigqueue_t *src, sigqueue_t *dst, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_move_set(src, dst, &set); } #endif static void sigqueue_delete_set(sigqueue_t *sq, const sigset_t *set) { struct proc *p = sq->sq_proc; ksiginfo_t *ksi, *next; KASSERT(sq->sq_flags & SQ_INIT, ("src sigqueue not inited")); /* Remove siginfo queue */ TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) { if (SIGISMEMBER(*set, ksi->ksi_signo)) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } } SIGSETNAND(sq->sq_kill, *set); SIGSETNAND(sq->sq_ptrace, *set); SIGSETNAND(sq->sq_signals, *set); } void sigqueue_delete(sigqueue_t *sq, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_delete_set(sq, &set); } /* Remove a set of signals for a process */ static void sigqueue_delete_set_proc(struct proc *p, const sigset_t *set) { sigqueue_t worklist; struct thread *td0; PROC_LOCK_ASSERT(p, MA_OWNED); sigqueue_init(&worklist, NULL); sigqueue_move_set(&p->p_sigqueue, &worklist, set); FOREACH_THREAD_IN_PROC(p, td0) sigqueue_move_set(&td0->td_sigqueue, &worklist, set); sigqueue_flush(&worklist); } void sigqueue_delete_proc(struct proc *p, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_delete_set_proc(p, &set); } static void sigqueue_delete_stopmask_proc(struct proc *p) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, SIGSTOP); SIGADDSET(set, SIGTSTP); SIGADDSET(set, SIGTTIN); SIGADDSET(set, SIGTTOU); sigqueue_delete_set_proc(p, &set); } /* * Determine signal that should be delivered to thread td, the current * thread, 0 if none. If there is a pending stop signal with default * action, the process stops in issignal(). */ int cursig(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); mtx_assert(&td->td_proc->p_sigacts->ps_mtx, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_NOTOWNED); return (SIGPENDING(td) ? issignal(td) : 0); } /* * Arrange for ast() to handle unmasked pending signals on return to user * mode. This must be called whenever a signal is added to td_sigqueue or * unmasked in td_sigmask. */ void signotify(struct thread *td) { struct proc *p; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); if (SIGPENDING(td)) { thread_lock(td); td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING; thread_unlock(td); } } int sigonstack(size_t sp) { struct thread *td = curthread; return ((td->td_pflags & TDP_ALTSTACK) ? #if defined(COMPAT_43) ((td->td_sigstk.ss_size == 0) ? (td->td_sigstk.ss_flags & SS_ONSTACK) : ((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size)) #else ((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size) #endif : 0); } static __inline int sigprop(int sig) { if (sig > 0 && sig < NSIG) return (sigproptbl[_SIG_IDX(sig)]); return (0); } int sig_ffs(sigset_t *set) { int i; for (i = 0; i < _SIG_WORDS; i++) if (set->__bits[i]) return (ffs(set->__bits[i]) + (i * 32)); return (0); } static bool sigact_flag_test(const struct sigaction *act, int flag) { /* * SA_SIGINFO is reset when signal disposition is set to * ignore or default. Other flags are kept according to user * settings. */ return ((act->sa_flags & flag) != 0 && (flag != SA_SIGINFO || ((__sighandler_t *)act->sa_sigaction != SIG_IGN && (__sighandler_t *)act->sa_sigaction != SIG_DFL))); } /* * kern_sigaction * sigaction * freebsd4_sigaction * osigaction */ int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags) { struct sigacts *ps; struct proc *p = td->td_proc; if (!_SIG_VALID(sig)) return (EINVAL); if (act != NULL && act->sa_handler != SIG_DFL && act->sa_handler != SIG_IGN && (act->sa_flags & ~(SA_ONSTACK | SA_RESTART | SA_RESETHAND | SA_NOCLDSTOP | SA_NODEFER | SA_NOCLDWAIT | SA_SIGINFO)) != 0) return (EINVAL); PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); if (oact) { oact->sa_mask = ps->ps_catchmask[_SIG_IDX(sig)]; oact->sa_flags = 0; if (SIGISMEMBER(ps->ps_sigonstack, sig)) oact->sa_flags |= SA_ONSTACK; if (!SIGISMEMBER(ps->ps_sigintr, sig)) oact->sa_flags |= SA_RESTART; if (SIGISMEMBER(ps->ps_sigreset, sig)) oact->sa_flags |= SA_RESETHAND; if (SIGISMEMBER(ps->ps_signodefer, sig)) oact->sa_flags |= SA_NODEFER; if (SIGISMEMBER(ps->ps_siginfo, sig)) { oact->sa_flags |= SA_SIGINFO; oact->sa_sigaction = (__siginfohandler_t *)ps->ps_sigact[_SIG_IDX(sig)]; } else oact->sa_handler = ps->ps_sigact[_SIG_IDX(sig)]; if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDSTOP) oact->sa_flags |= SA_NOCLDSTOP; if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDWAIT) oact->sa_flags |= SA_NOCLDWAIT; } if (act) { if ((sig == SIGKILL || sig == SIGSTOP) && act->sa_handler != SIG_DFL) { mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); return (EINVAL); } /* * Change setting atomically. */ ps->ps_catchmask[_SIG_IDX(sig)] = act->sa_mask; SIG_CANTMASK(ps->ps_catchmask[_SIG_IDX(sig)]); if (sigact_flag_test(act, SA_SIGINFO)) { ps->ps_sigact[_SIG_IDX(sig)] = (__sighandler_t *)act->sa_sigaction; SIGADDSET(ps->ps_siginfo, sig); } else { ps->ps_sigact[_SIG_IDX(sig)] = act->sa_handler; SIGDELSET(ps->ps_siginfo, sig); } if (!sigact_flag_test(act, SA_RESTART)) SIGADDSET(ps->ps_sigintr, sig); else SIGDELSET(ps->ps_sigintr, sig); if (sigact_flag_test(act, SA_ONSTACK)) SIGADDSET(ps->ps_sigonstack, sig); else SIGDELSET(ps->ps_sigonstack, sig); if (sigact_flag_test(act, SA_RESETHAND)) SIGADDSET(ps->ps_sigreset, sig); else SIGDELSET(ps->ps_sigreset, sig); if (sigact_flag_test(act, SA_NODEFER)) SIGADDSET(ps->ps_signodefer, sig); else SIGDELSET(ps->ps_signodefer, sig); if (sig == SIGCHLD) { if (act->sa_flags & SA_NOCLDSTOP) ps->ps_flag |= PS_NOCLDSTOP; else ps->ps_flag &= ~PS_NOCLDSTOP; if (act->sa_flags & SA_NOCLDWAIT) { /* * Paranoia: since SA_NOCLDWAIT is implemented * by reparenting the dying child to PID 1 (and * trust it to reap the zombie), PID 1 itself * is forbidden to set SA_NOCLDWAIT. */ if (p->p_pid == 1) ps->ps_flag &= ~PS_NOCLDWAIT; else ps->ps_flag |= PS_NOCLDWAIT; } else ps->ps_flag &= ~PS_NOCLDWAIT; if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN) ps->ps_flag |= PS_CLDSIGIGN; else ps->ps_flag &= ~PS_CLDSIGIGN; } /* * Set bit in ps_sigignore for signals that are set to SIG_IGN, * and for signals set to SIG_DFL where the default is to * ignore. However, don't put SIGCONT in ps_sigignore, as we * have to restart the process. */ if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || (sigprop(sig) & SA_IGNORE && ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)) { /* never to be seen again */ sigqueue_delete_proc(p, sig); if (sig != SIGCONT) /* easier in psignal */ SIGADDSET(ps->ps_sigignore, sig); SIGDELSET(ps->ps_sigcatch, sig); } else { SIGDELSET(ps->ps_sigignore, sig); if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL) SIGDELSET(ps->ps_sigcatch, sig); else SIGADDSET(ps->ps_sigcatch, sig); } #ifdef COMPAT_FREEBSD4 if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL || (flags & KSA_FREEBSD4) == 0) SIGDELSET(ps->ps_freebsd4, sig); else SIGADDSET(ps->ps_freebsd4, sig); #endif #ifdef COMPAT_43 if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL || (flags & KSA_OSIGSET) == 0) SIGDELSET(ps->ps_osigset, sig); else SIGADDSET(ps->ps_osigset, sig); #endif } mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); return (0); } #ifndef _SYS_SYSPROTO_H_ struct sigaction_args { int sig; struct sigaction *act; struct sigaction *oact; }; #endif int -sys_sigaction(td, uap) - struct thread *td; - register struct sigaction_args *uap; +sys_sigaction(struct thread *td, struct sigaction_args *uap) { struct sigaction act, oact; - register struct sigaction *actp, *oactp; + struct sigaction *actp, *oactp; int error; actp = (uap->act != NULL) ? &act : NULL; oactp = (uap->oact != NULL) ? &oact : NULL; if (actp) { error = copyin(uap->act, actp, sizeof(act)); if (error) return (error); } error = kern_sigaction(td, uap->sig, actp, oactp, 0); if (oactp && !error) error = copyout(oactp, uap->oact, sizeof(oact)); return (error); } #ifdef COMPAT_FREEBSD4 #ifndef _SYS_SYSPROTO_H_ struct freebsd4_sigaction_args { int sig; struct sigaction *act; struct sigaction *oact; }; #endif int -freebsd4_sigaction(td, uap) - struct thread *td; - register struct freebsd4_sigaction_args *uap; +freebsd4_sigaction(struct thread *td, struct freebsd4_sigaction_args *uap) { struct sigaction act, oact; - register struct sigaction *actp, *oactp; + struct sigaction *actp, *oactp; int error; actp = (uap->act != NULL) ? &act : NULL; oactp = (uap->oact != NULL) ? &oact : NULL; if (actp) { error = copyin(uap->act, actp, sizeof(act)); if (error) return (error); } error = kern_sigaction(td, uap->sig, actp, oactp, KSA_FREEBSD4); if (oactp && !error) error = copyout(oactp, uap->oact, sizeof(oact)); return (error); } #endif /* COMAPT_FREEBSD4 */ #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigaction_args { int signum; struct osigaction *nsa; struct osigaction *osa; }; #endif int -osigaction(td, uap) - struct thread *td; - register struct osigaction_args *uap; +osigaction(struct thread *td, struct osigaction_args *uap) { struct osigaction sa; struct sigaction nsa, osa; - register struct sigaction *nsap, *osap; + struct sigaction *nsap, *osap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); nsap = (uap->nsa != NULL) ? &nsa : NULL; osap = (uap->osa != NULL) ? &osa : NULL; if (nsap) { error = copyin(uap->nsa, &sa, sizeof(sa)); if (error) return (error); nsap->sa_handler = sa.sa_handler; nsap->sa_flags = sa.sa_flags; OSIG2SIG(sa.sa_mask, nsap->sa_mask); } error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET); if (osap && !error) { sa.sa_handler = osap->sa_handler; sa.sa_flags = osap->sa_flags; SIG2OSIG(osap->sa_mask, sa.sa_mask); error = copyout(&sa, uap->osa, sizeof(sa)); } return (error); } #if !defined(__i386__) /* Avoid replicating the same stub everywhere */ int -osigreturn(td, uap) - struct thread *td; - struct osigreturn_args *uap; +osigreturn(struct thread *td, struct osigreturn_args *uap) { return (nosys(td, (struct nosys_args *)uap)); } #endif #endif /* COMPAT_43 */ /* * Initialize signal state for process 0; * set to ignore signals that are ignored by default. */ void -siginit(p) - struct proc *p; +siginit(struct proc *p) { - register int i; + int i; struct sigacts *ps; PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); for (i = 1; i <= NSIG; i++) { if (sigprop(i) & SA_IGNORE && i != SIGCONT) { SIGADDSET(ps->ps_sigignore, i); } } mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); } /* * Reset specified signal to the default disposition. */ static void sigdflt(struct sigacts *ps, int sig) { mtx_assert(&ps->ps_mtx, MA_OWNED); SIGDELSET(ps->ps_sigcatch, sig); if ((sigprop(sig) & SA_IGNORE) != 0 && sig != SIGCONT) SIGADDSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; SIGDELSET(ps->ps_siginfo, sig); } /* * Reset signals for an exec of the specified process. */ void execsigs(struct proc *p) { sigset_t osigignore; struct sigacts *ps; int sig; struct thread *td; /* * Reset caught signals. Held signals remain held * through td_sigmask (unless they were caught, * and are now ignored by default). */ PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); while (SIGNOTEMPTY(ps->ps_sigcatch)) { sig = sig_ffs(&ps->ps_sigcatch); sigdflt(ps, sig); if ((sigprop(sig) & SA_IGNORE) != 0) sigqueue_delete_proc(p, sig); } /* * As CloudABI processes cannot modify signal handlers, fully * reset all signals to their default behavior. Do ignore * SIGPIPE, as it would otherwise be impossible to recover from * writes to broken pipes and sockets. */ if (SV_PROC_ABI(p) == SV_ABI_CLOUDABI) { osigignore = ps->ps_sigignore; while (SIGNOTEMPTY(osigignore)) { sig = sig_ffs(&osigignore); SIGDELSET(osigignore, sig); if (sig != SIGPIPE) sigdflt(ps, sig); } SIGADDSET(ps->ps_sigignore, SIGPIPE); } /* * Reset stack state to the user stack. * Clear set of signals caught on the signal stack. */ td = curthread; MPASS(td->td_proc == p); td->td_sigstk.ss_flags = SS_DISABLE; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_sp = 0; td->td_pflags &= ~TDP_ALTSTACK; /* * Reset no zombies if child dies flag as Solaris does. */ ps->ps_flag &= ~(PS_NOCLDWAIT | PS_CLDSIGIGN); if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN) ps->ps_sigact[_SIG_IDX(SIGCHLD)] = SIG_DFL; mtx_unlock(&ps->ps_mtx); } /* * kern_sigprocmask() * * Manipulate signal mask. */ int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags) { sigset_t new_block, oset1; struct proc *p; int error; p = td->td_proc; if ((flags & SIGPROCMASK_PROC_LOCKED) != 0) PROC_LOCK_ASSERT(p, MA_OWNED); else PROC_LOCK(p); mtx_assert(&p->p_sigacts->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0 ? MA_OWNED : MA_NOTOWNED); if (oset != NULL) *oset = td->td_sigmask; error = 0; if (set != NULL) { switch (how) { case SIG_BLOCK: SIG_CANTMASK(*set); oset1 = td->td_sigmask; SIGSETOR(td->td_sigmask, *set); new_block = td->td_sigmask; SIGSETNAND(new_block, oset1); break; case SIG_UNBLOCK: SIGSETNAND(td->td_sigmask, *set); signotify(td); goto out; case SIG_SETMASK: SIG_CANTMASK(*set); oset1 = td->td_sigmask; if (flags & SIGPROCMASK_OLD) SIGSETLO(td->td_sigmask, *set); else td->td_sigmask = *set; new_block = td->td_sigmask; SIGSETNAND(new_block, oset1); signotify(td); break; default: error = EINVAL; goto out; } /* * The new_block set contains signals that were not previously * blocked, but are blocked now. * * In case we block any signal that was not previously blocked * for td, and process has the signal pending, try to schedule * signal delivery to some thread that does not block the * signal, possibly waking it up. */ if (p->p_numthreads != 1) reschedule_signals(p, new_block, flags); } out: if (!(flags & SIGPROCMASK_PROC_LOCKED)) PROC_UNLOCK(p); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sigprocmask_args { int how; const sigset_t *set; sigset_t *oset; }; #endif int -sys_sigprocmask(td, uap) - register struct thread *td; - struct sigprocmask_args *uap; +sys_sigprocmask(struct thread *td, struct sigprocmask_args *uap) { sigset_t set, oset; sigset_t *setp, *osetp; int error; setp = (uap->set != NULL) ? &set : NULL; osetp = (uap->oset != NULL) ? &oset : NULL; if (setp) { error = copyin(uap->set, setp, sizeof(set)); if (error) return (error); } error = kern_sigprocmask(td, uap->how, setp, osetp, 0); if (osetp && !error) { error = copyout(osetp, uap->oset, sizeof(oset)); } return (error); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigprocmask_args { int how; osigset_t mask; }; #endif int -osigprocmask(td, uap) - register struct thread *td; - struct osigprocmask_args *uap; +osigprocmask(struct thread *td, struct osigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, 1); SIG2OSIG(oset, td->td_retval[0]); return (error); } #endif /* COMPAT_43 */ int sys_sigwait(struct thread *td, struct sigwait_args *uap) { ksiginfo_t ksi; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) { td->td_retval[0] = error; return (0); } error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) { if (error == EINTR && td->td_proc->p_osrel < P_OSREL_SIGWAIT) error = ERESTART; if (error == ERESTART) return (error); td->td_retval[0] = error; return (0); } error = copyout(&ksi.ksi_signo, uap->sig, sizeof(ksi.ksi_signo)); td->td_retval[0] = error; return (0); } int sys_sigtimedwait(struct thread *td, struct sigtimedwait_args *uap) { struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t)); if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int sys_sigwaitinfo(struct thread *td, struct sigwaitinfo_args *uap) { ksiginfo_t ksi; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t)); if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } static void proc_td_siginfo_capture(struct thread *td, siginfo_t *si) { struct thread *thr; FOREACH_THREAD_IN_PROC(td->td_proc, thr) { if (thr == td) thr->td_si = *si; else thr->td_si.si_signo = 0; } } int kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi, struct timespec *timeout) { struct sigacts *ps; sigset_t saved_mask, new_block; struct proc *p; int error, sig, timo, timevalid = 0; struct timespec rts, ets, ts; struct timeval tv; p = td->td_proc; error = 0; ets.tv_sec = 0; ets.tv_nsec = 0; if (timeout != NULL) { if (timeout->tv_nsec >= 0 && timeout->tv_nsec < 1000000000) { timevalid = 1; getnanouptime(&rts); ets = rts; timespecadd(&ets, timeout); } } ksiginfo_init(ksi); /* Some signals can not be waited for. */ SIG_CANTMASK(waitset); ps = p->p_sigacts; PROC_LOCK(p); saved_mask = td->td_sigmask; SIGSETNAND(td->td_sigmask, waitset); for (;;) { mtx_lock(&ps->ps_mtx); sig = cursig(td); mtx_unlock(&ps->ps_mtx); KASSERT(sig >= 0, ("sig %d", sig)); if (sig != 0 && SIGISMEMBER(waitset, sig)) { if (sigqueue_get(&td->td_sigqueue, sig, ksi) != 0 || sigqueue_get(&p->p_sigqueue, sig, ksi) != 0) { error = 0; break; } } if (error != 0) break; /* * POSIX says this must be checked after looking for pending * signals. */ if (timeout != NULL) { if (!timevalid) { error = EINVAL; break; } getnanouptime(&rts); if (timespeccmp(&rts, &ets, >=)) { error = EAGAIN; break; } ts = ets; timespecsub(&ts, &rts); TIMESPEC_TO_TIMEVAL(&tv, &ts); timo = tvtohz(&tv); } else { timo = 0; } error = msleep(ps, &p->p_mtx, PPAUSE|PCATCH, "sigwait", timo); if (timeout != NULL) { if (error == ERESTART) { /* Timeout can not be restarted. */ error = EINTR; } else if (error == EAGAIN) { /* We will calculate timeout by ourself. */ error = 0; } } } new_block = saved_mask; SIGSETNAND(new_block, td->td_sigmask); td->td_sigmask = saved_mask; /* * Fewer signals can be delivered to us, reschedule signal * notification. */ if (p->p_numthreads != 1) reschedule_signals(p, new_block, 0); if (error == 0) { SDT_PROBE2(proc, , , signal__clear, sig, ksi); if (ksi->ksi_code == SI_TIMER) itimer_accept(p, ksi->ksi_timerid, ksi); #ifdef KTRACE if (KTRPOINT(td, KTR_PSIG)) { sig_t action; mtx_lock(&ps->ps_mtx); action = ps->ps_sigact[_SIG_IDX(sig)]; mtx_unlock(&ps->ps_mtx); ktrpsig(sig, action, &td->td_sigmask, ksi->ksi_code); } #endif if (sig == SIGKILL) { proc_td_siginfo_capture(td, &ksi->ksi_info); sigexit(td, sig); } } PROC_UNLOCK(p); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sigpending_args { sigset_t *set; }; #endif int -sys_sigpending(td, uap) - struct thread *td; - struct sigpending_args *uap; +sys_sigpending(struct thread *td, struct sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t pending; PROC_LOCK(p); pending = p->p_sigqueue.sq_signals; SIGSETOR(pending, td->td_sigqueue.sq_signals); PROC_UNLOCK(p); return (copyout(&pending, uap->set, sizeof(sigset_t))); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigpending_args { int dummy; }; #endif int -osigpending(td, uap) - struct thread *td; - struct osigpending_args *uap; +osigpending(struct thread *td, struct osigpending_args *uap) { struct proc *p = td->td_proc; sigset_t pending; PROC_LOCK(p); pending = p->p_sigqueue.sq_signals; SIGSETOR(pending, td->td_sigqueue.sq_signals); PROC_UNLOCK(p); SIG2OSIG(pending, td->td_retval[0]); return (0); } #endif /* COMPAT_43 */ #if defined(COMPAT_43) /* * Generalized interface signal handler, 4.3-compatible. */ #ifndef _SYS_SYSPROTO_H_ struct osigvec_args { int signum; struct sigvec *nsv; struct sigvec *osv; }; #endif /* ARGSUSED */ int -osigvec(td, uap) - struct thread *td; - register struct osigvec_args *uap; +osigvec(struct thread *td, struct osigvec_args *uap) { struct sigvec vec; struct sigaction nsa, osa; - register struct sigaction *nsap, *osap; + struct sigaction *nsap, *osap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); nsap = (uap->nsv != NULL) ? &nsa : NULL; osap = (uap->osv != NULL) ? &osa : NULL; if (nsap) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); nsap->sa_handler = vec.sv_handler; OSIG2SIG(vec.sv_mask, nsap->sa_mask); nsap->sa_flags = vec.sv_flags; nsap->sa_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */ } error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET); if (osap && !error) { vec.sv_handler = osap->sa_handler; SIG2OSIG(osap->sa_mask, vec.sv_mask); vec.sv_flags = osap->sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct osigblock_args { int mask; }; #endif int -osigblock(td, uap) - register struct thread *td; - struct osigblock_args *uap; +osigblock(struct thread *td, struct osigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } #ifndef _SYS_SYSPROTO_H_ struct osigsetmask_args { int mask; }; #endif int -osigsetmask(td, uap) - struct thread *td; - struct osigsetmask_args *uap; +osigsetmask(struct thread *td, struct osigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } #endif /* COMPAT_43 */ /* * Suspend calling thread until signal, providing mask to be set in the * meantime. */ #ifndef _SYS_SYSPROTO_H_ struct sigsuspend_args { const sigset_t *sigmask; }; #endif /* ARGSUSED */ int -sys_sigsuspend(td, uap) - struct thread *td; - struct sigsuspend_args *uap; +sys_sigsuspend(struct thread *td, struct sigsuspend_args *uap) { sigset_t mask; int error; error = copyin(uap->sigmask, &mask, sizeof(mask)); if (error) return (error); return (kern_sigsuspend(td, mask)); } int kern_sigsuspend(struct thread *td, sigset_t mask) { struct proc *p = td->td_proc; int has_sig, sig; /* * When returning from sigsuspend, we want * the old mask to be restored after the * signal handler has finished. Thus, we * save it here and mark the sigacts structure * to indicate this. */ PROC_LOCK(p); kern_sigprocmask(td, SIG_SETMASK, &mask, &td->td_oldsigmask, SIGPROCMASK_PROC_LOCKED); td->td_pflags |= TDP_OLDMASK; /* * Process signals now. Otherwise, we can get spurious wakeup * due to signal entered process queue, but delivered to other * thread. But sigsuspend should return only on signal * delivery. */ (p->p_sysent->sv_set_syscall_retval)(td, EINTR); for (has_sig = 0; !has_sig;) { while (msleep(&p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, "pause", 0) == 0) /* void */; thread_suspend_check(0); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) { KASSERT(sig >= 0, ("sig %d", sig)); has_sig += postsig(sig); } mtx_unlock(&p->p_sigacts->ps_mtx); } PROC_UNLOCK(p); td->td_errno = EINTR; td->td_pflags |= TDP_NERRNO; return (EJUSTRETURN); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ /* * Compatibility sigsuspend call for old binaries. Note nonstandard calling * convention: libc stub passes mask, not pointer, to save a copyin. */ #ifndef _SYS_SYSPROTO_H_ struct osigsuspend_args { osigset_t mask; }; #endif /* ARGSUSED */ int -osigsuspend(td, uap) - struct thread *td; - struct osigsuspend_args *uap; +osigsuspend(struct thread *td, struct osigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } #endif /* COMPAT_43 */ #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osigstack_args { struct sigstack *nss; struct sigstack *oss; }; #endif /* ARGSUSED */ int -osigstack(td, uap) - struct thread *td; - register struct osigstack_args *uap; +osigstack(struct thread *td, struct osigstack_args *uap) { struct sigstack nss, oss; int error = 0; if (uap->nss != NULL) { error = copyin(uap->nss, &nss, sizeof(nss)); if (error) return (error); } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (uap->nss != NULL) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= nss.ss_onstack & SS_ONSTACK; td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) error = copyout(&oss, uap->oss, sizeof(oss)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct sigaltstack_args { stack_t *ss; stack_t *oss; }; #endif /* ARGSUSED */ int -sys_sigaltstack(td, uap) - struct thread *td; - register struct sigaltstack_args *uap; +sys_sigaltstack(struct thread *td, struct sigaltstack_args *uap) { stack_t ss, oss; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &ss, sizeof(ss)); if (error) return (error); } error = kern_sigaltstack(td, (uap->ss != NULL) ? &ss : NULL, (uap->oss != NULL) ? &oss : NULL); if (error) return (error); if (uap->oss != NULL) error = copyout(&oss, uap->oss, sizeof(stack_t)); return (error); } int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss) { struct proc *p = td->td_proc; int oonstack; oonstack = sigonstack(cpu_getstack(td)); if (oss != NULL) { *oss = td->td_sigstk; oss->ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; } if (ss != NULL) { if (oonstack) return (EPERM); if ((ss->ss_flags & ~SS_DISABLE) != 0) return (EINVAL); if (!(ss->ss_flags & SS_DISABLE)) { if (ss->ss_size < p->p_sysent->sv_minsigstksz) return (ENOMEM); td->td_sigstk = *ss; td->td_pflags |= TDP_ALTSTACK; } else { td->td_pflags &= ~TDP_ALTSTACK; } } return (0); } /* * Common code for kill process group/broadcast kill. * cp is calling process. */ static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi) { struct proc *p; struct pgrp *pgrp; int err; int ret; ret = ESRCH; if (all) { /* * broadcast */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p == td->td_proc || p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } err = p_cansignal(td, p, sig); if (err == 0) { if (sig) pksignal(p, sig, ksi); ret = err; } else if (ret == ESRCH) ret = err; PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); } else { sx_slock(&proctree_lock); if (pgid == 0) { /* * zero pgid means send to my process group. */ pgrp = td->td_proc->p_pgrp; PGRP_LOCK(pgrp); } else { pgrp = pgfind(pgid); if (pgrp == NULL) { sx_sunlock(&proctree_lock); return (ESRCH); } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } err = p_cansignal(td, p, sig); if (err == 0) { if (sig) pksignal(p, sig, ksi); ret = err; } else if (ret == ESRCH) ret = err; PROC_UNLOCK(p); } PGRP_UNLOCK(pgrp); } return (ret); } #ifndef _SYS_SYSPROTO_H_ struct kill_args { int pid; int signum; }; #endif /* ARGSUSED */ int sys_kill(struct thread *td, struct kill_args *uap) { ksiginfo_t ksi; struct proc *p; int error; /* * A process in capability mode can send signals only to himself. * The main rationale behind this is that abort(3) is implemented as * kill(getpid(), SIGABRT). */ if (IN_CAPABILITY_MODE(td) && uap->pid != td->td_proc->p_pid) return (ECAPMODE); AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_PID(uap->pid); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); ksiginfo_init(&ksi); ksi.ksi_signo = uap->signum; ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; if (uap->pid > 0) { /* kill single process */ if ((p = pfind(uap->pid)) == NULL) { if ((p = zpfind(uap->pid)) == NULL) return (ESRCH); } AUDIT_ARG_PROCESS(p); error = p_cansignal(td, p, uap->signum); if (error == 0 && uap->signum) pksignal(p, uap->signum, &ksi); PROC_UNLOCK(p); return (error); } switch (uap->pid) { case -1: /* broadcast signal */ return (killpg1(td, uap->signum, 0, 1, &ksi)); case 0: /* signal own process group */ return (killpg1(td, uap->signum, 0, 0, &ksi)); default: /* negative explicit process group */ return (killpg1(td, uap->signum, -uap->pid, 0, &ksi)); } /* NOTREACHED */ } int -sys_pdkill(td, uap) - struct thread *td; - struct pdkill_args *uap; +sys_pdkill(struct thread *td, struct pdkill_args *uap) { struct proc *p; cap_rights_t rights; int error; AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_FD(uap->fd); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); error = procdesc_find(td, uap->fd, cap_rights_init(&rights, CAP_PDKILL), &p); if (error) return (error); AUDIT_ARG_PROCESS(p); error = p_cansignal(td, p, uap->signum); if (error == 0 && uap->signum) kern_psignal(p, uap->signum); PROC_UNLOCK(p); return (error); } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct okillpg_args { int pgid; int signum; }; #endif /* ARGSUSED */ int okillpg(struct thread *td, struct okillpg_args *uap) { ksiginfo_t ksi; AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_PID(uap->pgid); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); ksiginfo_init(&ksi); ksi.ksi_signo = uap->signum; ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; return (killpg1(td, uap->signum, uap->pgid, 0, &ksi)); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct sigqueue_args { pid_t pid; int signum; /* union sigval */ void *value; }; #endif int sys_sigqueue(struct thread *td, struct sigqueue_args *uap) { union sigval sv; sv.sival_ptr = uap->value; return (kern_sigqueue(td, uap->pid, uap->signum, &sv)); } int kern_sigqueue(struct thread *td, pid_t pid, int signum, union sigval *value) { ksiginfo_t ksi; struct proc *p; int error; if ((u_int)signum > _SIG_MAXSIG) return (EINVAL); /* * Specification says sigqueue can only send signal to * single process. */ if (pid <= 0) return (EINVAL); if ((p = pfind(pid)) == NULL) { if ((p = zpfind(pid)) == NULL) return (ESRCH); } error = p_cansignal(td, p, signum); if (error == 0 && signum != 0) { ksiginfo_init(&ksi); ksi.ksi_flags = KSI_SIGQ; ksi.ksi_signo = signum; ksi.ksi_code = SI_QUEUE; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; ksi.ksi_value = *value; error = pksignal(p, ksi.ksi_signo, &ksi); } PROC_UNLOCK(p); return (error); } /* * Send a signal to a process group. */ void gsignal(int pgid, int sig, ksiginfo_t *ksi) { struct pgrp *pgrp; if (pgid != 0) { sx_slock(&proctree_lock); pgrp = pgfind(pgid); sx_sunlock(&proctree_lock); if (pgrp != NULL) { pgsignal(pgrp, sig, 0, ksi); PGRP_UNLOCK(pgrp); } } } /* * Send a signal to a process group. If checktty is 1, * limit to members which have a controlling terminal. */ void pgsignal(struct pgrp *pgrp, int sig, int checkctty, ksiginfo_t *ksi) { struct proc *p; if (pgrp) { PGRP_LOCK_ASSERT(pgrp, MA_OWNED); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && (checkctty == 0 || p->p_flag & P_CONTROLT)) pksignal(p, sig, ksi); PROC_UNLOCK(p); } } } /* * Recalculate the signal mask and reset the signal disposition after * usermode frame for delivery is formed. Should be called after * mach-specific routine, because sysent->sv_sendsig() needs correct * ps_siginfo and signal mask. */ static void postsig_done(int sig, struct thread *td, struct sigacts *ps) { sigset_t mask; mtx_assert(&ps->ps_mtx, MA_OWNED); td->td_ru.ru_nsignals++; mask = ps->ps_catchmask[_SIG_IDX(sig)]; if (!SIGISMEMBER(ps->ps_signodefer, sig)) SIGADDSET(mask, sig); kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED); if (SIGISMEMBER(ps->ps_sigreset, sig)) sigdflt(ps, sig); } /* * Send a signal caused by a trap to the current thread. If it will be * caught immediately, deliver it with correct code. Otherwise, post it * normally. */ void trapsignal(struct thread *td, ksiginfo_t *ksi) { struct sigacts *ps; struct proc *p; int sig; int code; p = td->td_proc; sig = ksi->ksi_signo; code = ksi->ksi_code; KASSERT(_SIG_VALID(sig), ("invalid signal")); PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) && !SIGISMEMBER(td->td_sigmask, sig)) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_PSIG)) ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)], &td->td_sigmask, code); #endif (*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)], ksi, &td->td_sigmask); postsig_done(sig, td, ps); mtx_unlock(&ps->ps_mtx); } else { /* * Avoid a possible infinite loop if the thread * masking the signal or process is ignoring the * signal. */ if (kern_forcesigexit && (SIGISMEMBER(td->td_sigmask, sig) || ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) { SIGDELSET(td->td_sigmask, sig); SIGDELSET(ps->ps_sigcatch, sig); SIGDELSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; } mtx_unlock(&ps->ps_mtx); p->p_code = code; /* XXX for core dump/debugger */ p->p_sig = sig; /* XXX to verify code */ tdsendsignal(p, td, sig, ksi); } PROC_UNLOCK(p); } static struct thread * sigtd(struct proc *p, int sig, int prop) { struct thread *td, *signal_td; PROC_LOCK_ASSERT(p, MA_OWNED); /* * Check if current thread can handle the signal without * switching context to another thread. */ if (curproc == p && !SIGISMEMBER(curthread->td_sigmask, sig)) return (curthread); signal_td = NULL; FOREACH_THREAD_IN_PROC(p, td) { if (!SIGISMEMBER(td->td_sigmask, sig)) { signal_td = td; break; } } if (signal_td == NULL) signal_td = FIRST_THREAD_IN_PROC(p); return (signal_td); } /* * Send the signal to the process. If the signal has an action, the action * is usually performed by the target process rather than the caller; we add * the signal to the set of pending signals for the process. * * Exceptions: * o When a stop signal is sent to a sleeping process that takes the * default action, the process is stopped without awakening it. * o SIGCONT restarts stopped processes (or puts them back to sleep) * regardless of the signal action (eg, blocked or ignored). * * Other ignored signals are discarded immediately. * * NB: This function may be entered from the debugger via the "kill" DDB * command. There is little that can be done to mitigate the possibly messy * side effects of this unwise possibility. */ void kern_psignal(struct proc *p, int sig) { ksiginfo_t ksi; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; (void) tdsendsignal(p, NULL, sig, &ksi); } int pksignal(struct proc *p, int sig, ksiginfo_t *ksi) { return (tdsendsignal(p, NULL, sig, ksi)); } /* Utility function for finding a thread to send signal event to. */ int sigev_findtd(struct proc *p ,struct sigevent *sigev, struct thread **ttd) { struct thread *td; if (sigev->sigev_notify == SIGEV_THREAD_ID) { td = tdfind(sigev->sigev_notify_thread_id, p->p_pid); if (td == NULL) return (ESRCH); *ttd = td; } else { *ttd = NULL; PROC_LOCK(p); } return (0); } void tdsignal(struct thread *td, int sig) { ksiginfo_t ksi; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; (void) tdsendsignal(td->td_proc, td, sig, &ksi); } void tdksignal(struct thread *td, int sig, ksiginfo_t *ksi) { (void) tdsendsignal(td->td_proc, td, sig, ksi); } int tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) { sig_t action; sigqueue_t *sigqueue; int prop; struct sigacts *ps; int intrval; int ret = 0; int wakeup_swapper; MPASS(td == NULL || p == td->td_proc); PROC_LOCK_ASSERT(p, MA_OWNED); if (!_SIG_VALID(sig)) panic("%s(): invalid signal %d", __func__, sig); KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("%s: ksi on queue", __func__)); /* * IEEE Std 1003.1-2001: return success when killing a zombie. */ if (p->p_state == PRS_ZOMBIE) { if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } ps = p->p_sigacts; KNOTE_LOCKED(p->p_klist, NOTE_SIGNAL | sig); prop = sigprop(sig); if (td == NULL) { td = sigtd(p, sig, prop); sigqueue = &p->p_sigqueue; } else sigqueue = &td->td_sigqueue; SDT_PROBE3(proc, , , signal__send, td, p, sig); /* * If the signal is being ignored, * then we forget about it immediately. * (Note: we don't set SIGCONT in ps_sigignore, * and if it is set to SIG_IGN, * action will be SIG_DFL here.) */ mtx_lock(&ps->ps_mtx); if (SIGISMEMBER(ps->ps_sigignore, sig)) { SDT_PROBE3(proc, , , signal__discard, td, p, sig); mtx_unlock(&ps->ps_mtx); if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } if (SIGISMEMBER(td->td_sigmask, sig)) action = SIG_HOLD; else if (SIGISMEMBER(ps->ps_sigcatch, sig)) action = SIG_CATCH; else action = SIG_DFL; if (SIGISMEMBER(ps->ps_sigintr, sig)) intrval = EINTR; else intrval = ERESTART; mtx_unlock(&ps->ps_mtx); if (prop & SA_CONT) sigqueue_delete_stopmask_proc(p); else if (prop & SA_STOP) { /* * If sending a tty stop signal to a member of an orphaned * process group, discard the signal here if the action * is default; don't stop the process below if sleeping, * and don't clear any pending SIGCONT. */ if ((prop & SA_TTYSTOP) && (p->p_pgrp->pg_jobc == 0) && (action == SIG_DFL)) { if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } sigqueue_delete_proc(p, SIGCONT); if (p->p_flag & P_CONTINUED) { p->p_flag &= ~P_CONTINUED; PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); } } ret = sigqueue_add(sigqueue, sig, ksi); if (ret != 0) return (ret); signotify(td); /* * Defer further processing for signals which are held, * except that stopped processes must be continued by SIGCONT. */ if (action == SIG_HOLD && !((prop & SA_CONT) && (p->p_flag & P_STOPPED_SIG))) return (ret); /* SIGKILL: Remove procfs STOPEVENTs. */ if (sig == SIGKILL) { /* from procfs_ioctl.c: PIOCBIC */ p->p_stops = 0; /* from procfs_ioctl.c: PIOCCONT */ p->p_step = 0; wakeup(&p->p_step); } /* * Some signals have a process-wide effect and a per-thread * component. Most processing occurs when the process next * tries to cross the user boundary, however there are some * times when processing needs to be done immediately, such as * waking up threads so that they can cross the user boundary. * We try to do the per-process part here. */ if (P_SHOULDSTOP(p)) { KASSERT(!(p->p_flag & P_WEXIT), ("signal to stopped but exiting process")); if (sig == SIGKILL) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * SIGKILL sets process running. * It will die elsewhere. * All threads must be restarted. */ p->p_flag &= ~P_STOPPED_SIG; goto runfast; } if (prop & SA_CONT) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * If SIGCONT is default (or ignored), we continue the * process but don't leave the signal in sigqueue as * it has no further action. If SIGCONT is held, we * continue the process and leave the signal in * sigqueue. If the process catches SIGCONT, let it * handle the signal itself. If it isn't waiting on * an event, it goes back to run state. * Otherwise, process goes back to sleep state. */ p->p_flag &= ~P_STOPPED_SIG; PROC_SLOCK(p); if (p->p_numthreads == p->p_suspcount) { PROC_SUNLOCK(p); p->p_flag |= P_CONTINUED; p->p_xsig = SIGCONT; PROC_LOCK(p->p_pptr); childproc_continued(p); PROC_UNLOCK(p->p_pptr); PROC_SLOCK(p); } if (action == SIG_DFL) { thread_unsuspend(p); PROC_SUNLOCK(p); sigqueue_delete(sigqueue, sig); goto out; } if (action == SIG_CATCH) { /* * The process wants to catch it so it needs * to run at least one thread, but which one? */ PROC_SUNLOCK(p); goto runfast; } /* * The signal is not ignored or caught. */ thread_unsuspend(p); PROC_SUNLOCK(p); goto out; } if (prop & SA_STOP) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * Already stopped, don't need to stop again * (If we did the shell could get confused). * Just make sure the signal STOP bit set. */ p->p_flag |= P_STOPPED_SIG; sigqueue_delete(sigqueue, sig); goto out; } /* * All other kinds of signals: * If a thread is sleeping interruptibly, simulate a * wakeup so that when it is continued it will be made * runnable and can look at the signal. However, don't make * the PROCESS runnable, leave it stopped. * It may run a bit until it hits a thread_suspend_check(). */ wakeup_swapper = 0; PROC_SLOCK(p); thread_lock(td); if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR)) wakeup_swapper = sleepq_abort(td, intrval); thread_unlock(td); PROC_SUNLOCK(p); if (wakeup_swapper) kick_proc0(); goto out; /* * Mutexes are short lived. Threads waiting on them will * hit thread_suspend_check() soon. */ } else if (p->p_state == PRS_NORMAL) { if (p->p_flag & P_TRACED || action == SIG_CATCH) { tdsigwakeup(td, sig, action, intrval); goto out; } MPASS(action == SIG_DFL); if (prop & SA_STOP) { if (p->p_flag & (P_PPWAIT|P_WEXIT)) goto out; p->p_flag |= P_STOPPED_SIG; p->p_xsig = sig; PROC_SLOCK(p); wakeup_swapper = sig_suspend_threads(td, p, 1); if (p->p_numthreads == p->p_suspcount) { /* * only thread sending signal to another * process can reach here, if thread is sending * signal to its process, because thread does * not suspend itself here, p_numthreads * should never be equal to p_suspcount. */ thread_stopped(p); PROC_SUNLOCK(p); sigqueue_delete_proc(p, p->p_xsig); } else PROC_SUNLOCK(p); if (wakeup_swapper) kick_proc0(); goto out; } } else { /* Not in "NORMAL" state. discard the signal. */ sigqueue_delete(sigqueue, sig); goto out; } /* * The process is not stopped so we need to apply the signal to all the * running threads. */ runfast: tdsigwakeup(td, sig, action, intrval); PROC_SLOCK(p); thread_unsuspend(p); PROC_SUNLOCK(p); out: /* If we jump here, proc slock should not be owned. */ PROC_SLOCK_ASSERT(p, MA_NOTOWNED); return (ret); } /* * The force of a signal has been directed against a single * thread. We need to see what we can do about knocking it * out of any sleep it may be in etc. */ static void tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval) { struct proc *p = td->td_proc; - register int prop; + int prop; int wakeup_swapper; wakeup_swapper = 0; PROC_LOCK_ASSERT(p, MA_OWNED); prop = sigprop(sig); PROC_SLOCK(p); thread_lock(td); /* * Bring the priority of a thread up if we want it to get * killed in this lifetime. Be careful to avoid bumping the * priority of the idle thread, since we still allow to signal * kernel processes. */ if (action == SIG_DFL && (prop & SA_KILL) != 0 && td->td_priority > PUSER && !TD_IS_IDLETHREAD(td)) sched_prio(td, PUSER); if (TD_ON_SLEEPQ(td)) { /* * If thread is sleeping uninterruptibly * we can't interrupt the sleep... the signal will * be noticed when the process returns through * trap() or syscall(). */ if ((td->td_flags & TDF_SINTR) == 0) goto out; /* * If SIGCONT is default (or ignored) and process is * asleep, we are finished; the process should not * be awakened. */ if ((prop & SA_CONT) && action == SIG_DFL) { thread_unlock(td); PROC_SUNLOCK(p); sigqueue_delete(&p->p_sigqueue, sig); /* * It may be on either list in this state. * Remove from both for now. */ sigqueue_delete(&td->td_sigqueue, sig); return; } /* * Don't awaken a sleeping thread for SIGSTOP if the * STOP signal is deferred. */ if ((prop & SA_STOP) != 0 && (td->td_flags & (TDF_SBDRY | TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY) goto out; /* * Give low priority threads a better chance to run. */ if (td->td_priority > PUSER && !TD_IS_IDLETHREAD(td)) sched_prio(td, PUSER); wakeup_swapper = sleepq_abort(td, intrval); } else { /* * Other states do nothing with the signal immediately, * other than kicking ourselves if we are running. * It will either never be noticed, or noticed very soon. */ #ifdef SMP if (TD_IS_RUNNING(td) && td != curthread) forward_signal(td); #endif } out: PROC_SUNLOCK(p); thread_unlock(td); if (wakeup_swapper) kick_proc0(); } static int sig_suspend_threads(struct thread *td, struct proc *p, int sending) { struct thread *td2; int wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); MPASS(sending || td == curthread); wakeup_swapper = 0; FOREACH_THREAD_IN_PROC(p, td2) { thread_lock(td2); td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) && (td2->td_flags & TDF_SINTR)) { if (td2->td_flags & TDF_SBDRY) { /* * Once a thread is asleep with * TDF_SBDRY and without TDF_SERESTART * or TDF_SEINTR set, it should never * become suspended due to this check. */ KASSERT(!TD_IS_SUSPENDED(td2), ("thread with deferred stops suspended")); if (TD_SBDRY_INTR(td2)) wakeup_swapper |= sleepq_abort(td2, TD_SBDRY_ERRNO(td2)); } else if (!TD_IS_SUSPENDED(td2)) { thread_suspend_one(td2); } } else if (!TD_IS_SUSPENDED(td2)) { if (sending || td != td2) td2->td_flags |= TDF_ASTPENDING; #ifdef SMP if (TD_IS_RUNNING(td2) && td2 != td) forward_signal(td2); #endif } thread_unlock(td2); } return (wakeup_swapper); } /* * Stop the process for an event deemed interesting to the debugger. If si is * non-NULL, this is a signal exchange; the new signal requested by the * debugger will be returned for handling. If si is NULL, this is some other * type of interesting event. The debugger may request a signal be delivered in * that case as well, however it will be deferred until it can be handled. */ int ptracestop(struct thread *td, int sig, ksiginfo_t *si) { struct proc *p = td->td_proc; struct thread *td2; ksiginfo_t ksi; int prop; PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(!(p->p_flag & P_WEXIT), ("Stopping exiting process")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Stopping for traced signal"); td->td_xsig = sig; if (si == NULL || (si->ksi_flags & KSI_PTRACE) == 0) { td->td_dbgflags |= TDB_XSIG; CTR4(KTR_PTRACE, "ptracestop: tid %d (pid %d) flags %#x sig %d", td->td_tid, p->p_pid, td->td_dbgflags, sig); PROC_SLOCK(p); while ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_XSIG)) { if (P_KILLED(p)) { /* * Ensure that, if we've been PT_KILLed, the * exit status reflects that. Another thread * may also be in ptracestop(), having just * received the SIGKILL, but this thread was * unsuspended first. */ td->td_dbgflags &= ~TDB_XSIG; td->td_xsig = SIGKILL; p->p_ptevents = 0; break; } if (p->p_flag & P_SINGLE_EXIT && !(td->td_dbgflags & TDB_EXIT)) { /* * Ignore ptrace stops except for thread exit * events when the process exits. */ td->td_dbgflags &= ~TDB_XSIG; PROC_SUNLOCK(p); return (0); } /* * Make wait(2) work. Ensure that right after the * attach, the thread which was decided to become the * leader of attach gets reported to the waiter. * Otherwise, just avoid overwriting another thread's * assignment to p_xthread. If another thread has * already set p_xthread, the current thread will get * a chance to report itself upon the next iteration. */ if ((td->td_dbgflags & TDB_FSTP) != 0 || ((p->p_flag2 & P2_PTRACE_FSTP) == 0 && p->p_xthread == NULL)) { p->p_xsig = sig; p->p_xthread = td; td->td_dbgflags &= ~TDB_FSTP; p->p_flag2 &= ~P2_PTRACE_FSTP; p->p_flag |= P_STOPPED_SIG | P_STOPPED_TRACE; sig_suspend_threads(td, p, 0); } if ((td->td_dbgflags & TDB_STOPATFORK) != 0) { td->td_dbgflags &= ~TDB_STOPATFORK; cv_broadcast(&p->p_dbgwait); } stopme: thread_suspend_switch(td, p); if (p->p_xthread == td) p->p_xthread = NULL; if (!(p->p_flag & P_TRACED)) break; if (td->td_dbgflags & TDB_SUSPEND) { if (p->p_flag & P_SINGLE_EXIT) break; goto stopme; } } PROC_SUNLOCK(p); } if (si != NULL && sig == td->td_xsig) { /* Parent wants us to take the original signal unchanged. */ si->ksi_flags |= KSI_HEAD; if (sigqueue_add(&td->td_sigqueue, sig, si) != 0) si->ksi_signo = 0; } else if (td->td_xsig != 0) { /* * If parent wants us to take a new signal, then it will leave * it in td->td_xsig; otherwise we just look for signals again. */ ksiginfo_init(&ksi); ksi.ksi_signo = td->td_xsig; ksi.ksi_flags |= KSI_PTRACE; prop = sigprop(td->td_xsig); td2 = sigtd(p, td->td_xsig, prop); tdsendsignal(p, td2, td->td_xsig, &ksi); if (td != td2) return (0); } return (td->td_xsig); } static void reschedule_signals(struct proc *p, sigset_t block, int flags) { struct sigacts *ps; struct thread *td; int sig; PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0 ? MA_OWNED : MA_NOTOWNED); if (SIGISEMPTY(p->p_siglist)) return; SIGSETAND(block, p->p_siglist); while ((sig = sig_ffs(&block)) != 0) { SIGDELSET(block, sig); td = sigtd(p, sig, 0); signotify(td); if (!(flags & SIGPROCMASK_PS_LOCKED)) mtx_lock(&ps->ps_mtx); if (p->p_flag & P_TRACED || (SIGISMEMBER(ps->ps_sigcatch, sig) && !SIGISMEMBER(td->td_sigmask, sig))) tdsigwakeup(td, sig, SIG_CATCH, (SIGISMEMBER(ps->ps_sigintr, sig) ? EINTR : ERESTART)); if (!(flags & SIGPROCMASK_PS_LOCKED)) mtx_unlock(&ps->ps_mtx); } } void tdsigcleanup(struct thread *td) { struct proc *p; sigset_t unblocked; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sigqueue_flush(&td->td_sigqueue); if (p->p_numthreads == 1) return; /* * Since we cannot handle signals, notify signal post code * about this by filling the sigmask. * * Also, if needed, wake up thread(s) that do not block the * same signals as the exiting thread, since the thread might * have been selected for delivery and woken up. */ SIGFILLSET(unblocked); SIGSETNAND(unblocked, td->td_sigmask); SIGFILLSET(td->td_sigmask); reschedule_signals(p, unblocked, 0); } static int sigdeferstop_curr_flags(int cflags) { MPASS((cflags & (TDF_SEINTR | TDF_SERESTART)) == 0 || (cflags & TDF_SBDRY) != 0); return (cflags & (TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)); } /* * Defer the delivery of SIGSTOP for the current thread, according to * the requested mode. Returns previous flags, which must be restored * by sigallowstop(). * * TDF_SBDRY, TDF_SEINTR, and TDF_SERESTART flags are only set and * cleared by the current thread, which allow the lock-less read-only * accesses below. */ int sigdeferstop_impl(int mode) { struct thread *td; int cflags, nflags; td = curthread; cflags = sigdeferstop_curr_flags(td->td_flags); switch (mode) { case SIGDEFERSTOP_NOP: nflags = cflags; break; case SIGDEFERSTOP_OFF: nflags = 0; break; case SIGDEFERSTOP_SILENT: nflags = (cflags | TDF_SBDRY) & ~(TDF_SEINTR | TDF_SERESTART); break; case SIGDEFERSTOP_EINTR: nflags = (cflags | TDF_SBDRY | TDF_SEINTR) & ~TDF_SERESTART; break; case SIGDEFERSTOP_ERESTART: nflags = (cflags | TDF_SBDRY | TDF_SERESTART) & ~TDF_SEINTR; break; default: panic("sigdeferstop: invalid mode %x", mode); break; } if (cflags == nflags) return (SIGDEFERSTOP_VAL_NCHG); thread_lock(td); td->td_flags = (td->td_flags & ~cflags) | nflags; thread_unlock(td); return (cflags); } /* * Restores the STOP handling mode, typically permitting the delivery * of SIGSTOP for the current thread. This does not immediately * suspend if a stop was posted. Instead, the thread will suspend * either via ast() or a subsequent interruptible sleep. */ void sigallowstop_impl(int prev) { struct thread *td; int cflags; KASSERT(prev != SIGDEFERSTOP_VAL_NCHG, ("failed sigallowstop")); KASSERT((prev & ~(TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)) == 0, ("sigallowstop: incorrect previous mode %x", prev)); td = curthread; cflags = sigdeferstop_curr_flags(td->td_flags); if (cflags != prev) { thread_lock(td); td->td_flags = (td->td_flags & ~cflags) | prev; thread_unlock(td); } } /* * If the current process has received a signal (should be caught or cause * termination, should interrupt current syscall), return the signal number. * Stop signals with default action are processed immediately, then cleared; * they aren't returned. This is checked after each entry to the system for * a syscall or trap (though this can usually be done without calling issignal * by checking the pending signal masks in cursig.) The normal call * sequence is * * while (sig = cursig(curthread)) * postsig(sig); */ static int issignal(struct thread *td) { struct proc *p; struct sigacts *ps; struct sigqueue *queue; sigset_t sigpending; int prop, sig, traced; ksiginfo_t ksi; p = td->td_proc; ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, MA_OWNED); PROC_LOCK_ASSERT(p, MA_OWNED); for (;;) { traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG); sigpending = td->td_sigqueue.sq_signals; SIGSETOR(sigpending, p->p_sigqueue.sq_signals); SIGSETNAND(sigpending, td->td_sigmask); if ((p->p_flag & P_PPWAIT) != 0 || (td->td_flags & (TDF_SBDRY | TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY) SIG_STOPSIGMASK(sigpending); if (SIGISEMPTY(sigpending)) /* no signal to send */ return (0); if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED && (p->p_flag2 & P2_PTRACE_FSTP) != 0 && SIGISMEMBER(sigpending, SIGSTOP)) { /* * If debugger just attached, always consume * SIGSTOP from ptrace(PT_ATTACH) first, to * execute the debugger attach ritual in * order. */ sig = SIGSTOP; td->td_dbgflags |= TDB_FSTP; } else { sig = sig_ffs(&sigpending); } if (p->p_stops & S_SIG) { mtx_unlock(&ps->ps_mtx); stopevent(p, S_SIG, sig); mtx_lock(&ps->ps_mtx); } /* * We should see pending but ignored signals * only if P_TRACED was on when they were posted. */ if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) { sigqueue_delete(&td->td_sigqueue, sig); sigqueue_delete(&p->p_sigqueue, sig); continue; } if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED) { /* * If traced, always stop. * Remove old signal from queue before the stop. * XXX shrug off debugger, it causes siginfo to * be thrown away. */ queue = &td->td_sigqueue; ksiginfo_init(&ksi); if (sigqueue_get(queue, sig, &ksi) == 0) { queue = &p->p_sigqueue; sigqueue_get(queue, sig, &ksi); } td->td_si = ksi.ksi_info; mtx_unlock(&ps->ps_mtx); sig = ptracestop(td, sig, &ksi); mtx_lock(&ps->ps_mtx); /* * Keep looking if the debugger discarded or * replaced the signal. */ if (sig == 0) continue; /* * If the signal became masked, re-queue it. */ if (SIGISMEMBER(td->td_sigmask, sig)) { ksi.ksi_flags |= KSI_HEAD; sigqueue_add(&p->p_sigqueue, sig, &ksi); continue; } /* * If the traced bit got turned off, requeue * the signal and go back up to the top to * rescan signals. This ensures that p_sig* * and p_sigact are consistent. */ if ((p->p_flag & P_TRACED) == 0) { ksi.ksi_flags |= KSI_HEAD; sigqueue_add(queue, sig, &ksi); continue; } } prop = sigprop(sig); /* * Decide whether the signal should be returned. * Return the signal's number, or fall through * to clear it from the pending mask. */ switch ((intptr_t)p->p_sigacts->ps_sigact[_SIG_IDX(sig)]) { case (intptr_t)SIG_DFL: /* * Don't take default actions on system processes. */ if (p->p_pid <= 1) { #ifdef DIAGNOSTIC /* * Are you sure you want to ignore SIGSEGV * in init? XXX */ printf("Process (pid %lu) got signal %d\n", (u_long)p->p_pid, sig); #endif break; /* == ignore */ } /* * If there is a pending stop signal to process with * default action, stop here, then clear the signal. * Traced or exiting processes should ignore stops. * Additionally, a member of an orphaned process group * should ignore tty stops. */ if (prop & SA_STOP) { if (p->p_flag & (P_TRACED | P_WEXIT | P_SINGLE_EXIT) || (p->p_pgrp->pg_jobc == 0 && prop & SA_TTYSTOP)) break; /* == ignore */ if (TD_SBDRY_INTR(td)) { KASSERT((td->td_flags & TDF_SBDRY) != 0, ("lost TDF_SBDRY")); return (-1); } mtx_unlock(&ps->ps_mtx); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Catching SIGSTOP"); sigqueue_delete(&td->td_sigqueue, sig); sigqueue_delete(&p->p_sigqueue, sig); p->p_flag |= P_STOPPED_SIG; p->p_xsig = sig; PROC_SLOCK(p); sig_suspend_threads(td, p, 0); thread_suspend_switch(td, p); PROC_SUNLOCK(p); mtx_lock(&ps->ps_mtx); goto next; } else if (prop & SA_IGNORE) { /* * Except for SIGCONT, shouldn't get here. * Default action is to ignore; drop it. */ break; /* == ignore */ } else return (sig); /*NOTREACHED*/ case (intptr_t)SIG_IGN: /* * Masking above should prevent us ever trying * to take action on an ignored signal other * than SIGCONT, unless process is traced. */ if ((prop & SA_CONT) == 0 && (p->p_flag & P_TRACED) == 0) printf("issignal\n"); break; /* == ignore */ default: /* * This signal has an action, let * postsig() process it. */ return (sig); } sigqueue_delete(&td->td_sigqueue, sig); /* take the signal! */ sigqueue_delete(&p->p_sigqueue, sig); next:; } /* NOTREACHED */ } void thread_stopped(struct proc *p) { int n; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); n = p->p_suspcount; if (p == curproc) n++; if ((p->p_flag & P_STOPPED_SIG) && (n == p->p_numthreads)) { PROC_SUNLOCK(p); p->p_flag &= ~P_WAITED; PROC_LOCK(p->p_pptr); childproc_stopped(p, (p->p_flag & P_TRACED) ? CLD_TRAPPED : CLD_STOPPED); PROC_UNLOCK(p->p_pptr); PROC_SLOCK(p); } } /* * Take the action for the specified signal * from the current set of pending signals. */ int postsig(int sig) { struct thread *td; struct proc *p; struct sigacts *ps; sig_t action; ksiginfo_t ksi; sigset_t returnmask; KASSERT(sig != 0, ("postsig")); td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, MA_OWNED); ksiginfo_init(&ksi); if (sigqueue_get(&td->td_sigqueue, sig, &ksi) == 0 && sigqueue_get(&p->p_sigqueue, sig, &ksi) == 0) return (0); ksi.ksi_signo = sig; if (ksi.ksi_code == SI_TIMER) itimer_accept(p, ksi.ksi_timerid, &ksi); action = ps->ps_sigact[_SIG_IDX(sig)]; #ifdef KTRACE if (KTRPOINT(td, KTR_PSIG)) ktrpsig(sig, action, td->td_pflags & TDP_OLDMASK ? &td->td_oldsigmask : &td->td_sigmask, ksi.ksi_code); #endif if ((p->p_stops & S_SIG) != 0) { mtx_unlock(&ps->ps_mtx); stopevent(p, S_SIG, sig); mtx_lock(&ps->ps_mtx); } if (action == SIG_DFL) { /* * Default action, where the default is to kill * the process. (Other cases were ignored above.) */ mtx_unlock(&ps->ps_mtx); proc_td_siginfo_capture(td, &ksi.ksi_info); sigexit(td, sig); /* NOTREACHED */ } else { /* * If we get here, the signal must be caught. */ KASSERT(action != SIG_IGN, ("postsig action %p", action)); KASSERT(!SIGISMEMBER(td->td_sigmask, sig), ("postsig action: blocked sig %d", sig)); /* * Set the new mask value and also defer further * occurrences of this signal. * * Special case: user has done a sigsuspend. Here the * current mask is not of interest, but rather the * mask from before the sigsuspend is what we want * restored after the signal processing is completed. */ if (td->td_pflags & TDP_OLDMASK) { returnmask = td->td_oldsigmask; td->td_pflags &= ~TDP_OLDMASK; } else returnmask = td->td_sigmask; if (p->p_sig == sig) { p->p_code = 0; p->p_sig = 0; } (*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask); postsig_done(sig, td, ps); } return (1); } /* * Kill the current process for stated reason. */ void -killproc(p, why) - struct proc *p; - char *why; +killproc(struct proc *p, char *why) { PROC_LOCK_ASSERT(p, MA_OWNED); CTR3(KTR_PROC, "killproc: proc %p (pid %d, %s)", p, p->p_pid, p->p_comm); log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, why); p->p_flag |= P_WKILLED; kern_psignal(p, SIGKILL); } /* * Force the current process to exit with the specified signal, dumping core * if appropriate. We bypass the normal tests for masked and caught signals, * allowing unrecoverable failures to terminate the process without changing * signal state. Mark the accounting record with the signal termination. * If dumping core, save the signal number for the debugger. Calls exit and * does not return. */ void -sigexit(td, sig) - struct thread *td; - int sig; +sigexit(struct thread *td, int sig) { struct proc *p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); p->p_acflag |= AXSIG; /* * We must be single-threading to generate a core dump. This * ensures that the registers in the core file are up-to-date. * Also, the ELF dump handler assumes that the thread list doesn't * change out from under it. * * XXX If another thread attempts to single-thread before us * (e.g. via fork()), we won't get a dump at all. */ if ((sigprop(sig) & SA_CORE) && thread_single(p, SINGLE_NO_EXIT) == 0) { p->p_sig = sig; /* * Log signals which would cause core dumps * (Log as LOG_INFO to appease those who don't want * these messages.) * XXX : Todo, as well as euid, write out ruid too * Note that coredump() drops proc lock. */ if (coredump(td) == 0) sig |= WCOREFLAG; if (kern_logsigexit) log(LOG_INFO, "pid %d (%s), uid %d: exited on signal %d%s\n", p->p_pid, p->p_comm, td->td_ucred ? td->td_ucred->cr_uid : -1, sig &~ WCOREFLAG, sig & WCOREFLAG ? " (core dumped)" : ""); } else PROC_UNLOCK(p); exit1(td, 0, sig); /* NOTREACHED */ } /* * Send queued SIGCHLD to parent when child process's state * is changed. */ static void sigparent(struct proc *p, int reason, int status) { PROC_LOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED); if (p->p_ksi != NULL) { p->p_ksi->ksi_signo = SIGCHLD; p->p_ksi->ksi_code = reason; p->p_ksi->ksi_status = status; p->p_ksi->ksi_pid = p->p_pid; p->p_ksi->ksi_uid = p->p_ucred->cr_ruid; if (KSI_ONQ(p->p_ksi)) return; } pksignal(p->p_pptr, SIGCHLD, p->p_ksi); } static void childproc_jobstate(struct proc *p, int reason, int sig) { struct sigacts *ps; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED); /* * Wake up parent sleeping in kern_wait(), also send * SIGCHLD to parent, but SIGCHLD does not guarantee * that parent will awake, because parent may masked * the signal. */ p->p_pptr->p_flag |= P_STATCHILD; wakeup(p->p_pptr); ps = p->p_pptr->p_sigacts; mtx_lock(&ps->ps_mtx); if ((ps->ps_flag & PS_NOCLDSTOP) == 0) { mtx_unlock(&ps->ps_mtx); sigparent(p, reason, sig); } else mtx_unlock(&ps->ps_mtx); } void childproc_stopped(struct proc *p, int reason) { childproc_jobstate(p, reason, p->p_xsig); } void childproc_continued(struct proc *p) { childproc_jobstate(p, CLD_CONTINUED, SIGCONT); } void childproc_exited(struct proc *p) { int reason, status; if (WCOREDUMP(p->p_xsig)) { reason = CLD_DUMPED; status = WTERMSIG(p->p_xsig); } else if (WIFSIGNALED(p->p_xsig)) { reason = CLD_KILLED; status = WTERMSIG(p->p_xsig); } else { reason = CLD_EXITED; status = p->p_xexit; } /* * XXX avoid calling wakeup(p->p_pptr), the work is * done in exit1(). */ sigparent(p, reason, status); } /* * We only have 1 character for the core count in the format * string, so the range will be 0-9 */ #define MAX_NUM_CORES 10 static int num_cores = 5; static int sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) { int error; int new_val; new_val = num_cores; error = sysctl_handle_int(oidp, &new_val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new_val > MAX_NUM_CORES) new_val = MAX_NUM_CORES; if (new_val < 0) new_val = 0; num_cores = new_val; return (0); } SYSCTL_PROC(_debug, OID_AUTO, ncores, CTLTYPE_INT|CTLFLAG_RW, 0, sizeof(int), sysctl_debug_num_cores_check, "I", ""); #define GZ_SUFFIX ".gz" #ifdef GZIO static int compress_user_cores = 1; SYSCTL_INT(_kern, OID_AUTO, compress_user_cores, CTLFLAG_RWTUN, &compress_user_cores, 0, "Compression of user corefiles"); int compress_user_cores_gzlevel = 6; SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_gzlevel, CTLFLAG_RWTUN, &compress_user_cores_gzlevel, 0, "Corefile gzip compression level"); #else static int compress_user_cores = 0; #endif /* * Protect the access to corefilename[] by allproc_lock. */ #define corefilename_lock allproc_lock static char corefilename[MAXPATHLEN] = {"%N.core"}; TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); static int sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) { int error; sx_xlock(&corefilename_lock); error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), req); sx_xunlock(&corefilename_lock); return (error); } SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", "Process corefile name format string"); /* * corefile_open(comm, uid, pid, td, compress, vpp, namep) * Expand the name described in corefilename, using name, uid, and pid * and open/create core file. * corefilename is a printf-like string, with three format specifiers: * %N name of process ("name") * %P process id (pid) * %U user id (uid) * For example, "%N.core" is the default; they can be disabled completely * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". * This is controlled by the sysctl variable kern.corefile (see above). */ static int corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, int compress, struct vnode **vpp, char **namep) { struct nameidata nd; struct sbuf sb; const char *format; char *hostname, *name; int indexpos, i, error, cmode, flags, oflags; hostname = NULL; format = corefilename; name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); indexpos = -1; (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); sx_slock(&corefilename_lock); for (i = 0; format[i] != '\0'; i++) { switch (format[i]) { case '%': /* Format character */ i++; switch (format[i]) { case '%': sbuf_putc(&sb, '%'); break; case 'H': /* hostname */ if (hostname == NULL) { hostname = malloc(MAXHOSTNAMELEN, M_TEMP, M_WAITOK); } getcredhostname(td->td_ucred, hostname, MAXHOSTNAMELEN); sbuf_printf(&sb, "%s", hostname); break; case 'I': /* autoincrementing index */ sbuf_printf(&sb, "0"); indexpos = sbuf_len(&sb) - 1; break; case 'N': /* process name */ sbuf_printf(&sb, "%s", comm); break; case 'P': /* process id */ sbuf_printf(&sb, "%u", pid); break; case 'U': /* user id */ sbuf_printf(&sb, "%u", uid); break; default: log(LOG_ERR, "Unknown format character %c in " "corename `%s'\n", format[i], format); break; } break; default: sbuf_putc(&sb, format[i]); break; } } sx_sunlock(&corefilename_lock); free(hostname, M_TEMP); if (compress) sbuf_printf(&sb, GZ_SUFFIX); if (sbuf_error(&sb) != 0) { log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " "long\n", (long)pid, comm, (u_long)uid); sbuf_delete(&sb); free(name, M_TEMP); return (ENOMEM); } sbuf_finish(&sb); sbuf_delete(&sb); cmode = S_IRUSR | S_IWUSR; oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); /* * If the core format has a %I in it, then we need to check * for existing corefiles before returning a name. * To do this we iterate over 0..num_cores to find a * non-existing core file name to use. */ if (indexpos != -1) { for (i = 0; i < num_cores; i++) { flags = O_CREAT | O_EXCL | FWRITE | O_NOFOLLOW; name[indexpos] = '0' + i; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td); error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL); if (error) { if (error == EEXIST) continue; log(LOG_ERR, "pid %d (%s), uid (%u): Path `%s' failed " "on initial open test, error = %d\n", pid, comm, uid, name, error); } goto out; } } flags = O_CREAT | FWRITE | O_NOFOLLOW; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td); error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL); out: if (error) { #ifdef AUDIT audit_proc_coredump(td, name, error); #endif free(name, M_TEMP); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); *vpp = nd.ni_vp; *namep = name; return (0); } static int coredump_sanitise_path(const char *path) { size_t i; /* * Only send a subset of ASCII to devd(8) because it * might pass these strings to sh -c. */ for (i = 0; path[i]; i++) if (!(isalpha(path[i]) || isdigit(path[i])) && path[i] != '/' && path[i] != '.' && path[i] != '-') return (0); return (1); } /* * Dump a process' core. The main routine does some * policy checking, and creates the name of the coredump; * then it passes on a vnode and a size limit to the process-specific * coredump routine if there is one; if there _is not_ one, it returns * ENOSYS; otherwise it returns the error from the process-specific routine. */ static int coredump(struct thread *td) { struct proc *p = td->td_proc; struct ucred *cred = td->td_ucred; struct vnode *vp; struct flock lf; struct vattr vattr; int error, error1, locked; char *name; /* name of corefile */ void *rl_cookie; off_t limit; char *data = NULL; char *fullpath, *freepath = NULL; size_t len; static const char comm_name[] = "comm="; static const char core_name[] = "core="; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); _STOPEVENT(p, S_CORE, 0); if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) || (p->p_flag2 & P2_NOTRACE) != 0) { PROC_UNLOCK(p); return (EFAULT); } /* * Note that the bulk of limit checking is done after * the corefile is created. The exception is if the limit * for corefiles is 0, in which case we don't bother * creating the corefile at all. This layout means that * a corefile is truncated instead of not being created, * if it is larger than the limit. */ limit = (off_t)lim_cur(td, RLIMIT_CORE); if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { PROC_UNLOCK(p); return (EFBIG); } PROC_UNLOCK(p); error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, compress_user_cores, &vp, &name); if (error != 0) return (error); /* * Don't dump to non-regular files or files with links. * Do not dump into system files. */ if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0) { VOP_UNLOCK(vp, 0); error = EFAULT; goto out; } VOP_UNLOCK(vp, 0); /* Postpone other writers, including core dumps of other processes. */ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_WRLCK; locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); VATTR_NULL(&vattr); vattr.va_size = 0; if (set_core_nodump_flag) vattr.va_flags = UF_NODUMP; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); PROC_LOCK(p); p->p_acflag |= ACORE; PROC_UNLOCK(p); if (p->p_sysent->sv_coredump != NULL) { error = p->p_sysent->sv_coredump(td, vp, limit, compress_user_cores ? IMGACT_CORE_COMPRESS : 0); } else { error = ENOSYS; } if (locked) { lf.l_type = F_UNLCK; VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); } vn_rangelock_unlock(vp, rl_cookie); /* * Notify the userland helper that a process triggered a core dump. * This allows the helper to run an automated debugging session. */ if (error != 0 || coredump_devctl == 0) goto out; len = MAXPATHLEN * 2 + sizeof(comm_name) - 1 + sizeof(' ') + sizeof(core_name) - 1; data = malloc(len, M_TEMP, M_WAITOK); if (vn_fullpath_global(td, p->p_textvp, &fullpath, &freepath) != 0) goto out; if (!coredump_sanitise_path(fullpath)) goto out; snprintf(data, len, "%s%s ", comm_name, fullpath); free(freepath, M_TEMP); freepath = NULL; if (vn_fullpath_global(td, vp, &fullpath, &freepath) != 0) goto out; if (!coredump_sanitise_path(fullpath)) goto out; strlcat(data, core_name, len); strlcat(data, fullpath, len); devctl_notify("kernel", "signal", "coredump", data); out: error1 = vn_close(vp, FWRITE, cred, td); if (error == 0) error = error1; #ifdef AUDIT audit_proc_coredump(td, name, error); #endif free(freepath, M_TEMP); free(data, M_TEMP); free(name, M_TEMP); return (error); } /* * Nonexistent system call-- signal process (may want to handle it). Flag * error in case process won't see signal immediately (blocked or ignored). */ #ifndef _SYS_SYSPROTO_H_ struct nosys_args { int dummy; }; #endif /* ARGSUSED */ int -nosys(td, args) - struct thread *td; - struct nosys_args *args; +nosys(struct thread *td, struct nosys_args *args) { struct proc *p; p = td->td_proc; PROC_LOCK(p); tdsignal(td, SIGSYS); PROC_UNLOCK(p); if (kern_lognosys == 1 || kern_lognosys == 3) { uprintf("pid %d comm %s: nosys %d\n", p->p_pid, p->p_comm, td->td_sa.code); } if (kern_lognosys == 2 || kern_lognosys == 3) { printf("pid %d comm %s: nosys %d\n", p->p_pid, p->p_comm, td->td_sa.code); } return (ENOSYS); } /* * Send a SIGIO or SIGURG signal to a process or process group using stored * credentials rather than those of the current process. */ void -pgsigio(sigiop, sig, checkctty) - struct sigio **sigiop; - int sig, checkctty; +pgsigio(struct sigio **sigiop, int sig, int checkctty) { ksiginfo_t ksi; struct sigio *sigio; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; SIGIO_LOCK(); sigio = *sigiop; if (sigio == NULL) { SIGIO_UNLOCK(); return; } if (sigio->sio_pgid > 0) { PROC_LOCK(sigio->sio_proc); if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred)) kern_psignal(sigio->sio_proc, sig); PROC_UNLOCK(sigio->sio_proc); } else if (sigio->sio_pgid < 0) { struct proc *p; PGRP_LOCK(sigio->sio_pgrp); LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && CANSIGIO(sigio->sio_ucred, p->p_ucred) && (checkctty == 0 || (p->p_flag & P_CONTROLT))) kern_psignal(p, sig); PROC_UNLOCK(p); } PGRP_UNLOCK(sigio->sio_pgrp); } SIGIO_UNLOCK(); } static int filt_sigattach(struct knote *kn) { struct proc *p = curproc; kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ knlist_add(p->p_klist, kn, 0); return (0); } static void filt_sigdetach(struct knote *kn) { struct proc *p = kn->kn_ptr.p_proc; knlist_remove(p->p_klist, kn, 0); } /* * signal knotes are shared with proc knotes, so we apply a mask to * the hint in order to differentiate them from process hints. This * could be avoided by using a signal-specific knote list, but probably * isn't worth the trouble. */ static int filt_signal(struct knote *kn, long hint) { if (hint & NOTE_SIGNAL) { hint &= ~NOTE_SIGNAL; if (kn->kn_id == hint) kn->kn_data++; } return (kn->kn_data != 0); } struct sigacts * sigacts_alloc(void) { struct sigacts *ps; ps = malloc(sizeof(struct sigacts), M_SUBPROC, M_WAITOK | M_ZERO); refcount_init(&ps->ps_refcnt, 1); mtx_init(&ps->ps_mtx, "sigacts", NULL, MTX_DEF); return (ps); } void sigacts_free(struct sigacts *ps) { if (refcount_release(&ps->ps_refcnt) == 0) return; mtx_destroy(&ps->ps_mtx); free(ps, M_SUBPROC); } struct sigacts * sigacts_hold(struct sigacts *ps) { refcount_acquire(&ps->ps_refcnt); return (ps); } void sigacts_copy(struct sigacts *dest, struct sigacts *src) { KASSERT(dest->ps_refcnt == 1, ("sigacts_copy to shared dest")); mtx_lock(&src->ps_mtx); bcopy(src, dest, offsetof(struct sigacts, ps_refcnt)); mtx_unlock(&src->ps_mtx); } int sigacts_shared(struct sigacts *ps) { return (ps->ps_refcnt > 1); } Index: stable/11/sys/kern/kern_timeout.c =================================================================== --- stable/11/sys/kern/kern_timeout.c (revision 331642) +++ stable/11/sys/kern/kern_timeout.c (revision 331643) @@ -1,1673 +1,1673 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_callout_profiling.h" #include "opt_ddb.h" #if defined(__arm__) #include "opt_timer.h" #endif #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef SMP #include #endif #ifndef NO_EVENTTIMERS DPCPU_DECLARE(sbintime_t, hardclocktime); #endif SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); #ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); static int avg_gcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); static int avg_lockcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); static int avg_depth_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); static int avg_lockcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); static int avg_mpcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif static int ncallout; SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, "Number of entries in callwheel and size of timeout() preallocation"); #ifdef RSS static int pin_default_swi = 1; static int pin_pcpu_swi = 1; #else static int pin_default_swi = 0; static int pin_pcpu_swi = 0; #endif SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); /* * TODO: * allocate more timeout table slots when table overflows. */ u_int callwheelsize, callwheelmask; /* * The callout cpu exec entities represent informations necessary for * describing the state of callouts currently running on the CPU and the ones * necessary for migrating callouts to the new callout cpu. In particular, * the first entry of the array cc_exec_entity holds informations for callout * running in SWI thread context, while the second one holds informations * for callout running directly from hardware interrupt context. * The cached informations are very important for deferring migration when * the migrating callout is already running. */ struct cc_exec { struct callout *cc_curr; void (*cc_drain)(void *); #ifdef SMP void (*ce_migration_func)(void *); void *ce_migration_arg; int ce_migration_cpu; sbintime_t ce_migration_time; sbintime_t ce_migration_prec; #endif bool cc_cancel; bool cc_waiting; }; /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; struct callout *cc_next; struct callout *cc_callout; struct callout_list *cc_callwheel; struct callout_tailq cc_expireq; struct callout_slist cc_callfree; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; u_int cc_bucket; u_int cc_inited; char cc_ktr_event_name[20]; }; #define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) #define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr #define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain #define cc_exec_next(cc) cc->cc_next #define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel #define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting #ifdef SMP #define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func #define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg #define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu #define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time #define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else struct callout_cpu cc_cpu; #define CC_CPU(cpu) &cc_cpu #define CC_SELF() &cc_cpu #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; static void callout_cpu_init(struct callout_cpu *cc, int cpu); static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: * cc_curr - If a callout is in progress, it is cc_curr. * If cc_curr is non-NULL, threads waiting in * callout_drain() will be woken up as soon as the * relevant callout completes. * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. * The softclock() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after * cc_lock is successfully acquired. * cc_waiting - If a thread is waiting in callout_drain(), then * callout_wait is nonzero. Set only when * cc_curr is non-NULL. */ /* * Resets the execution entity tied to a specific callout cpu. */ static void cc_cce_cleanup(struct callout_cpu *cc, int direct) { cc_exec_curr(cc, direct) = NULL; cc_exec_cancel(cc, direct) = false; cc_exec_waiting(cc, direct) = false; #ifdef SMP cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif } /* * Checks if migration is requested by a specific callout cpu. */ static int cc_cce_migrating(struct callout_cpu *cc, int direct) { #ifdef SMP return (cc_migration_cpu(cc, direct) != CPUBLOCK); #else return (0); #endif } /* * Kernel low level callwheel initialization * called on cpu0 during kernel startup. */ static void callout_callwheel_init(void *dummy) { struct callout_cpu *cc; /* * Calculate the size of the callout wheel and the preallocated * timeout() structures. * XXX: Clip callout to result of previous function of maxusers * maximum 384. This is still huge, but acceptable. */ memset(CC_CPU(0), 0, sizeof(cc_cpu)); ncallout = imin(16 + maxproc + maxfiles, 18508); TUNABLE_INT_FETCH("kern.ncallout", &ncallout); /* * Calculate callout wheel size, should be next power of two higher * than 'ncallout'. */ callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; /* * Fetch whether we're pinning the swi's or not. */ TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); /* * Only cpu0 handles timeout(9) and receives a preallocation. * * XXX: Once all timeout(9) consumers are converted this can * be removed. */ timeout_cpu = PCPU_GET(cpuid); cc = CC_CPU(timeout_cpu); cc->cc_callout = malloc(ncallout * sizeof(struct callout), M_CALLOUT, M_WAITOK); callout_cpu_init(cc, timeout_cpu); } SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); /* * Initialize the per-cpu callout structures. */ static void callout_cpu_init(struct callout_cpu *cc, int cpu) { struct callout *c; int i; mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); cc->cc_inited = 1; cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = SBT_MAX; for (i = 0; i < 2; i++) cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ return; for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); c->c_iflags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } #ifdef SMP /* * Switches the cpu tied to a specific callout. * The function expects a locked incoming callout cpu and returns with * locked outcoming callout cpu. */ static struct callout_cpu * callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) { struct callout_cpu *new_cc; MPASS(c != NULL && cc != NULL); CC_LOCK_ASSERT(cc); /* * Avoid interrupts and preemption firing after the callout cpu * is blocked in order to avoid deadlocks as the new thread * may be willing to acquire the callout cpu lock. */ c->c_cpu = CPUBLOCK; spinlock_enter(); CC_UNLOCK(cc); new_cc = CC_CPU(new_cpu); CC_LOCK(new_cc); spinlock_exit(); c->c_cpu = new_cpu; return (new_cc); } #endif /* * Start standard softclock thread. */ static void start_softclock(void *dummy) { struct callout_cpu *cc; char name[MAXCOMLEN]; #ifdef SMP int cpu; struct intr_event *ie; #endif cc = CC_CPU(timeout_cpu); snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_default_swi && (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { printf("%s: timeout clock couldn't be pinned to cpu %d\n", __func__, timeout_cpu); } #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ callout_cpu_init(cc, cpu); snprintf(name, sizeof(name), "clock (%d)", cpu); ie = NULL; if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { printf("%s: per-cpu clock couldn't be pinned to " "cpu %d\n", __func__, cpu); } } #endif } SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); #define CC_HASH_SHIFT 8 static inline u_int callout_hash(sbintime_t sbt) { return (sbt >> (32 - CC_HASH_SHIFT)); } static inline u_int callout_get_bucket(sbintime_t sbt) { return (callout_hash(sbt) & callwheelmask); } void callout_process(sbintime_t now) { struct callout *tmp, *tmpn; struct callout_cpu *cc; struct callout_list *sc; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; #ifdef CALLOUT_PROFILING int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; #endif cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; nowb = callout_hash(now); /* Compute the last bucket and minimum time of the bucket after it. */ if (nowb == firstb) lookahead = (SBT_1S / 16); else if (nowb - firstb == 1) lookahead = (SBT_1S / 8); else lookahead = (SBT_1S / 2); first = last = now; first += (lookahead / 2); last += lookahead; last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); lastb = callout_hash(last) - 1; max = last; /* * Check if we wrapped around the entire wheel from the last scan. * In case, we need to scan entirely the wheel for pending callouts. */ if (lastb - firstb >= callwheelsize) { lastb = firstb + callwheelsize - 1; if (nowb - firstb >= callwheelsize) nowb = lastb; } /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { sc = &cc->cc_callwheel[firstb & callwheelmask]; tmp = LIST_FIRST(sc); while (tmp != NULL) { /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* * Consumer told us the callout may be run * directly from hardware interrupt context. */ if (tmp->c_iflags & CALLOUT_DIRECT) { #ifdef CALLOUT_PROFILING ++depth_dir; #endif cc_exec_next(cc) = LIST_NEXT(tmp, c_links.le); cc->cc_bucket = firstb & callwheelmask; LIST_REMOVE(tmp, c_links.le); softclock_call_cc(tmp, cc, #ifdef CALLOUT_PROFILING &mpcalls_dir, &lockcalls_dir, NULL, #endif 1); tmp = cc_exec_next(cc); cc_exec_next(cc) = NULL; } else { tmpn = LIST_NEXT(tmp, c_links.le); LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); tmp->c_iflags |= CALLOUT_PROCESSED; tmp = tmpn; } continue; } /* Skip events from distant future. */ if (tmp->c_time >= max) goto next; /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; goto next; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) first = tmp->c_time; tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; next: tmp = LIST_NEXT(tmp, c_links.le); } /* Proceed with the next bucket. */ firstb++; /* * Stop if we looked after present time and found * some event we can't execute at now. * Stop if we looked far enough into the future. */ } while (((int)(firstb - lastb)) <= 0); cc->cc_firstevent = last; #ifndef NO_EVENTTIMERS cpu_new_callout(curcpu, last, first); #endif #ifdef CALLOUT_PROFILING avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; #endif mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. */ if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); } static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; int cpu; for (;;) { cpu = c->c_cpu; #ifdef SMP if (cpu == CPUBLOCK) { while (c->c_cpu == CPUBLOCK) cpu_spinwait(); continue; } #endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) break; CC_UNLOCK(cc); } return (cc); } static void callout_cc_add(struct callout *c, struct callout_cpu *cc, sbintime_t sbt, sbintime_t precision, void (*func)(void *), void *arg, int cpu, int flags) { int bucket; CC_LOCK_ASSERT(cc); if (sbt < cc->cc_lastscan) sbt = cc->cc_lastscan; c->c_arg = arg; c->c_iflags |= CALLOUT_PENDING; c->c_iflags &= ~CALLOUT_PROCESSED; c->c_flags |= CALLOUT_ACTIVE; if (flags & C_DIRECT_EXEC) c->c_iflags |= CALLOUT_DIRECT; c->c_func = func; c->c_time = sbt; c->c_precision = precision; bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); if (cc->cc_bucket == bucket) cc_exec_next(cc) = c; #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout * that has been inserted, but only if really required. */ if (SBT_MAX - c->c_time < c->c_precision) c->c_precision = SBT_MAX - c->c_time; sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; cpu_new_callout(cpu, sbt, c->c_time); } #endif } static void callout_cc_del(struct callout *c, struct callout_cpu *cc) { if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0) return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct) { struct rm_priotracker tracker; void (*c_func)(void *); void *c_arg; struct lock_class *class; struct lock_object *c_lock; uintptr_t lock_status; int c_iflags; #ifdef SMP struct callout_cpu *new_cc; void (*new_func)(void *); void *new_arg; int flags, new_cpu; sbintime_t new_prec, new_time; #endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ static timeout_t *lastfunc; #endif KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING, ("softclock_call_cc: pend %p %x", c, c->c_iflags)); KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, ("softclock_call_cc: act %p %x", c, c->c_flags)); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; lock_status = 0; if (c->c_flags & CALLOUT_SHAREDLOCK) { if (class == &lock_class_rm) lock_status = (uintptr_t)&tracker; else lock_status = 1; } c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_iflags = c->c_iflags; if (c->c_iflags & CALLOUT_LOCAL_ALLOC) c->c_iflags = CALLOUT_LOCAL_ALLOC; else c->c_iflags &= ~CALLOUT_PENDING; cc_exec_curr(cc, direct) = c; cc_exec_cancel(cc, direct) = false; cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); if (c_lock != NULL) { class->lc_lock(c_lock, lock_status); /* * The callout may have been cancelled * while we switched locks. */ if (cc_exec_cancel(cc, direct)) { class->lc_unlock(c_lock); goto skip; } /* The callout cannot be stopped now. */ cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING (*lockcalls)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING (*mpcalls)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt1 = sbinuptime(); #endif THREAD_NO_SLEEPING(); SDT_PROBE1(callout_execute, , , callout__start, c); c_func(c_arg); SDT_PROBE1(callout_execute, , , callout__end, c); THREAD_SLEEPING_OK(); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt2 = sbinuptime(); sbt2 -= sbt1; if (sbt2 > maxdt) { if (lastfunc != c_func || sbt2 > maxdt * 2) { ts2 = sbttots(sbt2); printf( "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); } maxdt = sbt2; lastfunc = c_func; } #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0) class->lc_unlock(c_lock); skip: CC_LOCK(cc); KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); cc_exec_curr(cc, direct) = NULL; if (cc_exec_drain(cc, direct)) { void (*drain)(void *); drain = cc_exec_drain(cc, direct); cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); drain(c_arg); CC_LOCK(cc); } if (cc_exec_waiting(cc, direct)) { /* * There is someone waiting for the * callout to complete. * If the callout was scheduled for * migration just cancel it. */ if (cc_cce_migrating(cc, direct)) { cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not * destroyed but that is not easy. */ c->c_iflags &= ~CALLOUT_DFRMIGRATION; } cc_exec_waiting(cc, direct) = false; CC_UNLOCK(cc); wakeup(&cc_exec_waiting(cc, direct)); CC_LOCK(cc); } else if (cc_cce_migrating(cc, direct)) { KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0, ("Migrating legacy callout %p", c)); #ifdef SMP /* * If the callout was scheduled for * migration just perform it now. */ new_cpu = cc_migration_cpu(cc, direct); new_time = cc_migration_time(cc, direct); new_prec = cc_migration_prec(cc, direct); new_func = cc_migration_func(cc, direct); new_arg = cc_migration_arg(cc, direct); cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not destroyed * but that is not easy. * * As first thing, handle deferred callout stops. */ if (!callout_migrating(c)) { CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); callout_cc_del(c, cc); return; } c->c_iflags &= ~CALLOUT_DFRMIGRATION; new_cc = callout_cpu_switch(c, cc, new_cpu); flags = (direct) ? C_DIRECT_EXEC : 0; callout_cc_add(c, new_cc, new_time, new_prec, new_func, new_arg, new_cpu, flags); CC_UNLOCK(new_cc); CC_LOCK(cc); #else panic("migration should not happen"); #endif } /* * If the current callout is locally allocated (from * timeout(9)) then put it on the freelist. * * Note: we need to check the cached copy of c_iflags because * if it was not local, then it's not safe to deref the * callout pointer. */ KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 || c->c_iflags == CALLOUT_LOCAL_ALLOC, ("corrupted callout")); if (c_iflags & CALLOUT_LOCAL_ALLOC) callout_cc_del(c, cc); } /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures * used in this implementation was published by G. Varghese and T. Lauck in * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for * the Efficient Implementation of a Timer Facility" in the Proceedings of * the 11th ACM Annual Symposium on Operating Systems Principles, * Austin, Texas Nov 1987. */ /* * Software (low priority) clock interrupt. * Run periodic events from timeout queue. */ void softclock(void *arg) { struct callout_cpu *cc; struct callout *c; #ifdef CALLOUT_PROFILING int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; #endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING &mpcalls, &lockcalls, &gcalls, #endif 0); #ifdef CALLOUT_PROFILING ++depth; #endif } #ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; #endif CC_UNLOCK(cc); } /* * timeout -- * Execute a function after a specified length of time. * * untimeout -- * Cancel previous timeout function call. * * callout_handle_init -- * Initialize a handle so that using it with untimeout is benign. * * See AT&T BCI Driver Reference Manual for specification. This * implementation differs from that one in that although an * identification value is returned from timeout, the original * arguments to timeout as well as the identifier are used to * identify entries for untimeout. */ struct callout_handle timeout(timeout_t *ftn, void *arg, int to_ticks) { struct callout_cpu *cc; struct callout *new; struct callout_handle handle; cc = CC_CPU(timeout_cpu); CC_LOCK(cc); /* Fill in the next free callout structure. */ new = SLIST_FIRST(&cc->cc_callfree); if (new == NULL) /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); return (handle); } void untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; /* * Check for a handle that was initialized * by callout_handle_init, but never used * for a real timeout. */ if (handle.callout == NULL) return; cc = callout_lock(handle.callout); if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) callout_stop(handle.callout); CC_UNLOCK(cc); } void callout_handle_init(struct callout_handle *handle) { handle->callout = NULL; } void callout_when(sbintime_t sbt, sbintime_t precision, int flags, sbintime_t *res, sbintime_t *prec_res) { sbintime_t to_sbt, to_pr; if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) { *res = sbt; *prec_res = precision; return; } if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt) sbt = tick_sbt; if ((flags & C_HARDCLOCK) != 0 || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { to_sbt = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ to_sbt += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* * Obtain the time of the last hardclock() call on * this CPU directly from the kern_clocksource.c. * This value is per-CPU, but it is equal for all * active ones. */ #ifdef __LP64__ to_sbt = DPCPU_GET(hardclocktime); #else spinlock_enter(); to_sbt = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif if (cold && to_sbt == 0) to_sbt = sbinuptime(); if ((flags & C_HARDCLOCK) == 0) to_sbt += tick_sbt; } else to_sbt = sbinuptime(); if (SBT_MAX - to_sbt < sbt) to_sbt = SBT_MAX; else to_sbt += sbt; *res = to_sbt; to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : sbt >> C_PRELGET(flags)); *prec_res = to_pr > precision ? to_pr : precision; } /* * New interface; clients allocate their own callout structures. * * callout_reset() - establish or change a timeout * callout_stop() - disestablish a timeout * callout_init() - initialize a callout structure so that it can * safely be passed to callout_reset() and callout_stop() * * defines three convenience macros: * * callout_active() - returns truth if callout has not been stopped, * drained, or deactivated since the last time the callout was * reset. * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec, void (*ftn)(void *), void *arg, int cpu, int flags) { sbintime_t to_sbt, precision; struct callout_cpu *cc; int cancelled, direct; int ignore_cpu=0; cancelled = 0; if (cpu == -1) { ignore_cpu = 1; } else if ((cpu >= MAXCPU) || ((CC_CPU(cpu))->cc_inited == 0)) { /* Invalid CPU spec */ panic("Invalid CPU in callout %d", cpu); } callout_when(sbt, prec, flags, &to_sbt, &precision); /* * This flag used to be added by callout_cc_add, but the * first time you call this we could end up with the * wrong direct flag if we don't do it before we add. */ if (flags & C_DIRECT_EXEC) { direct = 1; } else { direct = 0; } KASSERT(!direct || c->c_lock == NULL, ("%s: direct callout %p has lock", __func__, c)); cc = callout_lock(c); /* * Don't allow migration of pre-allocated callouts lest they * become unbalanced or handle the case where the user does * not care. */ if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) || ignore_cpu) { cpu = c->c_cpu; } if (cc_exec_curr(cc, direct) == c) { /* * We're being asked to reschedule a callout which is * currently in progress. If there is a lock then we * can cancel the callout if it has not really started. */ if (c->c_lock != NULL && !cc_exec_cancel(cc, direct)) cancelled = cc_exec_cancel(cc, direct) = true; if (cc_exec_waiting(cc, direct)) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. */ CTR4(KTR_CALLOUT, "%s %p func %p arg %p", cancelled ? "cancelled" : "failed to cancel", c, c->c_func, c->c_arg); CC_UNLOCK(cc); return (cancelled); } #ifdef SMP if (callout_migrating(c)) { /* * This only occurs when a second callout_reset_sbt_on * is made after a previous one moved it into * deferred migration (below). Note we do *not* change * the prev_cpu even though the previous target may * be different. */ cc_migration_cpu(cc, direct) = cpu; cc_migration_time(cc, direct) = to_sbt; cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; cancelled = 1; CC_UNLOCK(cc); return (cancelled); } #endif } if (c->c_iflags & CALLOUT_PENDING) { if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } cancelled = 1; c->c_iflags &= ~ CALLOUT_PENDING; c->c_flags &= ~ CALLOUT_ACTIVE; } #ifdef SMP /* * If the callout must migrate try to perform it immediately. * If the callout is currently running, just defer the migration * to a more appropriate moment. */ if (c->c_cpu != cpu) { if (cc_exec_curr(cc, direct) == c) { /* * Pending will have been removed since we are * actually executing the callout on another * CPU. That callout should be waiting on the * lock the caller holds. If we set both * active/and/pending after we return and the * lock on the executing callout proceeds, it * will then see pending is true and return. * At the return from the actual callout execution * the migration will occur in softclock_call_cc * and this new callout will be placed on the * new CPU via a call to callout_cpu_switch() which * will get the lock on the right CPU followed * by a call callout_cc_add() which will add it there. * (see above in softclock_call_cc()). */ cc_migration_cpu(cc, direct) = cpu; cc_migration_time(cc, direct) = to_sbt; cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); c->c_flags |= CALLOUT_ACTIVE; CTR6(KTR_CALLOUT, "migration of %p func %p arg %p in %d.%08x to %u deferred", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff), cpu); CC_UNLOCK(cc); return (cancelled); } cc = callout_cpu_switch(c, cc, cpu); } #endif callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff)); CC_UNLOCK(cc); return (cancelled); } /* * Common idioms that can be optimized in the future. */ int callout_schedule_on(struct callout *c, int to_ticks, int cpu) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); } int callout_schedule(struct callout *c, int to_ticks) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); } int _callout_stop_safe(struct callout *c, int flags, void (*drain)(void *)) { struct callout_cpu *cc, *old_cc; struct lock_class *class; int direct, sq_locked, use_lock; int cancelled, not_on_a_list; if ((flags & CS_DRAIN) != 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, "calling %s", __func__); /* * Some old subsystems don't hold Giant while running a callout_stop(), * so just discard this check for the moment. */ if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) { if (c->c_lock == &Giant.lock_object) use_lock = mtx_owned(&Giant); else { use_lock = 1; class = LOCK_CLASS(c->c_lock); class->lc_assert(c->c_lock, LA_XLOCKED); } } else use_lock = 0; if (c->c_iflags & CALLOUT_DIRECT) { direct = 1; } else { direct = 0; } sq_locked = 0; old_cc = NULL; again: cc = callout_lock(c); if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) == (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) && ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) { /* * Special case where this slipped in while we * were migrating *as* the callout is about to * execute. The caller probably holds the lock * the callout wants. * * Get rid of the migration first. Then set * the flag that tells this code *not* to * try to remove it from any lists (its not * on one yet). When the callout wheel runs, * it will ignore this callout. */ c->c_iflags &= ~CALLOUT_PENDING; c->c_flags &= ~CALLOUT_ACTIVE; not_on_a_list = 1; } else { not_on_a_list = 0; } /* * If the callout was migrating while the callout cpu lock was * dropped, just drop the sleepqueue lock and check the states * again. */ if (sq_locked != 0 && cc != old_cc) { #ifdef SMP CC_UNLOCK(cc); sleepq_release(&cc_exec_waiting(old_cc, direct)); sq_locked = 0; old_cc = NULL; goto again; #else panic("migration should not happen"); #endif } /* * If the callout is running, try to stop it or drain it. */ if (cc_exec_curr(cc, direct) == c) { /* * Succeed we to stop it or not, we must clear the * active flag - this is what API users expect. If we're * draining and the callout is currently executing, first wait * until it finishes. */ if ((flags & CS_DRAIN) == 0) c->c_flags &= ~CALLOUT_ACTIVE; if ((flags & CS_DRAIN) != 0) { /* * The current callout is running (or just * about to run) and blocking is allowed, so * just wait for the current invocation to * finish. */ while (cc_exec_curr(cc, direct) == c) { /* * Use direct calls to sleepqueue interface * instead of cv/msleep in order to avoid * a LOR between cc_lock and sleepqueue * chain spinlocks. This piece of code * emulates a msleep_spin() call actually. * * If we already have the sleepqueue chain * locked, then we can safely block. If we * don't already have it locked, however, * we have to drop the cc_lock to lock * it. This opens several races, so we * restart at the beginning once we have * both locks. If nothing has changed, then * we will end up back here with sq_locked * set. */ if (!sq_locked) { CC_UNLOCK(cc); sleepq_lock( &cc_exec_waiting(cc, direct)); sq_locked = 1; old_cc = cc; goto again; } /* * Migration could be cancelled here, but * as long as it is still not sure when it * will be packed up, just let softclock() * take care of it. */ cc_exec_waiting(cc, direct) = true; DROP_GIANT(); CC_UNLOCK(cc); sleepq_add( &cc_exec_waiting(cc, direct), &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0); sleepq_wait( &cc_exec_waiting(cc, direct), 0); sq_locked = 0; old_cc = NULL; /* Reacquire locks previously released. */ PICKUP_GIANT(); CC_LOCK(cc); } c->c_flags &= ~CALLOUT_ACTIVE; } else if (use_lock && !cc_exec_cancel(cc, direct) && (drain == NULL)) { /* * The current callout is waiting for its * lock which we hold. Cancel the callout * and return. After our caller drops the * lock, the callout will be skipped in * softclock(). This *only* works with a * callout_stop() *not* callout_drain() or * callout_async_drain(). */ cc_exec_cancel(cc, direct) = true; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); KASSERT(!cc_cce_migrating(cc, direct), ("callout wrongly scheduled for migration")); if (callout_migrating(c)) { c->c_iflags &= ~CALLOUT_DFRMIGRATION; #ifdef SMP cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif } CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); return (1); } else if (callout_migrating(c)) { /* * The callout is currently being serviced * and the "next" callout is scheduled at * its completion with a migration. We remove * the migration flag so it *won't* get rescheduled, * but we can't stop the one thats running so * we return 0. */ c->c_iflags &= ~CALLOUT_DFRMIGRATION; #ifdef SMP /* * We can't call cc_cce_cleanup here since * if we do it will remove .ce_curr and * its still running. This will prevent a * reschedule of the callout when the * execution completes. */ cc_migration_cpu(cc, direct) = CPUBLOCK; cc_migration_time(cc, direct) = 0; cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; #endif CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", c, c->c_func, c->c_arg); if (drain) { cc_exec_drain(cc, direct) = drain; } CC_UNLOCK(cc); return ((flags & CS_EXECUTING) != 0); } CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); if (drain) { cc_exec_drain(cc, direct) = drain; } KASSERT(!sq_locked, ("sleepqueue chain still locked")); cancelled = ((flags & CS_EXECUTING) != 0); } else cancelled = 1; if (sq_locked) sleepq_release(&cc_exec_waiting(cc, direct)); if ((c->c_iflags & CALLOUT_PENDING) == 0) { CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); /* * For not scheduled and not executing callout return * negative value. */ if (cc_exec_curr(cc, direct) != c) cancelled = -1; CC_UNLOCK(cc); return (cancelled); } c->c_iflags &= ~CALLOUT_PENDING; c->c_flags &= ~CALLOUT_ACTIVE; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); if (not_on_a_list == 0) { if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } } callout_cc_del(c, cc); CC_UNLOCK(cc); return (cancelled); } void callout_init(struct callout *c, int mpsafe) { bzero(c, sizeof *c); if (mpsafe) { c->c_lock = NULL; c->c_iflags = CALLOUT_RETURNUNLOCKED; } else { c->c_lock = &Giant.lock_object; c->c_iflags = 0; } c->c_cpu = timeout_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); c->c_lock = lock; KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, ("callout_init_lock: bad flags %d", flags)); KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", __func__)); c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = timeout_cpu; } #ifdef APM_FIXUP_CALLTODO /* * Adjust the kernel calltodo timeout list. This routine is used after * an APM resume to recalculate the calltodo timer list values with the * number of hz's we have been sleeping. The next hardclock() will detect * that there are fired timers and run softclock() to execute them. * * Please note, I have not done an exhaustive analysis of what code this * might break. I am motivated to have my select()'s and alarm()'s that * have expired during suspend firing upon resume so that the applications * which set the timer can do the maintanence the timer was for as close * as possible to the originally intended time. Testing this code for a * week showed that resuming from a suspend resulted in 22 to 25 timers * firing, which seemed independent on whether the suspend was 2 hours or * 2 days. Your milage may vary. - Ken Key */ void adjust_timeout_calltodo(struct timeval *time_change) { - register struct callout *p; + struct callout *p; unsigned long delta_ticks; /* * How many ticks were we asleep? * (stolen from tvtohz()). */ /* Don't do anything */ if (time_change->tv_sec < 0) return; else if (time_change->tv_sec <= LONG_MAX / 1000000) delta_ticks = howmany(time_change->tv_sec * 1000000 + time_change->tv_usec, tick) + 1; else if (time_change->tv_sec <= LONG_MAX / hz) delta_ticks = time_change->tv_sec * hz + howmany(time_change->tv_usec, tick) + 1; else delta_ticks = LONG_MAX; if (delta_ticks > INT_MAX) delta_ticks = INT_MAX; /* * Now rip through the timer calltodo list looking for timers * to expire. */ /* don't collide with softclock() */ CC_LOCK(cc); for (p = calltodo.c_next; p != NULL; p = p->c_next) { p->c_time -= delta_ticks; /* Break if the timer had more time on it than delta_ticks */ if (p->c_time > 0) break; /* take back the ticks the timer didn't use (p->c_time <= 0) */ delta_ticks = -p->c_time; } CC_UNLOCK(cc); return; } #endif /* APM_FIXUP_CALLTODO */ static int flssbt(sbintime_t sbt) { sbt += (uint64_t)sbt >> 1; if (sizeof(long) >= sizeof(sbintime_t)) return (flsl(sbt)); if (sbt >= SBT_1S) return (flsl(((uint64_t)sbt) >> 32) + 32); return (flsl(sbt)); } /* * Dump immediate statistic snapshot of the scheduled callouts. */ static int sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) { struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; int ct[64], cpr[64], ccpbk[32]; int error, val, i, count, tcum, pcum, maxc, c, medc; #ifdef SMP int cpu; #endif val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); count = maxc = 0; st = spr = maxt = maxpr = 0; bzero(ccpbk, sizeof(ccpbk)); bzero(ct, sizeof(ct)); bzero(cpr, sizeof(cpr)); now = sbinuptime(); #ifdef SMP CPU_FOREACH(cpu) { cc = CC_CPU(cpu); #else cc = CC_CPU(timeout_cpu); #endif CC_LOCK(cc); for (i = 0; i < callwheelsize; i++) { sc = &cc->cc_callwheel[i]; c = 0; LIST_FOREACH(tmp, sc, c_links.le) { c++; t = tmp->c_time - now; if (t < 0) t = 0; st += t / SBT_1US; spr += tmp->c_precision / SBT_1US; if (t > maxt) maxt = t; if (tmp->c_precision > maxpr) maxpr = tmp->c_precision; ct[flssbt(t)]++; cpr[flssbt(tmp->c_precision)]++; } if (c > maxc) maxc = c; ccpbk[fls(c + c / 2)]++; count += c; } CC_UNLOCK(cc); #ifdef SMP } #endif for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) tcum += ct[i]; medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) pcum += cpr[i]; medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, c = 0; i < 32 && c < count / 2; i++) c += ccpbk[i]; medc = (i >= 2) ? (1 << (i - 2)) : 0; printf("Scheduled callouts statistic snapshot:\n"); printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", medc, count / callwheelsize / mp_ncpus, (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, maxc); printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, (st / count) / 1000000, (st / count) % 1000000, maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, (spr / count) / 1000000, (spr / count) % 1000000, maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); printf(" Distribution: \tbuckets\t time\t tcum\t" " prec\t pcum\n"); for (i = 0, tcum = pcum = 0; i < 64; i++) { if (ct[i] == 0 && cpr[i] == 0) continue; t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; tcum += ct[i]; pcum += cpr[i]; printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, i - 1 - (32 - CC_HASH_SHIFT), ct[i], tcum, cpr[i], pcum); } return (error); } SYSCTL_PROC(_kern, OID_AUTO, callout_stat, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_callout_stat, "I", "Dump immediate statistic snapshot of the scheduled callouts"); #ifdef DDB static void _show_callout(struct callout *c) { db_printf("callout %p\n", c); #define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e); db_printf(" &c_links = %p\n", &(c->c_links)); C_DB_PRINTF("%" PRId64, c_time); C_DB_PRINTF("%" PRId64, c_precision); C_DB_PRINTF("%p", c_arg); C_DB_PRINTF("%p", c_func); C_DB_PRINTF("%p", c_lock); C_DB_PRINTF("%#x", c_flags); C_DB_PRINTF("%#x", c_iflags); C_DB_PRINTF("%d", c_cpu); #undef C_DB_PRINTF } DB_SHOW_COMMAND(callout, db_show_callout) { if (!have_addr) { db_printf("usage: show callout \n"); return; } _show_callout((struct callout *)addr); } #endif /* DDB */ Index: stable/11/sys/kern/kern_xxx.c =================================================================== --- stable/11/sys/kern/kern_xxx.c (revision 331642) +++ stable/11/sys/kern/kern_xxx.c (revision 331643) @@ -1,473 +1,463 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_xxx.c 8.2 (Berkeley) 11/14/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct gethostname_args { char *hostname; u_int len; }; #endif /* ARGSUSED */ int -ogethostname(td, uap) - struct thread *td; - struct gethostname_args *uap; +ogethostname(struct thread *td, struct gethostname_args *uap) { int name[2]; size_t len = uap->len; name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, uap->hostname, &len, 1, 0, 0, 0, 0)); } #ifndef _SYS_SYSPROTO_H_ struct sethostname_args { char *hostname; u_int len; }; #endif /* ARGSUSED */ int -osethostname(td, uap) - struct thread *td; - register struct sethostname_args *uap; +osethostname(struct thread *td, struct sethostname_args *uap) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, uap->hostname, uap->len, 0, 0)); } #ifndef _SYS_SYSPROTO_H_ struct ogethostid_args { int dummy; }; #endif /* ARGSUSED */ int -ogethostid(td, uap) - struct thread *td; - struct ogethostid_args *uap; +ogethostid(struct thread *td, struct ogethostid_args *uap) { size_t len = sizeof(long); int name[2]; name[0] = CTL_KERN; name[1] = KERN_HOSTID; return (kernel_sysctl(td, name, 2, (long *)td->td_retval, &len, NULL, 0, NULL, 0)); } #endif /* COMPAT_43 */ #ifdef COMPAT_43 #ifndef _SYS_SYSPROTO_H_ struct osethostid_args { long hostid; }; #endif /* ARGSUSED */ int -osethostid(td, uap) - struct thread *td; - struct osethostid_args *uap; +osethostid(struct thread *td, struct osethostid_args *uap) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_HOSTID; return (kernel_sysctl(td, name, 2, NULL, NULL, &uap->hostid, sizeof(uap->hostid), NULL, 0)); } int -oquota(td, uap) - struct thread *td; - struct oquota_args *uap; +oquota(struct thread *td, struct oquota_args *uap) { return (ENOSYS); } #define KINFO_PROC (0<<8) #define KINFO_RT (1<<8) #define KINFO_VNODE (2<<8) #define KINFO_FILE (3<<8) #define KINFO_METER (4<<8) #define KINFO_LOADAVG (5<<8) #define KINFO_CLOCKRATE (6<<8) /* Non-standard BSDI extension - only present on their 4.3 net-2 releases */ #define KINFO_BSDI_SYSINFO (101<<8) /* * XXX this is bloat, but I hope it's better here than on the potentially * limited kernel stack... -Peter */ static struct { int bsdi_machine; /* "i386" on BSD/386 */ /* ^^^ this is an offset to the string, relative to the struct start */ char *pad0; long pad1; long pad2; long pad3; u_long pad4; u_long pad5; u_long pad6; int bsdi_ostype; /* "BSD/386" on BSD/386 */ int bsdi_osrelease; /* "1.1" on BSD/386 */ long pad7; long pad8; char *pad9; long pad10; long pad11; int pad12; long pad13; quad_t pad14; long pad15; struct timeval pad16; /* we dont set this, because BSDI's uname used gethostname() instead */ int bsdi_hostname; /* hostname on BSD/386 */ /* the actual string data is appended here */ } bsdi_si; /* * this data is appended to the end of the bsdi_si structure during copyout. * The "char *" offsets are relative to the base of the bsdi_si struct. * This contains "FreeBSD\02.0-BUILT-nnnnnn\0i386\0", and these strings * should not exceed the length of the buffer here... (or else!! :-) */ static char bsdi_strings[80]; /* It had better be less than this! */ #ifndef _SYS_SYSPROTO_H_ struct getkerninfo_args { int op; char *where; size_t *size; int arg; }; #endif int ogetkerninfo(struct thread *td, struct getkerninfo_args *uap) { int error, name[6]; size_t size; u_int needed = 0; switch (uap->op & 0xff00) { case KINFO_RT: name[0] = CTL_NET; name[1] = PF_ROUTE; name[2] = 0; name[3] = (uap->op & 0xff0000) >> 16; name[4] = uap->op & 0xff; name[5] = uap->arg; error = userland_sysctl(td, name, 6, uap->where, uap->size, 0, 0, 0, &size, 0); break; case KINFO_VNODE: name[0] = CTL_KERN; name[1] = KERN_VNODE; error = userland_sysctl(td, name, 2, uap->where, uap->size, 0, 0, 0, &size, 0); break; case KINFO_PROC: name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = uap->op & 0xff; name[3] = uap->arg; error = userland_sysctl(td, name, 4, uap->where, uap->size, 0, 0, 0, &size, 0); break; case KINFO_FILE: name[0] = CTL_KERN; name[1] = KERN_FILE; error = userland_sysctl(td, name, 2, uap->where, uap->size, 0, 0, 0, &size, 0); break; case KINFO_METER: name[0] = CTL_VM; name[1] = VM_TOTAL; error = userland_sysctl(td, name, 2, uap->where, uap->size, 0, 0, 0, &size, 0); break; case KINFO_LOADAVG: name[0] = CTL_VM; name[1] = VM_LOADAVG; error = userland_sysctl(td, name, 2, uap->where, uap->size, 0, 0, 0, &size, 0); break; case KINFO_CLOCKRATE: name[0] = CTL_KERN; name[1] = KERN_CLOCKRATE; error = userland_sysctl(td, name, 2, uap->where, uap->size, 0, 0, 0, &size, 0); break; case KINFO_BSDI_SYSINFO: { /* * this is pretty crude, but it's just enough for uname() * from BSDI's 1.x libc to work. * * *size gives the size of the buffer before the call, and * the amount of data copied after a successful call. * If successful, the return value is the amount of data * available, which can be larger than *size. * * BSDI's 2.x product apparently fails with ENOMEM if *size * is too small. */ u_int left; char *s; bzero((char *)&bsdi_si, sizeof(bsdi_si)); bzero(bsdi_strings, sizeof(bsdi_strings)); s = bsdi_strings; bsdi_si.bsdi_ostype = (s - bsdi_strings) + sizeof(bsdi_si); strcpy(s, ostype); s += strlen(s) + 1; bsdi_si.bsdi_osrelease = (s - bsdi_strings) + sizeof(bsdi_si); strcpy(s, osrelease); s += strlen(s) + 1; bsdi_si.bsdi_machine = (s - bsdi_strings) + sizeof(bsdi_si); strcpy(s, machine); s += strlen(s) + 1; needed = sizeof(bsdi_si) + (s - bsdi_strings); if ((uap->where == NULL) || (uap->size == NULL)) { /* process is asking how much buffer to supply.. */ size = needed; error = 0; break; } if ((error = copyin(uap->size, &size, sizeof(size))) != 0) break; /* if too much buffer supplied, trim it down */ if (size > needed) size = needed; /* how much of the buffer is remaining */ left = size; if ((error = copyout((char *)&bsdi_si, uap->where, left)) != 0) break; /* is there any point in continuing? */ if (left > sizeof(bsdi_si)) { left -= sizeof(bsdi_si); error = copyout(&bsdi_strings, uap->where + sizeof(bsdi_si), left); } break; } default: error = EOPNOTSUPP; break; } if (error == 0) { td->td_retval[0] = needed ? needed : size; if (uap->size) { error = copyout(&size, uap->size, sizeof(size)); } } return (error); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * This is the FreeBSD-1.1 compatible uname(2) interface. These days it is * done in libc as a wrapper around a bunch of sysctl's. This must maintain * the old 1.1 binary ABI. */ #if SYS_NMLN != 32 #error "FreeBSD-1.1 uname syscall has been broken" #endif #ifndef _SYS_SYSPROTO_H_ struct uname_args { struct utsname *name; }; #endif /* ARGSUSED */ int freebsd4_uname(struct thread *td, struct freebsd4_uname_args *uap) { int name[2], error; size_t len; char *s, *us; name[0] = CTL_KERN; name[1] = KERN_OSTYPE; len = sizeof (uap->name->sysname); error = userland_sysctl(td, name, 2, uap->name->sysname, &len, 1, 0, 0, 0, 0); if (error) return (error); subyte( uap->name->sysname + sizeof(uap->name->sysname) - 1, 0); name[1] = KERN_HOSTNAME; len = sizeof uap->name->nodename; error = userland_sysctl(td, name, 2, uap->name->nodename, &len, 1, 0, 0, 0, 0); if (error) return (error); subyte( uap->name->nodename + sizeof(uap->name->nodename) - 1, 0); name[1] = KERN_OSRELEASE; len = sizeof uap->name->release; error = userland_sysctl(td, name, 2, uap->name->release, &len, 1, 0, 0, 0, 0); if (error) return (error); subyte( uap->name->release + sizeof(uap->name->release) - 1, 0); /* name = KERN_VERSION; len = sizeof uap->name->version; error = userland_sysctl(td, name, 2, uap->name->version, &len, 1, 0, 0, 0, 0); if (error) return (error); subyte( uap->name->version + sizeof(uap->name->version) - 1, 0); */ /* * this stupid hackery to make the version field look like FreeBSD 1.1 */ for(s = version; *s && *s != '#'; s++); for(us = uap->name->version; *s && *s != ':'; s++) { error = subyte( us++, *s); if (error) return (error); } error = subyte( us++, 0); if (error) return (error); name[0] = CTL_HW; name[1] = HW_MACHINE; len = sizeof uap->name->machine; error = userland_sysctl(td, name, 2, uap->name->machine, &len, 1, 0, 0, 0, 0); if (error) return (error); subyte( uap->name->machine + sizeof(uap->name->machine) - 1, 0); return (0); } #ifndef _SYS_SYSPROTO_H_ struct getdomainname_args { char *domainname; int len; }; #endif /* ARGSUSED */ int freebsd4_getdomainname(struct thread *td, struct freebsd4_getdomainname_args *uap) { int name[2]; size_t len = uap->len; name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, uap->domainname, &len, 1, 0, 0, 0, 0)); } #ifndef _SYS_SYSPROTO_H_ struct setdomainname_args { char *domainname; int len; }; #endif /* ARGSUSED */ int freebsd4_setdomainname(struct thread *td, struct freebsd4_setdomainname_args *uap) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, uap->domainname, uap->len, 0, 0)); } #endif /* COMPAT_FREEBSD4 */ Index: stable/11/sys/kern/sched_4bsd.c =================================================================== --- stable/11/sys/kern/sched_4bsd.c (revision 331642) +++ stable/11/sys/kern/sched_4bsd.c (revision 331643) @@ -1,1779 +1,1779 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_hwpmc_hooks.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #ifdef KDTRACE_HOOKS #include int dtrace_vtime_active; dtrace_vtime_switch_func_t dtrace_vtime_switch_func; #endif /* * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in * the range 100-256 Hz (approximately). */ #define ESTCPULIM(e) \ min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \ RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1) #ifdef SMP #define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus) #else #define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */ #endif #define NICE_WEIGHT 1 /* Priorities per nice level. */ #define TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX))) /* * The schedulable entity that runs a context. * This is an extension to the thread structure and is tailored to * the requirements of this scheduler. * All fields are protected by the scheduler lock. */ struct td_sched { fixpt_t ts_pctcpu; /* %cpu during p_swtime. */ u_int ts_estcpu; /* Estimated cpu utilization. */ int ts_cpticks; /* Ticks of cpu time. */ int ts_slptime; /* Seconds !RUNNING. */ int ts_slice; /* Remaining part of time slice. */ int ts_flags; struct runq *ts_runq; /* runq the thread is currently on */ #ifdef KTR char ts_name[TS_NAME_LEN]; #endif }; /* flags kept in td_flags */ #define TDF_DIDRUN TDF_SCHED0 /* thread actually ran. */ #define TDF_BOUND TDF_SCHED1 /* Bound to one CPU. */ #define TDF_SLICEEND TDF_SCHED2 /* Thread time slice is over. */ /* flags kept in ts_flags */ #define TSF_AFFINITY 0x0001 /* Has a non-"full" CPU set. */ #define SKE_RUNQ_PCPU(ts) \ ((ts)->ts_runq != 0 && (ts)->ts_runq != &runq) #define THREAD_CAN_SCHED(td, cpu) \ CPU_ISSET((cpu), &(td)->td_cpuset->cs_mask) _Static_assert(sizeof(struct thread) + sizeof(struct td_sched) <= sizeof(struct thread0_storage), "increase struct thread0_storage.t0st_sched size"); static struct mtx sched_lock; static int realstathz = 127; /* stathz is sometimes 0 and run off of hz. */ static int sched_tdcnt; /* Total runnable threads in the system. */ static int sched_slice = 12; /* Thread run time before rescheduling. */ static void setup_runqs(void); static void schedcpu(void); static void schedcpu_thread(void); static void sched_priority(struct thread *td, u_char prio); static void sched_setup(void *dummy); static void maybe_resched(struct thread *td); static void updatepri(struct thread *td); static void resetpriority(struct thread *td); static void resetpriority_thread(struct thread *td); #ifdef SMP static int sched_pickcpu(struct thread *td); static int forward_wakeup(int cpunum); static void kick_other_cpu(int pri, int cpuid); #endif static struct kproc_desc sched_kp = { "schedcpu", schedcpu_thread, NULL }; SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, kproc_start, &sched_kp); SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL); static void sched_initticks(void *dummy); SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL); /* * Global run queue. */ static struct runq runq; #ifdef SMP /* * Per-CPU run queues */ static struct runq runq_pcpu[MAXCPU]; long runq_length[MAXCPU]; static cpuset_t idle_cpus_mask; #endif struct pcpuidlestat { u_int idlecalls; u_int oldidlecalls; }; static DPCPU_DEFINE(struct pcpuidlestat, idlestat); static void setup_runqs(void) { #ifdef SMP int i; for (i = 0; i < MAXCPU; ++i) runq_init(&runq_pcpu[i]); #endif runq_init(&runq); } static int sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) { int error, new_val, period; period = 1000000 / realstathz; new_val = period * sched_slice; error = sysctl_handle_int(oidp, &new_val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new_val <= 0) return (EINVAL); sched_slice = imax(1, (new_val + period / 2) / period); hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) / realstathz); return (0); } SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler"); SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0, "Scheduler name"); SYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_kern_quantum, "I", "Quantum for timeshare threads in microseconds"); SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, "Quantum for timeshare threads in stathz ticks"); #ifdef SMP /* Enable forwarding of wakeups to all other cpus */ static SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP"); static int runq_fuzz = 1; SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); static int forward_wakeup_enabled = 1; SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW, &forward_wakeup_enabled, 0, "Forwarding of wakeup to idle CPUs"); static int forward_wakeups_requested = 0; SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD, &forward_wakeups_requested, 0, "Requests for Forwarding of wakeup to idle CPUs"); static int forward_wakeups_delivered = 0; SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD, &forward_wakeups_delivered, 0, "Completed Forwarding of wakeup to idle CPUs"); static int forward_wakeup_use_mask = 1; SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW, &forward_wakeup_use_mask, 0, "Use the mask of idle cpus"); static int forward_wakeup_use_loop = 0; SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW, &forward_wakeup_use_loop, 0, "Use a loop to find idle cpus"); #endif #if 0 static int sched_followon = 0; SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, &sched_followon, 0, "allow threads to share a quantum"); #endif SDT_PROVIDER_DEFINE(sched); SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *", "struct proc *", "uint8_t"); SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *", "struct proc *", "void *"); SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *", "struct proc *", "void *", "int"); SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *", "struct proc *", "uint8_t", "struct thread *"); SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int"); SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *", "struct proc *"); SDT_PROBE_DEFINE(sched, , , on__cpu); SDT_PROBE_DEFINE(sched, , , remain__cpu); SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *", "struct proc *"); static __inline void sched_load_add(void) { sched_tdcnt++; KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt); SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt); } static __inline void sched_load_rem(void) { sched_tdcnt--; KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt); SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt); } /* * Arrange to reschedule if necessary, taking the priorities and * schedulers into account. */ static void maybe_resched(struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority < curthread->td_priority) curthread->td_flags |= TDF_NEEDRESCHED; } /* * This function is called when a thread is about to be put on run queue * because it has been made runnable or its priority has been adjusted. It * determines if the new thread should preempt the current thread. If so, * it sets td_owepreempt to request a preemption. */ int maybe_preempt(struct thread *td) { #ifdef PREEMPTION struct thread *ctd; int cpri, pri; /* * The new thread should not preempt the current thread if any of the * following conditions are true: * * - The kernel is in the throes of crashing (panicstr). * - The current thread has a higher (numerically lower) or * equivalent priority. Note that this prevents curthread from * trying to preempt to itself. * - The current thread has an inhibitor set or is in the process of * exiting. In this case, the current thread is about to switch * out anyways, so there's no point in preempting. If we did, * the current thread would not be properly resumed as well, so * just avoid that whole landmine. * - If the new thread's priority is not a realtime priority and * the current thread's priority is not an idle priority and * FULL_PREEMPTION is disabled. * * If all of these conditions are false, but the current thread is in * a nested critical section, then we have to defer the preemption * until we exit the critical section. Otherwise, switch immediately * to the new thread. */ ctd = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT((td->td_inhibitors == 0), ("maybe_preempt: trying to run inhibited thread")); pri = td->td_priority; cpri = ctd->td_priority; if (panicstr != NULL || pri >= cpri /* || dumping */ || TD_IS_INHIBITED(ctd)) return (0); #ifndef FULL_PREEMPTION if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) return (0); #endif CTR0(KTR_PROC, "maybe_preempt: scheduling preemption"); ctd->td_owepreempt = 1; return (1); #else return (0); #endif } /* * Constants for digital decay and forget: * 90% of (ts_estcpu) usage in 5 * loadav time * 95% of (ts_pctcpu) usage in 60 seconds (load insensitive) * Note that, as ps(1) mentions, this can let percentages * total over 100% (I've seen 137.9% for 3 processes). * * Note that schedclock() updates ts_estcpu and p_cpticks asynchronously. * * We wish to decay away 90% of ts_estcpu in (5 * loadavg) seconds. * That is, the system wants to compute a value of decay such * that the following for loop: * for (i = 0; i < (5 * loadavg); i++) * ts_estcpu *= decay; * will compute * ts_estcpu *= 0.1; * for all values of loadavg: * * Mathematically this loop can be expressed by saying: * decay ** (5 * loadavg) ~= .1 * * The system computes decay as: * decay = (2 * loadavg) / (2 * loadavg + 1) * * We wish to prove that the system's computation of decay * will always fulfill the equation: * decay ** (5 * loadavg) ~= .1 * * If we compute b as: * b = 2 * loadavg * then * decay = b / (b + 1) * * We now need to prove two things: * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) * * Facts: * For x close to zero, exp(x) =~ 1 + x, since * exp(x) = 0! + x**1/1! + x**2/2! + ... . * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. * For x close to zero, ln(1+x) =~ x, since * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). * ln(.1) =~ -2.30 * * Proof of (1): * Solve (factor)**(power) =~ .1 given power (5*loadav): * solving for factor, * ln(factor) =~ (-2.30/5*loadav), or * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED * * Proof of (2): * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): * solving for power, * power*ln(b/(b+1)) =~ -2.30, or * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED * * Actual power values for the implemented algorithm are as follows: * loadav: 1 2 3 4 * power: 5.68 10.32 14.94 19.55 */ /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ #define loadfactor(loadav) (2 * (loadav)) #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) /* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); /* * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). * * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). * * If you don't want to bother with the faster/more-accurate formula, you * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate * (more general) method of calculating the %age of CPU used by a process. */ #define CCPU_SHIFT 11 /* * Recompute process priorities, every hz ticks. * MP-safe, called without the Giant mutex. */ /* ARGSUSED */ static void schedcpu(void) { - register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); + fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); struct thread *td; struct proc *p; struct td_sched *ts; int awake; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } FOREACH_THREAD_IN_PROC(p, td) { awake = 0; ts = td_get_sched(td); thread_lock(td); /* * Increment sleep time (if sleeping). We * ignore overflow, as above. */ /* * The td_sched slptimes are not touched in wakeup * because the thread may not HAVE everything in * memory? XXX I think this is out of date. */ if (TD_ON_RUNQ(td)) { awake = 1; td->td_flags &= ~TDF_DIDRUN; } else if (TD_IS_RUNNING(td)) { awake = 1; /* Do not clear TDF_DIDRUN */ } else if (td->td_flags & TDF_DIDRUN) { awake = 1; td->td_flags &= ~TDF_DIDRUN; } /* * ts_pctcpu is only for ps and ttyinfo(). */ ts->ts_pctcpu = (ts->ts_pctcpu * ccpu) >> FSHIFT; /* * If the td_sched has been idle the entire second, * stop recalculating its priority until * it wakes up. */ if (ts->ts_cpticks != 0) { #if (FSHIFT >= CCPU_SHIFT) ts->ts_pctcpu += (realstathz == 100) ? ((fixpt_t) ts->ts_cpticks) << (FSHIFT - CCPU_SHIFT) : 100 * (((fixpt_t) ts->ts_cpticks) << (FSHIFT - CCPU_SHIFT)) / realstathz; #else ts->ts_pctcpu += ((FSCALE - ccpu) * (ts->ts_cpticks * FSCALE / realstathz)) >> FSHIFT; #endif ts->ts_cpticks = 0; } /* * If there are ANY running threads in this process, * then don't count it as sleeping. * XXX: this is broken. */ if (awake) { if (ts->ts_slptime > 1) { /* * In an ideal world, this should not * happen, because whoever woke us * up from the long sleep should have * unwound the slptime and reset our * priority before we run at the stale * priority. Should KASSERT at some * point when all the cases are fixed. */ updatepri(td); } ts->ts_slptime = 0; } else ts->ts_slptime++; if (ts->ts_slptime > 1) { thread_unlock(td); continue; } ts->ts_estcpu = decay_cpu(loadfac, ts->ts_estcpu); resetpriority(td); resetpriority_thread(td); thread_unlock(td); } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); } /* * Main loop for a kthread that executes schedcpu once a second. */ static void schedcpu_thread(void) { for (;;) { schedcpu(); pause("-", hz); } } /* * Recalculate the priority of a process after it has slept for a while. * For all load averages >= 1 and max ts_estcpu of 255, sleeping for at * least six times the loadfactor will decay ts_estcpu to zero. */ static void updatepri(struct thread *td) { struct td_sched *ts; fixpt_t loadfac; unsigned int newcpu; ts = td_get_sched(td); loadfac = loadfactor(averunnable.ldavg[0]); if (ts->ts_slptime > 5 * loadfac) ts->ts_estcpu = 0; else { newcpu = ts->ts_estcpu; ts->ts_slptime--; /* was incremented in schedcpu() */ while (newcpu && --ts->ts_slptime) newcpu = decay_cpu(loadfac, newcpu); ts->ts_estcpu = newcpu; } } /* * Compute the priority of a process when running in user mode. * Arrange to reschedule if the resulting priority is better * than that of the current process. */ static void resetpriority(struct thread *td) { u_int newpriority; if (td->td_pri_class != PRI_TIMESHARE) return; newpriority = PUSER + td_get_sched(td)->ts_estcpu / INVERSE_ESTCPU_WEIGHT + NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN); newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), PRI_MAX_TIMESHARE); sched_user_prio(td, newpriority); } /* * Update the thread's priority when the associated process's user * priority changes. */ static void resetpriority_thread(struct thread *td) { /* Only change threads with a time sharing user priority. */ if (td->td_priority < PRI_MIN_TIMESHARE || td->td_priority > PRI_MAX_TIMESHARE) return; /* XXX the whole needresched thing is broken, but not silly. */ maybe_resched(td); sched_prio(td, td->td_user_pri); } /* ARGSUSED */ static void sched_setup(void *dummy) { setup_runqs(); /* Account for thread0. */ sched_load_add(); } /* * This routine determines time constants after stathz and hz are setup. */ static void sched_initticks(void *dummy) { realstathz = stathz ? stathz : hz; sched_slice = realstathz / 10; /* ~100ms */ hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) / realstathz); } /* External interfaces start here */ /* * Very early in the boot some setup of scheduler-specific * parts of proc0 and of some scheduler resources needs to be done. * Called from: * proc0_init() */ void schedinit(void) { /* * Set up the scheduler specific parts of thread0. */ thread0.td_lock = &sched_lock; td_get_sched(&thread0)->ts_slice = sched_slice; mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); } int sched_runnable(void) { #ifdef SMP return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]); #else return runq_check(&runq); #endif } int sched_rr_interval(void) { /* Convert sched_slice from stathz to hz. */ return (imax(1, (sched_slice * hz + realstathz / 2) / realstathz)); } /* * We adjust the priority of the current process. The priority of a * process gets worse as it accumulates CPU time. The cpu usage * estimator (ts_estcpu) is increased here. resetpriority() will * compute a different priority each time ts_estcpu increases by * INVERSE_ESTCPU_WEIGHT (until PRI_MAX_TIMESHARE is reached). The * cpu usage estimator ramps up quite quickly when the process is * running (linearly), and decays away exponentially, at a rate which * is proportionally slower when the system is busy. The basic * principle is that the system will 90% forget that the process used * a lot of CPU time in 5 * loadav seconds. This causes the system to * favor processes which haven't run much recently, and to round-robin * among other processes. */ void sched_clock(struct thread *td) { struct pcpuidlestat *stat; struct td_sched *ts; THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td_get_sched(td); ts->ts_cpticks++; ts->ts_estcpu = ESTCPULIM(ts->ts_estcpu + 1); if ((ts->ts_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { resetpriority(td); resetpriority_thread(td); } /* * Force a context switch if the current thread has used up a full * time slice (default is 100ms). */ if (!TD_IS_IDLETHREAD(td) && --ts->ts_slice <= 0) { ts->ts_slice = sched_slice; td->td_flags |= TDF_NEEDRESCHED | TDF_SLICEEND; } stat = DPCPU_PTR(idlestat); stat->oldidlecalls = stat->idlecalls; stat->idlecalls = 0; } /* * Charge child's scheduling CPU usage to parent. */ void sched_exit(struct proc *p, struct thread *td) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "proc exit", "prio:%d", td->td_priority); PROC_LOCK_ASSERT(p, MA_OWNED); sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); } void sched_exit_thread(struct thread *td, struct thread *child) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "exit", "prio:%d", child->td_priority); thread_lock(td); td_get_sched(td)->ts_estcpu = ESTCPULIM(td_get_sched(td)->ts_estcpu + td_get_sched(child)->ts_estcpu); thread_unlock(td); thread_lock(child); if ((child->td_flags & TDF_NOLOAD) == 0) sched_load_rem(); thread_unlock(child); } void sched_fork(struct thread *td, struct thread *childtd) { sched_fork_thread(td, childtd); } void sched_fork_thread(struct thread *td, struct thread *childtd) { struct td_sched *ts, *tsc; childtd->td_oncpu = NOCPU; childtd->td_lastcpu = NOCPU; childtd->td_lock = &sched_lock; childtd->td_cpuset = cpuset_ref(td->td_cpuset); childtd->td_priority = childtd->td_base_pri; ts = td_get_sched(childtd); bzero(ts, sizeof(*ts)); tsc = td_get_sched(td); ts->ts_estcpu = tsc->ts_estcpu; ts->ts_flags |= (tsc->ts_flags & TSF_AFFINITY); ts->ts_slice = 1; } void sched_nice(struct proc *p, int nice) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); p->p_nice = nice; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); resetpriority(td); resetpriority_thread(td); thread_unlock(td); } } void sched_class(struct thread *td, int class) { THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_pri_class = class; } /* * Adjust the priority of a thread. */ static void sched_priority(struct thread *td, u_char prio) { KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "priority change", "prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED, sched_tdname(curthread)); SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio); if (td != curthread && prio > td->td_priority) { KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread), "lend prio", "prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED, sched_tdname(td)); SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio, curthread); } THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority == prio) return; td->td_priority = prio; if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) { sched_rem(td); sched_add(td, SRQ_BORING); } } /* * Update a thread's priority when it is lent another thread's * priority. */ void sched_lend_prio(struct thread *td, u_char prio) { td->td_flags |= TDF_BORROWING; sched_priority(td, prio); } /* * Restore a thread's priority when priority propagation is * over. The prio argument is the minimum priority the thread * needs to have to satisfy other possible priority lending * requests. If the thread's regulary priority is less * important than prio the thread will keep a priority boost * of prio. */ void sched_unlend_prio(struct thread *td, u_char prio) { u_char base_pri; if (td->td_base_pri >= PRI_MIN_TIMESHARE && td->td_base_pri <= PRI_MAX_TIMESHARE) base_pri = td->td_user_pri; else base_pri = td->td_base_pri; if (prio >= base_pri) { td->td_flags &= ~TDF_BORROWING; sched_prio(td, base_pri); } else sched_lend_prio(td, prio); } void sched_prio(struct thread *td, u_char prio) { u_char oldprio; /* First, update the base priority. */ td->td_base_pri = prio; /* * If the thread is borrowing another thread's priority, don't ever * lower the priority. */ if (td->td_flags & TDF_BORROWING && td->td_priority < prio) return; /* Change the real priority. */ oldprio = td->td_priority; sched_priority(td, prio); /* * If the thread is on a turnstile, then let the turnstile update * its state. */ if (TD_ON_LOCK(td) && oldprio != prio) turnstile_adjust(td, oldprio); } void sched_user_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_base_user_pri = prio; if (td->td_lend_user_pri <= prio) return; td->td_user_pri = prio; } void sched_lend_user_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_lend_user_pri = prio; td->td_user_pri = min(prio, td->td_base_user_pri); if (td->td_priority > td->td_user_pri) sched_prio(td, td->td_user_pri); else if (td->td_priority != td->td_user_pri) td->td_flags |= TDF_NEEDRESCHED; } void sched_sleep(struct thread *td, int pri) { THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_slptick = ticks; td_get_sched(td)->ts_slptime = 0; if (pri != 0 && PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) sched_prio(td, pri); if (TD_IS_SUSPENDED(td) || pri >= PSOCK) td->td_flags |= TDF_CANSWAP; } void sched_switch(struct thread *td, struct thread *newtd, int flags) { struct mtx *tmtx; struct td_sched *ts; struct proc *p; int preempted; tmtx = NULL; ts = td_get_sched(td); p = td->td_proc; THREAD_LOCK_ASSERT(td, MA_OWNED); /* * Switch to the sched lock to fix things up and pick * a new thread. * Block the td_lock in order to avoid breaking the critical path. */ if (td->td_lock != &sched_lock) { mtx_lock_spin(&sched_lock); tmtx = thread_lock_block(td); } if ((td->td_flags & TDF_NOLOAD) == 0) sched_load_rem(); td->td_lastcpu = td->td_oncpu; preempted = (td->td_flags & TDF_SLICEEND) == 0 && (flags & SW_PREEMPT) != 0; td->td_flags &= ~(TDF_NEEDRESCHED | TDF_SLICEEND); td->td_owepreempt = 0; td->td_oncpu = NOCPU; /* * At the last moment, if this thread is still marked RUNNING, * then put it back on the run queue as it has not been suspended * or stopped or any thing else similar. We never put the idle * threads on the run queue, however. */ if (td->td_flags & TDF_IDLETD) { TD_SET_CAN_RUN(td); #ifdef SMP CPU_CLR(PCPU_GET(cpuid), &idle_cpus_mask); #endif } else { if (TD_IS_RUNNING(td)) { /* Put us back on the run queue. */ sched_add(td, preempted ? SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : SRQ_OURSELF|SRQ_YIELDING); } } if (newtd) { /* * The thread we are about to run needs to be counted * as if it had been added to the run queue and selected. * It came from: * * A preemption * * An upcall * * A followon */ KASSERT((newtd->td_inhibitors == 0), ("trying to run inhibited thread")); newtd->td_flags |= TDF_DIDRUN; TD_SET_RUNNING(newtd); if ((newtd->td_flags & TDF_NOLOAD) == 0) sched_load_add(); } else { newtd = choosethread(); MPASS(newtd->td_lock == &sched_lock); } #if (KTR_COMPILE & KTR_SCHED) != 0 if (TD_IS_IDLETHREAD(td)) KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle", "prio:%d", td->td_priority); else KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td), "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, "lockname:\"%s\"", td->td_lockname); #endif if (td != newtd) { #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc); /* I feel sleepy */ lock_profile_release_lock(&sched_lock.lock_object); #ifdef KDTRACE_HOOKS /* * If DTrace has set the active vtime enum to anything * other than INACTIVE (0), then it should have set the * function to call. */ if (dtrace_vtime_active) (*dtrace_vtime_switch_func)(newtd); #endif cpu_switch(td, newtd, tmtx != NULL ? tmtx : td->td_lock); lock_profile_obtain_lock_success(&sched_lock.lock_object, 0, 0, __FILE__, __LINE__); /* * Where am I? What year is it? * We are in the same thread that went to sleep above, * but any amount of time may have passed. All our context * will still be available as will local variables. * PCPU values however may have changed as we may have * changed CPU so don't trust cached values of them. * New threads will go to fork_exit() instead of here * so if you change things here you may need to change * things there too. * * If the thread above was exiting it will never wake * up again here, so either it has saved everything it * needed to, or the thread_wait() or wait() will * need to reap it. */ SDT_PROBE0(sched, , , on__cpu); #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); #endif } else SDT_PROBE0(sched, , , remain__cpu); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", "prio:%d", td->td_priority); #ifdef SMP if (td->td_flags & TDF_IDLETD) CPU_SET(PCPU_GET(cpuid), &idle_cpus_mask); #endif sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); MPASS(td->td_lock == &sched_lock); } void sched_wakeup(struct thread *td) { struct td_sched *ts; THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td_get_sched(td); td->td_flags &= ~TDF_CANSWAP; if (ts->ts_slptime > 1) { updatepri(td); resetpriority(td); } td->td_slptick = 0; ts->ts_slptime = 0; ts->ts_slice = sched_slice; sched_add(td, SRQ_BORING); } #ifdef SMP static int forward_wakeup(int cpunum) { struct pcpu *pc; cpuset_t dontuse, map, map2; u_int id, me; int iscpuset; mtx_assert(&sched_lock, MA_OWNED); CTR0(KTR_RUNQ, "forward_wakeup()"); if ((!forward_wakeup_enabled) || (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0)) return (0); if (!smp_started || panicstr) return (0); forward_wakeups_requested++; /* * Check the idle mask we received against what we calculated * before in the old version. */ me = PCPU_GET(cpuid); /* Don't bother if we should be doing it ourself. */ if (CPU_ISSET(me, &idle_cpus_mask) && (cpunum == NOCPU || me == cpunum)) return (0); CPU_SETOF(me, &dontuse); CPU_OR(&dontuse, &stopped_cpus); CPU_OR(&dontuse, &hlt_cpus_mask); CPU_ZERO(&map2); if (forward_wakeup_use_loop) { STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpuid; if (!CPU_ISSET(id, &dontuse) && pc->pc_curthread == pc->pc_idlethread) { CPU_SET(id, &map2); } } } if (forward_wakeup_use_mask) { map = idle_cpus_mask; CPU_NAND(&map, &dontuse); /* If they are both on, compare and use loop if different. */ if (forward_wakeup_use_loop) { if (CPU_CMP(&map, &map2)) { printf("map != map2, loop method preferred\n"); map = map2; } } } else { map = map2; } /* If we only allow a specific CPU, then mask off all the others. */ if (cpunum != NOCPU) { KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); iscpuset = CPU_ISSET(cpunum, &map); if (iscpuset == 0) CPU_ZERO(&map); else CPU_SETOF(cpunum, &map); } if (!CPU_EMPTY(&map)) { forward_wakeups_delivered++; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpuid; if (!CPU_ISSET(id, &map)) continue; if (cpu_idle_wakeup(pc->pc_cpuid)) CPU_CLR(id, &map); } if (!CPU_EMPTY(&map)) ipi_selected(map, IPI_AST); return (1); } if (cpunum == NOCPU) printf("forward_wakeup: Idle processor not found\n"); return (0); } static void kick_other_cpu(int pri, int cpuid) { struct pcpu *pcpu; int cpri; pcpu = pcpu_find(cpuid); if (CPU_ISSET(cpuid, &idle_cpus_mask)) { forward_wakeups_delivered++; if (!cpu_idle_wakeup(cpuid)) ipi_cpu(cpuid, IPI_AST); return; } cpri = pcpu->pc_curthread->td_priority; if (pri >= cpri) return; #if defined(IPI_PREEMPTION) && defined(PREEMPTION) #if !defined(FULL_PREEMPTION) if (pri <= PRI_MAX_ITHD) #endif /* ! FULL_PREEMPTION */ { ipi_cpu(cpuid, IPI_PREEMPT); return; } #endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */ pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; ipi_cpu(cpuid, IPI_AST); return; } #endif /* SMP */ #ifdef SMP static int sched_pickcpu(struct thread *td) { int best, cpu; mtx_assert(&sched_lock, MA_OWNED); if (td->td_lastcpu != NOCPU && THREAD_CAN_SCHED(td, td->td_lastcpu)) best = td->td_lastcpu; else best = NOCPU; CPU_FOREACH(cpu) { if (!THREAD_CAN_SCHED(td, cpu)) continue; if (best == NOCPU) best = cpu; else if (runq_length[cpu] < runq_length[best]) best = cpu; } KASSERT(best != NOCPU, ("no valid CPUs")); return (best); } #endif void sched_add(struct thread *td, int flags) #ifdef SMP { cpuset_t tidlemsk; struct td_sched *ts; u_int cpu, cpuid; int forwarded = 0; int single_cpu = 0; ts = td_get_sched(td); THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT((td->td_inhibitors == 0), ("sched_add: trying to run inhibited thread")); KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), ("sched_add: bad thread state")); KASSERT(td->td_flags & TDF_INMEM, ("sched_add: thread swapped out")); KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add", "prio:%d", td->td_priority, KTR_ATTR_LINKED, sched_tdname(curthread)); KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup", KTR_ATTR_LINKED, sched_tdname(td)); SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, flags & SRQ_PREEMPTED); /* * Now that the thread is moving to the run-queue, set the lock * to the scheduler's lock. */ if (td->td_lock != &sched_lock) { mtx_lock_spin(&sched_lock); thread_lock_set(td, &sched_lock); } TD_SET_RUNQ(td); /* * If SMP is started and the thread is pinned or otherwise limited to * a specific set of CPUs, queue the thread to a per-CPU run queue. * Otherwise, queue the thread to the global run queue. * * If SMP has not yet been started we must use the global run queue * as per-CPU state may not be initialized yet and we may crash if we * try to access the per-CPU run queues. */ if (smp_started && (td->td_pinned != 0 || td->td_flags & TDF_BOUND || ts->ts_flags & TSF_AFFINITY)) { if (td->td_pinned != 0) cpu = td->td_lastcpu; else if (td->td_flags & TDF_BOUND) { /* Find CPU from bound runq. */ KASSERT(SKE_RUNQ_PCPU(ts), ("sched_add: bound td_sched not on cpu runq")); cpu = ts->ts_runq - &runq_pcpu[0]; } else /* Find a valid CPU for our cpuset */ cpu = sched_pickcpu(td); ts->ts_runq = &runq_pcpu[cpu]; single_cpu = 1; CTR3(KTR_RUNQ, "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu); } else { CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to gbl runq", ts, td); cpu = NOCPU; ts->ts_runq = &runq; } if ((td->td_flags & TDF_NOLOAD) == 0) sched_load_add(); runq_add(ts->ts_runq, td, flags); if (cpu != NOCPU) runq_length[cpu]++; cpuid = PCPU_GET(cpuid); if (single_cpu && cpu != cpuid) { kick_other_cpu(td->td_priority, cpu); } else { if (!single_cpu) { tidlemsk = idle_cpus_mask; CPU_NAND(&tidlemsk, &hlt_cpus_mask); CPU_CLR(cpuid, &tidlemsk); if (!CPU_ISSET(cpuid, &idle_cpus_mask) && ((flags & SRQ_INTR) == 0) && !CPU_EMPTY(&tidlemsk)) forwarded = forward_wakeup(cpu); } if (!forwarded) { if (!maybe_preempt(td)) maybe_resched(td); } } } #else /* SMP */ { struct td_sched *ts; ts = td_get_sched(td); THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT((td->td_inhibitors == 0), ("sched_add: trying to run inhibited thread")); KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), ("sched_add: bad thread state")); KASSERT(td->td_flags & TDF_INMEM, ("sched_add: thread swapped out")); KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add", "prio:%d", td->td_priority, KTR_ATTR_LINKED, sched_tdname(curthread)); KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup", KTR_ATTR_LINKED, sched_tdname(td)); SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, flags & SRQ_PREEMPTED); /* * Now that the thread is moving to the run-queue, set the lock * to the scheduler's lock. */ if (td->td_lock != &sched_lock) { mtx_lock_spin(&sched_lock); thread_lock_set(td, &sched_lock); } TD_SET_RUNQ(td); CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td); ts->ts_runq = &runq; if ((td->td_flags & TDF_NOLOAD) == 0) sched_load_add(); runq_add(ts->ts_runq, td, flags); if (!maybe_preempt(td)) maybe_resched(td); } #endif /* SMP */ void sched_rem(struct thread *td) { struct td_sched *ts; ts = td_get_sched(td); KASSERT(td->td_flags & TDF_INMEM, ("sched_rem: thread swapped out")); KASSERT(TD_ON_RUNQ(td), ("sched_rem: thread not on run queue")); mtx_assert(&sched_lock, MA_OWNED); KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq rem", "prio:%d", td->td_priority, KTR_ATTR_LINKED, sched_tdname(curthread)); SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL); if ((td->td_flags & TDF_NOLOAD) == 0) sched_load_rem(); #ifdef SMP if (ts->ts_runq != &runq) runq_length[ts->ts_runq - runq_pcpu]--; #endif runq_remove(ts->ts_runq, td); TD_SET_CAN_RUN(td); } /* * Select threads to run. Note that running threads still consume a * slot. */ struct thread * sched_choose(void) { struct thread *td; struct runq *rq; mtx_assert(&sched_lock, MA_OWNED); #ifdef SMP struct thread *tdcpu; rq = &runq; td = runq_choose_fuzz(&runq, runq_fuzz); tdcpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]); if (td == NULL || (tdcpu != NULL && tdcpu->td_priority < td->td_priority)) { CTR2(KTR_RUNQ, "choosing td %p from pcpu runq %d", tdcpu, PCPU_GET(cpuid)); td = tdcpu; rq = &runq_pcpu[PCPU_GET(cpuid)]; } else { CTR1(KTR_RUNQ, "choosing td_sched %p from main runq", td); } #else rq = &runq; td = runq_choose(&runq); #endif if (td) { #ifdef SMP if (td == tdcpu) runq_length[PCPU_GET(cpuid)]--; #endif runq_remove(rq, td); td->td_flags |= TDF_DIDRUN; KASSERT(td->td_flags & TDF_INMEM, ("sched_choose: thread swapped out")); return (td); } return (PCPU_GET(idlethread)); } void sched_preempt(struct thread *td) { SDT_PROBE2(sched, , , surrender, td, td->td_proc); thread_lock(td); if (td->td_critnest > 1) td->td_owepreempt = 1; else mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL); thread_unlock(td); } void sched_userret(struct thread *td) { /* * XXX we cheat slightly on the locking here to avoid locking in * the usual case. Setting td_priority here is essentially an * incomplete workaround for not setting it properly elsewhere. * Now that some interrupt handlers are threads, not setting it * properly elsewhere can clobber it in the window between setting * it here and returning to user mode, so don't waste time setting * it perfectly here. */ KASSERT((td->td_flags & TDF_BORROWING) == 0, ("thread with borrowed priority returning to userland")); if (td->td_priority != td->td_user_pri) { thread_lock(td); td->td_priority = td->td_user_pri; td->td_base_pri = td->td_user_pri; thread_unlock(td); } } void sched_bind(struct thread *td, int cpu) { struct td_sched *ts; THREAD_LOCK_ASSERT(td, MA_OWNED|MA_NOTRECURSED); KASSERT(td == curthread, ("sched_bind: can only bind curthread")); ts = td_get_sched(td); td->td_flags |= TDF_BOUND; #ifdef SMP ts->ts_runq = &runq_pcpu[cpu]; if (PCPU_GET(cpuid) == cpu) return; mi_switch(SW_VOL, NULL); #endif } void sched_unbind(struct thread* td) { THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(td == curthread, ("sched_unbind: can only bind curthread")); td->td_flags &= ~TDF_BOUND; } int sched_is_bound(struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); return (td->td_flags & TDF_BOUND); } void sched_relinquish(struct thread *td) { thread_lock(td); mi_switch(SW_VOL | SWT_RELINQUISH, NULL); thread_unlock(td); } int sched_load(void) { return (sched_tdcnt); } int sched_sizeof_proc(void) { return (sizeof(struct proc)); } int sched_sizeof_thread(void) { return (sizeof(struct thread) + sizeof(struct td_sched)); } fixpt_t sched_pctcpu(struct thread *td) { struct td_sched *ts; THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td_get_sched(td); return (ts->ts_pctcpu); } #ifdef RACCT /* * Calculates the contribution to the thread cpu usage for the latest * (unfinished) second. */ fixpt_t sched_pctcpu_delta(struct thread *td) { struct td_sched *ts; fixpt_t delta; int realstathz; THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td_get_sched(td); delta = 0; realstathz = stathz ? stathz : hz; if (ts->ts_cpticks != 0) { #if (FSHIFT >= CCPU_SHIFT) delta = (realstathz == 100) ? ((fixpt_t) ts->ts_cpticks) << (FSHIFT - CCPU_SHIFT) : 100 * (((fixpt_t) ts->ts_cpticks) << (FSHIFT - CCPU_SHIFT)) / realstathz; #else delta = ((FSCALE - ccpu) * (ts->ts_cpticks * FSCALE / realstathz)) >> FSHIFT; #endif } return (delta); } #endif u_int sched_estcpu(struct thread *td) { return (td_get_sched(td)->ts_estcpu); } /* * The actual idle process. */ void sched_idletd(void *dummy) { struct pcpuidlestat *stat; THREAD_NO_SLEEPING(); stat = DPCPU_PTR(idlestat); for (;;) { mtx_assert(&Giant, MA_NOTOWNED); while (sched_runnable() == 0) { cpu_idle(stat->idlecalls + stat->oldidlecalls > 64); stat->idlecalls++; } mtx_lock_spin(&sched_lock); mi_switch(SW_VOL | SWT_IDLE, NULL); mtx_unlock_spin(&sched_lock); } } /* * A CPU is entering for the first time or a thread is exiting. */ void sched_throw(struct thread *td) { /* * Correct spinlock nesting. The idle thread context that we are * borrowing was created so that it would start out with a single * spin lock (sched_lock) held in fork_trampoline(). Since we've * explicitly acquired locks in this function, the nesting count * is now 2 rather than 1. Since we are nested, calling * spinlock_exit() will simply adjust the counts without allowing * spin lock using code to interrupt us. */ if (td == NULL) { mtx_lock_spin(&sched_lock); spinlock_exit(); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); } else { lock_profile_release_lock(&sched_lock.lock_object); MPASS(td->td_lock == &sched_lock); td->td_lastcpu = td->td_oncpu; td->td_oncpu = NOCPU; } mtx_assert(&sched_lock, MA_OWNED); KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); cpu_throw(td, choosethread()); /* doesn't return */ } void sched_fork_exit(struct thread *td) { /* * Finish setting up thread glue so that it begins execution in a * non-nested critical section with sched_lock held but not recursed. */ td->td_oncpu = PCPU_GET(cpuid); sched_lock.mtx_lock = (uintptr_t)td; lock_profile_obtain_lock_success(&sched_lock.lock_object, 0, 0, __FILE__, __LINE__); THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", "prio:%d", td->td_priority); SDT_PROBE0(sched, , , on__cpu); } char * sched_tdname(struct thread *td) { #ifdef KTR struct td_sched *ts; ts = td_get_sched(td); if (ts->ts_name[0] == '\0') snprintf(ts->ts_name, sizeof(ts->ts_name), "%s tid %d", td->td_name, td->td_tid); return (ts->ts_name); #else return (td->td_name); #endif } #ifdef KTR void sched_clear_tdname(struct thread *td) { struct td_sched *ts; ts = td_get_sched(td); ts->ts_name[0] = '\0'; } #endif void sched_affinity(struct thread *td) { #ifdef SMP struct td_sched *ts; int cpu; THREAD_LOCK_ASSERT(td, MA_OWNED); /* * Set the TSF_AFFINITY flag if there is at least one CPU this * thread can't run on. */ ts = td_get_sched(td); ts->ts_flags &= ~TSF_AFFINITY; CPU_FOREACH(cpu) { if (!THREAD_CAN_SCHED(td, cpu)) { ts->ts_flags |= TSF_AFFINITY; break; } } /* * If this thread can run on all CPUs, nothing else to do. */ if (!(ts->ts_flags & TSF_AFFINITY)) return; /* Pinned threads and bound threads should be left alone. */ if (td->td_pinned != 0 || td->td_flags & TDF_BOUND) return; switch (td->td_state) { case TDS_RUNQ: /* * If we are on a per-CPU runqueue that is in the set, * then nothing needs to be done. */ if (ts->ts_runq != &runq && THREAD_CAN_SCHED(td, ts->ts_runq - runq_pcpu)) return; /* Put this thread on a valid per-CPU runqueue. */ sched_rem(td); sched_add(td, SRQ_BORING); break; case TDS_RUNNING: /* * See if our current CPU is in the set. If not, force a * context switch. */ if (THREAD_CAN_SCHED(td, td->td_oncpu)) return; td->td_flags |= TDF_NEEDRESCHED; if (td != curthread) ipi_cpu(cpu, IPI_AST); break; default: break; } #endif } Index: stable/11/sys/kern/sysv_msg.c =================================================================== --- stable/11/sys/kern/sysv_msg.c (revision 331642) +++ stable/11/sys/kern/sysv_msg.c (revision 331643) @@ -1,1920 +1,1900 @@ /*- * Implementation of SVID messages * * Author: Daniel Boulet * * Copyright 1993 Daniel Boulet and RTMX Inc. * * This system call was implemented by Daniel Boulet under contract from RTMX. * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. */ /*- * Copyright (c) 2003-2005 McAfee, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project in part by McAfee * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research * program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_sysvipc.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(sysv_msg, "System V message queues support"); static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues"); static int msginit(void); static int msgunload(void); static int sysvmsg_modload(struct module *, int, void *); static void msq_remove(struct msqid_kernel *); static struct prison *msg_find_prison(struct ucred *); static int msq_prison_cansee(struct prison *, struct msqid_kernel *); static int msg_prison_check(void *, void *); static int msg_prison_set(void *, void *); static int msg_prison_get(void *, void *); static int msg_prison_remove(void *, void *); static void msg_prison_cleanup(struct prison *); #ifdef MSG_DEBUG #define DPRINTF(a) printf a #else #define DPRINTF(a) (void)0 #endif static void msg_freehdr(struct msg *msghdr); #ifndef MSGSSZ #define MSGSSZ 8 /* Each segment must be 2^N long */ #endif #ifndef MSGSEG #define MSGSEG 2048 /* must be less than 32767 */ #endif #define MSGMAX (MSGSSZ*MSGSEG) #ifndef MSGMNB #define MSGMNB 2048 /* max # of bytes in a queue */ #endif #ifndef MSGMNI #define MSGMNI 40 #endif #ifndef MSGTQL #define MSGTQL 40 #endif /* * Based on the configuration parameters described in an SVR2 (yes, two) * config(1m) man page. * * Each message is broken up and stored in segments that are msgssz bytes * long. For efficiency reasons, this should be a power of two. Also, * it doesn't make sense if it is less than 8 or greater than about 256. * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of * two between 8 and 1024 inclusive (and panic's if it isn't). */ struct msginfo msginfo = { MSGMAX, /* max chars in a message */ MSGMNI, /* # of message queue identifiers */ MSGMNB, /* max chars in a queue */ MSGTQL, /* max messages in system */ MSGSSZ, /* size of a message segment */ /* (must be small power of 2 greater than 4) */ MSGSEG /* number of message segments */ }; /* * macros to convert between msqid_ds's and msqid's. * (specific to this implementation) */ #define MSQID(ix,ds) ((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000)) #define MSQID_IX(id) ((id) & 0xffff) #define MSQID_SEQ(id) (((id) >> 16) & 0xffff) /* * The rest of this file is specific to this particular implementation. */ struct msgmap { short next; /* next segment in buffer */ /* -1 -> available */ /* 0..(MSGSEG-1) -> index of next segment */ }; #define MSG_LOCKED 01000 /* Is this msqid_ds locked? */ static int nfree_msgmaps; /* # of free map entries */ static short free_msgmaps; /* head of linked list of free map entries */ static struct msg *free_msghdrs;/* list of free msg headers */ static char *msgpool; /* MSGMAX byte long msg buffer pool */ static struct msgmap *msgmaps; /* MSGSEG msgmap structures */ static struct msg *msghdrs; /* MSGTQL msg headers */ static struct msqid_kernel *msqids; /* MSGMNI msqid_kernel struct's */ static struct mtx msq_mtx; /* global mutex for message queues. */ static unsigned msg_prison_slot;/* prison OSD slot */ static struct syscall_helper_data msg_syscalls[] = { SYSCALL_INIT_HELPER(msgctl), SYSCALL_INIT_HELPER(msgget), SYSCALL_INIT_HELPER(msgsnd), SYSCALL_INIT_HELPER(msgrcv), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL_INIT_HELPER(msgsys), SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl), #endif SYSCALL_INIT_LAST }; #ifdef COMPAT_FREEBSD32 #include #include #include #include #include #include static struct syscall_helper_data msg32_syscalls[] = { SYSCALL32_INIT_HELPER(freebsd32_msgctl), SYSCALL32_INIT_HELPER(freebsd32_msgsnd), SYSCALL32_INIT_HELPER(freebsd32_msgrcv), SYSCALL32_INIT_HELPER_COMPAT(msgget), SYSCALL32_INIT_HELPER(freebsd32_msgsys), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl), #endif SYSCALL_INIT_LAST }; #endif static int msginit() { struct prison *pr; void **rsv; int i, error; osd_method_t methods[PR_MAXMETHOD] = { [PR_METHOD_CHECK] = msg_prison_check, [PR_METHOD_SET] = msg_prison_set, [PR_METHOD_GET] = msg_prison_get, [PR_METHOD_REMOVE] = msg_prison_remove, }; msginfo.msgmax = msginfo.msgseg * msginfo.msgssz; msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK); msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK); msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK); msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG, M_WAITOK | M_ZERO); /* * msginfo.msgssz should be a power of two for efficiency reasons. * It is also pretty silly if msginfo.msgssz is less than 8 * or greater than about 256 so ... */ i = 8; while (i < 1024 && i != msginfo.msgssz) i <<= 1; if (i != msginfo.msgssz) { DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz, msginfo.msgssz)); panic("msginfo.msgssz not a small power of 2"); } if (msginfo.msgseg > 32767) { DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg)); panic("msginfo.msgseg > 32767"); } for (i = 0; i < msginfo.msgseg; i++) { if (i > 0) msgmaps[i-1].next = i; msgmaps[i].next = -1; /* implies entry is available */ } free_msgmaps = 0; nfree_msgmaps = msginfo.msgseg; for (i = 0; i < msginfo.msgtql; i++) { msghdrs[i].msg_type = 0; if (i > 0) msghdrs[i-1].msg_next = &msghdrs[i]; msghdrs[i].msg_next = NULL; #ifdef MAC mac_sysvmsg_init(&msghdrs[i]); #endif } free_msghdrs = &msghdrs[0]; for (i = 0; i < msginfo.msgmni; i++) { msqids[i].u.msg_qbytes = 0; /* implies entry is available */ msqids[i].u.msg_perm.seq = 0; /* reset to a known value */ msqids[i].u.msg_perm.mode = 0; #ifdef MAC mac_sysvmsq_init(&msqids[i]); #endif } mtx_init(&msq_mtx, "msq", NULL, MTX_DEF); /* Set current prisons according to their allow.sysvipc. */ msg_prison_slot = osd_jail_register(NULL, methods); rsv = osd_reserve(msg_prison_slot); prison_lock(&prison0); (void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0); prison_unlock(&prison0); rsv = NULL; sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) { if (rsv == NULL) rsv = osd_reserve(msg_prison_slot); prison_lock(pr); if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, &prison0); rsv = NULL; } prison_unlock(pr); } if (rsv != NULL) osd_free_reserved(rsv); sx_sunlock(&allprison_lock); error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 error = syscall32_helper_register(msg32_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #endif return (0); } static int msgunload() { struct msqid_kernel *msqkptr; int msqid; #ifdef MAC int i; #endif syscall_helper_unregister(msg_syscalls); #ifdef COMPAT_FREEBSD32 syscall32_helper_unregister(msg32_syscalls); #endif for (msqid = 0; msqid < msginfo.msgmni; msqid++) { msqkptr = &msqids[msqid]; if (msqkptr->u.msg_qbytes != 0 || (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) break; } if (msqid != msginfo.msgmni) return (EBUSY); if (msg_prison_slot != 0) osd_jail_deregister(msg_prison_slot); #ifdef MAC for (i = 0; i < msginfo.msgtql; i++) mac_sysvmsg_destroy(&msghdrs[i]); for (msqid = 0; msqid < msginfo.msgmni; msqid++) mac_sysvmsq_destroy(&msqids[msqid]); #endif free(msgpool, M_MSG); free(msgmaps, M_MSG); free(msghdrs, M_MSG); free(msqids, M_MSG); mtx_destroy(&msq_mtx); return (0); } static int sysvmsg_modload(struct module *module, int cmd, void *arg) { int error = 0; switch (cmd) { case MOD_LOAD: error = msginit(); if (error != 0) msgunload(); break; case MOD_UNLOAD: error = msgunload(); break; case MOD_SHUTDOWN: break; default: error = EINVAL; break; } return (error); } static moduledata_t sysvmsg_mod = { "sysvmsg", &sysvmsg_modload, NULL }; DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST); MODULE_VERSION(sysvmsg, 1); static void msg_freehdr(msghdr) struct msg *msghdr; { while (msghdr->msg_ts > 0) { short next; if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg) panic("msghdr->msg_spot out of range"); next = msgmaps[msghdr->msg_spot].next; msgmaps[msghdr->msg_spot].next = free_msgmaps; free_msgmaps = msghdr->msg_spot; nfree_msgmaps++; msghdr->msg_spot = next; if (msghdr->msg_ts >= msginfo.msgssz) msghdr->msg_ts -= msginfo.msgssz; else msghdr->msg_ts = 0; } if (msghdr->msg_spot != -1) panic("msghdr->msg_spot != -1"); msghdr->msg_next = free_msghdrs; free_msghdrs = msghdr; #ifdef MAC mac_sysvmsg_cleanup(msghdr); #endif } static void msq_remove(struct msqid_kernel *msqkptr) { struct msg *msghdr; racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1); racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum); racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes); crfree(msqkptr->cred); msqkptr->cred = NULL; /* Free the message headers */ msghdr = msqkptr->u.msg_first; while (msghdr != NULL) { struct msg *msghdr_tmp; /* Free the segments of each message */ msqkptr->u.msg_cbytes -= msghdr->msg_ts; msqkptr->u.msg_qnum--; msghdr_tmp = msghdr; msghdr = msghdr->msg_next; msg_freehdr(msghdr_tmp); } if (msqkptr->u.msg_cbytes != 0) panic("msg_cbytes is screwed up"); if (msqkptr->u.msg_qnum != 0) panic("msg_qnum is screwed up"); msqkptr->u.msg_qbytes = 0; /* Mark it as free */ #ifdef MAC mac_sysvmsq_cleanup(msqkptr); #endif wakeup(msqkptr); } static struct prison * msg_find_prison(struct ucred *cred) { struct prison *pr, *rpr; pr = cred->cr_prison; prison_lock(pr); rpr = osd_jail_get(pr, msg_prison_slot); prison_unlock(pr); return rpr; } static int msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr) { if (msqkptr->cred == NULL || !(rpr == msqkptr->cred->cr_prison || prison_ischild(rpr, msqkptr->cred->cr_prison))) return (EINVAL); return (0); } #ifndef _SYS_SYSPROTO_H_ struct msgctl_args { int msqid; int cmd; struct msqid_ds *buf; }; #endif int -sys_msgctl(td, uap) - struct thread *td; - register struct msgctl_args *uap; +sys_msgctl(struct thread *td, struct msgctl_args *uap) { int msqid = uap->msqid; int cmd = uap->cmd; struct msqid_ds msqbuf; int error; DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf)); if (cmd == IPC_SET && (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0) return (error); error = kern_msgctl(td, msqid, cmd, &msqbuf); if (cmd == IPC_STAT && error == 0) error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds)); return (error); } int kern_msgctl(td, msqid, cmd, msqbuf) struct thread *td; int msqid; int cmd; struct msqid_ds *msqbuf; { int rval, error, msqix; - register struct msqid_kernel *msqkptr; + struct msqid_kernel *msqkptr; struct prison *rpr; rpr = msg_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); msqix = IPCID_TO_IX(msqid); if (msqix < 0 || msqix >= msginfo.msgmni) { DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, msginfo.msgmni)); return (EINVAL); } msqkptr = &msqids[msqix]; mtx_lock(&msq_mtx); if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("no such msqid\n")); error = EINVAL; goto done2; } if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("wrong sequence number\n")); error = EINVAL; goto done2; } error = msq_prison_cansee(rpr, msqkptr); if (error != 0) { DPRINTF(("requester can't see prison\n")); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd); if (error != 0) goto done2; #endif error = 0; rval = 0; switch (cmd) { case IPC_RMID: { #ifdef MAC struct msg *msghdr; #endif if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M))) goto done2; #ifdef MAC /* * Check that the thread has MAC access permissions to * individual msghdrs. Note: We need to do this in a * separate loop because the actual loop alters the * msq/msghdr info as it progresses, and there is no going * back if half the way through we discover that the * thread cannot free a certain msghdr. The msq will get * into an inconsistent state. */ for (msghdr = msqkptr->u.msg_first; msghdr != NULL; msghdr = msghdr->msg_next) { error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr); if (error != 0) goto done2; } #endif msq_remove(msqkptr); } break; case IPC_SET: if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M))) goto done2; if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) { error = priv_check(td, PRIV_IPC_MSGSIZE); if (error) goto done2; } if (msqbuf->msg_qbytes > msginfo.msgmnb) { DPRINTF(("can't increase msg_qbytes beyond %d" "(truncating)\n", msginfo.msgmnb)); msqbuf->msg_qbytes = msginfo.msgmnb; /* silently restrict qbytes to system limit */ } if (msqbuf->msg_qbytes == 0) { DPRINTF(("can't reduce msg_qbytes to 0\n")); error = EINVAL; /* non-standard errno! */ goto done2; } msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid; /* change the owner */ msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid; /* change the owner */ msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) | (msqbuf->msg_perm.mode & 0777); msqkptr->u.msg_qbytes = msqbuf->msg_qbytes; msqkptr->u.msg_ctime = time_second; break; case IPC_STAT: if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { DPRINTF(("requester doesn't have read access\n")); goto done2; } *msqbuf = msqkptr->u; if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison) msqbuf->msg_perm.key = IPC_PRIVATE; break; default: DPRINTF(("invalid command %d\n", cmd)); error = EINVAL; goto done2; } if (error == 0) td->td_retval[0] = rval; done2: mtx_unlock(&msq_mtx); return (error); } #ifndef _SYS_SYSPROTO_H_ struct msgget_args { key_t key; int msgflg; }; #endif int -sys_msgget(td, uap) - struct thread *td; - register struct msgget_args *uap; +sys_msgget(struct thread *td, struct msgget_args *uap) { int msqid, error = 0; int key = uap->key; int msgflg = uap->msgflg; struct ucred *cred = td->td_ucred; - register struct msqid_kernel *msqkptr = NULL; + struct msqid_kernel *msqkptr = NULL; DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg)); if (msg_find_prison(cred) == NULL) return (ENOSYS); mtx_lock(&msq_mtx); if (key != IPC_PRIVATE) { for (msqid = 0; msqid < msginfo.msgmni; msqid++) { msqkptr = &msqids[msqid]; if (msqkptr->u.msg_qbytes != 0 && msqkptr->cred != NULL && msqkptr->cred->cr_prison == cred->cr_prison && msqkptr->u.msg_perm.key == key) break; } if (msqid < msginfo.msgmni) { DPRINTF(("found public key\n")); if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) { DPRINTF(("not exclusive\n")); error = EEXIST; goto done2; } if ((error = ipcperm(td, &msqkptr->u.msg_perm, msgflg & 0700))) { DPRINTF(("requester doesn't have 0%o access\n", msgflg & 0700)); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqget(cred, msqkptr); if (error != 0) goto done2; #endif goto found; } } DPRINTF(("need to allocate the msqid_ds\n")); if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) { for (msqid = 0; msqid < msginfo.msgmni; msqid++) { /* * Look for an unallocated and unlocked msqid_ds. * msqid_ds's can be locked by msgsnd or msgrcv while * they are copying the message in/out. We can't * re-use the entry until they release it. */ msqkptr = &msqids[msqid]; if (msqkptr->u.msg_qbytes == 0 && (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0) break; } if (msqid == msginfo.msgmni) { DPRINTF(("no more msqid_ds's available\n")); error = ENOSPC; goto done2; } #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); error = racct_add(td->td_proc, RACCT_NMSGQ, 1); PROC_UNLOCK(td->td_proc); if (error != 0) { error = ENOSPC; goto done2; } } #endif DPRINTF(("msqid %d is available\n", msqid)); msqkptr->u.msg_perm.key = key; msqkptr->u.msg_perm.cuid = cred->cr_uid; msqkptr->u.msg_perm.uid = cred->cr_uid; msqkptr->u.msg_perm.cgid = cred->cr_gid; msqkptr->u.msg_perm.gid = cred->cr_gid; msqkptr->u.msg_perm.mode = (msgflg & 0777); msqkptr->cred = crhold(cred); /* Make sure that the returned msqid is unique */ msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff; msqkptr->u.msg_first = NULL; msqkptr->u.msg_last = NULL; msqkptr->u.msg_cbytes = 0; msqkptr->u.msg_qnum = 0; msqkptr->u.msg_qbytes = msginfo.msgmnb; msqkptr->u.msg_lspid = 0; msqkptr->u.msg_lrpid = 0; msqkptr->u.msg_stime = 0; msqkptr->u.msg_rtime = 0; msqkptr->u.msg_ctime = time_second; #ifdef MAC mac_sysvmsq_create(cred, msqkptr); #endif } else { DPRINTF(("didn't find it and wasn't asked to create it\n")); error = ENOENT; goto done2; } found: /* Construct the unique msqid */ td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm); done2: mtx_unlock(&msq_mtx); return (error); } #ifndef _SYS_SYSPROTO_H_ struct msgsnd_args { int msqid; - const void *msgp; + const void *msgp; /* XXX msgp is actually mtext. */ size_t msgsz; int msgflg; }; #endif int -kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype) - struct thread *td; - int msqid; - const void *msgp; /* XXX msgp is actually mtext. */ - size_t msgsz; - int msgflg; - long mtype; +kern_msgsnd(struct thread *td, int msqid, const void *msgp, + size_t msgsz, int msgflg, long mtype) { int msqix, segs_needed, error = 0; - register struct msqid_kernel *msqkptr; - register struct msg *msghdr; + struct msqid_kernel *msqkptr; + struct msg *msghdr; struct prison *rpr; short next; #ifdef RACCT size_t saved_msgsz; #endif rpr = msg_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); mtx_lock(&msq_mtx); msqix = IPCID_TO_IX(msqid); if (msqix < 0 || msqix >= msginfo.msgmni) { DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, msginfo.msgmni)); error = EINVAL; goto done2; } msqkptr = &msqids[msqix]; if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("no such message queue id\n")); error = EINVAL; goto done2; } if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("wrong sequence number\n")); error = EINVAL; goto done2; } if ((error = msq_prison_cansee(rpr, msqkptr))) { DPRINTF(("requester can't see prison\n")); goto done2; } if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) { DPRINTF(("requester doesn't have write access\n")); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr); if (error != 0) goto done2; #endif #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) { PROC_UNLOCK(td->td_proc); error = EAGAIN; goto done2; } saved_msgsz = msgsz; if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) { racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); PROC_UNLOCK(td->td_proc); error = EAGAIN; goto done2; } PROC_UNLOCK(td->td_proc); } #endif segs_needed = howmany(msgsz, msginfo.msgssz); DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz, segs_needed)); for (;;) { int need_more_resources = 0; /* * check msgsz * (inside this loop in case msg_qbytes changes while we sleep) */ if (msgsz > msqkptr->u.msg_qbytes) { DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n")); error = EINVAL; goto done3; } if (msqkptr->u.msg_perm.mode & MSG_LOCKED) { DPRINTF(("msqid is locked\n")); need_more_resources = 1; } if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) { DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n")); need_more_resources = 1; } if (segs_needed > nfree_msgmaps) { DPRINTF(("segs_needed > nfree_msgmaps\n")); need_more_resources = 1; } if (free_msghdrs == NULL) { DPRINTF(("no more msghdrs\n")); need_more_resources = 1; } if (need_more_resources) { int we_own_it; if ((msgflg & IPC_NOWAIT) != 0) { DPRINTF(("need more resources but caller " "doesn't want to wait\n")); error = EAGAIN; goto done3; } if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) { DPRINTF(("we don't own the msqid_ds\n")); we_own_it = 0; } else { /* Force later arrivals to wait for our request */ DPRINTF(("we own the msqid_ds\n")); msqkptr->u.msg_perm.mode |= MSG_LOCKED; we_own_it = 1; } DPRINTF(("msgsnd: goodnight\n")); error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH, "msgsnd", hz); DPRINTF(("msgsnd: good morning, error=%d\n", error)); if (we_own_it) msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; if (error == EWOULDBLOCK) { DPRINTF(("msgsnd: timed out\n")); continue; } if (error != 0) { DPRINTF(("msgsnd: interrupted system call\n")); error = EINTR; goto done3; } /* * Make sure that the msq queue still exists */ if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("msqid deleted\n")); error = EIDRM; goto done3; } } else { DPRINTF(("got all the resources that we need\n")); break; } } /* * We have the resources that we need. * Make sure! */ if (msqkptr->u.msg_perm.mode & MSG_LOCKED) panic("msg_perm.mode & MSG_LOCKED"); if (segs_needed > nfree_msgmaps) panic("segs_needed > nfree_msgmaps"); if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) panic("msgsz + msg_cbytes > msg_qbytes"); if (free_msghdrs == NULL) panic("no more msghdrs"); /* * Re-lock the msqid_ds in case we page-fault when copying in the * message */ if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) panic("msqid_ds is already locked"); msqkptr->u.msg_perm.mode |= MSG_LOCKED; /* * Allocate a message header */ msghdr = free_msghdrs; free_msghdrs = msghdr->msg_next; msghdr->msg_spot = -1; msghdr->msg_ts = msgsz; msghdr->msg_type = mtype; #ifdef MAC /* * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here * immediately? Or, should it be checked just before the msg is * enqueued in the msgq (as it is done now)? */ mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr); #endif /* * Allocate space for the message */ while (segs_needed > 0) { if (nfree_msgmaps <= 0) panic("not enough msgmaps"); if (free_msgmaps == -1) panic("nil free_msgmaps"); next = free_msgmaps; if (next <= -1) panic("next too low #1"); if (next >= msginfo.msgseg) panic("next out of range #1"); DPRINTF(("allocating segment %d to message\n", next)); free_msgmaps = msgmaps[next].next; nfree_msgmaps--; msgmaps[next].next = msghdr->msg_spot; msghdr->msg_spot = next; segs_needed--; } /* * Validate the message type */ if (msghdr->msg_type < 1) { msg_freehdr(msghdr); msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; wakeup(msqkptr); DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type)); error = EINVAL; goto done3; } /* * Copy in the message body */ next = msghdr->msg_spot; while (msgsz > 0) { size_t tlen; if (msgsz > msginfo.msgssz) tlen = msginfo.msgssz; else tlen = msgsz; if (next <= -1) panic("next too low #2"); if (next >= msginfo.msgseg) panic("next out of range #2"); mtx_unlock(&msq_mtx); if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz], tlen)) != 0) { mtx_lock(&msq_mtx); DPRINTF(("error %d copying in message segment\n", error)); msg_freehdr(msghdr); msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; wakeup(msqkptr); goto done3; } mtx_lock(&msq_mtx); msgsz -= tlen; msgp = (const char *)msgp + tlen; next = msgmaps[next].next; } if (next != -1) panic("didn't use all the msg segments"); /* * We've got the message. Unlock the msqid_ds. */ msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; /* * Make sure that the msqid_ds is still allocated. */ if (msqkptr->u.msg_qbytes == 0) { msg_freehdr(msghdr); wakeup(msqkptr); error = EIDRM; goto done3; } #ifdef MAC /* * Note: Since the task/thread allocates the msghdr and usually * primes it with its own MAC label, for a majority of policies, it * won't be necessary to check whether the msghdr has access * permissions to the msgq. The mac_sysvmsq_check_msqsnd check would * suffice in that case. However, this hook may be required where * individual policies derive a non-identical label for the msghdr * from the current thread label and may want to check the msghdr * enqueue permissions, along with read/write permissions to the * msgq. */ error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr); if (error != 0) { msg_freehdr(msghdr); wakeup(msqkptr); goto done3; } #endif /* * Put the message into the queue */ if (msqkptr->u.msg_first == NULL) { msqkptr->u.msg_first = msghdr; msqkptr->u.msg_last = msghdr; } else { msqkptr->u.msg_last->msg_next = msghdr; msqkptr->u.msg_last = msghdr; } msqkptr->u.msg_last->msg_next = NULL; msqkptr->u.msg_cbytes += msghdr->msg_ts; msqkptr->u.msg_qnum++; msqkptr->u.msg_lspid = td->td_proc->p_pid; msqkptr->u.msg_stime = time_second; wakeup(msqkptr); td->td_retval[0] = 0; done3: #ifdef RACCT if (racct_enable && error != 0) { PROC_LOCK(td->td_proc); racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz); PROC_UNLOCK(td->td_proc); } #endif done2: mtx_unlock(&msq_mtx); return (error); } int -sys_msgsnd(td, uap) - struct thread *td; - register struct msgsnd_args *uap; +sys_msgsnd(struct thread *td, struct msgsnd_args *uap) { int error; long mtype; DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp, uap->msgsz, uap->msgflg)); if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) { DPRINTF(("error %d copying the message type\n", error)); return (error); } return (kern_msgsnd(td, uap->msqid, (const char *)uap->msgp + sizeof(mtype), uap->msgsz, uap->msgflg, mtype)); } #ifndef _SYS_SYSPROTO_H_ struct msgrcv_args { int msqid; void *msgp; size_t msgsz; long msgtyp; int msgflg; }; #endif +/* XXX msgp is actually mtext. */ int -kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype) - struct thread *td; - int msqid; - void *msgp; /* XXX msgp is actually mtext. */ - size_t msgsz; - long msgtyp; - int msgflg; - long *mtype; +kern_msgrcv(struct thread *td, int msqid, void *msgp, size_t msgsz, long msgtyp, + int msgflg, long *mtype) { size_t len; - register struct msqid_kernel *msqkptr; - register struct msg *msghdr; + struct msqid_kernel *msqkptr; + struct msg *msghdr; struct prison *rpr; int msqix, error = 0; short next; rpr = msg_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); msqix = IPCID_TO_IX(msqid); if (msqix < 0 || msqix >= msginfo.msgmni) { DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, msginfo.msgmni)); return (EINVAL); } msqkptr = &msqids[msqix]; mtx_lock(&msq_mtx); if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("no such message queue id\n")); error = EINVAL; goto done2; } if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("wrong sequence number\n")); error = EINVAL; goto done2; } if ((error = msq_prison_cansee(rpr, msqkptr))) { DPRINTF(("requester can't see prison\n")); goto done2; } if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { DPRINTF(("requester doesn't have read access\n")); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr); if (error != 0) goto done2; #endif msghdr = NULL; while (msghdr == NULL) { if (msgtyp == 0) { msghdr = msqkptr->u.msg_first; if (msghdr != NULL) { if (msgsz < msghdr->msg_ts && (msgflg & MSG_NOERROR) == 0) { DPRINTF(("first message on the queue " "is too big (want %zu, got %d)\n", msgsz, msghdr->msg_ts)); error = E2BIG; goto done2; } #ifdef MAC error = mac_sysvmsq_check_msgrcv(td->td_ucred, msghdr); if (error != 0) goto done2; #endif if (msqkptr->u.msg_first == msqkptr->u.msg_last) { msqkptr->u.msg_first = NULL; msqkptr->u.msg_last = NULL; } else { msqkptr->u.msg_first = msghdr->msg_next; if (msqkptr->u.msg_first == NULL) panic("msg_first/last screwed up #1"); } } } else { struct msg *previous; struct msg **prev; previous = NULL; prev = &(msqkptr->u.msg_first); while ((msghdr = *prev) != NULL) { /* * Is this message's type an exact match or is * this message's type less than or equal to * the absolute value of a negative msgtyp? * Note that the second half of this test can * NEVER be true if msgtyp is positive since * msg_type is always positive! */ if (msgtyp == msghdr->msg_type || msghdr->msg_type <= -msgtyp) { DPRINTF(("found message type %ld, " "requested %ld\n", msghdr->msg_type, msgtyp)); if (msgsz < msghdr->msg_ts && (msgflg & MSG_NOERROR) == 0) { DPRINTF(("requested message " "on the queue is too big " "(want %zu, got %hu)\n", msgsz, msghdr->msg_ts)); error = E2BIG; goto done2; } #ifdef MAC error = mac_sysvmsq_check_msgrcv( td->td_ucred, msghdr); if (error != 0) goto done2; #endif *prev = msghdr->msg_next; if (msghdr == msqkptr->u.msg_last) { if (previous == NULL) { if (prev != &msqkptr->u.msg_first) panic("msg_first/last screwed up #2"); msqkptr->u.msg_first = NULL; msqkptr->u.msg_last = NULL; } else { if (prev == &msqkptr->u.msg_first) panic("msg_first/last screwed up #3"); msqkptr->u.msg_last = previous; } } break; } previous = msghdr; prev = &(msghdr->msg_next); } } /* * We've either extracted the msghdr for the appropriate * message or there isn't one. * If there is one then bail out of this loop. */ if (msghdr != NULL) break; /* * Hmph! No message found. Does the user want to wait? */ if ((msgflg & IPC_NOWAIT) != 0) { DPRINTF(("no appropriate message found (msgtyp=%ld)\n", msgtyp)); /* The SVID says to return ENOMSG. */ error = ENOMSG; goto done2; } /* * Wait for something to happen */ DPRINTF(("msgrcv: goodnight\n")); error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH, "msgrcv", 0); DPRINTF(("msgrcv: good morning (error=%d)\n", error)); if (error != 0) { DPRINTF(("msgrcv: interrupted system call\n")); error = EINTR; goto done2; } /* * Make sure that the msq queue still exists */ if (msqkptr->u.msg_qbytes == 0 || msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("msqid deleted\n")); error = EIDRM; goto done2; } } /* * Return the message to the user. * * First, do the bookkeeping (before we risk being interrupted). */ msqkptr->u.msg_cbytes -= msghdr->msg_ts; msqkptr->u.msg_qnum--; msqkptr->u.msg_lrpid = td->td_proc->p_pid; msqkptr->u.msg_rtime = time_second; racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1); racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts); /* * Make msgsz the actual amount that we'll be returning. * Note that this effectively truncates the message if it is too long * (since msgsz is never increased). */ DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz, msghdr->msg_ts)); if (msgsz > msghdr->msg_ts) msgsz = msghdr->msg_ts; *mtype = msghdr->msg_type; /* * Return the segments to the user */ next = msghdr->msg_spot; for (len = 0; len < msgsz; len += msginfo.msgssz) { size_t tlen; if (msgsz - len > msginfo.msgssz) tlen = msginfo.msgssz; else tlen = msgsz - len; if (next <= -1) panic("next too low #3"); if (next >= msginfo.msgseg) panic("next out of range #3"); mtx_unlock(&msq_mtx); error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen); mtx_lock(&msq_mtx); if (error != 0) { DPRINTF(("error (%d) copying out message segment\n", error)); msg_freehdr(msghdr); wakeup(msqkptr); goto done2; } msgp = (char *)msgp + tlen; next = msgmaps[next].next; } /* * Done, return the actual number of bytes copied out. */ msg_freehdr(msghdr); wakeup(msqkptr); td->td_retval[0] = msgsz; done2: mtx_unlock(&msq_mtx); return (error); } int -sys_msgrcv(td, uap) - struct thread *td; - register struct msgrcv_args *uap; +sys_msgrcv(struct thread *td, struct msgrcv_args *uap) { int error; long mtype; DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid, uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg)); if ((error = kern_msgrcv(td, uap->msqid, (char *)uap->msgp + sizeof(mtype), uap->msgsz, uap->msgtyp, uap->msgflg, &mtype)) != 0) return (error); if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0) DPRINTF(("error %d copying the message type\n", error)); return (error); } static int sysctl_msqids(SYSCTL_HANDLER_ARGS) { struct msqid_kernel tmsqk; #ifdef COMPAT_FREEBSD32 struct msqid_kernel32 tmsqk32; #endif struct prison *pr, *rpr; void *outaddr; size_t outsize; int error, i; pr = req->td->td_ucred->cr_prison; rpr = msg_find_prison(req->td->td_ucred); error = 0; for (i = 0; i < msginfo.msgmni; i++) { mtx_lock(&msq_mtx); if (msqids[i].u.msg_qbytes == 0 || rpr == NULL || msq_prison_cansee(rpr, &msqids[i]) != 0) bzero(&tmsqk, sizeof(tmsqk)); else { tmsqk = msqids[i]; if (tmsqk.cred->cr_prison != pr) tmsqk.u.msg_perm.key = IPC_PRIVATE; } mtx_unlock(&msq_mtx); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { bzero(&tmsqk32, sizeof(tmsqk32)); freebsd32_ipcperm_out(&tmsqk.u.msg_perm, &tmsqk32.u.msg_perm); /* Don't copy u.msg_first or u.msg_last */ CP(tmsqk, tmsqk32, u.msg_cbytes); CP(tmsqk, tmsqk32, u.msg_qnum); CP(tmsqk, tmsqk32, u.msg_qbytes); CP(tmsqk, tmsqk32, u.msg_lspid); CP(tmsqk, tmsqk32, u.msg_lrpid); CP(tmsqk, tmsqk32, u.msg_stime); CP(tmsqk, tmsqk32, u.msg_rtime); CP(tmsqk, tmsqk32, u.msg_ctime); /* Don't copy label or cred */ outaddr = &tmsqk32; outsize = sizeof(tmsqk32); } else #endif { /* Don't leak kernel pointers */ tmsqk.u.msg_first = NULL; tmsqk.u.msg_last = NULL; tmsqk.label = NULL; tmsqk.cred = NULL; /* * XXX: some padding also exists, but we take care to * allocate our pool of msqid_kernel structs with * zeroed memory so this should be OK. */ outaddr = &tmsqk; outsize = sizeof(tmsqk); } error = SYSCTL_OUT(req, outaddr, outsize); if (error != 0) break; } return (error); } SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "Maximum message size"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "Number of message queue identifiers"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0, "Maximum number of bytes in a queue"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0, "Maximum number of messages in the system"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "Size of a message segment"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "Number of message segments"); SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_msqids, "", "Array of struct msqid_kernel for each potential message queue"); static int msg_prison_check(void *obj, void *data) { struct prison *pr = obj; struct prison *prpr; struct vfsoptlist *opts = data; int error, jsys; /* * sysvmsg is a jailsys integer. * It must be "disable" if the parent jail is disabled. */ error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)); if (error != ENOENT) { if (error != 0) return (error); switch (jsys) { case JAIL_SYS_DISABLE: break; case JAIL_SYS_NEW: case JAIL_SYS_INHERIT: prison_lock(pr->pr_parent); prpr = osd_jail_get(pr->pr_parent, msg_prison_slot); prison_unlock(pr->pr_parent); if (prpr == NULL) return (EPERM); break; default: return (EINVAL); } } return (0); } static int msg_prison_set(void *obj, void *data) { struct prison *pr = obj; struct prison *tpr, *orpr, *nrpr, *trpr; struct vfsoptlist *opts = data; void *rsv; int jsys, descend; /* * sysvmsg controls which jail is the root of the associated msgs (this * jail or same as the parent), or if the feature is available at all. */ if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT) jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) ? JAIL_SYS_INHERIT : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) ? JAIL_SYS_DISABLE : -1; if (jsys == JAIL_SYS_DISABLE) { prison_lock(pr); orpr = osd_jail_get(pr, msg_prison_slot); if (orpr != NULL) osd_jail_del(pr, msg_prison_slot); prison_unlock(pr); if (orpr != NULL) { if (orpr == pr) msg_prison_cleanup(pr); /* Disable all child jails as well. */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, msg_prison_slot); if (trpr != NULL) { osd_jail_del(tpr, msg_prison_slot); prison_unlock(tpr); if (trpr == tpr) msg_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } else if (jsys != -1) { if (jsys == JAIL_SYS_NEW) nrpr = pr; else { prison_lock(pr->pr_parent); nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot); prison_unlock(pr->pr_parent); } rsv = osd_reserve(msg_prison_slot); prison_lock(pr); orpr = osd_jail_get(pr, msg_prison_slot); if (orpr != nrpr) (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, nrpr); else osd_free_reserved(rsv); prison_unlock(pr); if (orpr != nrpr) { if (orpr == pr) msg_prison_cleanup(pr); if (orpr != NULL) { /* Change child jails matching the old root, */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, msg_prison_slot); if (trpr == orpr) { (void)osd_jail_set(tpr, msg_prison_slot, nrpr); prison_unlock(tpr); if (trpr == tpr) msg_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } } return (0); } static int msg_prison_get(void *obj, void *data) { struct prison *pr = obj; struct prison *rpr; struct vfsoptlist *opts = data; int error, jsys; /* Set sysvmsg based on the jail's root prison. */ prison_lock(pr); rpr = osd_jail_get(pr, msg_prison_slot); prison_unlock(pr); jsys = rpr == NULL ? JAIL_SYS_DISABLE : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys)); if (error == ENOENT) error = 0; return (error); } static int msg_prison_remove(void *obj, void *data __unused) { struct prison *pr = obj; struct prison *rpr; prison_lock(pr); rpr = osd_jail_get(pr, msg_prison_slot); prison_unlock(pr); if (rpr == pr) msg_prison_cleanup(pr); return (0); } static void msg_prison_cleanup(struct prison *pr) { struct msqid_kernel *msqkptr; int i; /* Remove any msqs that belong to this jail. */ mtx_lock(&msq_mtx); for (i = 0; i < msginfo.msgmni; i++) { msqkptr = &msqids[i]; if (msqkptr->u.msg_qbytes != 0 && msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr) msq_remove(msqkptr); } mtx_unlock(&msq_mtx); } SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues"); #ifdef COMPAT_FREEBSD32 int freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap) { #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) switch (uap->which) { case 0: return (freebsd7_freebsd32_msgctl(td, (struct freebsd7_freebsd32_msgctl_args *)&uap->a2)); case 2: return (freebsd32_msgsnd(td, (struct freebsd32_msgsnd_args *)&uap->a2)); case 3: return (freebsd32_msgrcv(td, (struct freebsd32_msgrcv_args *)&uap->a2)); default: return (sys_msgsys(td, (struct msgsys_args *)uap)); } #else return (nosys(td, NULL)); #endif } #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) int freebsd7_freebsd32_msgctl(struct thread *td, struct freebsd7_freebsd32_msgctl_args *uap) { struct msqid_ds msqbuf; struct msqid_ds32_old msqbuf32; int error; if (uap->cmd == IPC_SET) { error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32)); if (error) return (error); freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm); PTRIN_CP(msqbuf32, msqbuf, msg_first); PTRIN_CP(msqbuf32, msqbuf, msg_last); CP(msqbuf32, msqbuf, msg_cbytes); CP(msqbuf32, msqbuf, msg_qnum); CP(msqbuf32, msqbuf, msg_qbytes); CP(msqbuf32, msqbuf, msg_lspid); CP(msqbuf32, msqbuf, msg_lrpid); CP(msqbuf32, msqbuf, msg_stime); CP(msqbuf32, msqbuf, msg_rtime); CP(msqbuf32, msqbuf, msg_ctime); } error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); if (error) return (error); if (uap->cmd == IPC_STAT) { bzero(&msqbuf32, sizeof(msqbuf32)); freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm); PTROUT_CP(msqbuf, msqbuf32, msg_first); PTROUT_CP(msqbuf, msqbuf32, msg_last); CP(msqbuf, msqbuf32, msg_cbytes); CP(msqbuf, msqbuf32, msg_qnum); CP(msqbuf, msqbuf32, msg_qbytes); CP(msqbuf, msqbuf32, msg_lspid); CP(msqbuf, msqbuf32, msg_lrpid); CP(msqbuf, msqbuf32, msg_stime); CP(msqbuf, msqbuf32, msg_rtime); CP(msqbuf, msqbuf32, msg_ctime); error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32)); } return (error); } #endif int freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap) { struct msqid_ds msqbuf; struct msqid_ds32 msqbuf32; int error; if (uap->cmd == IPC_SET) { error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32)); if (error) return (error); freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm); PTRIN_CP(msqbuf32, msqbuf, msg_first); PTRIN_CP(msqbuf32, msqbuf, msg_last); CP(msqbuf32, msqbuf, msg_cbytes); CP(msqbuf32, msqbuf, msg_qnum); CP(msqbuf32, msqbuf, msg_qbytes); CP(msqbuf32, msqbuf, msg_lspid); CP(msqbuf32, msqbuf, msg_lrpid); CP(msqbuf32, msqbuf, msg_stime); CP(msqbuf32, msqbuf, msg_rtime); CP(msqbuf32, msqbuf, msg_ctime); } error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); if (error) return (error); if (uap->cmd == IPC_STAT) { freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm); PTROUT_CP(msqbuf, msqbuf32, msg_first); PTROUT_CP(msqbuf, msqbuf32, msg_last); CP(msqbuf, msqbuf32, msg_cbytes); CP(msqbuf, msqbuf32, msg_qnum); CP(msqbuf, msqbuf32, msg_qbytes); CP(msqbuf, msqbuf32, msg_lspid); CP(msqbuf, msqbuf32, msg_lrpid); CP(msqbuf, msqbuf32, msg_stime); CP(msqbuf, msqbuf32, msg_rtime); CP(msqbuf, msqbuf32, msg_ctime); error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32)); } return (error); } int freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap) { const void *msgp; long mtype; int32_t mtype32; int error; msgp = PTRIN(uap->msgp); if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0) return (error); mtype = mtype32; return (kern_msgsnd(td, uap->msqid, (const char *)msgp + sizeof(mtype32), uap->msgsz, uap->msgflg, mtype)); } int freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap) { void *msgp; long mtype; int32_t mtype32; int error; msgp = PTRIN(uap->msgp); if ((error = kern_msgrcv(td, uap->msqid, (char *)msgp + sizeof(mtype32), uap->msgsz, uap->msgtyp, uap->msgflg, &mtype)) != 0) return (error); mtype32 = (int32_t)mtype; return (copyout(&mtype32, msgp, sizeof(mtype32))); } #endif #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *msgcalls[] = { (sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget, (sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv }; /* * Entry point for all MSG calls. + * + * XXX actually varargs. + * struct msgsys_args { + * int which; + * int a2; + * int a3; + * int a4; + * int a5; + * int a6; + * } *uap; */ int -sys_msgsys(td, uap) - struct thread *td; - /* XXX actually varargs. */ - struct msgsys_args /* { - int which; - int a2; - int a3; - int a4; - int a5; - int a6; - } */ *uap; +sys_msgsys(struct thread *td, struct msgsys_args *uap) { int error; if (uap->which < 0 || uap->which >= nitems(msgcalls)) return (EINVAL); error = (*msgcalls[uap->which])(td, &uap->a2); return (error); } #ifndef CP #define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) #endif #ifndef _SYS_SYSPROTO_H_ struct freebsd7_msgctl_args { int msqid; int cmd; struct msqid_ds_old *buf; }; #endif int -freebsd7_msgctl(td, uap) - struct thread *td; - struct freebsd7_msgctl_args *uap; +freebsd7_msgctl(struct thread *td, struct freebsd7_msgctl_args *uap) { struct msqid_ds_old msqold; struct msqid_ds msqbuf; int error; DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd, uap->buf)); if (uap->cmd == IPC_SET) { error = copyin(uap->buf, &msqold, sizeof(msqold)); if (error) return (error); ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm); CP(msqold, msqbuf, msg_first); CP(msqold, msqbuf, msg_last); CP(msqold, msqbuf, msg_cbytes); CP(msqold, msqbuf, msg_qnum); CP(msqold, msqbuf, msg_qbytes); CP(msqold, msqbuf, msg_lspid); CP(msqold, msqbuf, msg_lrpid); CP(msqold, msqbuf, msg_stime); CP(msqold, msqbuf, msg_rtime); CP(msqold, msqbuf, msg_ctime); } error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); if (error) return (error); if (uap->cmd == IPC_STAT) { bzero(&msqold, sizeof(msqold)); ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm); CP(msqbuf, msqold, msg_first); CP(msqbuf, msqold, msg_last); CP(msqbuf, msqold, msg_cbytes); CP(msqbuf, msqold, msg_qnum); CP(msqbuf, msqold, msg_qbytes); CP(msqbuf, msqold, msg_lspid); CP(msqbuf, msqold, msg_lrpid); CP(msqbuf, msqold, msg_stime); CP(msqbuf, msqold, msg_rtime); CP(msqbuf, msqold, msg_ctime); error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old)); } return (error); } #undef CP #endif /* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 || COMPAT_FREEBSD7 */ Index: stable/11/sys/kern/vfs_export.c =================================================================== --- stable/11/sys/kern/vfs_export.c (revision 331642) +++ stable/11/sys/kern/vfs_export.c (revision 331643) @@ -1,522 +1,522 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure"); static struct radix_node_head *vfs_create_addrlist_af( struct radix_node_head **prnh, int off); static void vfs_free_addrlist(struct netexport *nep); static int vfs_free_netcred(struct radix_node *rn, void *w); static void vfs_free_addrlist_af(struct radix_node_head **prnh); static int vfs_hang_addrlist(struct mount *mp, struct netexport *nep, struct export_args *argp); static struct netcred *vfs_export_lookup(struct mount *, struct sockaddr *); /* * Network address lookup element */ struct netcred { struct radix_node netc_rnodes[2]; int netc_exflags; struct ucred *netc_anon; int netc_numsecflavors; int netc_secflavors[MAXSECFLAVORS]; }; /* * Network export information */ struct netexport { struct netcred ne_defexported; /* Default export */ struct radix_node_head *ne4; struct radix_node_head *ne6; }; /* * Build hash lists of net addresses and hang them off the mount point. * Called by vfs_export() to set up the lists of export addresses. */ static int vfs_hang_addrlist(struct mount *mp, struct netexport *nep, struct export_args *argp) { - register struct netcred *np; - register struct radix_node_head *rnh; - register int i; + struct netcred *np; + struct radix_node_head *rnh; + int i; struct radix_node *rn; struct sockaddr *saddr, *smask = NULL; #if defined(INET6) || defined(INET) int off; #endif int error; /* * XXX: This routine converts from a `struct xucred' * (argp->ex_anon) to a `struct ucred' (np->netc_anon). This * operation is questionable; for example, what should be done * with fields like cr_uidinfo and cr_prison? Currently, this * routine does not touch them (leaves them as NULL). */ if (argp->ex_anon.cr_version != XUCRED_VERSION) { vfs_mount_error(mp, "ex_anon.cr_version: %d != %d", argp->ex_anon.cr_version, XUCRED_VERSION); return (EINVAL); } if (argp->ex_addrlen == 0) { if (mp->mnt_flag & MNT_DEFEXPORTED) { vfs_mount_error(mp, "MNT_DEFEXPORTED already set for mount %p", mp); return (EPERM); } np = &nep->ne_defexported; np->netc_exflags = argp->ex_flags; np->netc_anon = crget(); np->netc_anon->cr_uid = argp->ex_anon.cr_uid; crsetgroups(np->netc_anon, argp->ex_anon.cr_ngroups, argp->ex_anon.cr_groups); np->netc_anon->cr_prison = &prison0; prison_hold(np->netc_anon->cr_prison); np->netc_numsecflavors = argp->ex_numsecflavors; bcopy(argp->ex_secflavors, np->netc_secflavors, sizeof(np->netc_secflavors)); MNT_ILOCK(mp); mp->mnt_flag |= MNT_DEFEXPORTED; MNT_IUNLOCK(mp); return (0); } #if MSIZE <= 256 if (argp->ex_addrlen > MLEN) { vfs_mount_error(mp, "ex_addrlen %d is greater than %d", argp->ex_addrlen, MLEN); return (EINVAL); } #endif i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO); saddr = (struct sockaddr *) (np + 1); if ((error = copyin(argp->ex_addr, saddr, argp->ex_addrlen))) goto out; if (saddr->sa_family == AF_UNSPEC || saddr->sa_family > AF_MAX) { error = EINVAL; vfs_mount_error(mp, "Invalid saddr->sa_family: %d"); goto out; } if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); error = copyin(argp->ex_mask, smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) smask->sa_len = argp->ex_masklen; } rnh = NULL; switch (saddr->sa_family) { #ifdef INET case AF_INET: if ((rnh = nep->ne4) == NULL) { off = offsetof(struct sockaddr_in, sin_addr) << 3; rnh = vfs_create_addrlist_af(&nep->ne4, off); } break; #endif #ifdef INET6 case AF_INET6: if ((rnh = nep->ne6) == NULL) { off = offsetof(struct sockaddr_in6, sin6_addr) << 3; rnh = vfs_create_addrlist_af(&nep->ne6, off); } break; #endif } if (rnh == NULL) { error = ENOBUFS; vfs_mount_error(mp, "%s %s %d", "Unable to initialize radix node head ", "for address family", saddr->sa_family); goto out; } RADIX_NODE_HEAD_LOCK(rnh); rn = (*rnh->rnh_addaddr)(saddr, smask, &rnh->rh, np->netc_rnodes); RADIX_NODE_HEAD_UNLOCK(rnh); if (rn == NULL || np != (struct netcred *)rn) { /* already exists */ error = EPERM; vfs_mount_error(mp, "netcred already exists for given addr/mask"); goto out; } np->netc_exflags = argp->ex_flags; np->netc_anon = crget(); np->netc_anon->cr_uid = argp->ex_anon.cr_uid; crsetgroups(np->netc_anon, argp->ex_anon.cr_ngroups, argp->ex_anon.cr_groups); np->netc_anon->cr_prison = &prison0; prison_hold(np->netc_anon->cr_prison); np->netc_numsecflavors = argp->ex_numsecflavors; bcopy(argp->ex_secflavors, np->netc_secflavors, sizeof(np->netc_secflavors)); return (0); out: free(np, M_NETADDR); return (error); } /* Helper for vfs_free_addrlist. */ /* ARGSUSED */ static int vfs_free_netcred(struct radix_node *rn, void *w) { struct radix_node_head *rnh = (struct radix_node_head *) w; struct ucred *cred; (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, &rnh->rh); cred = ((struct netcred *)rn)->netc_anon; if (cred != NULL) crfree(cred); free(rn, M_NETADDR); return (0); } static struct radix_node_head * vfs_create_addrlist_af(struct radix_node_head **prnh, int off) { if (rn_inithead((void **)prnh, off) == 0) return (NULL); RADIX_NODE_HEAD_LOCK_INIT(*prnh); return (*prnh); } static void vfs_free_addrlist_af(struct radix_node_head **prnh) { struct radix_node_head *rnh; rnh = *prnh; RADIX_NODE_HEAD_LOCK(rnh); (*rnh->rnh_walktree)(&rnh->rh, vfs_free_netcred, rnh); RADIX_NODE_HEAD_UNLOCK(rnh); RADIX_NODE_HEAD_DESTROY(rnh); rn_detachhead((void **)prnh); prnh = NULL; } /* * Free the net address hash lists that are hanging off the mount points. */ static void vfs_free_addrlist(struct netexport *nep) { struct ucred *cred; if (nep->ne4 != NULL) vfs_free_addrlist_af(&nep->ne4); if (nep->ne6 != NULL) vfs_free_addrlist_af(&nep->ne6); cred = nep->ne_defexported.netc_anon; if (cred != NULL) crfree(cred); } /* * High level function to manipulate export options on a mount point * and the passed in netexport. * Struct export_args *argp is the variable used to twiddle options, * the structure is described in sys/mount.h */ int vfs_export(struct mount *mp, struct export_args *argp) { struct netexport *nep; int error; if (argp->ex_numsecflavors < 0 || argp->ex_numsecflavors >= MAXSECFLAVORS) return (EINVAL); error = 0; lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL); nep = mp->mnt_export; if (argp->ex_flags & MNT_DELEXPORT) { if (nep == NULL) { error = ENOENT; goto out; } if (mp->mnt_flag & MNT_EXPUBLIC) { vfs_setpublicfs(NULL, NULL, NULL); MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_EXPUBLIC; MNT_IUNLOCK(mp); } vfs_free_addrlist(nep); mp->mnt_export = NULL; free(nep, M_MOUNT); nep = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); MNT_IUNLOCK(mp); } if (argp->ex_flags & MNT_EXPORTED) { if (nep == NULL) { nep = malloc(sizeof(struct netexport), M_MOUNT, M_WAITOK | M_ZERO); mp->mnt_export = nep; } if (argp->ex_flags & MNT_EXPUBLIC) { if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) goto out; MNT_ILOCK(mp); mp->mnt_flag |= MNT_EXPUBLIC; MNT_IUNLOCK(mp); } if ((error = vfs_hang_addrlist(mp, nep, argp))) goto out; MNT_ILOCK(mp); mp->mnt_flag |= MNT_EXPORTED; MNT_IUNLOCK(mp); } out: lockmgr(&mp->mnt_explock, LK_RELEASE, NULL); /* * Once we have executed the vfs_export() command, we do * not want to keep the "export" option around in the * options list, since that will cause subsequent MNT_UPDATE * calls to fail. The export information is saved in * mp->mnt_export, so we can safely delete the "export" mount option * here. */ vfs_deleteopt(mp->mnt_optnew, "export"); vfs_deleteopt(mp->mnt_opt, "export"); return (error); } /* * Set the publicly exported filesystem (WebNFS). Currently, only * one public filesystem is possible in the spec (RFC 2054 and 2055) */ int vfs_setpublicfs(struct mount *mp, struct netexport *nep, struct export_args *argp) { int error; struct vnode *rvp; char *cp; /* * mp == NULL -> invalidate the current info, the FS is * no longer exported. May be called from either vfs_export * or unmount, so check if it hasn't already been done. */ if (mp == NULL) { if (nfs_pub.np_valid) { nfs_pub.np_valid = 0; if (nfs_pub.np_index != NULL) { free(nfs_pub.np_index, M_TEMP); nfs_pub.np_index = NULL; } } return (0); } /* * Only one allowed at a time. */ if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) return (EBUSY); /* * Get real filehandle for root of exported FS. */ bzero(&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp))) return (error); if ((error = VOP_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) return (error); vput(rvp); /* * If an indexfile was specified, pull it in. */ if (argp->ex_indexfile != NULL) { if (nfs_pub.np_index != NULL) nfs_pub.np_index = malloc(MAXNAMLEN + 1, M_TEMP, M_WAITOK); error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, MAXNAMLEN, (size_t *)0); if (!error) { /* * Check for illegal filenames. */ for (cp = nfs_pub.np_index; *cp; cp++) { if (*cp == '/') { error = EINVAL; break; } } } if (error) { free(nfs_pub.np_index, M_TEMP); nfs_pub.np_index = NULL; return (error); } } nfs_pub.np_mount = mp; nfs_pub.np_valid = 1; return (0); } /* * Used by the filesystems to determine if a given network address * (passed in 'nam') is present in their exports list, returns a pointer * to struct netcred so that the filesystem can examine it for * access rights (read/write/etc). */ static struct netcred * vfs_export_lookup(struct mount *mp, struct sockaddr *nam) { struct netexport *nep; - register struct netcred *np; - register struct radix_node_head *rnh; + struct netcred *np; + struct radix_node_head *rnh; struct sockaddr *saddr; nep = mp->mnt_export; if (nep == NULL) return (NULL); np = NULL; if (mp->mnt_flag & MNT_EXPORTED) { /* * Lookup in the export list first. */ if (nam != NULL) { saddr = nam; rnh = NULL; switch (saddr->sa_family) { case AF_INET: rnh = nep->ne4; break; case AF_INET6: rnh = nep->ne6; break; } if (rnh != NULL) { RADIX_NODE_HEAD_RLOCK(rnh); np = (struct netcred *) (*rnh->rnh_matchaddr)(saddr, &rnh->rh); RADIX_NODE_HEAD_RUNLOCK(rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } } /* * If no address match, use the default if it exists. */ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) np = &nep->ne_defexported; } return (np); } /* * XXX: This comment comes from the deprecated ufs_check_export() * XXX: and may not entirely apply, but lacking something better: * This is the generic part of fhtovp called after the underlying * filesystem has validated the file handle. * * Verify that a host should have access to a filesystem. */ int vfs_stdcheckexp(struct mount *mp, struct sockaddr *nam, int *extflagsp, struct ucred **credanonp, int *numsecflavors, int **secflavors) { struct netcred *np; lockmgr(&mp->mnt_explock, LK_SHARED, NULL); np = vfs_export_lookup(mp, nam); if (np == NULL) { lockmgr(&mp->mnt_explock, LK_RELEASE, NULL); *credanonp = NULL; return (EACCES); } *extflagsp = np->netc_exflags; if ((*credanonp = np->netc_anon) != NULL) crhold(*credanonp); if (numsecflavors) *numsecflavors = np->netc_numsecflavors; if (secflavors) *secflavors = np->netc_secflavors; lockmgr(&mp->mnt_explock, LK_RELEASE, NULL); return (0); } Index: stable/11/sys/kern/vfs_mount.c =================================================================== --- stable/11/sys/kern/vfs_mount.c (revision 331642) +++ stable/11/sys/kern/vfs_mount.c (revision 331643) @@ -1,2015 +1,2001 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1999-2004 Poul-Henning Kamp * Copyright (c) 1999 Michael Smith * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define VFS_MOUNTARG_SIZE_MAX (1024 * 64) static int vfs_domount(struct thread *td, const char *fstype, char *fspath, uint64_t fsflags, struct vfsoptlist **optlist); static void free_mntarg(struct mntarg *ma); static int usermount = 0; SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "Unprivileged users may mount and unmount file systems"); MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure"); static uma_zone_t mount_zone; /* List of mounted filesystems. */ struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* For any iteration/modification of mountlist */ struct mtx mountlist_mtx; MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); /* * Global opts, taken by all filesystems */ static const char *global_opts[] = { "errmsg", "fstype", "fspath", "ro", "rw", "nosuid", "noexec", NULL }; static int mount_init(void *mem, int size, int flags) { struct mount *mp; mp = (struct mount *)mem; mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); return (0); } static void mount_fini(void *mem, int size) { struct mount *mp; mp = (struct mount *)mem; lockdestroy(&mp->mnt_explock); mtx_destroy(&mp->mnt_mtx); } static void vfs_mount_init(void *dummy __unused) { mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL, NULL, mount_init, mount_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); } SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL); /* * --------------------------------------------------------------------- * Functions for building and sanitizing the mount options */ /* Remove one mount option. */ static void vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) { TAILQ_REMOVE(opts, opt, link); free(opt->name, M_MOUNT); if (opt->value != NULL) free(opt->value, M_MOUNT); free(opt, M_MOUNT); } /* Release all resources related to the mount options. */ void vfs_freeopts(struct vfsoptlist *opts) { struct vfsopt *opt; while (!TAILQ_EMPTY(opts)) { opt = TAILQ_FIRST(opts); vfs_freeopt(opts, opt); } free(opts, M_MOUNT); } void vfs_deleteopt(struct vfsoptlist *opts, const char *name) { struct vfsopt *opt, *temp; if (opts == NULL) return; TAILQ_FOREACH_SAFE(opt, opts, link, temp) { if (strcmp(opt->name, name) == 0) vfs_freeopt(opts, opt); } } static int vfs_isopt_ro(const char *opt) { if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 || strcmp(opt, "norw") == 0) return (1); return (0); } static int vfs_isopt_rw(const char *opt) { if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0) return (1); return (0); } /* * Check if options are equal (with or without the "no" prefix). */ static int vfs_equalopts(const char *opt1, const char *opt2) { char *p; /* "opt" vs. "opt" or "noopt" vs. "noopt" */ if (strcmp(opt1, opt2) == 0) return (1); /* "noopt" vs. "opt" */ if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) return (1); /* "opt" vs. "noopt" */ if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) return (1); while ((p = strchr(opt1, '.')) != NULL && !strncmp(opt1, opt2, ++p - opt1)) { opt2 += p - opt1; opt1 = p; /* "foo.noopt" vs. "foo.opt" */ if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) return (1); /* "foo.opt" vs. "foo.noopt" */ if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) return (1); } /* "ro" / "rdonly" / "norw" / "rw" / "noro" */ if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) && (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2))) return (1); return (0); } /* * If a mount option is specified several times, * (with or without the "no" prefix) only keep * the last occurrence of it. */ static void vfs_sanitizeopts(struct vfsoptlist *opts) { struct vfsopt *opt, *opt2, *tmp; TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { opt2 = TAILQ_PREV(opt, vfsoptlist, link); while (opt2 != NULL) { if (vfs_equalopts(opt->name, opt2->name)) { tmp = TAILQ_PREV(opt2, vfsoptlist, link); vfs_freeopt(opts, opt2); opt2 = tmp; } else { opt2 = TAILQ_PREV(opt2, vfsoptlist, link); } } } } /* * Build a linked list of mount options from a struct uio. */ int vfs_buildopts(struct uio *auio, struct vfsoptlist **options) { struct vfsoptlist *opts; struct vfsopt *opt; size_t memused, namelen, optlen; unsigned int i, iovcnt; int error; opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); TAILQ_INIT(opts); memused = 0; iovcnt = auio->uio_iovcnt; for (i = 0; i < iovcnt; i += 2) { namelen = auio->uio_iov[i].iov_len; optlen = auio->uio_iov[i + 1].iov_len; memused += sizeof(struct vfsopt) + optlen + namelen; /* * Avoid consuming too much memory, and attempts to overflow * memused. */ if (memused > VFS_MOUNTARG_SIZE_MAX || optlen > VFS_MOUNTARG_SIZE_MAX || namelen > VFS_MOUNTARG_SIZE_MAX) { error = EINVAL; goto bad; } opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); opt->name = malloc(namelen, M_MOUNT, M_WAITOK); opt->value = NULL; opt->len = 0; opt->pos = i / 2; opt->seen = 0; /* * Do this early, so jumps to "bad" will free the current * option. */ TAILQ_INSERT_TAIL(opts, opt, link); if (auio->uio_segflg == UIO_SYSSPACE) { bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); } else { error = copyin(auio->uio_iov[i].iov_base, opt->name, namelen); if (error) goto bad; } /* Ensure names are null-terminated strings. */ if (namelen == 0 || opt->name[namelen - 1] != '\0') { error = EINVAL; goto bad; } if (optlen != 0) { opt->len = optlen; opt->value = malloc(optlen, M_MOUNT, M_WAITOK); if (auio->uio_segflg == UIO_SYSSPACE) { bcopy(auio->uio_iov[i + 1].iov_base, opt->value, optlen); } else { error = copyin(auio->uio_iov[i + 1].iov_base, opt->value, optlen); if (error) goto bad; } } } vfs_sanitizeopts(opts); *options = opts; return (0); bad: vfs_freeopts(opts); return (error); } /* * Merge the old mount options with the new ones passed * in the MNT_UPDATE case. * * XXX: This function will keep a "nofoo" option in the new * options. E.g, if the option's canonical name is "foo", * "nofoo" ends up in the mount point's active options. */ static void vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *oldopts) { struct vfsopt *opt, *new; TAILQ_FOREACH(opt, oldopts, link) { new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); new->name = strdup(opt->name, M_MOUNT); if (opt->len != 0) { new->value = malloc(opt->len, M_MOUNT, M_WAITOK); bcopy(opt->value, new->value, opt->len); } else new->value = NULL; new->len = opt->len; new->seen = opt->seen; TAILQ_INSERT_HEAD(toopts, new, link); } vfs_sanitizeopts(toopts); } /* * Mount a filesystem. */ +#ifndef _SYS_SYSPROTO_H_ +struct nmount_args { + struct iovec *iovp; + unsigned int iovcnt; + int flags; +}; +#endif int -sys_nmount(td, uap) - struct thread *td; - struct nmount_args /* { - struct iovec *iovp; - unsigned int iovcnt; - int flags; - } */ *uap; +sys_nmount(struct thread *td, struct nmount_args *uap) { struct uio *auio; int error; u_int iovcnt; uint64_t flags; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__, uap->iovp, uap->iovcnt, flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; iovcnt = uap->iovcnt; /* * Check that we have an even number of iovec's * and that we have at least two options. */ if ((iovcnt & 1) || (iovcnt < 4)) { CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__, uap->iovcnt); return (EINVAL); } error = copyinuio(uap->iovp, iovcnt, &auio); if (error) { CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno", __func__, error); return (error); } error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return (error); } /* * --------------------------------------------------------------------- * Various utility functions */ void vfs_ref(struct mount *mp) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_REF(mp); MNT_IUNLOCK(mp); } void vfs_rel(struct mount *mp) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_REL(mp); MNT_IUNLOCK(mp); } /* * Allocate and initialize the mount point struct. */ struct mount * vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, struct ucred *cred) { struct mount *mp; mp = uma_zalloc(mount_zone, M_WAITOK); bzero(&mp->mnt_startzero, __rangeof(struct mount, mnt_startzero, mnt_endzero)); TAILQ_INIT(&mp->mnt_nvnodelist); mp->mnt_nvnodelistsize = 0; TAILQ_INIT(&mp->mnt_activevnodelist); mp->mnt_activevnodelistsize = 0; mp->mnt_ref = 0; (void) vfs_busy(mp, MBF_NOWAIT); atomic_add_acq_int(&vfsp->vfc_refcount, 1); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_gen++; strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); mp->mnt_vnodecovered = vp; mp->mnt_cred = crdup(cred); mp->mnt_stat.f_owner = cred->cr_uid; strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); mp->mnt_iosize_max = DFLTPHYS; #ifdef MAC mac_mount_init(mp); mac_mount_create(cred, mp); #endif arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); TAILQ_INIT(&mp->mnt_uppers); return (mp); } /* * Destroy the mount struct previously allocated by vfs_mount_alloc(). */ void vfs_mount_destroy(struct mount *mp) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_REFEXPIRE; if (mp->mnt_kern_flag & MNTK_MWAIT) { mp->mnt_kern_flag &= ~MNTK_MWAIT; wakeup(mp); } while (mp->mnt_ref) msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); KASSERT(mp->mnt_ref == 0, ("%s: invalid refcount in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); if (mp->mnt_writeopcount != 0) panic("vfs_mount_destroy: nonzero writeopcount"); if (mp->mnt_secondary_writes != 0) panic("vfs_mount_destroy: nonzero secondary_writes"); atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1); if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { struct vnode *vp; TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) vn_printf(vp, "dangling vnode "); panic("unmount: dangling vnode"); } KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); if (mp->mnt_nvnodelistsize != 0) panic("vfs_mount_destroy: nonzero nvnodelistsize"); if (mp->mnt_activevnodelistsize != 0) panic("vfs_mount_destroy: nonzero activevnodelistsize"); if (mp->mnt_lockref != 0) panic("vfs_mount_destroy: nonzero lock refcount"); MNT_IUNLOCK(mp); if (mp->mnt_vnodecovered != NULL) vrele(mp->mnt_vnodecovered); #ifdef MAC mac_mount_destroy(mp); #endif if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); crfree(mp->mnt_cred); uma_zfree(mount_zone, mp); } int vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions) { struct vfsoptlist *optlist; struct vfsopt *opt, *tmp_opt; char *fstype, *fspath, *errmsg; int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; errmsg = fspath = NULL; errmsg_len = fspathlen = 0; errmsg_pos = -1; error = vfs_buildopts(fsoptions, &optlist); if (error) return (error); if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); /* * We need these two options before the others, * and they are mandatory for any filesystem. * Ensure they are NUL terminated as well. */ fstypelen = 0; error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); if (error || fstype[fstypelen - 1] != '\0') { error = EINVAL; if (errmsg != NULL) strncpy(errmsg, "Invalid fstype", errmsg_len); goto bail; } fspathlen = 0; error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); if (error || fspath[fspathlen - 1] != '\0') { error = EINVAL; if (errmsg != NULL) strncpy(errmsg, "Invalid fspath", errmsg_len); goto bail; } /* * We need to see if we have the "update" option * before we call vfs_domount(), since vfs_domount() has special * logic based on MNT_UPDATE. This is very important * when we want to update the root filesystem. */ TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) { if (strcmp(opt->name, "update") == 0) { fsflags |= MNT_UPDATE; vfs_freeopt(optlist, opt); } else if (strcmp(opt->name, "async") == 0) fsflags |= MNT_ASYNC; else if (strcmp(opt->name, "force") == 0) { fsflags |= MNT_FORCE; vfs_freeopt(optlist, opt); } else if (strcmp(opt->name, "reload") == 0) { fsflags |= MNT_RELOAD; vfs_freeopt(optlist, opt); } else if (strcmp(opt->name, "multilabel") == 0) fsflags |= MNT_MULTILABEL; else if (strcmp(opt->name, "noasync") == 0) fsflags &= ~MNT_ASYNC; else if (strcmp(opt->name, "noatime") == 0) fsflags |= MNT_NOATIME; else if (strcmp(opt->name, "atime") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoatime", M_MOUNT); } else if (strcmp(opt->name, "noclusterr") == 0) fsflags |= MNT_NOCLUSTERR; else if (strcmp(opt->name, "clusterr") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoclusterr", M_MOUNT); } else if (strcmp(opt->name, "noclusterw") == 0) fsflags |= MNT_NOCLUSTERW; else if (strcmp(opt->name, "clusterw") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoclusterw", M_MOUNT); } else if (strcmp(opt->name, "noexec") == 0) fsflags |= MNT_NOEXEC; else if (strcmp(opt->name, "exec") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoexec", M_MOUNT); } else if (strcmp(opt->name, "nosuid") == 0) fsflags |= MNT_NOSUID; else if (strcmp(opt->name, "suid") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonosuid", M_MOUNT); } else if (strcmp(opt->name, "nosymfollow") == 0) fsflags |= MNT_NOSYMFOLLOW; else if (strcmp(opt->name, "symfollow") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonosymfollow", M_MOUNT); } else if (strcmp(opt->name, "noro") == 0) fsflags &= ~MNT_RDONLY; else if (strcmp(opt->name, "rw") == 0) fsflags &= ~MNT_RDONLY; else if (strcmp(opt->name, "ro") == 0) fsflags |= MNT_RDONLY; else if (strcmp(opt->name, "rdonly") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("ro", M_MOUNT); fsflags |= MNT_RDONLY; } else if (strcmp(opt->name, "suiddir") == 0) fsflags |= MNT_SUIDDIR; else if (strcmp(opt->name, "sync") == 0) fsflags |= MNT_SYNCHRONOUS; else if (strcmp(opt->name, "union") == 0) fsflags |= MNT_UNION; else if (strcmp(opt->name, "automounted") == 0) { fsflags |= MNT_AUTOMOUNTED; vfs_freeopt(optlist, opt); } } /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (fstypelen > MFSNAMELEN || fspathlen > MNAMELEN) { error = ENAMETOOLONG; goto bail; } error = vfs_domount(td, fstype, fspath, fsflags, &optlist); bail: /* copyout the errmsg */ if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) && errmsg_len > 0 && errmsg != NULL) { if (fsoptions->uio_segflg == UIO_SYSSPACE) { bcopy(errmsg, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); } else { copyout(errmsg, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); } } if (optlist != NULL) vfs_freeopts(optlist); return (error); } /* * Old mount API. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int -sys_mount(td, uap) - struct thread *td; - struct mount_args /* { - char *type; - char *path; - int flags; - caddr_t data; - } */ *uap; +sys_mount(struct thread *td, struct mount_args *uap) { char *fstype; struct vfsconf *vfsp = NULL; struct mntarg *ma = NULL; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit architectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of mount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); if (error) { free(fstype, M_TEMP); return (error); } AUDIT_ARG_TEXT(fstype); vfsp = vfs_byname_kld(fstype, td, &error); free(fstype, M_TEMP); if (vfsp == NULL) return (ENOENT); if (vfsp->vfc_vfsops->vfs_cmount == NULL) return (EOPNOTSUPP); ma = mount_argsu(ma, "fstype", uap->type, MFSNAMELEN); ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); ma = mount_argb(ma, flags & MNT_RDONLY, "noro"); ma = mount_argb(ma, !(flags & MNT_NOSUID), "nosuid"); ma = mount_argb(ma, !(flags & MNT_NOEXEC), "noexec"); error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, flags); return (error); } /* * vfs_domount_first(): first file system mount (not update) */ static int vfs_domount_first( struct thread *td, /* Calling thread. */ struct vfsconf *vfsp, /* File system type. */ char *fspath, /* Mount path. */ struct vnode *vp, /* Vnode to be covered. */ uint64_t fsflags, /* Flags common to all filesystems. */ struct vfsoptlist **optlist /* Options local to the filesystem. */ ) { struct vattr va; struct mount *mp; struct vnode *newdp; int error; ASSERT_VOP_ELOCKED(vp, __func__); KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here")); /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ error = VOP_GETATTR(vp, &va, td->td_ucred); if (error == 0 && va.va_uid != td->td_ucred->cr_uid) error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN, 0); if (error == 0) error = vinvalbuf(vp, V_SAVE, 0, 0); if (error == 0 && vp->v_type != VDIR) error = ENOTDIR; if (error == 0) { VI_LOCK(vp); if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) vp->v_iflag |= VI_MOUNT; else error = EBUSY; VI_UNLOCK(vp); } if (error != 0) { vput(vp); return (error); } VOP_UNLOCK(vp, 0); /* Allocate and initialize the filesystem. */ mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); /* XXXMAC: pass to vfs_mount_alloc? */ mp->mnt_optnew = *optlist; /* Set the mount level flags. */ mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY)); /* * Mount the filesystem. * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ error = VFS_MOUNT(mp); if (error != 0) { vfs_unbusy(mp); mp->mnt_vnodecovered = NULL; vfs_mount_destroy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vrele(vp); return (error); } if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); mp->mnt_opt = mp->mnt_optnew; *optlist = NULL; (void)VFS_STATFS(mp, &mp->mnt_stat); /* * Prevent external consumers of mount options from reading mnt_optnew. */ mp->mnt_optnew = NULL; MNT_ILOCK(mp); if ((mp->mnt_flag & MNT_ASYNC) != 0 && (mp->mnt_kern_flag & MNTK_NOASYNC) == 0) mp->mnt_kern_flag |= MNTK_ASYNC; else mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); cache_purge(vp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vp->v_mountedhere = mp; /* Place the new filesystem at the end of the mount list. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) panic("mount: lost mount"); VOP_UNLOCK(vp, 0); EVENTHANDLER_INVOKE(vfs_mounted, mp, newdp, td); VOP_UNLOCK(newdp, 0); mountcheckdirs(vp, newdp); vrele(newdp); if ((mp->mnt_flag & MNT_RDONLY) == 0) vfs_allocate_syncvnode(mp); vfs_unbusy(mp); return (0); } /* * vfs_domount_update(): update of mounted file system */ static int vfs_domount_update( struct thread *td, /* Calling thread. */ struct vnode *vp, /* Mount point vnode. */ uint64_t fsflags, /* Flags common to all filesystems. */ struct vfsoptlist **optlist /* Options local to the filesystem. */ ) { struct export_args export; void *bufp; struct mount *mp; int error, export_error, len; uint64_t flag; ASSERT_VOP_ELOCKED(vp, __func__); KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here")); mp = vp->v_mount; if ((vp->v_vflag & VV_ROOT) == 0) { if (vfs_copyopt(*optlist, "export", &export, sizeof(export)) == 0) error = EXDEV; else error = EINVAL; vput(vp); return (error); } /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ flag = mp->mnt_flag; if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } /* * Only privileged root, or (if MNT_USER is set) the user that * did the original mount is permitted to update it. */ error = vfs_suser(mp, td); if (error != 0) { vput(vp); return (error); } if (vfs_busy(mp, MBF_NOWAIT)) { vput(vp); return (EBUSY); } VI_LOCK(vp); if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { VI_UNLOCK(vp); vfs_unbusy(mp); vput(vp); return (EBUSY); } vp->v_iflag |= VI_MOUNT; VI_UNLOCK(vp); VOP_UNLOCK(vp, 0); MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { MNT_IUNLOCK(mp); error = EBUSY; goto end; } mp->mnt_flag &= ~MNT_UPDATEMASK; mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY); if ((mp->mnt_flag & MNT_ASYNC) == 0) mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); mp->mnt_optnew = *optlist; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); /* * Mount the filesystem. * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ error = VFS_MOUNT(mp); export_error = 0; /* Process the export option. */ if (error == 0 && vfs_getopt(mp->mnt_optnew, "export", &bufp, &len) == 0) { /* Assume that there is only 1 ABI for each length. */ switch (len) { case (sizeof(struct oexport_args)): bzero(&export, sizeof(export)); /* FALLTHROUGH */ case (sizeof(export)): bcopy(bufp, &export, len); export_error = vfs_export(mp, &export); break; default: export_error = EINVAL; break; } } MNT_ILOCK(mp); if (error == 0) { mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); } else { /* * If we fail, restore old mount flags. MNT_QUOTA is special, * because it is not part of MNT_UPDATEMASK, but it could have * changed in the meantime if quotactl(2) was called. * All in all we want current value of MNT_QUOTA, not the old * one. */ mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); } if ((mp->mnt_flag & MNT_ASYNC) != 0 && (mp->mnt_kern_flag & MNTK_NOASYNC) == 0) mp->mnt_kern_flag |= MNTK_ASYNC; else mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); if (error != 0) goto end; if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); mp->mnt_opt = mp->mnt_optnew; *optlist = NULL; (void)VFS_STATFS(mp, &mp->mnt_stat); /* * Prevent external consumers of mount options from reading * mnt_optnew. */ mp->mnt_optnew = NULL; if ((mp->mnt_flag & MNT_RDONLY) == 0) vfs_allocate_syncvnode(mp); else vfs_deallocate_syncvnode(mp); end: vfs_unbusy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vrele(vp); return (error != 0 ? error : export_error); } /* * vfs_domount(): actually attempt a filesystem mount. */ static int vfs_domount( struct thread *td, /* Calling thread. */ const char *fstype, /* Filesystem type. */ char *fspath, /* Mount path. */ uint64_t fsflags, /* Flags common to all filesystems. */ struct vfsoptlist **optlist /* Options local to the filesystem. */ ) { struct vfsconf *vfsp; struct nameidata nd; struct vnode *vp; char *pathbuf; int error; /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) return (ENAMETOOLONG); if (jailed(td->td_ucred) || usermount == 0) { if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) return (error); } /* * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. */ if (fsflags & MNT_EXPORTED) { error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); if (error) return (error); } if (fsflags & MNT_SUIDDIR) { error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); if (error) return (error); } /* * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users. */ if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) { if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0) fsflags |= MNT_NOSUID | MNT_USER; } /* Load KLDs before we lock the covered vnode to avoid reversals. */ vfsp = NULL; if ((fsflags & MNT_UPDATE) == 0) { /* Don't try to load KLDs if we're mounting the root. */ if (fsflags & MNT_ROOTFS) vfsp = vfs_byname(fstype); else vfsp = vfs_byname_kld(fstype, td, &error); if (vfsp == NULL) return (ENODEV); if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) return (EPERM); } /* * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, fspath, td); error = namei(&nd); if (error != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if ((fsflags & MNT_UPDATE) == 0) { pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); strcpy(pathbuf, fspath); error = vn_path_to_global_path(td, vp, pathbuf, MNAMELEN); /* debug.disablefullpath == 1 results in ENODEV */ if (error == 0 || error == ENODEV) { error = vfs_domount_first(td, vfsp, pathbuf, vp, fsflags, optlist); } free(pathbuf, M_TEMP); } else error = vfs_domount_update(td, vp, fsflags, optlist); return (error); } /* * Unmount a filesystem. * * Note: unmount takes a path to the vnode mounted on as argument, not * special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int sys_unmount(struct thread *td, struct unmount_args *uap) { struct nameidata nd; struct mount *mp; char *pathbuf; int error, id0, id1; AUDIT_ARG_VALUE(uap->flags); if (jailed(td->td_ucred) || usermount == 0) { error = priv_check(td, PRIV_VFS_UNMOUNT); if (error) return (error); } pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); if (error) { free(pathbuf, M_TEMP); return (error); } if (uap->flags & MNT_BYFSID) { AUDIT_ARG_TEXT(pathbuf); /* Decode the filesystem ID. */ if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { free(pathbuf, M_TEMP); return (EINVAL); } mtx_lock(&mountlist_mtx); TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == id0 && mp->mnt_stat.f_fsid.val[1] == id1) { vfs_ref(mp); break; } } mtx_unlock(&mountlist_mtx); } else { /* * Try to find global path for path argument. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, pathbuf, td); if (namei(&nd) == 0) { NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_path_to_global_path(td, nd.ni_vp, pathbuf, MNAMELEN); if (error == 0 || error == ENODEV) vput(nd.ni_vp); } mtx_lock(&mountlist_mtx); TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) { vfs_ref(mp); break; } } mtx_unlock(&mountlist_mtx); } free(pathbuf, M_TEMP); if (mp == NULL) { /* * Previously we returned ENOENT for a nonexistent path and * EINVAL for a non-mountpoint. We cannot tell these apart * now, so in the !MNT_BYFSID case return the more likely * EINVAL for compatibility. */ return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); } /* * Don't allow unmounting the root filesystem. */ if (mp->mnt_flag & MNT_ROOTFS) { vfs_rel(mp); return (EINVAL); } error = dounmount(mp, uap->flags, td); return (error); } /* * Return error if any of the vnodes, ignoring the root vnode * and the syncer vnode, have non-zero usecount. * * This function is purely advisory - it can return false positives * and negatives. */ static int vfs_check_usecounts(struct mount *mp) { struct vnode *vp, *mvp; MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if ((vp->v_vflag & VV_ROOT) == 0 && vp->v_type != VNON && vp->v_usecount != 0) { VI_UNLOCK(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (EBUSY); } VI_UNLOCK(vp); } return (0); } static void dounmount_cleanup(struct mount *mp, struct vnode *coveredvp, int mntkflags) { mtx_assert(MNT_MTX(mp), MA_OWNED); mp->mnt_kern_flag &= ~mntkflags; if ((mp->mnt_kern_flag & MNTK_MWAIT) != 0) { mp->mnt_kern_flag &= ~MNTK_MWAIT; wakeup(mp); } MNT_IUNLOCK(mp); if (coveredvp != NULL) { VOP_UNLOCK(coveredvp, 0); vdrop(coveredvp); } vn_finished_write(mp); } /* * Do the actual filesystem unmount. */ int dounmount(struct mount *mp, int flags, struct thread *td) { struct vnode *coveredvp, *fsrootvp; int error; uint64_t async_flag; int mnt_gen_r; if ((coveredvp = mp->mnt_vnodecovered) != NULL) { mnt_gen_r = mp->mnt_gen; VI_LOCK(coveredvp); vholdl(coveredvp); vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); /* * Check for mp being unmounted while waiting for the * covered vnode lock. */ if (coveredvp->v_mountedhere != mp || coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { VOP_UNLOCK(coveredvp, 0); vdrop(coveredvp); vfs_rel(mp); return (EBUSY); } } /* * Only privileged root, or (if MNT_USER is set) the user that did the * original mount is permitted to unmount this filesystem. */ error = vfs_suser(mp, td); if (error != 0) { if (coveredvp != NULL) { VOP_UNLOCK(coveredvp, 0); vdrop(coveredvp); } vfs_rel(mp); return (error); } vn_start_write(NULL, &mp, V_WAIT | V_MNTREF); MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 || (mp->mnt_flag & MNT_UPDATE) != 0 || !TAILQ_EMPTY(&mp->mnt_uppers)) { dounmount_cleanup(mp, coveredvp, 0); return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ; if (flags & MNT_NONBUSY) { MNT_IUNLOCK(mp); error = vfs_check_usecounts(mp); MNT_ILOCK(mp); if (error != 0) { dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT | MNTK_NOINSMNTQ); return (error); } } /* Allow filesystems to detect that a forced unmount is in progress. */ if (flags & MNT_FORCE) { mp->mnt_kern_flag |= MNTK_UNMOUNTF; MNT_IUNLOCK(mp); /* * Must be done after setting MNTK_UNMOUNTF and before * waiting for mnt_lockref to become 0. */ VFS_PURGE(mp); MNT_ILOCK(mp); } error = 0; if (mp->mnt_lockref) { mp->mnt_kern_flag |= MNTK_DRAINING; error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, "mount drain", 0); } MNT_IUNLOCK(mp); KASSERT(mp->mnt_lockref == 0, ("%s: invalid lock refcount in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); KASSERT(error == 0, ("%s: invalid return value for msleep in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); /* * From now, we can claim that the use reference on the * coveredvp is ours, and the ref can be released only by * successfull unmount by us, or left for later unmount * attempt. The previously acquired hold reference is no * longer needed to protect the vnode from reuse. */ if (coveredvp != NULL) vdrop(coveredvp); vfs_msync(mp, MNT_WAIT); MNT_ILOCK(mp); async_flag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); cache_purgevfs(mp, false); /* remove cache entries for this file sys */ vfs_deallocate_syncvnode(mp); /* * For forced unmounts, move process cdir/rdir refs on the fs root * vnode to the covered vnode. For non-forced unmounts we want * such references to cause an EBUSY error. */ if ((flags & MNT_FORCE) && VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) { if (mp->mnt_vnodecovered != NULL && (mp->mnt_flag & MNT_IGNORE) == 0) mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); if (fsrootvp == rootvnode) { vrele(rootvnode); rootvnode = NULL; } vput(fsrootvp); } if ((mp->mnt_flag & MNT_RDONLY) != 0 || (flags & MNT_FORCE) != 0 || (error = VFS_SYNC(mp, MNT_WAIT)) == 0) error = VFS_UNMOUNT(mp, flags); vn_finished_write(mp); /* * If we failed to flush the dirty blocks for this mount point, * undo all the cdir/rdir and rootvnode changes we made above. * Unless we failed to do so because the device is reporting that * it doesn't exist anymore. */ if (error && error != ENXIO) { if ((flags & MNT_FORCE) && VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) { if (mp->mnt_vnodecovered != NULL && (mp->mnt_flag & MNT_IGNORE) == 0) mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); if (rootvnode == NULL) { rootvnode = fsrootvp; vref(rootvnode); } vput(fsrootvp); } MNT_ILOCK(mp); mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ; if ((mp->mnt_flag & MNT_RDONLY) == 0) { MNT_IUNLOCK(mp); vfs_allocate_syncvnode(mp); MNT_ILOCK(mp); } mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); mp->mnt_flag |= async_flag; if ((mp->mnt_flag & MNT_ASYNC) != 0 && (mp->mnt_kern_flag & MNTK_NOASYNC) == 0) mp->mnt_kern_flag |= MNTK_ASYNC; if (mp->mnt_kern_flag & MNTK_MWAIT) { mp->mnt_kern_flag &= ~MNTK_MWAIT; wakeup(mp); } MNT_IUNLOCK(mp); if (coveredvp) VOP_UNLOCK(coveredvp, 0); return (error); } mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); EVENTHANDLER_INVOKE(vfs_unmounted, mp, td); if (coveredvp != NULL) { coveredvp->v_mountedhere = NULL; VOP_UNLOCK(coveredvp, 0); } vfs_event_signal(NULL, VQ_UNMOUNT, 0); if (mp == rootdevmp) rootdevmp = NULL; vfs_mount_destroy(mp); return (0); } /* * Report errors during filesystem mounting. */ void vfs_mount_error(struct mount *mp, const char *fmt, ...) { struct vfsoptlist *moptlist = mp->mnt_optnew; va_list ap; int error, len; char *errmsg; error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); if (error || errmsg == NULL || len <= 0) return; va_start(ap, fmt); vsnprintf(errmsg, (size_t)len, fmt, ap); va_end(ap); } void vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...) { va_list ap; int error, len; char *errmsg; error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len); if (error || errmsg == NULL || len <= 0) return; va_start(ap, fmt); vsnprintf(errmsg, (size_t)len, fmt, ap); va_end(ap); } /* * --------------------------------------------------------------------- * Functions for querying mount options/arguments from filesystems. */ /* * Check that no unknown options are given */ int vfs_filteropt(struct vfsoptlist *opts, const char **legal) { struct vfsopt *opt; char errmsg[255]; const char **t, *p, *q; int ret = 0; TAILQ_FOREACH(opt, opts, link) { p = opt->name; q = NULL; if (p[0] == 'n' && p[1] == 'o') q = p + 2; for(t = global_opts; *t != NULL; t++) { if (strcmp(*t, p) == 0) break; if (q != NULL) { if (strcmp(*t, q) == 0) break; } } if (*t != NULL) continue; for(t = legal; *t != NULL; t++) { if (strcmp(*t, p) == 0) break; if (q != NULL) { if (strcmp(*t, q) == 0) break; } } if (*t != NULL) continue; snprintf(errmsg, sizeof(errmsg), "mount option <%s> is unknown", p); ret = EINVAL; } if (ret != 0) { TAILQ_FOREACH(opt, opts, link) { if (strcmp(opt->name, "errmsg") == 0) { strncpy((char *)opt->value, errmsg, opt->len); break; } } if (opt == NULL) printf("%s\n", errmsg); } return (ret); } /* * Get a mount option by its name. * * Return 0 if the option was found, ENOENT otherwise. * If len is non-NULL it will be filled with the length * of the option. If buf is non-NULL, it will be filled * with the address of the option. */ int -vfs_getopt(opts, name, buf, len) - struct vfsoptlist *opts; - const char *name; - void **buf; - int *len; +vfs_getopt(struct vfsoptlist *opts, const char *name, void **buf, int *len) { struct vfsopt *opt; KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; if (len != NULL) *len = opt->len; if (buf != NULL) *buf = opt->value; return (0); } } return (ENOENT); } int vfs_getopt_pos(struct vfsoptlist *opts, const char *name) { struct vfsopt *opt; if (opts == NULL) return (-1); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; return (opt->pos); } } return (-1); } int vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value) { char *opt_value, *vtp; quad_t iv; int error, opt_len; error = vfs_getopt(opts, name, (void **)&opt_value, &opt_len); if (error != 0) return (error); if (opt_len == 0 || opt_value == NULL) return (EINVAL); if (opt_value[0] == '\0' || opt_value[opt_len - 1] != '\0') return (EINVAL); iv = strtoq(opt_value, &vtp, 0); if (vtp == opt_value || (vtp[0] != '\0' && vtp[1] != '\0')) return (EINVAL); if (iv < 0) return (EINVAL); switch (vtp[0]) { case 't': case 'T': iv *= 1024; case 'g': case 'G': iv *= 1024; case 'm': case 'M': iv *= 1024; case 'k': case 'K': iv *= 1024; case '\0': break; default: return (EINVAL); } *value = iv; return (0); } char * vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) { struct vfsopt *opt; *error = 0; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->len == 0 || ((char *)opt->value)[opt->len - 1] != '\0') { *error = EINVAL; return (NULL); } return (opt->value); } *error = ENOENT; return (NULL); } int vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w, uint64_t val) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; if (w != NULL) *w |= val; return (1); } } if (w != NULL) *w &= ~val; return (0); } int vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) { va_list ap; struct vfsopt *opt; int ret; KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->len == 0 || opt->value == NULL) return (0); if (((char *)opt->value)[opt->len - 1] != '\0') return (0); va_start(ap, fmt); ret = vsscanf(opt->value, fmt, ap); va_end(ap); return (ret); } return (0); } int vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->value == NULL) opt->len = len; else { if (opt->len != len) return (EINVAL); bcopy(value, opt->value, len); } return (0); } return (ENOENT); } int vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->value == NULL) opt->len = len; else { if (opt->len < len) return (EINVAL); opt->len = len; bcopy(value, opt->value, len); } return (0); } return (ENOENT); } int vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->value == NULL) opt->len = strlen(value) + 1; else if (strlcpy(opt->value, value, opt->len) >= opt->len) return (EINVAL); return (0); } return (ENOENT); } /* * Find and copy a mount option. * * The size of the buffer has to be specified * in len, if it is not the same length as the * mount option, EINVAL is returned. * Returns ENOENT if the option is not found. */ int -vfs_copyopt(opts, name, dest, len) - struct vfsoptlist *opts; - const char *name; - void *dest; - int len; +vfs_copyopt(struct vfsoptlist *opts, const char *name, void *dest, int len) { struct vfsopt *opt; KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; if (len != opt->len) return (EINVAL); bcopy(opt->value, dest, opt->len); return (0); } } return (ENOENT); } int __vfs_statfs(struct mount *mp, struct statfs *sbp) { int error; error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat); if (sbp != &mp->mnt_stat) *sbp = mp->mnt_stat; return (error); } void vfs_mountedfrom(struct mount *mp, const char *from) { bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); strlcpy(mp->mnt_stat.f_mntfromname, from, sizeof mp->mnt_stat.f_mntfromname); } /* * --------------------------------------------------------------------- * This is the api for building mount args and mounting filesystems from * inside the kernel. * * The API works by accumulation of individual args. First error is * latched. * * XXX: should be documented in new manpage kernel_mount(9) */ /* A memory allocation which must be freed when we are done */ struct mntaarg { SLIST_ENTRY(mntaarg) next; }; /* The header for the mount arguments */ struct mntarg { struct iovec *v; int len; int error; SLIST_HEAD(, mntaarg) list; }; /* * Add a boolean argument. * * flag is the boolean value. * name must start with "no". */ struct mntarg * mount_argb(struct mntarg *ma, int flag, const char *name) { KASSERT(name[0] == 'n' && name[1] == 'o', ("mount_argb(...,%s): name must start with 'no'", name)); return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); } /* * Add an argument printf style */ struct mntarg * mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) { va_list ap; struct mntaarg *maa; struct sbuf *sb; int len; if (ma == NULL) { ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INIT(&ma->list); } if (ma->error) return (ma); ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), M_MOUNT, M_WAITOK); ma->v[ma->len].iov_base = (void *)(uintptr_t)name; ma->v[ma->len].iov_len = strlen(name) + 1; ma->len++; sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); len = sbuf_len(sb) + 1; maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INSERT_HEAD(&ma->list, maa, next); bcopy(sbuf_data(sb), maa + 1, len); sbuf_delete(sb); ma->v[ma->len].iov_base = maa + 1; ma->v[ma->len].iov_len = len; ma->len++; return (ma); } /* * Add an argument which is a userland string. */ struct mntarg * mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) { struct mntaarg *maa; char *tbuf; if (val == NULL) return (ma); if (ma == NULL) { ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INIT(&ma->list); } if (ma->error) return (ma); maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INSERT_HEAD(&ma->list, maa, next); tbuf = (void *)(maa + 1); ma->error = copyinstr(val, tbuf, len, NULL); return (mount_arg(ma, name, tbuf, -1)); } /* * Plain argument. * * If length is -1, treat value as a C string. */ struct mntarg * mount_arg(struct mntarg *ma, const char *name, const void *val, int len) { if (ma == NULL) { ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INIT(&ma->list); } if (ma->error) return (ma); ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), M_MOUNT, M_WAITOK); ma->v[ma->len].iov_base = (void *)(uintptr_t)name; ma->v[ma->len].iov_len = strlen(name) + 1; ma->len++; ma->v[ma->len].iov_base = (void *)(uintptr_t)val; if (len < 0) ma->v[ma->len].iov_len = strlen(val) + 1; else ma->v[ma->len].iov_len = len; ma->len++; return (ma); } /* * Free a mntarg structure */ static void free_mntarg(struct mntarg *ma) { struct mntaarg *maa; while (!SLIST_EMPTY(&ma->list)) { maa = SLIST_FIRST(&ma->list); SLIST_REMOVE_HEAD(&ma->list, next); free(maa, M_MOUNT); } free(ma->v, M_MOUNT); free(ma, M_MOUNT); } /* * Mount a filesystem */ int kernel_mount(struct mntarg *ma, uint64_t flags) { struct uio auio; int error; KASSERT(ma != NULL, ("kernel_mount NULL ma")); KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); auio.uio_iov = ma->v; auio.uio_iovcnt = ma->len; auio.uio_segflg = UIO_SYSSPACE; error = ma->error; if (!error) error = vfs_donmount(curthread, flags, &auio); free_mntarg(ma); return (error); } /* * A printflike function to mount a filesystem. */ int kernel_vmount(int flags, ...) { struct mntarg *ma = NULL; va_list ap; const char *cp; const void *vp; int error; va_start(ap, flags); for (;;) { cp = va_arg(ap, const char *); if (cp == NULL) break; vp = va_arg(ap, const void *); ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0)); } va_end(ap); error = kernel_mount(ma, flags); return (error); } void vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp) { bcopy(oexp, exp, sizeof(*oexp)); exp->ex_numsecflavors = 0; } Index: stable/11/sys/kern/vfs_syscalls.c =================================================================== --- stable/11/sys/kern/vfs_syscalls.c (revision 331642) +++ stable/11/sys/kern/vfs_syscalls.c (revision 331643) @@ -1,4656 +1,4367 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_compat.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); SDT_PROVIDER_DEFINE(vfs); SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); static int kern_chflagsat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, u_long flags, int atflag); static int setfflags(struct thread *td, struct vnode *, u_long); static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); static int getutimens(const struct timespec *, enum uio_seg, struct timespec *, int *); static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif /* ARGSUSED */ int -sys_sync(td, uap) - struct thread *td; - struct sync_args *uap; +sys_sync(struct thread *td, struct sync_args *uap) { struct mount *mp, *nmp; int save; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { save = curthread_pflags_set(TDP_SYNCIO); vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT); curthread_pflags_restore(save); vn_finished_write(mp); } mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); return (0); } /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif int -sys_quotactl(td, uap) - struct thread *td; - register struct quotactl_args /* { - char *path; - int cmd; - int uid; - caddr_t arg; - } */ *uap; +sys_quotactl(struct thread *td, struct quotactl_args *uap) { struct mount *mp; struct nameidata nd; int error; AUDIT_ARG_CMD(uap->cmd); AUDIT_ARG_UID(uap->uid); if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) return (EPERM); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); mp = nd.ni_vp->v_mount; vfs_ref(mp); vput(nd.ni_vp); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (error); error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); /* * Since quota on operation typically needs to open quota * file, the Q_QUOTAON handler needs to unbusy the mount point * before calling into namei. Otherwise, unmount might be * started between two vfs_busy() invocations (first is our, * second is from mount point cross-walk code in lookup()), * causing deadlock. * * Require that Q_QUOTAON handles the vfs_busy() reference on * its own, always returning with ubusied mount point. */ if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) vfs_unbusy(mp); return (error); } /* * Used by statfs conversion routines to scale the block size up if * necessary so that all of the block counts are <= 'max_size'. Note * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero * value of 'n'. */ void statfs_scale_blocks(struct statfs *sf, long max_size) { uint64_t count; int shift; KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); /* * Attempt to scale the block counts to give a more accurate * overview to userland of the ratio of free space to used * space. To do this, find the largest block count and compute * a divisor that lets it fit into a signed integer <= max_size. */ if (sf->f_bavail < 0) count = -sf->f_bavail; else count = sf->f_bavail; count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); if (count <= max_size) return; count >>= flsl(max_size); shift = 0; while (count > 0) { shift++; count >>=1; } sf->f_bsize <<= shift; sf->f_blocks >>= shift; sf->f_bfree >>= shift; sf->f_bavail >>= shift; } static int kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) { struct statfs *sp; int error; if (mp == NULL) return (EBADF); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (error); #ifdef MAC error = mac_mount_check_stat(td->td_ucred, mp); if (error != 0) goto out; #endif /* * Set these in case the underlying filesystem fails to do so. */ sp = &mp->mnt_stat; sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp); if (error != 0) goto out; *buf = *sp; if (priv_check(td, PRIV_VFS_GENERATION)) { buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, buf); } out: vfs_unbusy(mp); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif int -sys_statfs(td, uap) - struct thread *td; - register struct statfs_args /* { - char *path; - struct statfs *buf; - } */ *uap; +sys_statfs(struct thread *td, struct statfs_args *uap) { struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(struct statfs)); free(sfp, M_STATFS); return (error); } int kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, struct statfs *buf) { struct mount *mp; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, td); error = namei(&nd); if (error != 0) return (error); mp = nd.ni_vp->v_mount; vfs_ref(mp); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); return (kern_do_statfs(td, mp, buf)); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif int -sys_fstatfs(td, uap) - struct thread *td; - register struct fstatfs_args /* { - int fd; - struct statfs *buf; - } */ *uap; +sys_fstatfs(struct thread *td, struct fstatfs_args *uap) { struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(struct statfs)); free(sfp, M_STATFS); return (error); } int kern_fstatfs(struct thread *td, int fd, struct statfs *buf) { struct file *fp; struct mount *mp; struct vnode *vp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); if (error != 0) return (error); vp = fp->f_vnode; vn_lock(vp, LK_SHARED | LK_RETRY); #ifdef AUDIT AUDIT_ARG_VNODE1(vp); #endif mp = vp->v_mount; if (mp != NULL) vfs_ref(mp); VOP_UNLOCK(vp, 0); fdrop(fp, td); return (kern_do_statfs(td, mp, buf)); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int mode; }; #endif int -sys_getfsstat(td, uap) - struct thread *td; - register struct getfsstat_args /* { - struct statfs *buf; - long bufsize; - int mode; - } */ *uap; +sys_getfsstat(struct thread *td, struct getfsstat_args *uap) { size_t count; int error; if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) return (EINVAL); error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, UIO_USERSPACE, uap->mode); if (error == 0) td->td_retval[0] = count; return (error); } /* * If (bufsize > 0 && bufseg == UIO_SYSSPACE) * The caller is responsible for freeing memory which will be allocated * in '*buf'. */ int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode) { struct mount *mp, *nmp; struct statfs *sfsp, *sp, *sptmp, *tofree; size_t count, maxcount; int error; switch (mode) { case MNT_WAIT: case MNT_NOWAIT: break; default: return (EINVAL); } restart: maxcount = bufsize / sizeof(struct statfs); if (bufsize == 0) { sfsp = NULL; tofree = NULL; } else if (bufseg == UIO_USERSPACE) { sfsp = *buf; tofree = NULL; } else /* if (bufseg == UIO_SYSSPACE) */ { count = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { count++; } mtx_unlock(&mountlist_mtx); if (maxcount > count) maxcount = count; tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_STATFS, M_WAITOK); } count = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (prison_canseemount(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #ifdef MAC if (mac_mount_check_stat(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #endif if (mode == MNT_WAIT) { if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { /* * If vfs_busy() failed, and MBF_NOWAIT * wasn't passed, then the mp is gone. * Furthermore, because of MBF_MNTLSTLOCK, * the mountlist_mtx was dropped. We have * no other choice than to start over. */ mtx_unlock(&mountlist_mtx); free(tofree, M_STATFS); goto restart; } } else { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } } if (sfsp != NULL && count < maxcount) { sp = &mp->mnt_stat; /* * Set these in case the underlying filesystem * fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; /* * If MNT_NOWAIT is specified, do not refresh * the fsstat cache. */ if (mode != MNT_NOWAIT) { error = VFS_STATFS(mp, sp); if (error != 0) { mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); continue; } } if (priv_check(td, PRIV_VFS_GENERATION)) { sptmp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); *sptmp = *sp; sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, sptmp); sp = sptmp; } else sptmp = NULL; if (bufseg == UIO_SYSSPACE) { bcopy(sp, sfsp, sizeof(*sp)); free(sptmp, M_STATFS); } else /* if (bufseg == UIO_USERSPACE) */ { error = copyout(sp, sfsp, sizeof(*sp)); free(sptmp, M_STATFS); if (error != 0) { vfs_unbusy(mp); return (error); } } sfsp++; } count++; mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); if (sfsp != NULL && count > maxcount) *countp = maxcount; else *countp = count; return (0); } #ifdef COMPAT_FREEBSD4 /* * Get old format filesystem statistics. */ static void cvtstatfs(struct statfs *, struct ostatfs *); #ifndef _SYS_SYSPROTO_H_ struct freebsd4_statfs_args { char *path; struct ostatfs *buf; }; #endif int -freebsd4_statfs(td, uap) - struct thread *td; - struct freebsd4_statfs_args /* { - char *path; - struct ostatfs *buf; - } */ *uap; +freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) { struct ostatfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); if (error == 0) { cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fstatfs_args { int fd; struct ostatfs *buf; }; #endif int -freebsd4_fstatfs(td, uap) - struct thread *td; - struct freebsd4_fstatfs_args /* { - int fd; - struct ostatfs *buf; - } */ *uap; +freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) { struct ostatfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sfp); if (error == 0) { cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_getfsstat_args { struct ostatfs *buf; long bufsize; int mode; }; #endif int -freebsd4_getfsstat(td, uap) - struct thread *td; - register struct freebsd4_getfsstat_args /* { - struct ostatfs *buf; - long bufsize; - int mode; - } */ *uap; +freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) { struct statfs *buf, *sp; struct ostatfs osb; size_t count, size; int error; if (uap->bufsize < 0) return (EINVAL); count = uap->bufsize / sizeof(struct ostatfs); if (count > SIZE_MAX / sizeof(struct statfs)) return (EINVAL); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); td->td_retval[0] = count; if (size != 0) { sp = buf; while (count != 0 && error == 0) { cvtstatfs(sp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); sp++; uap->buf++; count--; } free(buf, M_STATFS); } return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fhstatfs_args { struct fhandle *u_fhp; struct ostatfs *buf; }; #endif int -freebsd4_fhstatfs(td, uap) - struct thread *td; - struct freebsd4_fhstatfs_args /* { - struct fhandle *u_fhp; - struct ostatfs *buf; - } */ *uap; +freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) { struct ostatfs osb; struct statfs *sfp; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sfp); if (error == 0) { cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Convert a new format statfs structure to an old format statfs structure. */ static void -cvtstatfs(nsp, osp) - struct statfs *nsp; - struct ostatfs *osp; +cvtstatfs(struct statfs *nsp, struct ostatfs *osp) { statfs_scale_blocks(nsp, LONG_MAX); bzero(osp, sizeof(*osp)); osp->f_bsize = nsp->f_bsize; osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); osp->f_blocks = nsp->f_blocks; osp->f_bfree = nsp->f_bfree; osp->f_bavail = nsp->f_bavail; osp->f_files = MIN(nsp->f_files, LONG_MAX); osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); osp->f_owner = nsp->f_owner; osp->f_type = nsp->f_type; osp->f_flags = nsp->f_flags; osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); strlcpy(osp->f_fstypename, nsp->f_fstypename, MIN(MFSNAMELEN, OMFSNAMELEN)); strlcpy(osp->f_mntonname, nsp->f_mntonname, MIN(MNAMELEN, OMNAMELEN)); strlcpy(osp->f_mntfromname, nsp->f_mntfromname, MIN(MNAMELEN, OMNAMELEN)); osp->f_fsid = nsp->f_fsid; } #endif /* COMPAT_FREEBSD4 */ /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif int -sys_fchdir(td, uap) - struct thread *td; - struct fchdir_args /* { - int fd; - } */ *uap; +sys_fchdir(struct thread *td, struct fchdir_args *uap) { struct vnode *vp, *tdp; struct mount *mp; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), &fp); if (error != 0) return (error); vp = fp->f_vnode; vrefact(vp); fdrop(fp, td); vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); error = change_dir(vp, td); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0)) continue; error = VFS_ROOT(mp, LK_SHARED, &tdp); vfs_unbusy(mp); if (error != 0) break; vput(vp); vp = tdp; } if (error != 0) { vput(vp); return (error); } VOP_UNLOCK(vp, 0); pwd_chdir(td, vp); return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif int -sys_chdir(td, uap) - struct thread *td; - struct chdir_args /* { - char *path; - } */ *uap; +sys_chdir(struct thread *td, struct chdir_args *uap) { return (kern_chdir(td, uap->path, UIO_USERSPACE)); } int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); if ((error = change_dir(nd.ni_vp, td)) != 0) { vput(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } VOP_UNLOCK(nd.ni_vp, 0); NDFREE(&nd, NDF_ONLY_PNBUF); pwd_chdir(td, nd.ni_vp); return (0); } /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif int -sys_chroot(td, uap) - struct thread *td; - struct chroot_args /* { - char *path; - } */ *uap; +sys_chroot(struct thread *td, struct chroot_args *uap) { struct nameidata nd; int error; error = priv_check(td, PRIV_VFS_CHROOT); if (error != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error != 0) goto error; error = change_dir(nd.ni_vp, td); if (error != 0) goto e_vunlock; #ifdef MAC error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); if (error != 0) goto e_vunlock; #endif VOP_UNLOCK(nd.ni_vp, 0); error = pwd_chroot(td, nd.ni_vp); vrele(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); e_vunlock: vput(nd.ni_vp); error: NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Common routine for chroot and chdir. Callers must provide a locked vnode * instance. */ int -change_dir(vp, td) - struct vnode *vp; - struct thread *td; +change_dir(struct vnode *vp, struct thread *td) { #ifdef MAC int error; #endif ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); if (vp->v_type != VDIR) return (ENOTDIR); #ifdef MAC error = mac_vnode_check_chdir(td->td_ucred, vp); if (error != 0) return (error); #endif return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); } static __inline void flags_to_rights(int flags, cap_rights_t *rightsp) { if (flags & O_EXEC) { cap_rights_set(rightsp, CAP_FEXECVE); } else { switch ((flags & O_ACCMODE)) { case O_RDONLY: cap_rights_set(rightsp, CAP_READ); break; case O_RDWR: cap_rights_set(rightsp, CAP_READ); /* FALLTHROUGH */ case O_WRONLY: cap_rights_set(rightsp, CAP_WRITE); if (!(flags & (O_APPEND | O_TRUNC))) cap_rights_set(rightsp, CAP_SEEK); break; } } if (flags & O_CREAT) cap_rights_set(rightsp, CAP_CREATE); if (flags & O_TRUNC) cap_rights_set(rightsp, CAP_FTRUNCATE); if (flags & (O_SYNC | O_FSYNC)) cap_rights_set(rightsp, CAP_FSYNC); if (flags & (O_EXLOCK | O_SHLOCK)) cap_rights_set(rightsp, CAP_FLOCK); } /* * Check permissions, allocate an open file structure, and call the device * open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int -sys_open(td, uap) - struct thread *td; - register struct open_args /* { - char *path; - int flags; - int mode; - } */ *uap; +sys_open(struct thread *td, struct open_args *uap) { return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct openat_args { int fd; char *path; int flag; int mode; }; #endif int sys_openat(struct thread *td, struct openat_args *uap) { AUDIT_ARG_FD(uap->fd); return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, uap->mode)); } int kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flags, int mode) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; struct nameidata nd; cap_rights_t rights; int cmode, error, indx; indx = -1; AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); cap_rights_init(&rights, CAP_LOOKUP); flags_to_rights(flags, &rights); /* * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags * may be specified. */ if (flags & O_EXEC) { if (flags & O_ACCMODE) return (EINVAL); } else if ((flags & O_ACCMODE) == O_ACCMODE) { return (EINVAL); } else { flags = FFLAGS(flags); } /* * Allocate a file structure. The descriptor to reference it * is allocated and set by finstall() below. */ error = falloc_noinstall(td, &fp); if (error != 0) return (error); /* * An extra reference on `fp' has been held for us by * falloc_noinstall(). */ /* Set the flags early so the finit in devfs can pick them up. */ fp->f_flag = flags & FMASK; cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, &rights, td); td->td_dupfd = -1; /* XXX check for fdopen */ error = vn_open(&nd, &flags, cmode, fp); if (error != 0) { /* * If the vn_open replaced the method vector, something * wonderous happened deep below and we just pass it up * pretending we know what we do. */ if (error == ENXIO && fp->f_ops != &badfileops) goto success; /* * Handle special fdopen() case. bleh. * * Don't do this for relative (capability) lookups; we don't * understand exactly what would happen, and we don't think * that it ever should. */ if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && (error == ENODEV || error == ENXIO) && td->td_dupfd >= 0) { error = dupfdopen(td, fdp, td->td_dupfd, flags, error, &indx); if (error == 0) goto success; } goto bad; } td->td_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; /* * Store the vnode, for any f_type. Typically, the vnode use * count is decremented by direct call to vn_closefile() for * files that switched type in the cdevsw fdopen() method. */ fp->f_vnode = vp; /* * If the file wasn't claimed by devfs bind it to the normal * vnode operations here. */ if (fp->f_ops == &badfileops) { KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); fp->f_seqcount = 1; finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, &vnops); } VOP_UNLOCK(vp, 0); if (flags & O_TRUNC) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error != 0) goto bad; } success: /* * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { struct filecaps *fcaps; #ifdef CAPABILITIES if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) fcaps = &nd.ni_filecaps; else #endif fcaps = NULL; error = finstall(td, fp, &indx, flags, fcaps); /* On success finstall() consumes fcaps. */ if (error != 0) { filecaps_free(&nd.ni_filecaps); goto bad; } } else { filecaps_free(&nd.ni_filecaps); } /* * Release our private reference, leaving the one associated with * the descriptor table intact. */ fdrop(fp, td); td->td_retval[0] = indx; return (0); bad: KASSERT(indx == -1, ("indx=%d, should be -1", indx)); fdrop(fp, td); return (error); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int -ocreat(td, uap) - struct thread *td; - register struct ocreat_args /* { - char *path; - int mode; - } */ *uap; +ocreat(struct thread *td, struct ocreat_args *uap) { return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif int -sys_mknod(td, uap) - struct thread *td; - register struct mknod_args /* { - char *path; - int mode; - int dev; - } */ *uap; +sys_mknod(struct thread *td, struct mknod_args *uap) { return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } #ifndef _SYS_SYSPROTO_H_ struct mknodat_args { int fd; char *path; mode_t mode; dev_t dev; }; #endif int sys_mknodat(struct thread *td, struct mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } int kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int mode, int dev) { struct vnode *vp; struct mount *mp; struct vattr vattr; struct nameidata nd; cap_rights_t rights; int error, whiteout = 0; AUDIT_ARG_MODE(mode); AUDIT_ARG_DEV(dev); switch (mode & S_IFMT) { case S_IFCHR: case S_IFBLK: error = priv_check(td, PRIV_VFS_MKNOD_DEV); if (error == 0 && dev == VNOVAL) error = EINVAL; break; case S_IFWHT: error = priv_check(td, PRIV_VFS_MKNOD_WHT); break; case S_IFIFO: if (dev == 0) return (kern_mkfifoat(td, fd, path, pathseg, mode)); /* FALLTHROUGH */ default: error = EINVAL; break; } if (error != 0) return (error); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } else { VATTR_NULL(&vattr); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; vattr.va_rdev = dev; whiteout = 0; switch (mode & S_IFMT) { case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: panic("kern_mknod: invalid mode"); } } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC if (error == 0 && !whiteout) error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (error == 0) { if (whiteout) error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); } } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif int -sys_mkfifo(td, uap) - struct thread *td; - register struct mkfifo_args /* { - char *path; - int mode; - } */ *uap; +sys_mkfifo(struct thread *td, struct mkfifo_args *uap) { return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct mkfifoat_args { int fd; char *path; mode_t mode; }; #endif int sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) { return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int mode) { struct mount *mp; struct vattr vattr; struct nameidata nd; cap_rights_t rights; int error; AUDIT_ARG_MODE(mode); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out; #endif error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); #ifdef MAC out: #endif vput(nd.ni_dvp); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif int -sys_link(td, uap) - struct thread *td; - register struct link_args /* { - char *path; - char *link; - } */ *uap; +sys_link(struct thread *td, struct link_args *uap) { return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, UIO_USERSPACE, FOLLOW)); } #ifndef _SYS_SYSPROTO_H_ struct linkat_args { int fd1; char *path1; int fd2; char *path2; int flag; }; #endif int sys_linkat(struct thread *td, struct linkat_args *uap) { int flag; flag = uap->flag; if (flag & ~AT_SYMLINK_FOLLOW) return (EINVAL); return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); } int hardlink_check_uid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, &hardlink_check_uid, 0, "Unprivileged processes cannot create hard links to files owned by other " "users"); static int hardlink_check_gid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, &hardlink_check_gid, 0, "Unprivileged processes cannot create hard links to files owned by other " "groups"); static int can_hardlink(struct vnode *vp, struct ucred *cred) { struct vattr va; int error; if (!hardlink_check_uid && !hardlink_check_gid) return (0); error = VOP_GETATTR(vp, &va, cred); if (error != 0) return (error); if (hardlink_check_uid && cred->cr_uid != va.va_uid) { error = priv_check_cred(cred, PRIV_VFS_LINK, 0); if (error != 0) return (error); } if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { error = priv_check_cred(cred, PRIV_VFS_LINK, 0); if (error != 0) return (error); } return (0); } int kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, enum uio_seg segflg, int follow) { struct vnode *vp; struct mount *mp; struct nameidata nd; cap_rights_t rights; int error; again: bwillwrite(); NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vp->v_type == VDIR) { vrele(vp); return (EPERM); /* POSIX */ } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT_TARGET), td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); vrele(vp); return (EEXIST); } else if (nd.ni_dvp->v_mount != vp->v_mount) { /* * Cross-device link. No need to recheck * vp->v_type, since it cannot change, except * to VBAD. */ NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vrele(vp); return (EXDEV); } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { error = can_hardlink(vp, td->td_ucred); #ifdef MAC if (error == 0) error = mac_vnode_check_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); #endif if (error != 0) { vput(vp); vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } error = vn_start_write(vp, &mp, V_NOWAIT); if (error != 0) { vput(vp); vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error != 0) return (error); goto again; } error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_UNLOCK(vp, 0); vput(nd.ni_dvp); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); } else { vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); goto again; } } vrele(vp); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif int -sys_symlink(td, uap) - struct thread *td; - register struct symlink_args /* { - char *path; - char *link; - } */ *uap; +sys_symlink(struct thread *td, struct symlink_args *uap) { return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct symlinkat_args { char *path; int fd; char *path2; }; #endif int sys_symlinkat(struct thread *td, struct symlinkat_args *uap) { return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, UIO_USERSPACE)); } int kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, enum uio_seg segflg) { struct mount *mp; struct vattr vattr; char *syspath; struct nameidata nd; int error; cap_rights_t rights; if (segflg == UIO_SYSSPACE) { syspath = path1; } else { syspath = uma_zalloc(namei_zone, M_WAITOK); if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) goto out; } AUDIT_ARG_TEXT(syspath); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) goto out; goto restart; } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; #ifdef MAC vattr.va_type = VLNK; error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out2; #endif error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); if (error == 0) vput(nd.ni_vp); #ifdef MAC out2: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); out: if (segflg != UIO_SYSSPACE) uma_zfree(namei_zone, syspath); return (error); } /* * Delete a whiteout from the filesystem. */ +#ifndef _SYS_SYSPROTO_H_ +struct undelete_args { + char *path; +}; +#endif int -sys_undelete(td, uap) - struct thread *td; - register struct undelete_args /* { - char *path; - } */ *uap; +sys_undelete(struct thread *td, struct undelete_args *uap) { struct mount *mp; struct nameidata nd; int error; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error != 0) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif int -sys_unlink(td, uap) - struct thread *td; - struct unlink_args /* { - char *path; - } */ *uap; +sys_unlink(struct thread *td, struct unlink_args *uap) { return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); } #ifndef _SYS_SYSPROTO_H_ struct unlinkat_args { int fd; char *path; int flag; }; #endif int sys_unlinkat(struct thread *td, struct unlinkat_args *uap) { int flag = uap->flag; int fd = uap->fd; char *path = uap->path; if (flag & ~AT_REMOVEDIR) return (EINVAL); if (flag & AT_REMOVEDIR) return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); else return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); } int kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, ino_t oldinum) { struct mount *mp; struct vnode *vp; struct nameidata nd; struct stat sb; cap_rights_t rights; int error; restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vp = nd.ni_vp; if (vp->v_type == VDIR && oldinum == 0) { error = EPERM; /* POSIX */ } else if (oldinum != 0 && ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && sb.st_ino != oldinum) { error = EIDRM; /* Identifier removed */ } else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_vflag & VV_ROOT) error = EBUSY; } if (error == 0) { if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error != 0) goto out; #endif vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: #endif vn_finished_write(mp); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int sys_lseek(struct thread *td, struct lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } int kern_lseek(struct thread *td, int fd, off_t offset, int whence) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp); if (error != 0) return (error); error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? fo_seek(fp, offset, whence, td) : ESPIPE; fdrop(fp, td); return (error); } #if defined(COMPAT_43) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(struct thread *td, struct olseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD6) /* Version with the 'pad' argument */ int freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif /* * Check access permissions using passed credentials. */ static int -vn_access(vp, user_flags, cred, td) - struct vnode *vp; - int user_flags; - struct ucred *cred; - struct thread *td; +vn_access(struct vnode *vp, int user_flags, struct ucred *cred, + struct thread *td) { accmode_t accmode; int error; /* Flags == 0 means only check for existence. */ if (user_flags == 0) return (0); accmode = 0; if (user_flags & R_OK) accmode |= VREAD; if (user_flags & W_OK) accmode |= VWRITE; if (user_flags & X_OK) accmode |= VEXEC; #ifdef MAC error = mac_vnode_check_access(cred, vp, accmode); if (error != 0) return (error); #endif if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, accmode, cred, td); return (error); } /* * Check access permissions using "real" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int amode; }; #endif int -sys_access(td, uap) - struct thread *td; - register struct access_args /* { - char *path; - int amode; - } */ *uap; +sys_access(struct thread *td, struct access_args *uap) { return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0, uap->amode)); } #ifndef _SYS_SYSPROTO_H_ struct faccessat_args { int dirfd; char *path; int amode; int flag; } #endif int sys_faccessat(struct thread *td, struct faccessat_args *uap) { return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, uap->amode)); } int kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flag, int amode) { struct ucred *cred, *usecred; struct vnode *vp; struct nameidata nd; cap_rights_t rights; int error; if (flag & ~AT_EACCESS) return (EINVAL); if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) return (EINVAL); /* * Create and modify a temporary credential instead of one that * is potentially shared (if we need one). */ cred = td->td_ucred; if ((flag & AT_EACCESS) == 0 && ((cred->cr_uid != cred->cr_ruid || cred->cr_rgid != cred->cr_groups[0]))) { usecred = crdup(cred); usecred->cr_uid = cred->cr_ruid; usecred->cr_groups[0] = cred->cr_rgid; td->td_ucred = usecred; } else usecred = cred; AUDIT_ARG_VALUE(amode); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), td); if ((error = namei(&nd)) != 0) goto out; vp = nd.ni_vp; error = vn_access(vp, amode, usecred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); out: if (usecred != cred) { td->td_ucred = cred; crfree(usecred); } return (error); } /* * Check access permissions using "effective" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct eaccess_args { char *path; int amode; }; #endif int -sys_eaccess(td, uap) - struct thread *td; - register struct eaccess_args /* { - char *path; - int amode; - } */ *uap; +sys_eaccess(struct thread *td, struct eaccess_args *uap) { return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, AT_EACCESS, uap->amode)); } #if defined(COMPAT_43) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif int -ostat(td, uap) - struct thread *td; - register struct ostat_args /* { - char *path; - struct ostat *ub; - } */ *uap; +ostat(struct thread *td, struct ostat_args *uap) { struct stat sb; struct ostat osb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtstat(&sb, &osb); return (copyout(&osb, uap->ub, sizeof (osb))); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif int -olstat(td, uap) - struct thread *td; - register struct olstat_args /* { - char *path; - struct ostat *ub; - } */ *uap; +olstat(struct thread *td, struct olstat_args *uap) { struct stat sb; struct ostat osb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtstat(&sb, &osb); return (copyout(&osb, uap->ub, sizeof (osb))); } /* * Convert from an old to a new stat structure. */ void -cvtstat(st, ost) - struct stat *st; - struct ostat *ost; +cvtstat(struct stat *st, struct ostat *ost) { bzero(ost, sizeof(*ost)); ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atim = st->st_atim; ost->st_mtim = st->st_mtim; ost->st_ctim = st->st_ctim; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif int -sys_stat(td, uap) - struct thread *td; - register struct stat_args /* { - char *path; - struct stat *ub; - } */ *uap; +sys_stat(struct thread *td, struct stat_args *uap) { struct stat sb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error == 0) error = copyout(&sb, uap->ub, sizeof (sb)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fstatat_args { int fd; char *path; struct stat *buf; int flag; } #endif int sys_fstatat(struct thread *td, struct fstatat_args *uap) { struct stat sb; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb, NULL); if (error == 0) error = copyout(&sb, uap->buf, sizeof (sb)); return (error); } int kern_statat(struct thread *td, int flag, int fd, char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)) { struct nameidata nd; struct stat sb; cap_rights_t rights; int error; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), td); if ((error = namei(&nd)) != 0) return (error); error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); if (error == 0) { SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); if (S_ISREG(sb.st_mode)) SDT_PROBE2(vfs, , stat, reg, path, pathseg); if (__predict_false(hook != NULL)) hook(nd.ni_vp, &sb); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); if (error != 0) return (error); *sbp = sb; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrstat(&sb); #endif return (0); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif int -sys_lstat(td, uap) - struct thread *td; - register struct lstat_args /* { - char *path; - struct stat *ub; - } */ *uap; +sys_lstat(struct thread *td, struct lstat_args *uap) { struct stat sb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error == 0) error = copyout(&sb, uap->ub, sizeof (sb)); return (error); } /* * Implementation of the NetBSD [l]stat() functions. */ void -cvtnstat(sb, nsb) - struct stat *sb; - struct nstat *nsb; +cvtnstat( struct stat *sb, struct nstat *nsb) { bzero(nsb, sizeof *nsb); nsb->st_dev = sb->st_dev; nsb->st_ino = sb->st_ino; nsb->st_mode = sb->st_mode; nsb->st_nlink = sb->st_nlink; nsb->st_uid = sb->st_uid; nsb->st_gid = sb->st_gid; nsb->st_rdev = sb->st_rdev; nsb->st_atim = sb->st_atim; nsb->st_mtim = sb->st_mtim; nsb->st_ctim = sb->st_ctim; nsb->st_size = sb->st_size; nsb->st_blocks = sb->st_blocks; nsb->st_blksize = sb->st_blksize; nsb->st_flags = sb->st_flags; nsb->st_gen = sb->st_gen; nsb->st_birthtim = sb->st_birthtim; } #ifndef _SYS_SYSPROTO_H_ struct nstat_args { char *path; struct nstat *ub; }; #endif int -sys_nstat(td, uap) - struct thread *td; - register struct nstat_args /* { - char *path; - struct nstat *ub; - } */ *uap; +sys_nstat(struct thread *td, struct nstat_args *uap) { struct stat sb; struct nstat nsb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtnstat(&sb, &nsb); return (copyout(&nsb, uap->ub, sizeof (nsb))); } /* * NetBSD lstat. Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif int -sys_nlstat(td, uap) - struct thread *td; - register struct nlstat_args /* { - char *path; - struct nstat *ub; - } */ *uap; +sys_nlstat(struct thread *td, struct nlstat_args *uap) { struct stat sb; struct nstat nsb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtnstat(&sb, &nsb); return (copyout(&nsb, uap->ub, sizeof (nsb))); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif int -sys_pathconf(td, uap) - struct thread *td; - register struct pathconf_args /* { - char *path; - int name; - } */ *uap; +sys_pathconf(struct thread *td, struct pathconf_args *uap) { return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); } #ifndef _SYS_SYSPROTO_H_ struct lpathconf_args { char *path; int name; }; #endif int -sys_lpathconf(td, uap) - struct thread *td; - register struct lpathconf_args /* { - char *path; - int name; - } */ *uap; +sys_lpathconf(struct thread *td, struct lpathconf_args *uap) { return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW)); } int kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, u_long flags) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; size_t count; }; #endif int -sys_readlink(td, uap) - struct thread *td; - register struct readlink_args /* { - char *path; - char *buf; - size_t count; - } */ *uap; +sys_readlink(struct thread *td, struct readlink_args *uap) { return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->count)); } #ifndef _SYS_SYSPROTO_H_ struct readlinkat_args { int fd; char *path; char *buf; size_t bufsize; }; #endif int sys_readlinkat(struct thread *td, struct readlinkat_args *uap) { return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->bufsize)); } int kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) { struct vnode *vp; struct iovec aiov; struct uio auio; struct nameidata nd; int error; if (count > IOSIZE_MAX) return (EINVAL); NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; #ifdef MAC error = mac_vnode_check_readlink(td->td_ucred, vp); if (error != 0) { vput(vp); return (error); } #endif if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) error = EINVAL; else { aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; auio.uio_resid = count; error = VOP_READLINK(vp, &auio, td->td_ucred); td->td_retval[0] = count - auio.uio_resid; } vput(vp); return (error); } /* * Common implementation code for chflags() and fchflags(). */ static int -setfflags(td, vp, flags) - struct thread *td; - struct vnode *vp; - u_long flags; +setfflags(struct thread *td, struct vnode *vp, u_long flags) { struct mount *mp; struct vattr vattr; int error; /* We can't support the value matching VNOVAL. */ if (flags == VNOVAL) return (EOPNOTSUPP); /* * Prevent non-root users from setting flags on devices. When * a device is reused, users can retain ownership of the device * if they are allowed to set flags and programs assume that * chown can't fail when done as root. */ if (vp->v_type == VCHR || vp->v_type == VBLK) { error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); if (error != 0) return (error); } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VATTR_NULL(&vattr); vattr.va_flags = flags; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { const char *path; u_long flags; }; #endif int -sys_chflags(td, uap) - struct thread *td; - register struct chflags_args /* { - const char *path; - u_long flags; - } */ *uap; +sys_chflags(struct thread *td, struct chflags_args *uap) { return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, 0)); } #ifndef _SYS_SYSPROTO_H_ struct chflagsat_args { int fd; const char *path; u_long flags; int atflag; } #endif int sys_chflagsat(struct thread *td, struct chflagsat_args *uap) { int fd = uap->fd; const char *path = uap->path; u_long flags = uap->flags; int atflag = uap->atflag; if (atflag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); } /* * Same as chflags() but doesn't follow symlinks. */ +#ifndef _SYS_SYSPROTO_H_ +struct lchflags_args { + const char *path; + u_long flags; +}; +#endif int -sys_lchflags(td, uap) - struct thread *td; - register struct lchflags_args /* { - const char *path; - u_long flags; - } */ *uap; +sys_lchflags(struct thread *td, struct lchflags_args *uap) { return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, AT_SYMLINK_NOFOLLOW)); } static int kern_chflagsat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, u_long flags, int atflag) { struct nameidata nd; cap_rights_t rights; int error, follow; AUDIT_ARG_FFLAGS(flags); follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FCHFLAGS), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfflags(td, nd.ni_vp, flags); vrele(nd.ni_vp); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; u_long flags; }; #endif int -sys_fchflags(td, uap) - struct thread *td; - register struct fchflags_args /* { - int fd; - u_long flags; - } */ *uap; +sys_fchflags(struct thread *td, struct fchflags_args *uap) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_FFLAGS(uap->flags); error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode, 0); #endif error = setfflags(td, fp->f_vnode, uap->flags); fdrop(fp, td); return (error); } /* * Common implementation code for chmod(), lchmod() and fchmod(). */ int -setfmode(td, cred, vp, mode) - struct thread *td; - struct ucred *cred; - struct vnode *vp; - int mode; +setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) { struct mount *mp; struct vattr vattr; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; #ifdef MAC error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif int -sys_chmod(td, uap) - struct thread *td; - register struct chmod_args /* { - char *path; - int mode; - } */ *uap; +sys_chmod(struct thread *td, struct chmod_args *uap) { return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, 0)); } #ifndef _SYS_SYSPROTO_H_ struct fchmodat_args { int dirfd; char *path; mode_t mode; int flag; } #endif int sys_fchmodat(struct thread *td, struct fchmodat_args *uap) { int flag = uap->flag; int fd = uap->fd; char *path = uap->path; mode_t mode = uap->mode; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); } /* * Change mode of a file given path name (don't follow links.) */ #ifndef _SYS_SYSPROTO_H_ struct lchmod_args { char *path; int mode; }; #endif int -sys_lchmod(td, uap) - struct thread *td; - register struct lchmod_args /* { - char *path; - int mode; - } */ *uap; +sys_lchmod(struct thread *td, struct lchmod_args *uap) { return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, AT_SYMLINK_NOFOLLOW)); } int kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, mode_t mode, int flag) { struct nameidata nd; cap_rights_t rights; int error, follow; AUDIT_ARG_MODE(mode); follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FCHMOD), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, td->td_ucred, nd.ni_vp, mode); vrele(nd.ni_vp); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif int sys_fchmod(struct thread *td, struct fchmod_args *uap) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_MODE(uap->mode); error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); if (error != 0) return (error); error = fo_chmod(fp, uap->mode, td->td_ucred, td); fdrop(fp, td); return (error); } /* * Common implementation for chown(), lchown(), and fchown() */ int -setfown(td, cred, vp, uid, gid) - struct thread *td; - struct ucred *cred; - struct vnode *vp; - uid_t uid; - gid_t gid; +setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, + gid_t gid) { struct mount *mp; struct vattr vattr; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_uid = uid; vattr.va_gid = gid; #ifdef MAC error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, vattr.va_gid); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif int -sys_chown(td, uap) - struct thread *td; - register struct chown_args /* { - char *path; - int uid; - int gid; - } */ *uap; +sys_chown(struct thread *td, struct chown_args *uap) { return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, uap->gid, 0)); } #ifndef _SYS_SYSPROTO_H_ struct fchownat_args { int fd; const char * path; uid_t uid; gid_t gid; int flag; }; #endif int sys_fchownat(struct thread *td, struct fchownat_args *uap) { int flag; flag = uap->flag; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, uap->gid, uap->flag)); } int kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int uid, int gid, int flag) { struct nameidata nd; cap_rights_t rights; int error, follow; AUDIT_ARG_OWNER(uid, gid); follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FCHOWN), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); vrele(nd.ni_vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif int -sys_lchown(td, uap) - struct thread *td; - register struct lchown_args /* { - char *path; - int uid; - int gid; - } */ *uap; +sys_lchown(struct thread *td, struct lchown_args *uap) { return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif int -sys_fchown(td, uap) - struct thread *td; - register struct fchown_args /* { - int fd; - int uid; - int gid; - } */ *uap; +sys_fchown(struct thread *td, struct fchown_args *uap) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_OWNER(uap->uid, uap->gid); error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); if (error != 0) return (error); error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); fdrop(fp, td); return (error); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int -getutimes(usrtvp, tvpseg, tsp) - const struct timeval *usrtvp; - enum uio_seg tvpseg; - struct timespec *tsp; +getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, + struct timespec *tsp) { struct timeval tv[2]; const struct timeval *tvp; int error; if (usrtvp == NULL) { vfs_timestamp(&tsp[0]); tsp[1] = tsp[0]; } else { if (tvpseg == UIO_SYSSPACE) { tvp = usrtvp; } else { if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) return (error); tvp = tv; } if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) return (EINVAL); TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); } return (0); } /* * Common implementation code for futimens(), utimensat(). */ #define UTIMENS_NULL 0x1 #define UTIMENS_EXIT 0x2 static int getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, struct timespec *tsp, int *retflags) { struct timespec tsnow; int error; vfs_timestamp(&tsnow); *retflags = 0; if (usrtsp == NULL) { tsp[0] = tsnow; tsp[1] = tsnow; *retflags |= UTIMENS_NULL; return (0); } if (tspseg == UIO_SYSSPACE) { tsp[0] = usrtsp[0]; tsp[1] = usrtsp[1]; } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) return (error); if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) *retflags |= UTIMENS_EXIT; if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) *retflags |= UTIMENS_NULL; if (tsp[0].tv_nsec == UTIME_OMIT) tsp[0].tv_sec = VNOVAL; else if (tsp[0].tv_nsec == UTIME_NOW) tsp[0] = tsnow; else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) return (EINVAL); if (tsp[1].tv_nsec == UTIME_OMIT) tsp[1].tv_sec = VNOVAL; else if (tsp[1].tv_nsec == UTIME_NOW) tsp[1] = tsnow; else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) return (EINVAL); return (0); } /* * Common implementation code for utimes(), lutimes(), futimes(), futimens(), * and utimensat(). */ static int -setutimes(td, vp, ts, numtimes, nullflag) - struct thread *td; - struct vnode *vp; - const struct timespec *ts; - int numtimes; - int nullflag; +setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, + int numtimes, int nullflag) { struct mount *mp; struct vattr vattr; int error, setbirthtime; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); setbirthtime = 0; if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && timespeccmp(&ts[1], &vattr.va_birthtime, < )) setbirthtime = 1; VATTR_NULL(&vattr); vattr.va_atime = ts[0]; vattr.va_mtime = ts[1]; if (setbirthtime) vattr.va_birthtime = ts[1]; if (numtimes > 2) vattr.va_birthtime = ts[2]; if (nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; #ifdef MAC error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, vattr.va_mtime); #endif if (error == 0) error = VOP_SETATTR(vp, &vattr, td->td_ucred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif int -sys_utimes(td, uap) - struct thread *td; - register struct utimes_args /* { - char *path; - struct timeval *tptr; - } */ *uap; +sys_utimes(struct thread *td, struct utimes_args *uap) { return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct futimesat_args { int fd; const char * path; const struct timeval * times; }; #endif int sys_futimesat(struct thread *td, struct futimesat_args *uap) { return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, uap->times, UIO_USERSPACE)); } int kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct nameidata nd; struct timespec ts[2]; cap_rights_t rights; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FUTIMES), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct lutimes_args { char *path; struct timeval *tptr; }; #endif int -sys_lutimes(td, uap) - struct thread *td; - register struct lutimes_args /* { - char *path; - struct timeval *tptr; - } */ *uap; +sys_lutimes(struct thread *td, struct lutimes_args *uap) { return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct nameidata nd; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct futimes_args { int fd; struct timeval *tptr; }; #endif int -sys_futimes(td, uap) - struct thread *td; - register struct futimes_args /* { - int fd; - struct timeval *tptr; - } */ *uap; +sys_futimes(struct thread *td, struct futimes_args *uap) { return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); } int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); error = getutimes(tptr, tptrseg, ts); if (error != 0) return (error); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode, 0); #endif error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); fdrop(fp, td); return (error); } int sys_futimens(struct thread *td, struct futimens_args *uap) { return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); } int kern_futimens(struct thread *td, int fd, struct timespec *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; cap_rights_t rights; int error, flags; AUDIT_ARG_FD(fd); error = getutimens(tptr, tptrseg, ts, &flags); if (error != 0) return (error); if (flags & UTIMENS_EXIT) return (0); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode, 0); #endif error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); fdrop(fp, td); return (error); } int sys_utimensat(struct thread *td, struct utimensat_args *uap) { return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, uap->times, UIO_USERSPACE, uap->flag)); } int kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, int flag) { struct nameidata nd; struct timespec ts[2]; cap_rights_t rights; int error, flags; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) return (error); NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW) | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FUTIMES), td); if ((error = namei(&nd)) != 0) return (error); /* * We are allowed to call namei() regardless of 2xUTIME_OMIT. * POSIX states: * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." * "Search permission is denied by a component of the path prefix." */ NDFREE(&nd, NDF_ONLY_PNBUF); if ((flags & UTIMENS_EXIT) == 0) error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); vrele(nd.ni_vp); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif int -sys_truncate(td, uap) - struct thread *td; - register struct truncate_args /* { - char *path; - int pad; - off_t length; - } */ *uap; +sys_truncate(struct thread *td, struct truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) { struct mount *mp; struct vnode *vp; void *rl_cookie; struct vattr vattr; struct nameidata nd; int error; if (length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vn_rangelock_unlock(vp, rl_cookie); vrele(vp); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_type == VDIR) error = EISDIR; #ifdef MAC else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { } #endif else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { VATTR_NULL(&vattr); vattr.va_size = length; error = VOP_SETATTR(vp, &vattr, td->td_ucred); } VOP_UNLOCK(vp, 0); vn_finished_write(mp); vn_rangelock_unlock(vp, rl_cookie); vrele(vp); return (error); } #if defined(COMPAT_43) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif int otruncate(struct thread *td, struct otruncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD6) /* Versions with the pad argument */ int freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, uap->length)); } #endif int kern_fsync(struct thread *td, int fd, bool fullsync) { struct vnode *vp; struct mount *mp; struct file *fp; cap_rights_t rights; int error, lock_flags; AUDIT_ARG_FD(fd); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); if (error != 0) return (error); vp = fp->f_vnode; #if 0 if (!fullsync) /* XXXKIB: compete outstanding aio writes */; #endif error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error != 0) goto drop; if (MNT_SHARED_WRITES(mp) || ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { lock_flags = LK_SHARED; } else { lock_flags = LK_EXCLUSIVE; } vn_lock(vp, lock_flags | LK_RETRY); AUDIT_ARG_VNODE1(vp); if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); drop: fdrop(fp, td); return (error); } /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif int sys_fsync(struct thread *td, struct fsync_args *uap) { return (kern_fsync(td, uap->fd, true)); } int sys_fdatasync(struct thread *td, struct fdatasync_args *uap) { return (kern_fsync(td, uap->fd, false)); } /* * Rename files. Source and destination must either both be directories, or * both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif int -sys_rename(td, uap) - struct thread *td; - register struct rename_args /* { - char *from; - char *to; - } */ *uap; +sys_rename(struct thread *td, struct rename_args *uap) { return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, uap->to, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct renameat_args { int oldfd; char *old; int newfd; char *new; }; #endif int sys_renameat(struct thread *td, struct renameat_args *uap) { return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, UIO_USERSPACE)); } int kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, enum uio_seg pathseg) { struct mount *mp = NULL; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; cap_rights_t rights; int error; again: bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | AUDITVNODE1, pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); #else NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); #endif if ((error = namei(&fromnd)) != 0) return (error); #ifdef MAC error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd); VOP_UNLOCK(fromnd.ni_dvp, 0); if (fromnd.ni_dvp != fromnd.ni_vp) VOP_UNLOCK(fromnd.ni_vp, 0); #endif fvp = fromnd.ni_vp; NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2, pathseg, new, newfd, cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; error = vn_start_write(fvp, &mp, V_NOWAIT); if (error != 0) { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp != NULL) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); vrele(tond.ni_startdir); if (fromnd.ni_startdir != NULL) vrele(fromnd.ni_startdir); error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error != 0) return (error); goto again; } if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } #ifdef CAPABILITIES if (newfd != AT_FDCWD) { /* * If the target already exists we require CAP_UNLINKAT * from 'newfd'. */ error = cap_check(&tond.ni_filecaps.fc_rights, cap_rights_init(&rights, CAP_UNLINKAT)); if (error != 0) goto out; } #endif } if (fvp == tdvp) { error = EINVAL; goto out; } /* * If the source is the same as the destination (that is, if they * are links to the same vnode), then there is nothing to do. */ if (fvp == tvp) error = -1; #ifdef MAC else error = mac_vnode_check_rename_to(td->td_ucred, tdvp, tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); #endif out: if (error == 0) { error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); } else { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp != NULL) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); vn_finished_write(mp); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif int -sys_mkdir(td, uap) - struct thread *td; - register struct mkdir_args /* { - char *path; - int mode; - } */ *uap; +sys_mkdir(struct thread *td, struct mkdir_args *uap) { return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct mkdirat_args { int fd; char *path; mode_t mode; }; #endif int sys_mkdirat(struct thread *td, struct mkdirat_args *uap) { return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, int mode) { struct mount *mp; struct vnode *vp; struct vattr vattr; struct nameidata nd; cap_rights_t rights; int error; AUDIT_ARG_MODE(mode); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); /* * XXX namei called with LOCKPARENT but not LOCKLEAF has * the strange behaviour of leaving the vnode unlocked * if the target is the same vnode as the parent. */ if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out; #endif error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); #ifdef MAC out: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (error == 0) vput(nd.ni_vp); vn_finished_write(mp); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif int -sys_rmdir(td, uap) - struct thread *td; - struct rmdir_args /* { - char *path; - } */ *uap; +sys_rmdir(struct thread *td, struct rmdir_args *uap) { return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); } int kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) { struct mount *mp; struct vnode *vp; struct nameidata nd; cap_rights_t rights; int error; restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) { error = EBUSY; goto out; } #ifdef MAC error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error != 0) goto out; #endif if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) { long loff; int error; error = kern_ogetdirentries(td, uap, &loff); if (error == 0) error = copyout(&loff, uap->basep, sizeof(long)); return (error); } int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff) { struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; cap_rights_t rights; caddr_t dirbuf; int error, eofflag, readcnt; long loff; off_t foffset; /* XXX arbitrary sanity limit on `count'. */ if (uap->count > 64 * 1024) return (EINVAL); error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; foffset = foffset_lock(fp, 0); unionread: if (vp->v_type != VDIR) { foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (EINVAL); } aiov.iov_base = uap->buf; aiov.iov_len = uap->count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = uap->count; vn_lock(vp, LK_SHARED | LK_RETRY); loff = auio.uio_offset = foffset; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error != 0) { VOP_UNLOCK(vp, 0); foffset_unlock(fp, foffset, FOF_NOUPDATE); fdrop(fp, td); return (error); } #endif # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); foffset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = uap->count; dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); foffset = kuio.uio_offset; if (error == 0) { readcnt = uap->count - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } free(dirbuf, M_TEMP); } if (error != 0) { VOP_UNLOCK(vp, 0); foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (error); } if (uap->count == auio.uio_resid && (vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; fp->f_data = vp; foffset = 0; vput(tvp); goto unionread; } VOP_UNLOCK(vp, 0); foffset_unlock(fp, foffset, 0); fdrop(fp, td); td->td_retval[0] = uap->count - auio.uio_resid; if (error == 0) *ploff = loff; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int -sys_getdirentries(td, uap) - struct thread *td; - register struct getdirentries_args /* { - int fd; - char *buf; - u_int count; - long *basep; - } */ *uap; +sys_getdirentries(struct thread *td, struct getdirentries_args *uap) { long base; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error != 0) return (error); if (uap->basep != NULL) error = copyout(&base, uap->basep, sizeof(long)); return (error); } int kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, long *basep, ssize_t *residp, enum uio_seg bufseg) { struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; cap_rights_t rights; long loff; int error, eofflag; off_t foffset; AUDIT_ARG_FD(fd); if (count > IOSIZE_MAX) return (EINVAL); auio.uio_resid = count; error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; foffset = foffset_lock(fp, 0); unionread: if (vp->v_type != VDIR) { error = EINVAL; goto fail; } aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); loff = auio.uio_offset = foffset; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); foffset = auio.uio_offset; if (error != 0) { VOP_UNLOCK(vp, 0); goto fail; } if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; fp->f_data = vp; foffset = 0; vput(tvp); goto unionread; } VOP_UNLOCK(vp, 0); *basep = loff; if (residp != NULL) *residp = auio.uio_resid; td->td_retval[0] = count - auio.uio_resid; fail: foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getdents_args { int fd; char *buf; size_t count; }; #endif int -sys_getdents(td, uap) - struct thread *td; - register struct getdents_args /* { - int fd; - char *buf; - u_int count; - } */ *uap; +sys_getdents(struct thread *td, struct getdents_args *uap) { struct getdirentries_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; return (sys_getdirentries(td, &ap)); } /* * Set the mode mask for creation of filesystem nodes. */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int -sys_umask(td, uap) - struct thread *td; - struct umask_args /* { - int newmask; - } */ *uap; +sys_umask(struct thread *td, struct umask_args *uap) { struct filedesc *fdp; fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = uap->newmask & ALLPERMS; FILEDESC_XUNLOCK(fdp); return (0); } /* * Void all references to file by ripping underlying filesystem away from * vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif int -sys_revoke(td, uap) - struct thread *td; - register struct revoke_args /* { - char *path; - } */ *uap; +sys_revoke(struct thread *td, struct revoke_args *uap) { struct vnode *vp; struct vattr vattr; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if (vp->v_type != VCHR || vp->v_rdev == NULL) { error = EINVAL; goto out; } #ifdef MAC error = mac_vnode_check_revoke(td->td_ucred, vp); if (error != 0) goto out; #endif error = VOP_GETATTR(vp, &vattr, td->td_ucred); if (error != 0) goto out; if (td->td_ucred->cr_uid != vattr.va_uid) { error = priv_check(td, PRIV_VFS_ADMIN); if (error != 0) goto out; } if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); out: vput(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry and check that, if it * is a capability, the correct rights are present. A reference on the file * entry is held upon returning. */ int getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { struct file *fp; int error; error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); if (error != 0) return (error); /* * The file could be not of the vnode type, or it may be not * yet fully initialized, in which case the f_vnode pointer * may be set, but f_ops is still badfileops. E.g., * devfs_open() transiently create such situation to * facilitate csw d_fdopen(). * * Dupfdopen() handling in kern_openat() installs the * half-baked file into the process descriptor table, allowing * other thread to dereference it. Guard against the race by * checking f_ops. */ if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { fdrop(fp, td); return (EINVAL); } *fpp = fp; return (0); } /* * Get an (NFS) file handle. */ #ifndef _SYS_SYSPROTO_H_ struct lgetfh_args { char *fname; fhandle_t *fhp; }; #endif int -sys_lgetfh(td, uap) - struct thread *td; - register struct lgetfh_args *uap; +sys_lgetfh(struct thread *td, struct lgetfh_args *uap) { struct nameidata nd; fhandle_t fh; - register struct vnode *vp; + struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_GETFH); if (error != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fh.fh_fid); vput(vp); if (error == 0) error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getfh_args { char *fname; fhandle_t *fhp; }; #endif int -sys_getfh(td, uap) - struct thread *td; - register struct getfh_args *uap; +sys_getfh(struct thread *td, struct getfh_args *uap) { struct nameidata nd; fhandle_t fh; - register struct vnode *vp; + struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_GETFH); if (error != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fh.fh_fid); vput(vp); if (error == 0) error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } /* * syscall for the rpc.lockd to use to translate a NFS file handle into an * open descriptor. * * warning: do not remove the priv_check() call or this becomes one giant * security hole. */ #ifndef _SYS_SYSPROTO_H_ struct fhopen_args { const struct fhandle *u_fhp; int flags; }; #endif int -sys_fhopen(td, uap) - struct thread *td; - struct fhopen_args /* { - const struct fhandle *u_fhp; - int flags; - } */ *uap; +sys_fhopen(struct thread *td, struct fhopen_args *uap) { struct mount *mp; struct vnode *vp; struct fhandle fhp; struct file *fp; int fmode, error; int indx; error = priv_check(td, PRIV_VFS_FHOPEN); if (error != 0) return (error); indx = -1; fmode = FFLAGS(uap->flags); /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); if (error != 0) return(error); /* find the mount point */ mp = vfs_busyfs(&fhp.fh_fsid); if (mp == NULL) return (ESTALE); /* now give me my vnode, it gets returned to me locked */ error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error != 0) return (error); error = falloc_noinstall(td, &fp); if (error != 0) { vput(vp); return (error); } /* * An extra reference on `fp' has been held for us by * falloc_noinstall(). */ #ifdef INVARIANTS td->td_dupfd = -1; #endif error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); if (error != 0) { KASSERT(fp->f_ops == &badfileops, ("VOP_OPEN in fhopen() set f_ops")); KASSERT(td->td_dupfd < 0, ("fhopen() encountered fdopen()")); vput(vp); goto bad; } #ifdef INVARIANTS td->td_dupfd = 0; #endif fp->f_vnode = vp; fp->f_seqcount = 1; finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, &vnops); VOP_UNLOCK(vp, 0); if ((fmode & O_TRUNC) != 0) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error != 0) goto bad; } error = finstall(td, fp, &indx, fmode, NULL); bad: fdrop(fp, td); td->td_retval[0] = indx; return (error); } /* * Stat an (NFS) file handle. */ #ifndef _SYS_SYSPROTO_H_ struct fhstat_args { struct fhandle *u_fhp; struct stat *sb; }; #endif int -sys_fhstat(td, uap) - struct thread *td; - register struct fhstat_args /* { - struct fhandle *u_fhp; - struct stat *sb; - } */ *uap; +sys_fhstat(struct thread *td, struct fhstat_args *uap) { struct stat sb; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fh)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error == 0) error = copyout(&sb, uap->sb, sizeof(sb)); return (error); } int kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) { struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_FHSTAT); if (error != 0) return (error); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error != 0) return (error); error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); vput(vp); return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct fhstatfs_args { struct fhandle *u_fhp; struct statfs *buf; }; #endif int -sys_fhstatfs(td, uap) - struct thread *td; - struct fhstatfs_args /* { - struct fhandle *u_fhp; - struct statfs *buf; - } */ *uap; +sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) { struct statfs *sfp; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(*sfp)); free(sfp, M_STATFS); return (error); } int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) { struct statfs *sp; struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_FHSTATFS); if (error != 0) return (error); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); if (error != 0) { vfs_unbusy(mp); return (error); } vput(vp); error = prison_canseemount(td->td_ucred, mp); if (error != 0) goto out; #ifdef MAC error = mac_mount_check_stat(td->td_ucred, mp); if (error != 0) goto out; #endif /* * Set these in case the underlying filesystem fails to do so. */ sp = &mp->mnt_stat; sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp); if (error == 0) *buf = *sp; out: vfs_unbusy(mp); return (error); } int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) { struct file *fp; struct mount *mp; struct vnode *vp; cap_rights_t rights; off_t olen, ooffset; int error; if (offset < 0 || len <= 0) return (EINVAL); /* Check for wrap. */ if (offset > OFF_MAX - len) return (EFBIG); error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); if (error != 0) return (error); if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { error = ESPIPE; goto out; } if ((fp->f_flag & FWRITE) == 0) { error = EBADF; goto out; } if (fp->f_type != DTYPE_VNODE) { error = ENODEV; goto out; } vp = fp->f_vnode; if (vp->v_type != VREG) { error = ENODEV; goto out; } /* Allocating blocks may take a long time, so iterate. */ for (;;) { olen = len; ooffset = offset; bwillwrite(); mp = NULL; error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error != 0) break; error = vn_lock(vp, LK_EXCLUSIVE); if (error != 0) { vn_finished_write(mp); break; } #ifdef MAC error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); if (error == 0) #endif error = VOP_ALLOCATE(vp, &offset, &len); VOP_UNLOCK(vp, 0); vn_finished_write(mp); if (olen + ooffset != offset + len) { panic("offset + len changed from %jx/%jx to %jx/%jx", ooffset, olen, offset, len); } if (error != 0 || len == 0) break; KASSERT(olen > len, ("Iteration did not make progress?")); maybe_yield(); } out: fdrop(fp, td); return (error); } int sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); return (kern_posix_error(td, error)); } /* * Unlike madvise(2), we do not make a best effort to remember every * possible caching hint. Instead, we remember the last setting with * the exception that we will allow POSIX_FADV_NORMAL to adjust the * region of any current setting. */ int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice) { struct fadvise_info *fa, *new; struct file *fp; struct vnode *vp; cap_rights_t rights; off_t end; int error; if (offset < 0 || len < 0 || offset > OFF_MAX - len) return (EINVAL); switch (advice) { case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_RANDOM: case POSIX_FADV_NOREUSE: new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); break; case POSIX_FADV_NORMAL: case POSIX_FADV_WILLNEED: case POSIX_FADV_DONTNEED: new = NULL; break; default: return (EINVAL); } /* XXX: CAP_POSIX_FADVISE? */ error = fget(td, fd, cap_rights_init(&rights), &fp); if (error != 0) goto out; if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { error = ESPIPE; goto out; } if (fp->f_type != DTYPE_VNODE) { error = ENODEV; goto out; } vp = fp->f_vnode; if (vp->v_type != VREG) { error = ENODEV; goto out; } if (len == 0) end = OFF_MAX; else end = offset + len - 1; switch (advice) { case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_RANDOM: case POSIX_FADV_NOREUSE: /* * Try to merge any existing non-standard region with * this new region if possible, otherwise create a new * non-standard region for this request. */ mtx_pool_lock(mtxpool_sleep, fp); fa = fp->f_advice; if (fa != NULL && fa->fa_advice == advice && ((fa->fa_start <= end && fa->fa_end >= offset) || (end != OFF_MAX && fa->fa_start == end + 1) || (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { if (offset < fa->fa_start) fa->fa_start = offset; if (end > fa->fa_end) fa->fa_end = end; } else { new->fa_advice = advice; new->fa_start = offset; new->fa_end = end; fp->f_advice = new; new = fa; } mtx_pool_unlock(mtxpool_sleep, fp); break; case POSIX_FADV_NORMAL: /* * If a the "normal" region overlaps with an existing * non-standard region, trim or remove the * non-standard region. */ mtx_pool_lock(mtxpool_sleep, fp); fa = fp->f_advice; if (fa != NULL) { if (offset <= fa->fa_start && end >= fa->fa_end) { new = fa; fp->f_advice = NULL; } else if (offset <= fa->fa_start && end >= fa->fa_start) fa->fa_start = end + 1; else if (offset <= fa->fa_end && end >= fa->fa_end) fa->fa_end = offset - 1; else if (offset >= fa->fa_start && end <= fa->fa_end) { /* * If the "normal" region is a middle * portion of the existing * non-standard region, just remove * the whole thing rather than picking * one side or the other to * preserve. */ new = fa; fp->f_advice = NULL; } } mtx_pool_unlock(mtxpool_sleep, fp); break; case POSIX_FADV_WILLNEED: case POSIX_FADV_DONTNEED: error = VOP_ADVISE(vp, offset, end, advice); break; } out: if (fp != NULL) fdrop(fp, td); free(new, M_FADVISE); return (error); } int sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, uap->advice); return (kern_posix_error(td, error)); } Index: stable/11/sys/kern/vfs_vnops.c =================================================================== --- stable/11/sys/kern/vfs_vnops.c (revision 331642) +++ stable/11/sys/kern/vfs_vnops.c (revision 331643) @@ -1,2502 +1,2488 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Copyright (c) 2012 Konstantin Belousov * Copyright (c) 2013, 2014 The FreeBSD Foundation * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_hwpmc_hooks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif static fo_rdwr_t vn_read; static fo_rdwr_t vn_write; static fo_rdwr_t vn_io_fault; static fo_truncate_t vn_truncate; static fo_ioctl_t vn_ioctl; static fo_poll_t vn_poll; static fo_kqfilter_t vn_kqfilter; static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; struct fileops vnops = { .fo_read = vn_io_fault, .fo_write = vn_io_fault, .fo_truncate = vn_truncate, .fo_ioctl = vn_ioctl, .fo_poll = vn_poll, .fo_kqfilter = vn_kqfilter, .fo_stat = vn_statfile, .fo_close = vn_closefile, .fo_chmod = vn_chmod, .fo_chown = vn_chown, .fo_sendfile = vn_sendfile, .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; static const int io_hold_cnt = 16; static int vn_io_fault_enable = 1; SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RW, &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); static int vn_io_fault_prefault = 0; SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_prefault, CTLFLAG_RW, &vn_io_fault_prefault, 0, "Enable vn_io_fault prefaulting"); static u_long vn_io_faults_cnt; SYSCTL_ULONG(_debug, OID_AUTO, vn_io_faults, CTLFLAG_RD, &vn_io_faults_cnt, 0, "Count of vn_io_fault lock avoidance triggers"); /* * Returns true if vn_io_fault mode of handling the i/o request should * be used. */ static bool do_vn_io_fault(struct vnode *vp, struct uio *uio) { struct mount *mp; return (uio->uio_segflg == UIO_USERSPACE && vp->v_type == VREG && (mp = vp->v_mount) != NULL && (mp->mnt_kern_flag & MNTK_NO_IOPF) != 0 && vn_io_fault_enable); } /* * Structure used to pass arguments to vn_io_fault1(), to do either * file- or vnode-based I/O calls. */ struct vn_io_fault_args { enum { VN_IO_FAULT_FOP, VN_IO_FAULT_VOP } kind; struct ucred *cred; int flags; union { struct fop_args_tag { struct file *fp; fo_rdwr_t *doio; } fop_args; struct vop_args_tag { struct vnode *vp; } vop_args; } args; }; static int vn_io_fault1(struct vnode *vp, struct uio *uio, struct vn_io_fault_args *args, struct thread *td); int vn_open(ndp, flagp, cmode, fp) struct nameidata *ndp; int *flagp, cmode; struct file *fp; { struct thread *td = ndp->ni_cnd.cn_thread; return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp)); } /* * Common code for vnode open operations via a name lookup. * Lookup the vnode and invoke VOP_CREATE if needed. * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. * * Note that this does NOT free nameidata for the successful case, * due to the NDINIT being done elsewhere. */ int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags, struct ucred *cred, struct file *fp) { struct vnode *vp; struct mount *mp; struct thread *td = ndp->ni_cnd.cn_thread; struct vattr vat; struct vattr *vap = &vat; int fmode, error; restart: fmode = *flagp; if ((fmode & (O_CREAT | O_EXCL | O_DIRECTORY)) == (O_CREAT | O_EXCL | O_DIRECTORY)) return (EINVAL); else if ((fmode & (O_CREAT | O_DIRECTORY)) == O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; /* * Set NOCACHE to avoid flushing the cache when * rolling in many files at once. */ ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF | NOCACHE; if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) ndp->ni_cnd.cn_flags |= FOLLOW; if (!(vn_open_flags & VN_OPEN_NOAUDIT)) ndp->ni_cnd.cn_flags |= AUDITVNODE1; if (vn_open_flags & VN_OPEN_NOCAPCHECK) ndp->ni_cnd.cn_flags |= NOCAPCHECK; bwillwrite(); if ((error = namei(ndp)) != 0) return (error); if (ndp->ni_vp == NULL) { VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; if (fmode & O_EXCL) vap->va_vaflags |= VA_EXCLUSIVE; if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(ndp, NDF_ONLY_PNBUF); vput(ndp->ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0) ndp->ni_cnd.cn_flags |= MAKEENTRY; #ifdef MAC error = mac_vnode_check_create(cred, ndp->ni_dvp, &ndp->ni_cnd, vap); if (error == 0) #endif error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, vap); vput(ndp->ni_dvp); vn_finished_write(mp); if (error) { NDFREE(ndp, NDF_ONLY_PNBUF); return (error); } fmode &= ~O_TRUNC; vp = ndp->ni_vp; } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); ndp->ni_dvp = NULL; vp = ndp->ni_vp; if (fmode & O_EXCL) { error = EEXIST; goto bad; } fmode &= ~O_CREAT; } } else { ndp->ni_cnd.cn_nameiop = LOOKUP; ndp->ni_cnd.cn_flags = ISOPEN | ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; if (!(fmode & FWRITE)) ndp->ni_cnd.cn_flags |= LOCKSHARED; if (!(vn_open_flags & VN_OPEN_NOAUDIT)) ndp->ni_cnd.cn_flags |= AUDITVNODE1; if (vn_open_flags & VN_OPEN_NOCAPCHECK) ndp->ni_cnd.cn_flags |= NOCAPCHECK; if ((error = namei(ndp)) != 0) return (error); vp = ndp->ni_vp; } error = vn_open_vnode(vp, fmode, cred, td, fp); if (error) goto bad; *flagp = fmode; return (0); bad: NDFREE(ndp, NDF_ONLY_PNBUF); vput(vp); *flagp = fmode; ndp->ni_vp = NULL; return (error); } /* * Common code for vnode open operations once a vnode is located. * Check permissions, and call the VOP_OPEN routine. */ int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, struct thread *td, struct file *fp) { accmode_t accmode; struct flock lf; int error, lock_flags, type; if (vp->v_type == VLNK) return (EMLINK); if (vp->v_type == VSOCK) return (EOPNOTSUPP); if (vp->v_type != VDIR && fmode & O_DIRECTORY) return (ENOTDIR); accmode = 0; if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) return (EISDIR); accmode |= VWRITE; } if (fmode & FREAD) accmode |= VREAD; if (fmode & FEXEC) accmode |= VEXEC; if ((fmode & O_APPEND) && (fmode & FWRITE)) accmode |= VAPPEND; #ifdef MAC if (fmode & O_CREAT) accmode |= VCREAT; if (fmode & O_VERIFY) accmode |= VVERIFY; error = mac_vnode_check_open(cred, vp, accmode); if (error) return (error); accmode &= ~(VCREAT | VVERIFY); #endif if ((fmode & O_CREAT) == 0) { if (accmode & VWRITE) { error = vn_writechk(vp); if (error) return (error); } if (accmode) { error = VOP_ACCESS(vp, accmode, cred, td); if (error) return (error); } } if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vn_lock(vp, LK_UPGRADE | LK_RETRY); if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0) return (error); while ((fmode & (O_EXLOCK | O_SHLOCK)) != 0) { KASSERT(fp != NULL, ("open with flock requires fp")); if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE) { error = EOPNOTSUPP; break; } lock_flags = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (fmode & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); if (error == 0) fp->f_flag |= FHASLOCK; vn_lock(vp, lock_flags | LK_RETRY); if (error != 0) break; if ((vp->v_iflag & VI_DOOMED) != 0) { error = ENOENT; break; } /* * Another thread might have used this vnode as an * executable while the vnode lock was dropped. * Ensure the vnode is still able to be opened for * writing after the lock has been obtained. */ if ((accmode & VWRITE) != 0) error = vn_writechk(vp); break; } if (error != 0) { fp->f_flag |= FOPENFAILED; fp->f_vnode = vp; if (fp->f_ops == &badfileops) { fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; } vref(vp); } else if ((fmode & FWRITE) != 0) { VOP_ADD_WRITECOUNT(vp, 1); CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp, vp->v_writecount); } ASSERT_VOP_LOCKED(vp, "vn_open_vnode"); return (error); } /* * Check for write permissions on the specified vnode. * Prototype text segments cannot be written. */ int -vn_writechk(vp) - register struct vnode *vp; +vn_writechk(struct vnode *vp) { ASSERT_VOP_LOCKED(vp, "vn_writechk"); /* * If there's shared text associated with * the vnode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp)) return (ETXTBSY); return (0); } /* * Vnode close call */ static int vn_close1(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td, bool keep_ref) { struct mount *mp; int error, lock_flags; if (vp->v_type != VFIFO && (flags & FWRITE) == 0 && MNT_EXTENDED_SHARED(vp->v_mount)) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, lock_flags | LK_RETRY); AUDIT_ARG_VNODE1(vp); if ((flags & (FWRITE | FOPENFAILED)) == FWRITE) { VNASSERT(vp->v_writecount > 0, vp, ("vn_close: negative writecount")); VOP_ADD_WRITECOUNT(vp, -1); CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, vp, vp->v_writecount); } error = VOP_CLOSE(vp, flags, file_cred, td); if (keep_ref) VOP_UNLOCK(vp, 0); else vput(vp); vn_finished_write(mp); return (error); } int vn_close(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td) { return (vn_close1(vp, flags, file_cred, td, false)); } /* * Heuristic to detect sequential operation. */ static int sequential_heuristic(struct uio *uio, struct file *fp) { ASSERT_VOP_LOCKED(fp->f_vnode, __func__); if (fp->f_flag & FRDAHEAD) return (fp->f_seqcount << IO_SEQSHIFT); /* * Offset 0 is handled specially. open() sets f_seqcount to 1 so * that the first I/O is normally considered to be slightly * sequential. Seeking to offset 0 doesn't change sequentiality * unless previous seeks have reduced f_seqcount to 0, in which * case offset 0 is not special. */ if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || uio->uio_offset == fp->f_nextoff) { /* * f_seqcount is in units of fixed-size blocks so that it * depends mainly on the amount of sequential I/O and not * much on the number of sequential I/O's. The fixed size * of 16384 is hard-coded here since it is (not quite) just * a magic size that works well here. This size is more * closely related to the best I/O size for real disks than * to any block size used by software. */ fp->f_seqcount += howmany(uio->uio_resid, 16384); if (fp->f_seqcount > IO_SEQMAX) fp->f_seqcount = IO_SEQMAX; return (fp->f_seqcount << IO_SEQSHIFT); } /* Not sequential. Quickly draw-down sequentiality. */ if (fp->f_seqcount > 1) fp->f_seqcount = 1; else fp->f_seqcount = 0; return (0); } /* * Package up an I/O request on a vnode into a uio and do it. */ int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid, struct thread *td) { struct uio auio; struct iovec aiov; struct mount *mp; struct ucred *cred; void *rl_cookie; struct vn_io_fault_args args; int error, lock_flags; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; aiov.iov_len = len; auio.uio_resid = len; auio.uio_offset = offset; auio.uio_segflg = segflg; auio.uio_rw = rw; auio.uio_td = td; error = 0; if ((ioflg & IO_NODELOCKED) == 0) { if ((ioflg & IO_RANGELOCKED) == 0) { if (rw == UIO_READ) { rl_cookie = vn_rangelock_rlock(vp, offset, offset + len); } else { rl_cookie = vn_rangelock_wlock(vp, offset, offset + len); } } else rl_cookie = NULL; mp = NULL; if (rw == UIO_WRITE) { if (vp->v_type != VCHR && (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto out; if (MNT_SHARED_WRITES(mp) || ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; } else lock_flags = LK_SHARED; vn_lock(vp, lock_flags | LK_RETRY); } else rl_cookie = NULL; ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); #ifdef MAC if ((ioflg & IO_NOMACCHECK) == 0) { if (rw == UIO_READ) error = mac_vnode_check_read(active_cred, file_cred, vp); else error = mac_vnode_check_write(active_cred, file_cred, vp); } #endif if (error == 0) { if (file_cred != NULL) cred = file_cred; else cred = active_cred; if (do_vn_io_fault(vp, &auio)) { args.kind = VN_IO_FAULT_VOP; args.cred = cred; args.flags = ioflg; args.args.vop_args.vp = vp; error = vn_io_fault1(vp, &auio, &args, td); } else if (rw == UIO_READ) { error = VOP_READ(vp, &auio, ioflg, cred); } else /* if (rw == UIO_WRITE) */ { error = VOP_WRITE(vp, &auio, ioflg, cred); } } if (aresid) *aresid = auio.uio_resid; else if (auio.uio_resid && error == 0) error = EIO; if ((ioflg & IO_NODELOCKED) == 0) { VOP_UNLOCK(vp, 0); if (mp != NULL) vn_finished_write(mp); } out: if (rl_cookie != NULL) vn_rangelock_unlock(vp, rl_cookie); return (error); } /* * Package up an I/O request on a vnode into a uio and do it. The I/O * request is split up into smaller chunks and we try to avoid saturating * the buffer cache while potentially holding a vnode locked, so we * check bwillwrite() before calling vn_rdwr(). We also call kern_yield() * to give other processes a chance to lock the vnode (either other processes * core'ing the same binary, or unrelated processes scanning the directory). */ int vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred, aresid, td) enum uio_rw rw; struct vnode *vp; void *base; size_t len; off_t offset; enum uio_seg segflg; int ioflg; struct ucred *active_cred; struct ucred *file_cred; size_t *aresid; struct thread *td; { int error = 0; ssize_t iaresid; do { int chunk; /* * Force `offset' to a multiple of MAXBSIZE except possibly * for the first chunk, so that filesystems only need to * write full blocks except possibly for the first and last * chunks. */ chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE; if (chunk > len) chunk = len; if (rw != UIO_READ && vp->v_type == VREG) bwillwrite(); iaresid = 0; error = vn_rdwr(rw, vp, base, chunk, offset, segflg, ioflg, active_cred, file_cred, &iaresid, td); len -= chunk; /* aresid calc already includes length */ if (error) break; offset += chunk; base = (char *)base + chunk; kern_yield(PRI_USER); } while (len); if (aresid) *aresid = len + iaresid; return (error); } off_t foffset_lock(struct file *fp, int flags) { struct mtx *mtxp; off_t res; KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed")); #if OFF_MAX <= LONG_MAX /* * Caller only wants the current f_offset value. Assume that * the long and shorter integer types reads are atomic. */ if ((flags & FOF_NOLOCK) != 0) return (fp->f_offset); #endif /* * According to McKusick the vn lock was protecting f_offset here. * It is now protected by the FOFFSET_LOCKED flag. */ mtxp = mtx_pool_find(mtxpool_sleep, fp); mtx_lock(mtxp); if ((flags & FOF_NOLOCK) == 0) { while (fp->f_vnread_flags & FOFFSET_LOCKED) { fp->f_vnread_flags |= FOFFSET_LOCK_WAITING; msleep(&fp->f_vnread_flags, mtxp, PUSER -1, "vofflock", 0); } fp->f_vnread_flags |= FOFFSET_LOCKED; } res = fp->f_offset; mtx_unlock(mtxp); return (res); } void foffset_unlock(struct file *fp, off_t val, int flags) { struct mtx *mtxp; KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed")); #if OFF_MAX <= LONG_MAX if ((flags & FOF_NOLOCK) != 0) { if ((flags & FOF_NOUPDATE) == 0) fp->f_offset = val; if ((flags & FOF_NEXTOFF) != 0) fp->f_nextoff = val; return; } #endif mtxp = mtx_pool_find(mtxpool_sleep, fp); mtx_lock(mtxp); if ((flags & FOF_NOUPDATE) == 0) fp->f_offset = val; if ((flags & FOF_NEXTOFF) != 0) fp->f_nextoff = val; if ((flags & FOF_NOLOCK) == 0) { KASSERT((fp->f_vnread_flags & FOFFSET_LOCKED) != 0, ("Lost FOFFSET_LOCKED")); if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING) wakeup(&fp->f_vnread_flags); fp->f_vnread_flags = 0; } mtx_unlock(mtxp); } void foffset_lock_uio(struct file *fp, struct uio *uio, int flags) { if ((flags & FOF_OFFSET) == 0) uio->uio_offset = foffset_lock(fp, flags); } void foffset_unlock_uio(struct file *fp, struct uio *uio, int flags) { if ((flags & FOF_OFFSET) == 0) foffset_unlock(fp, uio->uio_offset, flags); } static int get_advice(struct file *fp, struct uio *uio) { struct mtx *mtxp; int ret; ret = POSIX_FADV_NORMAL; if (fp->f_advice == NULL || fp->f_vnode->v_type != VREG) return (ret); mtxp = mtx_pool_find(mtxpool_sleep, fp); mtx_lock(mtxp); if (fp->f_advice != NULL && uio->uio_offset >= fp->f_advice->fa_start && uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end) ret = fp->f_advice->fa_advice; mtx_unlock(mtxp); return (ret); } /* * File table vnode read routine. */ static int vn_read(fp, uio, active_cred, flags, td) struct file *fp; struct uio *uio; struct ucred *active_cred; int flags; struct thread *td; { struct vnode *vp; off_t orig_offset; int error, ioflag; int advice; KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET")); vp = fp->f_vnode; ioflag = 0; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; if (fp->f_flag & O_DIRECT) ioflag |= IO_DIRECT; advice = get_advice(fp, uio); vn_lock(vp, LK_SHARED | LK_RETRY); switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_NOREUSE: ioflag |= sequential_heuristic(uio, fp); break; case POSIX_FADV_RANDOM: /* Disable read-ahead for random I/O. */ break; } orig_offset = uio->uio_offset; #ifdef MAC error = mac_vnode_check_read(active_cred, fp->f_cred, vp); if (error == 0) #endif error = VOP_READ(vp, uio, ioflag, fp->f_cred); fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0); if (error == 0 && advice == POSIX_FADV_NOREUSE && orig_offset != uio->uio_offset) /* * Use POSIX_FADV_DONTNEED to flush pages and buffers * for the backing file after a POSIX_FADV_NOREUSE * read(2). */ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1, POSIX_FADV_DONTNEED); return (error); } /* * File table vnode write routine. */ static int vn_write(fp, uio, active_cred, flags, td) struct file *fp; struct uio *uio; struct ucred *active_cred; int flags; struct thread *td; { struct vnode *vp; struct mount *mp; off_t orig_offset; int error, ioflag, lock_flags; int advice; KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET")); vp = fp->f_vnode; if (vp->v_type == VREG) bwillwrite(); ioflag = IO_UNIT; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; if (fp->f_flag & O_DIRECT) ioflag |= IO_DIRECT; if ((fp->f_flag & O_FSYNC) || (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; mp = NULL; if (vp->v_type != VCHR && (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto unlock; advice = get_advice(fp, uio); if (MNT_SHARED_WRITES(mp) || (mp == NULL && MNT_SHARED_WRITES(vp->v_mount))) { lock_flags = LK_SHARED; } else { lock_flags = LK_EXCLUSIVE; } vn_lock(vp, lock_flags | LK_RETRY); switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_NOREUSE: ioflag |= sequential_heuristic(uio, fp); break; case POSIX_FADV_RANDOM: /* XXX: Is this correct? */ break; } orig_offset = uio->uio_offset; #ifdef MAC error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error == 0) #endif error = VOP_WRITE(vp, uio, ioflag, fp->f_cred); fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0); if (vp->v_type != VCHR) vn_finished_write(mp); if (error == 0 && advice == POSIX_FADV_NOREUSE && orig_offset != uio->uio_offset) /* * Use POSIX_FADV_DONTNEED to flush pages and buffers * for the backing file after a POSIX_FADV_NOREUSE * write(2). */ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1, POSIX_FADV_DONTNEED); unlock: return (error); } /* * The vn_io_fault() is a wrapper around vn_read() and vn_write() to * prevent the following deadlock: * * Assume that the thread A reads from the vnode vp1 into userspace * buffer buf1 backed by the pages of vnode vp2. If a page in buf1 is * currently not resident, then system ends up with the call chain * vn_read() -> VOP_READ(vp1) -> uiomove() -> [Page Fault] -> * vm_fault(buf1) -> vnode_pager_getpages(vp2) -> VOP_GETPAGES(vp2) * which establishes lock order vp1->vn_lock, then vp2->vn_lock. * If, at the same time, thread B reads from vnode vp2 into buffer buf2 * backed by the pages of vnode vp1, and some page in buf2 is not * resident, we get a reversed order vp2->vn_lock, then vp1->vn_lock. * * To prevent the lock order reversal and deadlock, vn_io_fault() does * not allow page faults to happen during VOP_READ() or VOP_WRITE(). * Instead, it first tries to do the whole range i/o with pagefaults * disabled. If all pages in the i/o buffer are resident and mapped, * VOP will succeed (ignoring the genuine filesystem errors). * Otherwise, we get back EFAULT, and vn_io_fault() falls back to do * i/o in chunks, with all pages in the chunk prefaulted and held * using vm_fault_quick_hold_pages(). * * Filesystems using this deadlock avoidance scheme should use the * array of the held pages from uio, saved in the curthread->td_ma, * instead of doing uiomove(). A helper function * vn_io_fault_uiomove() converts uiomove request into * uiomove_fromphys() over td_ma array. * * Since vnode locks do not cover the whole i/o anymore, rangelocks * make the current i/o request atomic with respect to other i/os and * truncations. */ /* * Decode vn_io_fault_args and perform the corresponding i/o. */ static int vn_io_fault_doio(struct vn_io_fault_args *args, struct uio *uio, struct thread *td) { switch (args->kind) { case VN_IO_FAULT_FOP: return ((args->args.fop_args.doio)(args->args.fop_args.fp, uio, args->cred, args->flags, td)); case VN_IO_FAULT_VOP: if (uio->uio_rw == UIO_READ) { return (VOP_READ(args->args.vop_args.vp, uio, args->flags, args->cred)); } else if (uio->uio_rw == UIO_WRITE) { return (VOP_WRITE(args->args.vop_args.vp, uio, args->flags, args->cred)); } break; } panic("vn_io_fault_doio: unknown kind of io %d %d", args->kind, uio->uio_rw); } static int vn_io_fault_touch(char *base, const struct uio *uio) { int r; r = fubyte(base); if (r == -1 || (uio->uio_rw == UIO_READ && subyte(base, r) == -1)) return (EFAULT); return (0); } static int vn_io_fault_prefault_user(const struct uio *uio) { char *base; const struct iovec *iov; size_t len; ssize_t resid; int error, i; KASSERT(uio->uio_segflg == UIO_USERSPACE, ("vn_io_fault_prefault userspace")); error = i = 0; iov = uio->uio_iov; resid = uio->uio_resid; base = iov->iov_base; len = iov->iov_len; while (resid > 0) { error = vn_io_fault_touch(base, uio); if (error != 0) break; if (len < PAGE_SIZE) { if (len != 0) { error = vn_io_fault_touch(base + len - 1, uio); if (error != 0) break; resid -= len; } if (++i >= uio->uio_iovcnt) break; iov = uio->uio_iov + i; base = iov->iov_base; len = iov->iov_len; } else { len -= PAGE_SIZE; base += PAGE_SIZE; resid -= PAGE_SIZE; } } return (error); } /* * Common code for vn_io_fault(), agnostic to the kind of i/o request. * Uses vn_io_fault_doio() to make the call to an actual i/o function. * Used from vn_rdwr() and vn_io_fault(), which encode the i/o request * into args and call vn_io_fault1() to handle faults during the user * mode buffer accesses. */ static int vn_io_fault1(struct vnode *vp, struct uio *uio, struct vn_io_fault_args *args, struct thread *td) { vm_page_t ma[io_hold_cnt + 2]; struct uio *uio_clone, short_uio; struct iovec short_iovec[1]; vm_page_t *prev_td_ma; vm_prot_t prot; vm_offset_t addr, end; size_t len, resid; ssize_t adv; int error, cnt, save, saveheld, prev_td_ma_cnt; if (vn_io_fault_prefault) { error = vn_io_fault_prefault_user(uio); if (error != 0) return (error); /* Or ignore ? */ } prot = uio->uio_rw == UIO_READ ? VM_PROT_WRITE : VM_PROT_READ; /* * The UFS follows IO_UNIT directive and replays back both * uio_offset and uio_resid if an error is encountered during the * operation. But, since the iovec may be already advanced, * uio is still in an inconsistent state. * * Cache a copy of the original uio, which is advanced to the redo * point using UIO_NOCOPY below. */ uio_clone = cloneuio(uio); resid = uio->uio_resid; short_uio.uio_segflg = UIO_USERSPACE; short_uio.uio_rw = uio->uio_rw; short_uio.uio_td = uio->uio_td; save = vm_fault_disable_pagefaults(); error = vn_io_fault_doio(args, uio, td); if (error != EFAULT) goto out; atomic_add_long(&vn_io_faults_cnt, 1); uio_clone->uio_segflg = UIO_NOCOPY; uiomove(NULL, resid - uio->uio_resid, uio_clone); uio_clone->uio_segflg = uio->uio_segflg; saveheld = curthread_pflags_set(TDP_UIOHELD); prev_td_ma = td->td_ma; prev_td_ma_cnt = td->td_ma_cnt; while (uio_clone->uio_resid != 0) { len = uio_clone->uio_iov->iov_len; if (len == 0) { KASSERT(uio_clone->uio_iovcnt >= 1, ("iovcnt underflow")); uio_clone->uio_iov++; uio_clone->uio_iovcnt--; continue; } if (len > io_hold_cnt * PAGE_SIZE) len = io_hold_cnt * PAGE_SIZE; addr = (uintptr_t)uio_clone->uio_iov->iov_base; end = round_page(addr + len); if (end < addr) { error = EFAULT; break; } cnt = atop(end - trunc_page(addr)); /* * A perfectly misaligned address and length could cause * both the start and the end of the chunk to use partial * page. +2 accounts for such a situation. */ cnt = vm_fault_quick_hold_pages(&td->td_proc->p_vmspace->vm_map, addr, len, prot, ma, io_hold_cnt + 2); if (cnt == -1) { error = EFAULT; break; } short_uio.uio_iov = &short_iovec[0]; short_iovec[0].iov_base = (void *)addr; short_uio.uio_iovcnt = 1; short_uio.uio_resid = short_iovec[0].iov_len = len; short_uio.uio_offset = uio_clone->uio_offset; td->td_ma = ma; td->td_ma_cnt = cnt; error = vn_io_fault_doio(args, &short_uio, td); vm_page_unhold_pages(ma, cnt); adv = len - short_uio.uio_resid; uio_clone->uio_iov->iov_base = (char *)uio_clone->uio_iov->iov_base + adv; uio_clone->uio_iov->iov_len -= adv; uio_clone->uio_resid -= adv; uio_clone->uio_offset += adv; uio->uio_resid -= adv; uio->uio_offset += adv; if (error != 0 || adv == 0) break; } td->td_ma = prev_td_ma; td->td_ma_cnt = prev_td_ma_cnt; curthread_pflags_restore(saveheld); out: vm_fault_enable_pagefaults(save); free(uio_clone, M_IOV); return (error); } static int vn_io_fault(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { fo_rdwr_t *doio; struct vnode *vp; void *rl_cookie; struct vn_io_fault_args args; int error; doio = uio->uio_rw == UIO_READ ? vn_read : vn_write; vp = fp->f_vnode; foffset_lock_uio(fp, uio, flags); if (do_vn_io_fault(vp, uio)) { args.kind = VN_IO_FAULT_FOP; args.args.fop_args.fp = fp; args.args.fop_args.doio = doio; args.cred = active_cred; args.flags = flags | FOF_OFFSET; if (uio->uio_rw == UIO_READ) { rl_cookie = vn_rangelock_rlock(vp, uio->uio_offset, uio->uio_offset + uio->uio_resid); } else if ((fp->f_flag & O_APPEND) != 0 || (flags & FOF_OFFSET) == 0) { /* For appenders, punt and lock the whole range. */ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); } else { rl_cookie = vn_rangelock_wlock(vp, uio->uio_offset, uio->uio_offset + uio->uio_resid); } error = vn_io_fault1(vp, uio, &args, td); vn_rangelock_unlock(vp, rl_cookie); } else { error = doio(fp, uio, active_cred, flags | FOF_OFFSET, td); } foffset_unlock_uio(fp, uio, flags); return (error); } /* * Helper function to perform the requested uiomove operation using * the held pages for io->uio_iov[0].iov_base buffer instead of * copyin/copyout. Access to the pages with uiomove_fromphys() * instead of iov_base prevents page faults that could occur due to * pmap_collect() invalidating the mapping created by * vm_fault_quick_hold_pages(), or pageout daemon, page laundry or * object cleanup revoking the write access from page mappings. * * Filesystems specified MNTK_NO_IOPF shall use vn_io_fault_uiomove() * instead of plain uiomove(). */ int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio) { struct uio transp_uio; struct iovec transp_iov[1]; struct thread *td; size_t adv; int error, pgadv; td = curthread; if ((td->td_pflags & TDP_UIOHELD) == 0 || uio->uio_segflg != UIO_USERSPACE) return (uiomove(data, xfersize, uio)); KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt)); transp_iov[0].iov_base = data; transp_uio.uio_iov = &transp_iov[0]; transp_uio.uio_iovcnt = 1; if (xfersize > uio->uio_resid) xfersize = uio->uio_resid; transp_uio.uio_resid = transp_iov[0].iov_len = xfersize; transp_uio.uio_offset = 0; transp_uio.uio_segflg = UIO_SYSSPACE; /* * Since transp_iov points to data, and td_ma page array * corresponds to original uio->uio_iov, we need to invert the * direction of the i/o operation as passed to * uiomove_fromphys(). */ switch (uio->uio_rw) { case UIO_WRITE: transp_uio.uio_rw = UIO_READ; break; case UIO_READ: transp_uio.uio_rw = UIO_WRITE; break; } transp_uio.uio_td = uio->uio_td; error = uiomove_fromphys(td->td_ma, ((vm_offset_t)uio->uio_iov->iov_base) & PAGE_MASK, xfersize, &transp_uio); adv = xfersize - transp_uio.uio_resid; pgadv = (((vm_offset_t)uio->uio_iov->iov_base + adv) >> PAGE_SHIFT) - (((vm_offset_t)uio->uio_iov->iov_base) >> PAGE_SHIFT); td->td_ma += pgadv; KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt, pgadv)); td->td_ma_cnt -= pgadv; uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + adv; uio->uio_iov->iov_len -= adv; uio->uio_resid -= adv; uio->uio_offset += adv; return (error); } int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, struct uio *uio) { struct thread *td; vm_offset_t iov_base; int cnt, pgadv; td = curthread; if ((td->td_pflags & TDP_UIOHELD) == 0 || uio->uio_segflg != UIO_USERSPACE) return (uiomove_fromphys(ma, offset, xfersize, uio)); KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt)); cnt = xfersize > uio->uio_resid ? uio->uio_resid : xfersize; iov_base = (vm_offset_t)uio->uio_iov->iov_base; switch (uio->uio_rw) { case UIO_WRITE: pmap_copy_pages(td->td_ma, iov_base & PAGE_MASK, ma, offset, cnt); break; case UIO_READ: pmap_copy_pages(ma, offset, td->td_ma, iov_base & PAGE_MASK, cnt); break; } pgadv = ((iov_base + cnt) >> PAGE_SHIFT) - (iov_base >> PAGE_SHIFT); td->td_ma += pgadv; KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt, pgadv)); td->td_ma_cnt -= pgadv; uio->uio_iov->iov_base = (char *)(iov_base + cnt); uio->uio_iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; return (0); } /* * File table truncate routine. */ static int vn_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { struct vattr vattr; struct mount *mp; struct vnode *vp; void *rl_cookie; int error; vp = fp->f_vnode; /* * Lock the whole range for truncation. Otherwise split i/o * might happen partly before and partly after the truncation. */ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error) goto out1; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_type == VDIR) { error = EISDIR; goto out; } #ifdef MAC error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error) goto out; #endif error = vn_writechk(vp); if (error == 0) { VATTR_NULL(&vattr); vattr.va_size = length; if ((fp->f_flag & O_FSYNC) != 0) vattr.va_vaflags |= VA_SYNC; error = VOP_SETATTR(vp, &vattr, fp->f_cred); } out: VOP_UNLOCK(vp, 0); vn_finished_write(mp); out1: vn_rangelock_unlock(vp, rl_cookie); return (error); } /* * File table vnode stat routine. */ static int vn_statfile(fp, sb, active_cred, td) struct file *fp; struct stat *sb; struct ucred *active_cred; struct thread *td; { struct vnode *vp = fp->f_vnode; int error; vn_lock(vp, LK_SHARED | LK_RETRY); error = vn_stat(vp, sb, active_cred, fp->f_cred, td); VOP_UNLOCK(vp, 0); return (error); } /* * Stat a vnode; implementation for the stat syscall */ int -vn_stat(vp, sb, active_cred, file_cred, td) - struct vnode *vp; - register struct stat *sb; - struct ucred *active_cred; - struct ucred *file_cred; - struct thread *td; +vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred, + struct ucred *file_cred, struct thread *td) { struct vattr vattr; - register struct vattr *vap; + struct vattr *vap; int error; u_short mode; AUDIT_ARG_VNODE1(vp); #ifdef MAC error = mac_vnode_check_stat(active_cred, file_cred, vp); if (error) return (error); #endif vap = &vattr; /* * Initialize defaults for new and unusual fields, so that file * systems which don't support these fields don't need to know * about them. */ vap->va_birthtime.tv_sec = -1; vap->va_birthtime.tv_nsec = 0; vap->va_fsid = VNOVAL; vap->va_rdev = NODEV; error = VOP_GETATTR(vp, vap, active_cred); if (error) return (error); /* * Zero the spare stat fields */ bzero(sb, sizeof *sb); /* * Copy from vattr table */ if (vap->va_fsid != VNOVAL) sb->st_dev = vap->va_fsid; else sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; sb->st_ino = vap->va_fileid; mode = vap->va_mode; switch (vap->va_type) { case VREG: mode |= S_IFREG; break; case VDIR: mode |= S_IFDIR; break; case VBLK: mode |= S_IFBLK; break; case VCHR: mode |= S_IFCHR; break; case VLNK: mode |= S_IFLNK; break; case VSOCK: mode |= S_IFSOCK; break; case VFIFO: mode |= S_IFIFO; break; default: return (EBADF); } sb->st_mode = mode; sb->st_nlink = vap->va_nlink; sb->st_uid = vap->va_uid; sb->st_gid = vap->va_gid; sb->st_rdev = vap->va_rdev; if (vap->va_size > OFF_MAX) return (EOVERFLOW); sb->st_size = vap->va_size; sb->st_atim = vap->va_atime; sb->st_mtim = vap->va_mtime; sb->st_ctim = vap->va_ctime; sb->st_birthtim = vap->va_birthtime; /* * According to www.opengroup.org, the meaning of st_blksize is * "a filesystem-specific preferred I/O block size for this * object. In some filesystem types, this may vary from file * to file" * Use miminum/default of PAGE_SIZE (e.g. for VCHR). */ sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize); sb->st_flags = vap->va_flags; if (priv_check(td, PRIV_VFS_GENERATION)) sb->st_gen = 0; else sb->st_gen = vap->va_gen; sb->st_blocks = vap->va_bytes / S_BLKSIZE; return (0); } /* * File table vnode ioctl routine. */ static int -vn_ioctl(fp, com, data, active_cred, td) - struct file *fp; - u_long com; - void *data; - struct ucred *active_cred; - struct thread *td; +vn_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, + struct thread *td) { struct vattr vattr; struct vnode *vp; int error; vp = fp->f_vnode; switch (vp->v_type) { case VDIR: case VREG: switch (com) { case FIONREAD: vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, active_cred); VOP_UNLOCK(vp, 0); if (error == 0) *(int *)data = vattr.va_size - fp->f_offset; return (error); case FIONBIO: case FIOASYNC: return (0); default: return (VOP_IOCTL(vp, com, data, fp->f_flag, active_cred, td)); } default: return (ENOTTY); } } /* * File table vnode poll routine. */ static int -vn_poll(fp, events, active_cred, td) - struct file *fp; - int events; - struct ucred *active_cred; - struct thread *td; +vn_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) { struct vnode *vp; int error; vp = fp->f_vnode; #ifdef MAC vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(vp); error = mac_vnode_check_poll(active_cred, fp->f_cred, vp); VOP_UNLOCK(vp, 0); if (!error) #endif error = VOP_POLL(vp, events, fp->f_cred, td); return (error); } /* * Acquire the requested lock and then check for validity. LK_RETRY * permits vn_lock to return doomed vnodes. */ int _vn_lock(struct vnode *vp, int flags, char *file, int line) { int error; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("vn_lock: no locktype")); VNASSERT(vp->v_holdcnt != 0, vp, ("vn_lock: zero hold count")); retry: error = VOP_LOCK1(vp, flags, file, line); flags &= ~LK_INTERLOCK; /* Interlock is always dropped. */ KASSERT((flags & LK_RETRY) == 0 || error == 0, ("vn_lock: error %d incompatible with flags %#x", error, flags)); if ((flags & LK_RETRY) == 0) { if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) { VOP_UNLOCK(vp, 0); error = ENOENT; } } else if (error != 0) goto retry; return (error); } /* * File table vnode close routine. */ static int vn_closefile(struct file *fp, struct thread *td) { struct vnode *vp; struct flock lf; int error; bool ref; vp = fp->f_vnode; fp->f_ops = &badfileops; ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE; error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref); if (__predict_false(ref)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); vrele(vp); } return (error); } static bool vn_suspendable(struct mount *mp) { return (mp->mnt_op->vfs_susp_clean != NULL); } /* * Preparing to start a filesystem write operation. If the operation is * permitted, then we bump the count of operations in progress and * proceed. If a suspend request is in progress, we wait until the * suspension is over, and then proceed. */ static int vn_start_write_locked(struct mount *mp, int flags) { int error, mflags; mtx_assert(MNT_MTX(mp), MA_OWNED); error = 0; /* * Check on status of suspension. */ if ((curthread->td_pflags & TDP_IGNSUSP) == 0 || mp->mnt_susp_owner != curthread) { mflags = ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ? (flags & PCATCH) : 0) | (PUSER - 1); while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { if (flags & V_NOWAIT) { error = EWOULDBLOCK; goto unlock; } error = msleep(&mp->mnt_flag, MNT_MTX(mp), mflags, "suspfs", 0); if (error) goto unlock; } } if (flags & V_XSLEEP) goto unlock; mp->mnt_writeopcount++; unlock: if (error != 0 || (flags & V_XSLEEP) != 0) MNT_REL(mp); MNT_IUNLOCK(mp); return (error); } int vn_start_write(struct vnode *vp, struct mount **mpp, int flags) { struct mount *mp; int error; KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL), ("V_MNTREF requires mp")); error = 0; /* * If a vnode is provided, get and return the mount point that * to which it will write. */ if (vp != NULL) { if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { *mpp = NULL; if (error != EOPNOTSUPP) return (error); return (0); } } if ((mp = *mpp) == NULL) return (0); if (!vn_suspendable(mp)) { if (vp != NULL || (flags & V_MNTREF) != 0) vfs_rel(mp); return (0); } /* * VOP_GETWRITEMOUNT() returns with the mp refcount held through * a vfs_ref(). * As long as a vnode is not provided we need to acquire a * refcount for the provided mountpoint too, in order to * emulate a vfs_ref(). */ MNT_ILOCK(mp); if (vp == NULL && (flags & V_MNTREF) == 0) MNT_REF(mp); return (vn_start_write_locked(mp, flags)); } /* * Secondary suspension. Used by operations such as vop_inactive * routines that are needed by the higher level functions. These * are allowed to proceed until all the higher level functions have * completed (indicated by mnt_writeopcount dropping to zero). At that * time, these operations are halted until the suspension is over. */ int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags) { struct mount *mp; int error; KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL), ("V_MNTREF requires mp")); retry: if (vp != NULL) { if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { *mpp = NULL; if (error != EOPNOTSUPP) return (error); return (0); } } /* * If we are not suspended or have not yet reached suspended * mode, then let the operation proceed. */ if ((mp = *mpp) == NULL) return (0); if (!vn_suspendable(mp)) { if (vp != NULL || (flags & V_MNTREF) != 0) vfs_rel(mp); return (0); } /* * VOP_GETWRITEMOUNT() returns with the mp refcount held through * a vfs_ref(). * As long as a vnode is not provided we need to acquire a * refcount for the provided mountpoint too, in order to * emulate a vfs_ref(). */ MNT_ILOCK(mp); if (vp == NULL && (flags & V_MNTREF) == 0) MNT_REF(mp); if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) { mp->mnt_secondary_writes++; mp->mnt_secondary_accwrites++; MNT_IUNLOCK(mp); return (0); } if (flags & V_NOWAIT) { MNT_REL(mp); MNT_IUNLOCK(mp); return (EWOULDBLOCK); } /* * Wait for the suspension to finish. */ error = msleep(&mp->mnt_flag, MNT_MTX(mp), (PUSER - 1) | PDROP | ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ? (flags & PCATCH) : 0), "suspfs", 0); vfs_rel(mp); if (error == 0) goto retry; return (error); } /* * Filesystem write operation has completed. If we are suspending and this * operation is the last one, notify the suspender that the suspension is * now in effect. */ void -vn_finished_write(mp) - struct mount *mp; +vn_finished_write(struct mount *mp) { if (mp == NULL || !vn_suspendable(mp)) return; MNT_ILOCK(mp); MNT_REL(mp); mp->mnt_writeopcount--; if (mp->mnt_writeopcount < 0) panic("vn_finished_write: neg cnt"); if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && mp->mnt_writeopcount <= 0) wakeup(&mp->mnt_writeopcount); MNT_IUNLOCK(mp); } /* * Filesystem secondary write operation has completed. If we are * suspending and this operation is the last one, notify the suspender * that the suspension is now in effect. */ void -vn_finished_secondary_write(mp) - struct mount *mp; +vn_finished_secondary_write(struct mount *mp) { if (mp == NULL || !vn_suspendable(mp)) return; MNT_ILOCK(mp); MNT_REL(mp); mp->mnt_secondary_writes--; if (mp->mnt_secondary_writes < 0) panic("vn_finished_secondary_write: neg cnt"); if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && mp->mnt_secondary_writes <= 0) wakeup(&mp->mnt_secondary_writes); MNT_IUNLOCK(mp); } /* * Request a filesystem to suspend write operations. */ int vfs_write_suspend(struct mount *mp, int flags) { int error; MPASS(vn_suspendable(mp)); MNT_ILOCK(mp); if (mp->mnt_susp_owner == curthread) { MNT_IUNLOCK(mp); return (EALREADY); } while (mp->mnt_kern_flag & MNTK_SUSPEND) msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0); /* * Unmount holds a write reference on the mount point. If we * own busy reference and drain for writers, we deadlock with * the reference draining in the unmount path. Callers of * vfs_write_suspend() must specify VS_SKIP_UNMOUNT if * vfs_busy() reference is owned and caller is not in the * unmount context. */ if ((flags & VS_SKIP_UNMOUNT) != 0 && (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { MNT_IUNLOCK(mp); return (EBUSY); } mp->mnt_kern_flag |= MNTK_SUSPEND; mp->mnt_susp_owner = curthread; if (mp->mnt_writeopcount > 0) (void) msleep(&mp->mnt_writeopcount, MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0); else MNT_IUNLOCK(mp); if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0) vfs_write_resume(mp, 0); return (error); } /* * Request a filesystem to resume write operations. */ void vfs_write_resume(struct mount *mp, int flags) { MPASS(vn_suspendable(mp)); MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner")); mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 | MNTK_SUSPENDED); mp->mnt_susp_owner = NULL; wakeup(&mp->mnt_writeopcount); wakeup(&mp->mnt_flag); curthread->td_pflags &= ~TDP_IGNSUSP; if ((flags & VR_START_WRITE) != 0) { MNT_REF(mp); mp->mnt_writeopcount++; } MNT_IUNLOCK(mp); if ((flags & VR_NO_SUSPCLR) == 0) VFS_SUSP_CLEAN(mp); } else if ((flags & VR_START_WRITE) != 0) { MNT_REF(mp); vn_start_write_locked(mp, 0); } else { MNT_IUNLOCK(mp); } } /* * Helper loop around vfs_write_suspend() for filesystem unmount VFS * methods. */ int vfs_write_suspend_umnt(struct mount *mp) { int error; MPASS(vn_suspendable(mp)); KASSERT((curthread->td_pflags & TDP_IGNSUSP) == 0, ("vfs_write_suspend_umnt: recursed")); /* dounmount() already called vn_start_write(). */ for (;;) { vn_finished_write(mp); error = vfs_write_suspend(mp, 0); if (error != 0) { vn_start_write(NULL, &mp, V_WAIT); return (error); } MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_SUSPENDED) != 0) break; MNT_IUNLOCK(mp); vn_start_write(NULL, &mp, V_WAIT); } mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2); wakeup(&mp->mnt_flag); MNT_IUNLOCK(mp); curthread->td_pflags |= TDP_IGNSUSP; return (0); } /* * Implement kqueues for files by translating it to vnode operation. */ static int vn_kqfilter(struct file *fp, struct knote *kn) { return (VOP_KQFILTER(fp->f_vnode, kn)); } /* * Simplified in-kernel wrapper calls for extended attribute access. * Both calls pass in a NULL credential, authorizing as "kernel" access. * Set IO_NODELOCKED in ioflg if the vnode is already locked. */ int vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int *buflen, char *buf, struct thread *td) { struct uio auio; struct iovec iov; int error; iov.iov_len = *buflen; iov.iov_base = buf; auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_offset = 0; auio.uio_resid = *buflen; if ((ioflg & IO_NODELOCKED) == 0) vn_lock(vp, LK_SHARED | LK_RETRY); ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); /* authorize attribute retrieval as kernel */ error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL, td); if ((ioflg & IO_NODELOCKED) == 0) VOP_UNLOCK(vp, 0); if (error == 0) { *buflen = *buflen - auio.uio_resid; } return (error); } /* * XXX failure mode if partially written? */ int vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int buflen, char *buf, struct thread *td) { struct uio auio; struct iovec iov; struct mount *mp; int error; iov.iov_len = buflen; iov.iov_base = buf; auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_offset = 0; auio.uio_resid = buflen; if ((ioflg & IO_NODELOCKED) == 0) { if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); /* authorize attribute setting as kernel */ error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td); if ((ioflg & IO_NODELOCKED) == 0) { vn_finished_write(mp); VOP_UNLOCK(vp, 0); } return (error); } int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, struct thread *td) { struct mount *mp; int error; if ((ioflg & IO_NODELOCKED) == 0) { if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); /* authorize attribute removal as kernel */ error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td); if (error == EOPNOTSUPP) error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL, td); if ((ioflg & IO_NODELOCKED) == 0) { vn_finished_write(mp); VOP_UNLOCK(vp, 0); } return (error); } static int vn_get_ino_alloc_vget(struct mount *mp, void *arg, int lkflags, struct vnode **rvp) { return (VFS_VGET(mp, *(ino_t *)arg, lkflags, rvp)); } int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp) { return (vn_vget_ino_gen(vp, vn_get_ino_alloc_vget, &ino, lkflags, rvp)); } int vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc, void *alloc_arg, int lkflags, struct vnode **rvp) { struct mount *mp; int ltype, error; ASSERT_VOP_LOCKED(vp, "vn_vget_ino_get"); mp = vp->v_mount; ltype = VOP_ISLOCKED(vp); KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED, ("vn_vget_ino: vp not locked")); error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(vp, 0); error = vfs_busy(mp, 0); vn_lock(vp, ltype | LK_RETRY); vfs_rel(mp); if (error != 0) return (ENOENT); if (vp->v_iflag & VI_DOOMED) { vfs_unbusy(mp); return (ENOENT); } } VOP_UNLOCK(vp, 0); error = alloc(mp, alloc_arg, lkflags, rvp); vfs_unbusy(mp); if (*rvp != vp) vn_lock(vp, ltype | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { if (error == 0) { if (*rvp == vp) vunref(vp); else vput(*rvp); } error = ENOENT; } return (error); } int vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, struct thread *td) { if (vp->v_type != VREG || td == NULL) return (0); if ((uoff_t)uio->uio_offset + uio->uio_resid > lim_cur(td, RLIMIT_FSIZE)) { PROC_LOCK(td->td_proc); kern_psignal(td->td_proc, SIGXFSZ); PROC_UNLOCK(td->td_proc); return (EFBIG); } return (0); } int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { struct vnode *vp; vp = fp->f_vnode; #ifdef AUDIT vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif return (setfmode(td, active_cred, vp, mode)); } int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { struct vnode *vp; vp = fp->f_vnode; #ifdef AUDIT vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif return (setfown(td, active_cred, vp, uid, gid)); } void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) { vm_object_t object; if ((object = vp->v_object) == NULL) return; VM_OBJECT_WLOCK(object); vm_object_page_remove(object, start, end, 0); VM_OBJECT_WUNLOCK(object); } int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) { struct vattr va; daddr_t bn, bnp; uint64_t bsize; off_t noff; int error; KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, ("Wrong command %lu", cmd)); if (vn_lock(vp, LK_SHARED) != 0) return (EBADF); if (vp->v_type != VREG) { error = ENOTTY; goto unlock; } error = VOP_GETATTR(vp, &va, cred); if (error != 0) goto unlock; noff = *off; if (noff >= va.va_size) { error = ENXIO; goto unlock; } bsize = vp->v_mount->mnt_stat.f_iosize; for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize) { error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); if (error == EOPNOTSUPP) { error = ENOTTY; goto unlock; } if ((bnp == -1 && cmd == FIOSEEKHOLE) || (bnp != -1 && cmd == FIOSEEKDATA)) { noff = bn * bsize; if (noff < *off) noff = *off; goto unlock; } } if (noff > va.va_size) noff = va.va_size; /* noff == va.va_size. There is an implicit hole at the end of file. */ if (cmd == FIOSEEKDATA) error = ENXIO; unlock: VOP_UNLOCK(vp, 0); if (error == 0) *off = noff; return (error); } int vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) { struct ucred *cred; struct vnode *vp; struct vattr vattr; off_t foffset, size; int error, noneg; cred = td->td_ucred; vp = fp->f_vnode; foffset = foffset_lock(fp, 0); noneg = (vp->v_type != VCHR); error = 0; switch (whence) { case L_INCR: if (noneg && (foffset < 0 || (offset > 0 && foffset > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += foffset; break; case L_XTND: vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); if (error) break; /* * If the file references a disk device, then fetch * the media size and use that to determine the ending * offset. */ if (vattr.va_size == 0 && vp->v_type == VCHR && fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0) vattr.va_size = size; if (noneg && (vattr.va_size > OFF_MAX || (offset > 0 && vattr.va_size > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += vattr.va_size; break; case L_SET: break; case SEEK_DATA: error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td); break; case SEEK_HOLE: error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td); break; default: error = EINVAL; } if (error == 0 && noneg && offset < 0) error = EINVAL; if (error != 0) goto drop; VFS_KNOTE_UNLOCKED(vp, 0); td->td_uretoff.tdu_off = offset; drop: foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); return (error); } int vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { int error; /* * Grant permission if the caller is the owner of the file, or * the super-user, or has ACL_WRITE_ATTRIBUTES permission on * on the file. If the time pointer is null, then write * permission on the file is also sufficient. * * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes: * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES * will be allowed to set the times [..] to the current * server time. */ error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td); if (error != 0 && (vap->va_vaflags & VA_UTIMES_NULL) != 0) error = VOP_ACCESS(vp, VWRITE, cred, td); return (error); } int vn_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { struct vnode *vp; int error; if (fp->f_type == DTYPE_FIFO) kif->kf_type = KF_TYPE_FIFO; else kif->kf_type = KF_TYPE_VNODE; vp = fp->f_vnode; vref(vp); FILEDESC_SUNLOCK(fdp); error = vn_fill_kinfo_vnode(vp, kif); vrele(vp); FILEDESC_SLOCK(fdp); return (error); } static inline void vn_fill_junk(struct kinfo_file *kif) { size_t len, olen; /* * Simulate vn_fullpath returning changing values for a given * vp during e.g. coredump. */ len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1; olen = strlen(kif->kf_path); if (len < olen) strcpy(&kif->kf_path[len - 1], "$"); else for (; olen < len; olen++) strcpy(&kif->kf_path[olen], "A"); } int vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif) { struct vattr va; char *fullpath, *freepath; int error; kif->kf_vnode_type = vntype_to_kinfo(vp->v_type); freepath = NULL; fullpath = "-"; error = vn_fullpath(curthread, vp, &fullpath, &freepath); if (error == 0) { strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); } if (freepath != NULL) free(freepath, M_TEMP); KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path, vn_fill_junk(kif); ); /* * Retrieve vnode attributes. */ va.va_fsid = VNOVAL; va.va_rdev = NODEV; vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &va, curthread->td_ucred); VOP_UNLOCK(vp, 0); if (error != 0) return (error); if (va.va_fsid != VNOVAL) kif->kf_un.kf_file.kf_file_fsid = va.va_fsid; else kif->kf_un.kf_file.kf_file_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; kif->kf_un.kf_file.kf_file_fileid = va.va_fileid; kif->kf_un.kf_file.kf_file_mode = MAKEIMODE(va.va_type, va.va_mode); kif->kf_un.kf_file.kf_file_size = va.va_size; kif->kf_un.kf_file.kf_file_rdev = va.va_rdev; return (0); } int vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, struct thread *td) { #ifdef HWPMC_HOOKS struct pmckern_map_in pkm; #endif struct mount *mp; struct vnode *vp; vm_object_t object; vm_prot_t maxprot; boolean_t writecounted; int error; #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) /* * POSIX shared-memory objects are defined to have * kernel persistence, and are not defined to support * read(2)/write(2) -- or even open(2). Thus, we can * use MAP_ASYNC to trade on-disk coherence for speed. * The shm_open(3) library routine turns on the FPOSIXSHM * flag to request this behavior. */ if ((fp->f_flag & FPOSIXSHM) != 0) flags |= MAP_NOSYNC; #endif vp = fp->f_vnode; /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ mp = vp->v_mount; if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { maxprot = VM_PROT_NONE; if ((prot & VM_PROT_EXECUTE) != 0) return (EACCES); } else maxprot = VM_PROT_EXECUTE; if ((fp->f_flag & FREAD) != 0) maxprot |= VM_PROT_READ; else if ((prot & VM_PROT_READ) != 0) return (EACCES); /* * If we are sharing potential changes via MAP_SHARED and we * are trying to get write permission although we opened it * without asking for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_flag & FWRITE) != 0) maxprot |= VM_PROT_WRITE; else if ((prot & VM_PROT_WRITE) != 0) return (EACCES); } else { maxprot |= VM_PROT_WRITE; cap_maxprot |= VM_PROT_WRITE; } maxprot &= cap_maxprot; /* * For regular files and shared memory, POSIX requires that * the value of foff be a legitimate offset within the data * object. In particular, negative offsets are invalid. * Blocking negative offsets and overflows here avoids * possible wraparound or user-level access into reserved * ranges of the data object later. In contrast, POSIX does * not dictate how offsets are used by device drivers, so in * the case of a device mapping a negative offset is passed * on. */ if ( #ifdef _LP64 size > OFF_MAX || #endif foff < 0 || foff > OFF_MAX - size) return (EINVAL); writecounted = FALSE; error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp, &foff, &object, &writecounted); if (error != 0) return (error); error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, foff, writecounted, td); if (error != 0) { /* * If this mapping was accounted for in the vnode's * writecount, then undo that now. */ if (writecounted) vnode_pager_release_writecount(object, 0, size); vm_object_deallocate(object); } #ifdef HWPMC_HOOKS /* Inform hwpmc(4) if an executable is being mapped. */ if (PMC_HOOK_INSTALLED(PMC_FN_MMAP)) { if ((prot & VM_PROT_EXECUTE) != 0 && error == 0) { pkm.pm_file = vp; pkm.pm_address = (uintptr_t) *addr; PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); } } #endif return (error); } Index: stable/11/sys/libkern/zlib.c =================================================================== --- stable/11/sys/libkern/zlib.c (revision 331642) +++ stable/11/sys/libkern/zlib.c (revision 331643) @@ -1,5414 +1,5414 @@ /* * This file is derived from various .h and .c files from the zlib-1.0.4 * distribution by Jean-loup Gailly and Mark Adler, with some additions * by Paul Mackerras to aid in implementing Deflate compression and * decompression for PPP packets. See zlib.h for conditions of * distribution and use. * * Changes that have been made include: * - added Z_PACKET_FLUSH (see zlib.h for details) * - added inflateIncomp and deflateOutputPending * - allow strm->next_out to be NULL, meaning discard the output * * $FreeBSD$ */ /* * ==FILEVERSION 971210== * * This marker is used by the Linux installation script to determine * whether an up-to-date version of this file is already installed. */ #define NO_DUMMY_DECL #define NO_ZCFUNCS #define MY_ZCALLOC #if defined(__FreeBSD__) && defined(_KERNEL) #define _tr_init _zlib104_tr_init #define _tr_align _zlib104_tr_align #define _tr_tally _zlib104_tr_tally #define _tr_flush_block _zlib104_tr_flush_block #define _tr_stored_block _zlib104_tr_stored_block #define inflate_fast _zlib104_inflate_fast #define inflate _zlib104_inflate #define zlibVersion _zlib104_Version #endif /* +++ zutil.h */ /*- * zutil.h -- internal interface and configuration of the compression library * Copyright (C) 1995-1996 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ /* From: zutil.h,v 1.16 1996/07/24 13:41:13 me Exp $ */ #ifndef _Z_UTIL_H #define _Z_UTIL_H #ifdef _KERNEL #include #else #include "zlib.h" #endif #ifdef _KERNEL /* Assume this is a *BSD or SVR4 kernel */ #include #include #include #include #include #include # define HAVE_MEMCPY #else #if defined(__KERNEL__) /* Assume this is a Linux kernel */ #include #define HAVE_MEMCPY #else /* not kernel */ #if defined(MSDOS)||defined(VMS)||defined(CRAY)||defined(WIN32)||defined(RISCOS) # include # include #else extern int errno; #endif #ifdef STDC # include # include #endif #endif /* __KERNEL__ */ #endif /* _KERNEL */ #ifndef local # define local static #endif /* compile with -Dlocal if your debugger can't find static symbols */ typedef unsigned char uch; typedef uch FAR uchf; typedef unsigned short ush; typedef ush FAR ushf; typedef unsigned long ulg; static const char *z_errmsg[10]; /* indexed by 2-zlib_error */ /* (size given to avoid silly warnings with Visual C++) */ #define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] #define ERR_RETURN(strm,err) \ return (strm->msg = (const char*)ERR_MSG(err), (err)) /* To be used only when the state is known to be valid */ /* common constants */ #ifndef DEF_WBITS # define DEF_WBITS MAX_WBITS #endif /* default windowBits for decompression. MAX_WBITS is for compression only */ #if MAX_MEM_LEVEL >= 8 # define DEF_MEM_LEVEL 8 #else # define DEF_MEM_LEVEL MAX_MEM_LEVEL #endif /* default memLevel */ #define STORED_BLOCK 0 #define STATIC_TREES 1 #define DYN_TREES 2 /* The three kinds of block type */ #define MIN_MATCH 3 #define MAX_MATCH 258 /* The minimum and maximum match lengths */ #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ /* target dependencies */ #ifdef MSDOS # define OS_CODE 0x00 # ifdef __TURBOC__ # include # else /* MSC or DJGPP */ # include # endif #endif #ifdef OS2 # define OS_CODE 0x06 #endif #ifdef WIN32 /* Window 95 & Windows NT */ # define OS_CODE 0x0b #endif #if defined(VAXC) || defined(VMS) # define OS_CODE 0x02 # define FOPEN(name, mode) \ fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") #endif #ifdef AMIGA # define OS_CODE 0x01 #endif #if defined(ATARI) || defined(atarist) # define OS_CODE 0x05 #endif #ifdef MACOS # define OS_CODE 0x07 #endif #ifdef __50SERIES /* Prime/PRIMOS */ # define OS_CODE 0x0F #endif #ifdef TOPS20 # define OS_CODE 0x0a #endif #if defined(_BEOS_) || defined(RISCOS) # define fdopen(fd,mode) NULL /* No fdopen() */ #endif /* Common defaults */ #ifndef OS_CODE # define OS_CODE 0x03 /* assume Unix */ #endif #ifndef FOPEN # define FOPEN(name, mode) fopen((name), (mode)) #endif /* functions */ #ifdef HAVE_STRERROR extern char *strerror OF((int)); # define zstrerror(errnum) strerror(errnum) #else # define zstrerror(errnum) "" #endif #if defined(pyr) # define NO_MEMCPY #endif #if (defined(M_I86SM) || defined(M_I86MM)) && !defined(_MSC_VER) /* Use our own functions for small and medium model with MSC <= 5.0. * You may have to use the same strategy for Borland C (untested). */ # define NO_MEMCPY #endif #if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) # define HAVE_MEMCPY #endif #ifdef HAVE_MEMCPY # ifdef SMALL_MEDIUM /* MSDOS small or medium model */ # define zmemcpy _fmemcpy # define zmemcmp _fmemcmp # define zmemzero(dest, len) _fmemset(dest, 0, len) # else # define zmemcpy memcpy # define zmemcmp memcmp # define zmemzero(dest, len) memset(dest, 0, len) # endif #else extern void zmemcpy OF((Bytef* dest, Bytef* source, uInt len)); extern int zmemcmp OF((Bytef* s1, Bytef* s2, uInt len)); extern void zmemzero OF((Bytef* dest, uInt len)); #endif /* Diagnostic functions */ #ifdef DEBUG_ZLIB # include # ifndef verbose # define verbose 0 # endif extern void z_error OF((char *m)); # define Assert(cond,msg) {if(!(cond)) z_error(msg);} # define Trace(x) fprintf x # define Tracev(x) {if (verbose) fprintf x ;} # define Tracevv(x) {if (verbose>1) fprintf x ;} # define Tracec(c,x) {if (verbose && (c)) fprintf x ;} # define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} #else # define Assert(cond,msg) # define Trace(x) # define Tracev(x) # define Tracevv(x) # define Tracec(c,x) # define Tracecv(c,x) #endif typedef uLong (*check_func) OF((uLong check, const Bytef *buf, uInt len)); voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); void zcfree OF((voidpf opaque, voidpf ptr)); #define ZALLOC(strm, items, size) \ (*((strm)->zalloc))((strm)->opaque, (items), (size)) #define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) #define TRY_FREE(s, p) {if (p) ZFREE(s, p);} #endif /* _Z_UTIL_H */ /* --- zutil.h */ /* +++ deflate.h */ /* deflate.h -- internal compression state * Copyright (C) 1995-1996 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ /* From: deflate.h,v 1.10 1996/07/02 12:41:00 me Exp $ */ #ifndef _DEFLATE_H #define _DEFLATE_H /* #include "zutil.h" */ /* =========================================================================== * Internal compression state. */ #define LENGTH_CODES 29 /* number of length codes, not counting the special END_BLOCK code */ #define LITERALS 256 /* number of literal bytes 0..255 */ #define L_CODES (LITERALS+1+LENGTH_CODES) /* number of Literal or Length codes, including the END_BLOCK code */ #define D_CODES 30 /* number of distance codes */ #define BL_CODES 19 /* number of codes used to transfer the bit lengths */ #define HEAP_SIZE (2*L_CODES+1) /* maximum heap size */ #define MAX_BITS 15 /* All codes must not exceed MAX_BITS bits */ #define INIT_STATE 42 #define BUSY_STATE 113 #define FINISH_STATE 666 /* Stream status */ /* Data structure describing a single value and its code string. */ typedef struct ct_data_s { union { ush freq; /* frequency count */ ush code; /* bit string */ } fc; union { ush dad; /* father node in Huffman tree */ ush len; /* length of bit string */ } dl; } FAR ct_data; #define Freq fc.freq #define Code fc.code #define Dad dl.dad #define Len dl.len typedef struct static_tree_desc_s static_tree_desc; typedef struct tree_desc_s { ct_data *dyn_tree; /* the dynamic tree */ int max_code; /* largest code with non zero frequency */ static_tree_desc *stat_desc; /* the corresponding static tree */ } FAR tree_desc; typedef ush Pos; typedef Pos FAR Posf; typedef unsigned IPos; /* A Pos is an index in the character window. We use short instead of int to * save space in the various tables. IPos is used only for parameter passing. */ typedef struct deflate_state { z_streamp strm; /* pointer back to this zlib stream */ int status; /* as the name implies */ Bytef *pending_buf; /* output still pending */ ulg pending_buf_size; /* size of pending_buf */ Bytef *pending_out; /* next pending byte to output to the stream */ int pending; /* nb of bytes in the pending buffer */ int noheader; /* suppress zlib header and adler32 */ Byte data_type; /* UNKNOWN, BINARY or ASCII */ Byte method; /* STORED (for zip only) or DEFLATED */ int last_flush; /* value of flush param for previous deflate call */ /* used by deflate.c: */ uInt w_size; /* LZ77 window size (32K by default) */ uInt w_bits; /* log2(w_size) (8..16) */ uInt w_mask; /* w_size - 1 */ Bytef *window; /* Sliding window. Input bytes are read into the second half of the window, * and move to the first half later to keep a dictionary of at least wSize * bytes. With this organization, matches are limited to a distance of * wSize-MAX_MATCH bytes, but this ensures that IO is always * performed with a length multiple of the block size. Also, it limits * the window size to 64K, which is quite useful on MSDOS. * To do: use the user input buffer as sliding window. */ ulg window_size; /* Actual size of window: 2*wSize, except when the user input buffer * is directly used as sliding window. */ Posf *prev; /* Link to older string with same hash index. To limit the size of this * array to 64K, this link is maintained only for the last 32K strings. * An index in this array is thus a window index modulo 32K. */ Posf *head; /* Heads of the hash chains or NIL. */ uInt ins_h; /* hash index of string to be inserted */ uInt hash_size; /* number of elements in hash table */ uInt hash_bits; /* log2(hash_size) */ uInt hash_mask; /* hash_size-1 */ uInt hash_shift; /* Number of bits by which ins_h must be shifted at each input * step. It must be such that after MIN_MATCH steps, the oldest * byte no longer takes part in the hash key, that is: * hash_shift * MIN_MATCH >= hash_bits */ long block_start; /* Window position at the beginning of the current output block. Gets * negative when the window is moved backwards. */ uInt match_length; /* length of best match */ IPos prev_match; /* previous match */ int match_available; /* set if previous match exists */ uInt strstart; /* start of string to insert */ uInt match_start; /* start of matching string */ uInt lookahead; /* number of valid bytes ahead in window */ uInt prev_length; /* Length of the best match at previous step. Matches not greater than this * are discarded. This is used in the lazy match evaluation. */ uInt max_chain_length; /* To speed up deflation, hash chains are never searched beyond this * length. A higher limit improves compression ratio but degrades the * speed. */ uInt max_lazy_match; /* Attempt to find a better match only when the current match is strictly * smaller than this value. This mechanism is used only for compression * levels >= 4. */ # define max_insert_length max_lazy_match /* Insert new strings in the hash table only if the match length is not * greater than this length. This saves time but degrades compression. * max_insert_length is used only for compression levels <= 3. */ int level; /* compression level (1..9) */ int strategy; /* favor or force Huffman coding*/ uInt good_match; /* Use a faster search when the previous match is longer than this */ int nice_match; /* Stop searching when current match exceeds this */ /* used by trees.c: */ /* Didn't use ct_data typedef below to supress compiler warning */ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ struct tree_desc_s l_desc; /* desc. for literal tree */ struct tree_desc_s d_desc; /* desc. for distance tree */ struct tree_desc_s bl_desc; /* desc. for bit length tree */ ush bl_count[MAX_BITS+1]; /* number of codes at each bit length for an optimal tree */ int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ int heap_len; /* number of elements in the heap */ int heap_max; /* element of largest frequency */ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. * The same heap array is used to build all trees. */ uch depth[2*L_CODES+1]; /* Depth of each subtree used as tie breaker for trees of equal frequency */ uchf *l_buf; /* buffer for literals or lengths */ uInt lit_bufsize; /* Size of match buffer for literals/lengths. There are 4 reasons for * limiting lit_bufsize to 64K: * - frequencies can be kept in 16 bit counters * - if compression is not successful for the first block, all input * data is still in the window so we can still emit a stored block even * when input comes from standard input. (This can also be done for * all blocks if lit_bufsize is not greater than 32K.) * - if compression is not successful for a file smaller than 64K, we can * even emit a stored file instead of a stored block (saving 5 bytes). * This is applicable only for zip (not gzip or zlib). * - creating new Huffman trees less frequently may not provide fast * adaptation to changes in the input data statistics. (Take for * example a binary file with poorly compressible code followed by * a highly compressible string table.) Smaller buffer sizes give * fast adaptation but have of course the overhead of transmitting * trees more frequently. * - I can't count above 4 */ uInt last_lit; /* running index in l_buf */ ushf *d_buf; /* Buffer for distances. To simplify the code, d_buf and l_buf have * the same number of elements. To use different lengths, an extra flag * array would be necessary. */ ulg opt_len; /* bit length of current block with optimal trees */ ulg static_len; /* bit length of current block with static trees */ ulg compressed_len; /* total bit length of compressed file */ uInt matches; /* number of string matches in current block */ int last_eob_len; /* bit length of EOB code for last block */ #ifdef DEBUG_ZLIB ulg bits_sent; /* bit length of the compressed data */ #endif ush bi_buf; /* Output buffer. bits are inserted starting at the bottom (least * significant bits). */ int bi_valid; /* Number of valid bits in bi_buf. All bits above the last valid bit * are always zero. */ } FAR deflate_state; /* Output a byte on the stream. * IN assertion: there is enough room in pending_buf. */ #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) /* Minimum amount of lookahead, except at the end of the input file. * See deflate.c for comments about the MIN_MATCH+1. */ #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) /* In order to simplify the code, particularly on 16 bit machines, match * distances are limited to MAX_DIST instead of WSIZE. */ /* in trees.c */ void _tr_init OF((deflate_state *s)); int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); ulg _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, int eof)); void _tr_align OF((deflate_state *s)); void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, int eof)); void _tr_stored_type_only OF((deflate_state *)); #endif /* --- deflate.h */ /* +++ deflate.c */ /* deflate.c -- compress data using the deflation algorithm * Copyright (C) 1995-1996 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ /* * ALGORITHM * * The "deflation" process depends on being able to identify portions * of the input text which are identical to earlier input (within a * sliding window trailing behind the input currently being processed). * * The most straightforward technique turns out to be the fastest for * most input files: try all possible matches and select the longest. * The key feature of this algorithm is that insertions into the string * dictionary are very simple and thus fast, and deletions are avoided * completely. Insertions are performed at each input character, whereas * string matches are performed only when the previous match ends. So it * is preferable to spend more time in matches to allow very fast string * insertions and avoid deletions. The matching algorithm for small * strings is inspired from that of Rabin & Karp. A brute force approach * is used to find longer strings when a small match has been found. * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze * (by Leonid Broukhis). * A previous version of this file used a more sophisticated algorithm * (by Fiala and Greene) which is guaranteed to run in linear amortized * time, but has a larger average cost, uses more memory and is patented. * However the F&G algorithm may be faster for some highly redundant * files if the parameter max_chain_length (described below) is too large. * * ACKNOWLEDGEMENTS * * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and * I found it in 'freeze' written by Leonid Broukhis. * Thanks to many people for bug reports and testing. * * REFERENCES * * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". * Available in ftp://ds.internic.net/rfc/rfc1951.txt * * A description of the Rabin and Karp algorithm is given in the book * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. * * Fiala,E.R., and Greene,D.H. * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 * */ /* From: deflate.c,v 1.15 1996/07/24 13:40:58 me Exp $ */ /* #include "deflate.h" */ char deflate_copyright[] = " deflate 1.0.4 Copyright 1995-1996 Jean-loup Gailly "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot include such an acknowledgment, I would appreciate that you keep this copyright string in the executable of your product. */ /* =========================================================================== * Function prototypes. */ typedef enum { need_more, /* block not completed, need more input or more output */ block_done, /* block flush performed */ finish_started, /* finish started, need only more output at next deflate */ finish_done /* finish done, accept no more input or output */ } block_state; typedef block_state (*compress_func) OF((deflate_state *s, int flush)); /* Compression function. Returns the block state after the call. */ local void fill_window OF((deflate_state *s)); local block_state deflate_stored OF((deflate_state *s, int flush)); local block_state deflate_fast OF((deflate_state *s, int flush)); local block_state deflate_slow OF((deflate_state *s, int flush)); local void lm_init OF((deflate_state *s)); local void putShortMSB OF((deflate_state *s, uInt b)); local void flush_pending OF((z_streamp strm)); local int read_buf OF((z_streamp strm, charf *buf, unsigned size)); #ifdef ASMV void match_init OF((void)); /* asm code initialization */ uInt longest_match OF((deflate_state *s, IPos cur_match)); #else local uInt longest_match OF((deflate_state *s, IPos cur_match)); #endif #ifdef DEBUG_ZLIB local void check_match OF((deflate_state *s, IPos start, IPos match, int length)); #endif /* =========================================================================== * Local data */ #define NIL 0 /* Tail of hash chains */ #ifndef TOO_FAR # define TOO_FAR 4096 #endif /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) /* Minimum amount of lookahead, except at the end of the input file. * See deflate.c for comments about the MIN_MATCH+1. */ /* Values for max_lazy_match, good_match and max_chain_length, depending on * the desired pack level (0..9). The values given below have been tuned to * exclude worst case performance for pathological files. Better values may be * found for specific files. */ typedef struct config_s { ush good_length; /* reduce lazy search above this match length */ ush max_lazy; /* do not perform lazy search above this match length */ ush nice_length; /* quit search above this match length */ ush max_chain; compress_func func; } config; local config configuration_table[10] = { /* good lazy nice chain */ /* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ /* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ /* 2 */ {4, 5, 16, 8, deflate_fast}, /* 3 */ {4, 6, 32, 32, deflate_fast}, /* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ /* 5 */ {8, 16, 32, 32, deflate_slow}, /* 6 */ {8, 16, 128, 128, deflate_slow}, /* 7 */ {8, 32, 128, 256, deflate_slow}, /* 8 */ {32, 128, 258, 1024, deflate_slow}, /* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 * For deflate_fast() (levels <= 3) good is ignored and lazy has a different * meaning. */ #define EQUAL 0 /* result of memcmp for equal strings */ #ifndef NO_DUMMY_DECL struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ #endif /* =========================================================================== * Update a hash value with the given input byte * IN assertion: all calls to to UPDATE_HASH are made with consecutive * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ #define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) /* =========================================================================== * Insert string str in the dictionary and set match_head to the previous head * of the hash chain (the most recent string with same hash key). Return * the previous length of the hash chain. * IN assertion: all calls to to INSERT_STRING are made with consecutive * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ #define INSERT_STRING(s, str, match_head) \ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ s->head[s->ins_h] = (Pos)(str)) /* =========================================================================== * Initialize the hash table (avoiding 64K overflow for 16 bit systems). * prev[] will be initialized on the fly. */ #define CLEAR_HASH(s) \ s->head[s->hash_size-1] = NIL; \ zmemzero((charf *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); /* ========================================================================= */ int deflateInit_(strm, level, version, stream_size) z_streamp strm; int level; const char *version; int stream_size; { return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, version, stream_size); /* To do: ignore strm->next_in if we use it as window */ } /* ========================================================================= */ int deflateInit2_(strm, level, method, windowBits, memLevel, strategy, version, stream_size) z_streamp strm; int level; int method; int windowBits; int memLevel; int strategy; const char *version; int stream_size; { deflate_state *s; int noheader = 0; static char* my_version = ZLIB_VERSION; ushf *overlay; /* We overlay pending_buf and d_buf+l_buf. This works since the average * output size for (length,distance) codes is <= 24 bits. */ if (version == Z_NULL || version[0] != my_version[0] || stream_size != sizeof(z_stream)) { return Z_VERSION_ERROR; } if (strm == Z_NULL) return Z_STREAM_ERROR; strm->msg = Z_NULL; #ifndef NO_ZCFUNCS if (strm->zalloc == Z_NULL) { strm->zalloc = zcalloc; strm->opaque = (voidpf)0; } if (strm->zfree == Z_NULL) strm->zfree = zcfree; #endif if (level == Z_DEFAULT_COMPRESSION) level = 6; if (windowBits < 0) { /* undocumented feature: suppress zlib header */ noheader = 1; windowBits = -windowBits; } if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || windowBits < 9 || windowBits > 15 || level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { return Z_STREAM_ERROR; } s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); if (s == Z_NULL) return Z_MEM_ERROR; strm->state = (struct internal_state FAR *)s; s->strm = strm; s->noheader = noheader; s->w_bits = windowBits; s->w_size = 1 << s->w_bits; s->w_mask = s->w_size - 1; s->hash_bits = memLevel + 7; s->hash_size = 1 << s->hash_bits; s->hash_mask = s->hash_size - 1; s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); s->pending_buf = (uchf *) overlay; s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || s->pending_buf == Z_NULL) { strm->msg = (const char*)ERR_MSG(Z_MEM_ERROR); deflateEnd (strm); return Z_MEM_ERROR; } s->d_buf = overlay + s->lit_bufsize/sizeof(ush); s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; s->level = level; s->strategy = strategy; s->method = (Byte)method; return deflateReset(strm); } /* ========================================================================= */ int deflateSetDictionary (strm, dictionary, dictLength) z_streamp strm; const Bytef *dictionary; uInt dictLength; { deflate_state *s; uInt length = dictLength; uInt n; IPos hash_head = 0; if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL) return Z_STREAM_ERROR; s = (deflate_state *) strm->state; if (s->status != INIT_STATE) return Z_STREAM_ERROR; strm->adler = adler32(strm->adler, dictionary, dictLength); if (length < MIN_MATCH) return Z_OK; if (length > MAX_DIST(s)) { length = MAX_DIST(s); #ifndef USE_DICT_HEAD dictionary += dictLength - length; /* use the tail of the dictionary */ #endif } zmemcpy((charf *)s->window, dictionary, length); s->strstart = length; s->block_start = (long)length; /* Insert all strings in the hash table (except for the last two bytes). * s->lookahead stays null, so s->ins_h will be recomputed at the next * call of fill_window. */ s->ins_h = s->window[0]; UPDATE_HASH(s, s->ins_h, s->window[1]); for (n = 0; n <= length - MIN_MATCH; n++) { INSERT_STRING(s, n, hash_head); } if (hash_head) hash_head = 0; /* to make compiler happy */ return Z_OK; } /* ========================================================================= */ int deflateReset (strm) z_streamp strm; { deflate_state *s; if (strm == Z_NULL || strm->state == Z_NULL || strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR; strm->total_in = strm->total_out = 0; strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ strm->data_type = Z_UNKNOWN; s = (deflate_state *)strm->state; s->pending = 0; s->pending_out = s->pending_buf; if (s->noheader < 0) { s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ } s->status = s->noheader ? BUSY_STATE : INIT_STATE; strm->adler = 1; s->last_flush = Z_NO_FLUSH; _tr_init(s); lm_init(s); return Z_OK; } /* ========================================================================= */ int deflateParams(strm, level, strategy) z_streamp strm; int level; int strategy; { deflate_state *s; compress_func func; int err = Z_OK; if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; s = (deflate_state *) strm->state; if (level == Z_DEFAULT_COMPRESSION) { level = 6; } if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { return Z_STREAM_ERROR; } func = configuration_table[s->level].func; if (func != configuration_table[level].func && strm->total_in != 0) { /* Flush the last buffer: */ err = deflate(strm, Z_PARTIAL_FLUSH); } if (s->level != level) { s->level = level; s->max_lazy_match = configuration_table[level].max_lazy; s->good_match = configuration_table[level].good_length; s->nice_match = configuration_table[level].nice_length; s->max_chain_length = configuration_table[level].max_chain; } s->strategy = strategy; return err; } /* ========================================================================= * Put a short in the pending buffer. The 16-bit value is put in MSB order. * IN assertion: the stream state is correct and there is enough room in * pending_buf. */ local void putShortMSB (s, b) deflate_state *s; uInt b; { put_byte(s, (Byte)(b >> 8)); put_byte(s, (Byte)(b & 0xff)); } /* ========================================================================= * Flush as much pending output as possible. All deflate() output goes * through this function so some applications may wish to modify it * to avoid allocating a large strm->next_out buffer and copying into it. * (See also read_buf()). */ local void flush_pending(strm) z_streamp strm; { deflate_state *s = (deflate_state *) strm->state; unsigned len = s->pending; if (len > strm->avail_out) len = strm->avail_out; if (len == 0) return; if (strm->next_out != Z_NULL) { zmemcpy(strm->next_out, s->pending_out, len); strm->next_out += len; } s->pending_out += len; strm->total_out += len; strm->avail_out -= len; s->pending -= len; if (s->pending == 0) { s->pending_out = s->pending_buf; } } /* ========================================================================= */ int deflate (strm, flush) z_streamp strm; int flush; { int old_flush; /* value of flush param for previous deflate call */ deflate_state *s; if (strm == Z_NULL || strm->state == Z_NULL || flush > Z_FINISH || flush < 0) { return Z_STREAM_ERROR; } s = (deflate_state *) strm->state; if ((strm->next_in == Z_NULL && strm->avail_in != 0) || (s->status == FINISH_STATE && flush != Z_FINISH)) { ERR_RETURN(strm, Z_STREAM_ERROR); } if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); s->strm = strm; /* just in case */ old_flush = s->last_flush; s->last_flush = flush; /* Write the zlib header */ if (s->status == INIT_STATE) { uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; uInt level_flags = (s->level-1) >> 1; if (level_flags > 3) level_flags = 3; header |= (level_flags << 6); if (s->strstart != 0) header |= PRESET_DICT; header += 31 - (header % 31); s->status = BUSY_STATE; putShortMSB(s, header); /* Save the adler32 of the preset dictionary: */ if (s->strstart != 0) { putShortMSB(s, (uInt)(strm->adler >> 16)); putShortMSB(s, (uInt)(strm->adler & 0xffff)); } strm->adler = 1L; } /* Flush as much pending output as possible */ if (s->pending != 0) { flush_pending(strm); if (strm->avail_out == 0) { /* Since avail_out is 0, deflate will be called again with * more output space, but possibly with both pending and * avail_in equal to zero. There won't be anything to do, * but this is not an error situation so make sure we * return OK instead of BUF_ERROR at next call of deflate: */ s->last_flush = -1; return Z_OK; } /* Make sure there is something to do and avoid duplicate consecutive * flushes. For repeated and useless calls with Z_FINISH, we keep * returning Z_STREAM_END instead of Z_BUFF_ERROR. */ } else if (strm->avail_in == 0 && flush <= old_flush && flush != Z_FINISH) { ERR_RETURN(strm, Z_BUF_ERROR); } /* User must not provide more input after the first FINISH: */ if (s->status == FINISH_STATE && strm->avail_in != 0) { ERR_RETURN(strm, Z_BUF_ERROR); } /* Start a new block or continue the current one. */ if (strm->avail_in != 0 || s->lookahead != 0 || (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { block_state bstate; bstate = (*(configuration_table[s->level].func))(s, flush); if (bstate == finish_started || bstate == finish_done) { s->status = FINISH_STATE; } if (bstate == need_more || bstate == finish_started) { if (strm->avail_out == 0) { s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ } return Z_OK; /* If flush != Z_NO_FLUSH && avail_out == 0, the next call * of deflate should use the same flush parameter to make sure * that the flush is complete. So we don't have to output an * empty block here, this will be done at next call. This also * ensures that for a very small output buffer, we emit at most * one empty block. */ } if (bstate == block_done) { if (flush == Z_PARTIAL_FLUSH) { _tr_align(s); } else if (flush == Z_PACKET_FLUSH) { /* Output just the 3-bit `stored' block type value, but not a zero length. */ _tr_stored_type_only(s); } else { /* FULL_FLUSH or SYNC_FLUSH */ _tr_stored_block(s, (char*)0, 0L, 0); /* For a full flush, this empty block will be recognized * as a special marker by inflate_sync(). */ if (flush == Z_FULL_FLUSH) { CLEAR_HASH(s); /* forget history */ } } flush_pending(strm); if (strm->avail_out == 0) { s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ return Z_OK; } } } Assert(strm->avail_out > 0, "bug2"); if (flush != Z_FINISH) return Z_OK; if (s->noheader) return Z_STREAM_END; /* Write the zlib trailer (adler32) */ putShortMSB(s, (uInt)(strm->adler >> 16)); putShortMSB(s, (uInt)(strm->adler & 0xffff)); flush_pending(strm); /* If avail_out is zero, the application will call deflate again * to flush the rest. */ s->noheader = -1; /* write the trailer only once! */ return s->pending != 0 ? Z_OK : Z_STREAM_END; } /* ========================================================================= */ int deflateEnd (strm) z_streamp strm; { int status; deflate_state *s; if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; s = (deflate_state *) strm->state; status = s->status; if (status != INIT_STATE && status != BUSY_STATE && status != FINISH_STATE) { return Z_STREAM_ERROR; } /* Deallocate in reverse order of allocations: */ TRY_FREE(strm, s->pending_buf); TRY_FREE(strm, s->head); TRY_FREE(strm, s->prev); TRY_FREE(strm, s->window); ZFREE(strm, s); strm->state = Z_NULL; return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; } /* ========================================================================= * Copy the source state to the destination state. */ int deflateCopy (dest, source) z_streamp dest; z_streamp source; { deflate_state *ds; deflate_state *ss; ushf *overlay; if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) return Z_STREAM_ERROR; ss = (deflate_state *) source->state; zmemcpy(dest, source, sizeof(*dest)); ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); if (ds == Z_NULL) return Z_MEM_ERROR; dest->state = (struct internal_state FAR *) ds; zmemcpy(ds, ss, sizeof(*ds)); ds->strm = dest; ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); ds->pending_buf = (uchf *) overlay; if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || ds->pending_buf == Z_NULL) { deflateEnd (dest); return Z_MEM_ERROR; } /* ??? following zmemcpy doesn't work for 16-bit MSDOS */ zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; ds->bl_desc.dyn_tree = ds->bl_tree; return Z_OK; } /* =========================================================================== * Return the number of bytes of output which are immediately available * for output from the decompressor. */ int deflateOutputPending (strm) z_streamp strm; { if (strm == Z_NULL || strm->state == Z_NULL) return 0; return ((deflate_state *)(strm->state))->pending; } /* =========================================================================== * Read a new buffer from the current input stream, update the adler32 * and total number of bytes read. All deflate() input goes through * this function so some applications may wish to modify it to avoid * allocating a large strm->next_in buffer and copying from it. * (See also flush_pending()). */ local int read_buf(strm, buf, size) z_streamp strm; charf *buf; unsigned size; { unsigned len = strm->avail_in; if (len > size) len = size; if (len == 0) return 0; strm->avail_in -= len; if (!((deflate_state *)(strm->state))->noheader) { strm->adler = adler32(strm->adler, strm->next_in, len); } zmemcpy(buf, strm->next_in, len); strm->next_in += len; strm->total_in += len; return (int)len; } /* =========================================================================== * Initialize the "longest match" routines for a new zlib stream */ local void lm_init (s) deflate_state *s; { s->window_size = (ulg)2L*s->w_size; CLEAR_HASH(s); /* Set the default configuration parameters: */ s->max_lazy_match = configuration_table[s->level].max_lazy; s->good_match = configuration_table[s->level].good_length; s->nice_match = configuration_table[s->level].nice_length; s->max_chain_length = configuration_table[s->level].max_chain; s->strstart = 0; s->block_start = 0L; s->lookahead = 0; s->match_length = s->prev_length = MIN_MATCH-1; s->match_available = 0; s->ins_h = 0; #ifdef ASMV match_init(); /* initialize the asm code */ #endif } /* =========================================================================== * Set match_start to the longest match starting at the given string and * return its length. Matches shorter or equal to prev_length are discarded, * in which case the result is equal to prev_length and match_start is * garbage. * IN assertions: cur_match is the head of the hash chain for the current * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 * OUT assertion: the match length is not greater than s->lookahead. */ #ifndef ASMV /* For 80x86 and 680x0, an optimized version will be provided in match.asm or * match.S. The code will be functionally equivalent. */ local uInt longest_match(s, cur_match) deflate_state *s; IPos cur_match; /* current match */ { unsigned chain_length = s->max_chain_length;/* max hash chain length */ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ - register int len; /* length of current match */ + Bytef *scan = s->window + s->strstart; /* current string */ + Bytef *match; /* matched string */ + int len; /* length of current match */ int best_len = s->prev_length; /* best match length so far */ int nice_match = s->nice_match; /* stop if match long enough */ IPos limit = s->strstart > (IPos)MAX_DIST(s) ? s->strstart - (IPos)MAX_DIST(s) : NIL; /* Stop when cur_match becomes <= limit. To simplify the code, * we prevent matches with the string of window index 0. */ Posf *prev = s->prev; uInt wmask = s->w_mask; #ifdef UNALIGNED_OK /* Compare two bytes at a time. Note: this is not always beneficial. * Try with and without -DUNALIGNED_OK to check. */ - register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; - register ush scan_start = *(ushf*)scan; - register ush scan_end = *(ushf*)(scan+best_len-1); + Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + ush scan_start = *(ushf*)scan; + ush scan_end = *(ushf*)(scan+best_len-1); #else - register Bytef *strend = s->window + s->strstart + MAX_MATCH; - register Byte scan_end1 = scan[best_len-1]; - register Byte scan_end = scan[best_len]; + Bytef *strend = s->window + s->strstart + MAX_MATCH; + Byte scan_end1 = scan[best_len-1]; + Byte scan_end = scan[best_len]; #endif /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. * It is easy to get rid of this optimization if necessary. */ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); /* Do not waste too much time if we already have a good match: */ if (s->prev_length >= s->good_match) { chain_length >>= 2; } /* Do not look for matches beyond the end of the input. This is necessary * to make deflate deterministic. */ if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); do { Assert(cur_match < s->strstart, "no future"); match = s->window + cur_match; /* Skip to next match if the match length cannot increase * or if the match length is less than 2: */ #if (defined(UNALIGNED_OK) && MAX_MATCH == 258) /* This code assumes sizeof(unsigned short) == 2. Do not use * UNALIGNED_OK if your compiler uses a different size. */ if (*(ushf*)(match+best_len-1) != scan_end || *(ushf*)match != scan_start) continue; /* It is not necessary to compare scan[2] and match[2] since they are * always equal when the other bytes match, given that the hash keys * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at * strstart+3, +5, ... up to strstart+257. We check for insufficient * lookahead only every 4th comparison; the 128th check will be made * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is * necessary to put more guard bytes at the end of the window, or * to check more often for insufficient lookahead. */ Assert(scan[2] == match[2], "scan[2]?"); scan++, match++; do { } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && *(ushf*)(scan+=2) == *(ushf*)(match+=2) && *(ushf*)(scan+=2) == *(ushf*)(match+=2) && *(ushf*)(scan+=2) == *(ushf*)(match+=2) && scan < strend); /* The funny "do {}" generates better code on most compilers */ /* Here, scan <= window+strstart+257 */ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); if (*scan == *match) scan++; len = (MAX_MATCH - 1) - (int)(strend-scan); scan = strend - (MAX_MATCH-1); #else /* UNALIGNED_OK */ if (match[best_len] != scan_end || match[best_len-1] != scan_end1 || *match != *scan || *++match != scan[1]) continue; /* The check at best_len-1 can be removed because it will be made * again later. (This heuristic is not always a win.) * It is not necessary to compare scan[2] and match[2] since they * are always equal when the other bytes match, given that * the hash keys are equal and that HASH_BITS >= 8. */ scan += 2, match++; Assert(*scan == *match, "match[2]?"); /* We check for insufficient lookahead only every 8th comparison; * the 256th check will be made at strstart+258. */ do { } while (*++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && scan < strend); Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); len = MAX_MATCH - (int)(strend - scan); scan = strend - MAX_MATCH; #endif /* UNALIGNED_OK */ if (len > best_len) { s->match_start = cur_match; best_len = len; if (len >= nice_match) break; #ifdef UNALIGNED_OK scan_end = *(ushf*)(scan+best_len-1); #else scan_end1 = scan[best_len-1]; scan_end = scan[best_len]; #endif } } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); if ((uInt)best_len <= s->lookahead) return best_len; return s->lookahead; } #endif /* ASMV */ #ifdef DEBUG_ZLIB /* =========================================================================== * Check that the match at match_start is indeed a match. */ local void check_match(s, start, match, length) deflate_state *s; IPos start, match; int length; { /* check that the match is indeed a match */ if (zmemcmp((charf *)s->window + match, (charf *)s->window + start, length) != EQUAL) { fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); do { fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); } while (--length != 0); z_error("invalid match"); } if (z_verbose > 1) { fprintf(stderr,"\\[%d,%d]", start-match, length); do { putc(s->window[start++], stderr); } while (--length != 0); } } #else # define check_match(s, start, match, length) #endif /* =========================================================================== * Fill the window when the lookahead becomes insufficient. * Updates strstart and lookahead. * * IN assertion: lookahead < MIN_LOOKAHEAD * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD * At least one byte has been read, or avail_in == 0; reads are * performed for at least two bytes (required for the zip translate_eol * option -- not supported here). */ local void fill_window(s) deflate_state *s; { - register unsigned n, m; - register Posf *p; + unsigned n, m; + Posf *p; unsigned more; /* Amount of free space at the end of the window. */ uInt wsize = s->w_size; do { more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); /* Deal with !@#$% 64K limit: */ if (more == 0 && s->strstart == 0 && s->lookahead == 0) { more = wsize; } else if (more == (unsigned)(-1)) { /* Very unlikely, but possible on 16 bit machine if strstart == 0 * and lookahead == 1 (input done one byte at time) */ more--; /* If the window is almost full and there is insufficient lookahead, * move the upper half to the lower one to make room in the upper half. */ } else if (s->strstart >= wsize+MAX_DIST(s)) { zmemcpy((charf *)s->window, (charf *)s->window+wsize, (unsigned)wsize); s->match_start -= wsize; s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ s->block_start -= (long) wsize; /* Slide the hash table (could be avoided with 32 bit values at the expense of memory usage). We slide even when level == 0 to keep the hash table consistent if we switch back to level > 0 later. (Using level 0 permanently is not an optimal usage of zlib, so we don't care about this pathological case.) */ n = s->hash_size; p = &s->head[n]; do { m = *--p; *p = (Pos)(m >= wsize ? m-wsize : NIL); } while (--n); n = wsize; p = &s->prev[n]; do { m = *--p; *p = (Pos)(m >= wsize ? m-wsize : NIL); /* If n is not on any hash chain, prev[n] is garbage but * its value will never be used. */ } while (--n); more += wsize; } if (s->strm->avail_in == 0) return; /* If there was no sliding: * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && * more == window_size - lookahead - strstart * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) * => more >= window_size - 2*WSIZE + 2 * In the BIG_MEM or MMAP case (not yet supported), * window_size == input_size + MIN_LOOKAHEAD && * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. * Otherwise, window_size == 2*WSIZE so more >= 2. * If there was sliding, more >= WSIZE. So in all cases, more >= 2. */ Assert(more >= 2, "more < 2"); n = read_buf(s->strm, (charf *)s->window + s->strstart + s->lookahead, more); s->lookahead += n; /* Initialize the hash value now that we have some input: */ if (s->lookahead >= MIN_MATCH) { s->ins_h = s->window[s->strstart]; UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); #if MIN_MATCH != 3 Call UPDATE_HASH() MIN_MATCH-3 more times #endif } /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, * but this is not important since only literal bytes will be emitted. */ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); } /* =========================================================================== * Flush the current block, with given end-of-file flag. * IN assertion: strstart is set to the end of the current match. */ #define FLUSH_BLOCK_ONLY(s, eof) { \ _tr_flush_block(s, (s->block_start >= 0L ? \ (charf *)&s->window[(unsigned)s->block_start] : \ (charf *)Z_NULL), \ (ulg)((long)s->strstart - s->block_start), \ (eof)); \ s->block_start = s->strstart; \ flush_pending(s->strm); \ Tracev((stderr,"[FLUSH]")); \ } /* Same but force premature exit if necessary. */ #define FLUSH_BLOCK(s, eof) { \ FLUSH_BLOCK_ONLY(s, eof); \ if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ } /* =========================================================================== * Copy without compression as much as possible from the input stream, return * the current block state. * This function does not insert new strings in the dictionary since * uncompressible data is probably not useful. This function is used * only for the level=0 compression option. * NOTE: this function should be optimized to avoid extra copying from * window to pending_buf. */ local block_state deflate_stored(s, flush) deflate_state *s; int flush; { /* Stored blocks are limited to 0xffff bytes, pending_buf is limited * to pending_buf_size, and each stored block has a 5 byte header: */ ulg max_block_size = 0xffff; ulg max_start; if (max_block_size > s->pending_buf_size - 5) { max_block_size = s->pending_buf_size - 5; } /* Copy as much as possible from input to output: */ for (;;) { /* Fill the window as much as possible: */ if (s->lookahead <= 1) { Assert(s->strstart < s->w_size+MAX_DIST(s) || s->block_start >= (long)s->w_size, "slide too late"); fill_window(s); if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; if (s->lookahead == 0) break; /* flush the current block */ } Assert(s->block_start >= 0L, "block gone"); s->strstart += s->lookahead; s->lookahead = 0; /* Emit a stored block if pending_buf will be full: */ max_start = s->block_start + max_block_size; if (s->strstart == 0 || (ulg)s->strstart >= max_start) { /* strstart == 0 is possible when wraparound on 16-bit machine */ s->lookahead = (uInt)(s->strstart - max_start); s->strstart = (uInt)max_start; FLUSH_BLOCK(s, 0); } /* Flush if we may have to slide, otherwise block_start may become * negative and the data will be gone: */ if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { FLUSH_BLOCK(s, 0); } } FLUSH_BLOCK(s, flush == Z_FINISH); return flush == Z_FINISH ? finish_done : block_done; } /* =========================================================================== * Compress as much as possible from the input stream, return the current * block state. * This function does not perform lazy evaluation of matches and inserts * new strings in the dictionary only for unmatched strings or for short * matches. It is used only for the fast compression options. */ local block_state deflate_fast(s, flush) deflate_state *s; int flush; { IPos hash_head = NIL; /* head of the hash chain */ int bflush; /* set if current block must be flushed */ for (;;) { /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes * for the next match, plus MIN_MATCH bytes to insert the * string following the next match. */ if (s->lookahead < MIN_LOOKAHEAD) { fill_window(s); if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { return need_more; } if (s->lookahead == 0) break; /* flush the current block */ } /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ if (s->lookahead >= MIN_MATCH) { INSERT_STRING(s, s->strstart, hash_head); } /* Find the longest match, discarding those <= prev_length. * At this point we have always match_length < MIN_MATCH */ if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ if (s->strategy != Z_HUFFMAN_ONLY) { s->match_length = longest_match (s, hash_head); } /* longest_match() sets match_start */ } if (s->match_length >= MIN_MATCH) { check_match(s, s->strstart, s->match_start, s->match_length); bflush = _tr_tally(s, s->strstart - s->match_start, s->match_length - MIN_MATCH); s->lookahead -= s->match_length; /* Insert new strings in the hash table only if the match length * is not too large. This saves time but degrades compression. */ if (s->match_length <= s->max_insert_length && s->lookahead >= MIN_MATCH) { s->match_length--; /* string at strstart already in hash table */ do { s->strstart++; INSERT_STRING(s, s->strstart, hash_head); /* strstart never exceeds WSIZE-MAX_MATCH, so there are * always MIN_MATCH bytes ahead. */ } while (--s->match_length != 0); s->strstart++; } else { s->strstart += s->match_length; s->match_length = 0; s->ins_h = s->window[s->strstart]; UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); #if MIN_MATCH != 3 Call UPDATE_HASH() MIN_MATCH-3 more times #endif /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not * matter since it will be recomputed at next deflate call. */ } } else { /* No match, output a literal byte */ Tracevv((stderr,"%c", s->window[s->strstart])); bflush = _tr_tally (s, 0, s->window[s->strstart]); s->lookahead--; s->strstart++; } if (bflush) FLUSH_BLOCK(s, 0); } FLUSH_BLOCK(s, flush == Z_FINISH); return flush == Z_FINISH ? finish_done : block_done; } /* =========================================================================== * Same as above, but achieves better compression. We use a lazy * evaluation for matches: a match is finally adopted only if there is * no better match at the next window position. */ local block_state deflate_slow(s, flush) deflate_state *s; int flush; { IPos hash_head = NIL; /* head of hash chain */ int bflush; /* set if current block must be flushed */ /* Process the input block. */ for (;;) { /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes * for the next match, plus MIN_MATCH bytes to insert the * string following the next match. */ if (s->lookahead < MIN_LOOKAHEAD) { fill_window(s); if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { return need_more; } if (s->lookahead == 0) break; /* flush the current block */ } /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ if (s->lookahead >= MIN_MATCH) { INSERT_STRING(s, s->strstart, hash_head); } /* Find the longest match, discarding those <= prev_length. */ s->prev_length = s->match_length, s->prev_match = s->match_start; s->match_length = MIN_MATCH-1; if (hash_head != NIL && s->prev_length < s->max_lazy_match && s->strstart - hash_head <= MAX_DIST(s)) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ if (s->strategy != Z_HUFFMAN_ONLY) { s->match_length = longest_match (s, hash_head); } /* longest_match() sets match_start */ if (s->match_length <= 5 && (s->strategy == Z_FILTERED || (s->match_length == MIN_MATCH && s->strstart - s->match_start > TOO_FAR))) { /* If prev_match is also MIN_MATCH, match_start is garbage * but we will ignore the current match anyway. */ s->match_length = MIN_MATCH-1; } } /* If there was a match at the previous step and the current * match is not better, output the previous match: */ if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; /* Do not insert strings in hash table beyond this. */ check_match(s, s->strstart-1, s->prev_match, s->prev_length); bflush = _tr_tally(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH); /* Insert in hash table all strings up to the end of the match. * strstart-1 and strstart are already inserted. If there is not * enough lookahead, the last two strings are not inserted in * the hash table. */ s->lookahead -= s->prev_length-1; s->prev_length -= 2; do { if (++s->strstart <= max_insert) { INSERT_STRING(s, s->strstart, hash_head); } } while (--s->prev_length != 0); s->match_available = 0; s->match_length = MIN_MATCH-1; s->strstart++; if (bflush) FLUSH_BLOCK(s, 0); } else if (s->match_available) { /* If there was no match at the previous position, output a * single literal. If there was a match but the current match * is longer, truncate the previous match to a single literal. */ Tracevv((stderr,"%c", s->window[s->strstart-1])); if (_tr_tally (s, 0, s->window[s->strstart-1])) { FLUSH_BLOCK_ONLY(s, 0); } s->strstart++; s->lookahead--; if (s->strm->avail_out == 0) return need_more; } else { /* There is no previous match to compare with, wait for * the next step to decide. */ s->match_available = 1; s->strstart++; s->lookahead--; } } Assert (flush != Z_NO_FLUSH, "no flush?"); if (s->match_available) { Tracevv((stderr,"%c", s->window[s->strstart-1])); _tr_tally (s, 0, s->window[s->strstart-1]); s->match_available = 0; } FLUSH_BLOCK(s, flush == Z_FINISH); return flush == Z_FINISH ? finish_done : block_done; } /* --- deflate.c */ /* +++ trees.c */ /* trees.c -- output deflated data using Huffman coding * Copyright (C) 1995-1996 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ /* * ALGORITHM * * The "deflation" process uses several Huffman trees. The more * common source values are represented by shorter bit sequences. * * Each code tree is stored in a compressed form which is itself * a Huffman encoding of the lengths of all the code strings (in * ascending order by source values). The actual code strings are * reconstructed from the lengths in the inflate process, as described * in the deflate specification. * * REFERENCES * * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc * * Storer, James A. * Data Compression: Methods and Theory, pp. 49-50. * Computer Science Press, 1988. ISBN 0-7167-8156-5. * * Sedgewick, R. * Algorithms, p290. * Addison-Wesley, 1983. ISBN 0-201-06672-6. */ /* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */ /* #include "deflate.h" */ #ifdef DEBUG_ZLIB # include #endif /* =========================================================================== * Constants */ #define MAX_BL_BITS 7 /* Bit length codes must not exceed MAX_BL_BITS bits */ #define END_BLOCK 256 /* end of block literal code */ #define REP_3_6 16 /* repeat previous bit length 3-6 times (2 bits of repeat count) */ #define REPZ_3_10 17 /* repeat a zero length 3-10 times (3 bits of repeat count) */ #define REPZ_11_138 18 /* repeat a zero length 11-138 times (7 bits of repeat count) */ local int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; local int extra_dbits[D_CODES] /* extra bits for each distance code */ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; local int extra_blbits[BL_CODES]/* extra bits for each bit length code */ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; local uch bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; /* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */ #define Buf_size (8 * 2*sizeof(char)) /* Number of bits used within bi_buf. (bi_buf might be implemented on * more than 16 bits on some systems.) */ /* =========================================================================== * Local data. These are initialized only once. */ local ct_data static_ltree[L_CODES+2]; /* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see _tr_init * below). */ local ct_data static_dtree[D_CODES]; /* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */ local uch dist_code[512]; /* distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */ local uch length_code[MAX_MATCH-MIN_MATCH+1]; /* length code for each normalized match length (0 == MIN_MATCH) */ local int base_length[LENGTH_CODES]; /* First normalized length for each code (0 = MIN_MATCH) */ local int base_dist[D_CODES]; /* First normalized distance for each code (0 = distance of 1) */ struct static_tree_desc_s { ct_data *static_tree; /* static tree or NULL */ intf *extra_bits; /* extra bits for each code or NULL */ int extra_base; /* base index for extra_bits */ int elems; /* max number of elements in the tree */ int max_length; /* max bit length for the codes */ }; local static_tree_desc static_l_desc = {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; local static_tree_desc static_d_desc = {static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; local static_tree_desc static_bl_desc = {(ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; /* =========================================================================== * Local (static) routines in this file. */ local void tr_static_init OF((void)); local void init_block OF((deflate_state *s)); local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); local void build_tree OF((deflate_state *s, tree_desc *desc)); local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); local int build_bl_tree OF((deflate_state *s)); local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, int blcodes)); local void compress_block OF((deflate_state *s, ct_data *ltree, ct_data *dtree)); local void set_data_type OF((deflate_state *s)); local unsigned bi_reverse OF((unsigned value, int length)); local void bi_windup OF((deflate_state *s)); local void bi_flush OF((deflate_state *s)); local void copy_block OF((deflate_state *s, charf *buf, unsigned len, int header)); #ifndef DEBUG_ZLIB # define send_code(s, c, tree) send_bits(s, tree[(c)].Code, tree[(c)].Len) /* Send a code of the given tree. c and tree must not have side effects */ #else /* DEBUG_ZLIB */ # define send_code(s, c, tree) \ { if (verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ send_bits(s, tree[c].Code, tree[c].Len); } #endif #define d_code(dist) \ ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)]) /* Mapping from a distance to a distance code. dist is the distance - 1 and * must not have side effects. dist_code[256] and dist_code[257] are never * used. */ /* =========================================================================== * Output a short LSB first on the stream. * IN assertion: there is enough room in pendingBuf. */ #define put_short(s, w) { \ put_byte(s, (uch)((w) & 0xff)); \ put_byte(s, (uch)((ush)(w) >> 8)); \ } /* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. */ #ifdef DEBUG_ZLIB local void send_bits OF((deflate_state *s, int value, int length)); local void send_bits(s, value, length) deflate_state *s; int value; /* value to send */ int length; /* number of bits */ { Tracevv((stderr," l %2d v %4x ", length, value)); Assert(length > 0 && length <= 15, "invalid length"); s->bits_sent += (ulg)length; /* If not enough room in bi_buf, use (valid) bits from bi_buf and * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) * unused bits in value. */ if (s->bi_valid > (int)Buf_size - length) { s->bi_buf |= (value << s->bi_valid); put_short(s, s->bi_buf); s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); s->bi_valid += length - Buf_size; } else { s->bi_buf |= value << s->bi_valid; s->bi_valid += length; } } #else /* !DEBUG_ZLIB */ #define send_bits(s, value, length) \ { int len = (length);\ if ((s)->bi_valid > (int)Buf_size - len) {\ int val = (value);\ (s)->bi_buf |= (val << (s)->bi_valid);\ put_short((s), (s)->bi_buf);\ (s)->bi_buf = (ush)val >> (Buf_size - (s)->bi_valid);\ (s)->bi_valid += len - Buf_size;\ } else {\ (s)->bi_buf |= (value) << (s)->bi_valid;\ (s)->bi_valid += len;\ }\ } #endif /* DEBUG_ZLIB */ /* the arguments must not have side effects */ /* =========================================================================== * Initialize the various 'constant' tables. In a multi-threaded environment, * this function may be called by two threads concurrently, but this is * harmless since both invocations do exactly the same thing. */ local void tr_static_init() { static int static_init_done = 0; int n; /* iterates over tree elements */ int bits; /* bit counter */ int length; /* length value */ int code; /* code value */ int dist; /* distance index */ ush bl_count[MAX_BITS+1]; /* number of codes at each bit length for an optimal tree */ if (static_init_done) return; /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; for (code = 0; code < LENGTH_CODES-1; code++) { base_length[code] = length; for (n = 0; n < (1< dist code (0..29) */ dist = 0; for (code = 0 ; code < 16; code++) { base_dist[code] = dist; for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ for ( ; code < D_CODES; code++) { base_dist[code] = dist << 7; for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { dist_code[256 + dist++] = (uch)code; } } Assert (dist == 256, "tr_static_init: 256+dist != 512"); /* Construct the codes of the static literal tree */ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; n = 0; while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; /* Codes 286 and 287 do not exist, but we must include them in the * tree construction to get a canonical Huffman tree (longest code * all ones) */ gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); /* The static distance tree is trivial: */ for (n = 0; n < D_CODES; n++) { static_dtree[n].Len = 5; static_dtree[n].Code = bi_reverse((unsigned)n, 5); } static_init_done = 1; } /* =========================================================================== * Initialize the tree data structures for a new zlib stream. */ void _tr_init(s) deflate_state *s; { tr_static_init(); s->compressed_len = 0L; s->l_desc.dyn_tree = s->dyn_ltree; s->l_desc.stat_desc = &static_l_desc; s->d_desc.dyn_tree = s->dyn_dtree; s->d_desc.stat_desc = &static_d_desc; s->bl_desc.dyn_tree = s->bl_tree; s->bl_desc.stat_desc = &static_bl_desc; s->bi_buf = 0; s->bi_valid = 0; s->last_eob_len = 8; /* enough lookahead for inflate */ #ifdef DEBUG_ZLIB s->bits_sent = 0L; #endif /* Initialize the first block of the first file: */ init_block(s); } /* =========================================================================== * Initialize a new block. */ local void init_block(s) deflate_state *s; { int n; /* iterates over tree elements */ /* Initialize the trees. */ for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; s->dyn_ltree[END_BLOCK].Freq = 1; s->opt_len = s->static_len = 0L; s->last_lit = s->matches = 0; } #define SMALLEST 1 /* Index within the heap array of least frequent node in the Huffman tree */ /* =========================================================================== * Remove the smallest element from the heap and recreate the heap with * one less element. Updates heap and heap_len. */ #define pqremove(s, tree, top) \ {\ top = s->heap[SMALLEST]; \ s->heap[SMALLEST] = s->heap[s->heap_len--]; \ pqdownheap(s, tree, SMALLEST); \ } /* =========================================================================== * Compares to subtrees, using the tree depth as tie breaker when * the subtrees have equal frequency. This minimizes the worst case length. */ #define smaller(tree, n, m, depth) \ (tree[n].Freq < tree[m].Freq || \ (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) /* =========================================================================== * Restore the heap property by moving down the tree starting at node k, * exchanging a node with the smallest of its two sons if necessary, stopping * when the heap property is re-established (each father smaller than its * two sons). */ local void pqdownheap(s, tree, k) deflate_state *s; ct_data *tree; /* the tree to restore */ int k; /* node to move down */ { int v = s->heap[k]; int j = k << 1; /* left son of k */ while (j <= s->heap_len) { /* Set j to the smallest of the two sons: */ if (j < s->heap_len && smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { j++; } /* Exit if v is smaller than both sons */ if (smaller(tree, v, s->heap[j], s->depth)) break; /* Exchange v with the smallest son */ s->heap[k] = s->heap[j]; k = j; /* And continue down the tree, setting j to the left son of k */ j <<= 1; } s->heap[k] = v; } /* =========================================================================== * Compute the optimal bit lengths for a tree and update the total bit length * for the current block. * IN assertion: the fields freq and dad are set, heap[heap_max] and * above are the tree nodes sorted by increasing frequency. * OUT assertions: the field len is set to the optimal bit length, the * array bl_count contains the frequencies for each bit length. * The length opt_len is updated; static_len is also updated if stree is * not null. */ local void gen_bitlen(s, desc) deflate_state *s; tree_desc *desc; /* the tree descriptor */ { ct_data *tree = desc->dyn_tree; int max_code = desc->max_code; ct_data *stree = desc->stat_desc->static_tree; intf *extra = desc->stat_desc->extra_bits; int base = desc->stat_desc->extra_base; int max_length = desc->stat_desc->max_length; int h; /* heap index */ int n, m; /* iterate over the tree elements */ int bits; /* bit length */ int xbits; /* extra bits */ ush f; /* frequency */ int overflow = 0; /* number of elements with bit length too large */ for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; /* In a first pass, compute the optimal bit lengths (which may * overflow in the case of the bit length tree). */ tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ for (h = s->heap_max+1; h < HEAP_SIZE; h++) { n = s->heap[h]; bits = tree[tree[n].Dad].Len + 1; if (bits > max_length) bits = max_length, overflow++; tree[n].Len = (ush)bits; /* We overwrite tree[n].Dad which is no longer needed */ if (n > max_code) continue; /* not a leaf node */ s->bl_count[bits]++; xbits = 0; if (n >= base) xbits = extra[n-base]; f = tree[n].Freq; s->opt_len += (ulg)f * (bits + xbits); if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); } if (overflow == 0) return; Trace((stderr,"\nbit length overflow\n")); /* This happens for example on obj2 and pic of the Calgary corpus */ /* Find the first bit length which could increase: */ do { bits = max_length-1; while (s->bl_count[bits] == 0) bits--; s->bl_count[bits]--; /* move one leaf down the tree */ s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ s->bl_count[max_length]--; /* The brother of the overflow item also moves one step up, * but this does not affect bl_count[max_length] */ overflow -= 2; } while (overflow > 0); /* Now recompute all bit lengths, scanning in increasing frequency. * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all * lengths instead of fixing only the wrong ones. This idea is taken * from 'ar' written by Haruhiko Okumura.) */ for (bits = max_length; bits != 0; bits--) { n = s->bl_count[bits]; while (n != 0) { m = s->heap[--h]; if (m > max_code) continue; if (tree[m].Len != (unsigned) bits) { Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); s->opt_len += ((long)bits - (long)tree[m].Len) *(long)tree[m].Freq; tree[m].Len = (ush)bits; } n--; } } } /* =========================================================================== * Generate the codes for a given tree and bit counts (which need not be * optimal). * IN assertion: the array bl_count contains the bit length statistics for * the given tree and the field len is set for all tree elements. * OUT assertion: the field code is set for all tree elements of non * zero code length. */ local void gen_codes (tree, max_code, bl_count) ct_data *tree; /* the tree to decorate */ int max_code; /* largest code with non zero frequency */ ushf *bl_count; /* number of codes at each bit length */ { ush next_code[MAX_BITS+1]; /* next code value for each bit length */ ush code = 0; /* running code value */ int bits; /* bit index */ int n; /* code index */ /* The distribution counts are first used to generate the code values * without bit reversal. */ for (bits = 1; bits <= MAX_BITS; bits++) { next_code[bits] = code = (code + bl_count[bits-1]) << 1; } /* Check that the bit counts in bl_count are consistent. The last code * must be all ones. */ Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; ct_data *stree = desc->stat_desc->static_tree; int elems = desc->stat_desc->elems; int n, m; /* iterate over heap elements */ int max_code = -1; /* largest code with non zero frequency */ int node; /* new node being created */ /* Construct the initial heap, with least frequent element in * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. * heap[0] is not used. */ s->heap_len = 0, s->heap_max = HEAP_SIZE; for (n = 0; n < elems; n++) { if (tree[n].Freq != 0) { s->heap[++(s->heap_len)] = max_code = n; s->depth[n] = 0; } else { tree[n].Len = 0; } } /* The pkzip format requires that at least one distance code exists, * and that at least one bit should be sent even if there is only one * possible code. So to avoid special checks later on we force at least * two codes of non zero frequency. */ while (s->heap_len < 2) { node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); tree[node].Freq = 1; s->depth[node] = 0; s->opt_len--; if (stree) s->static_len -= stree[node].Len; /* node is 0 or 1 so it does not have extra bits */ } desc->max_code = max_code; /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, * establish sub-heaps of increasing lengths: */ for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); /* Construct the Huffman tree by repeatedly combining the least two * frequent nodes. */ node = elems; /* next internal node of the tree */ do { pqremove(s, tree, n); /* n = node of least frequency */ m = s->heap[SMALLEST]; /* m = node of next least frequency */ s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ s->heap[--(s->heap_max)] = m; /* Create a new node father of n and m */ tree[node].Freq = tree[n].Freq + tree[m].Freq; s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1); tree[n].Dad = tree[m].Dad = (ush)node; #ifdef DUMP_BL_TREE if (tree == s->bl_tree) { fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); } #endif /* and insert the new node in the heap */ s->heap[SMALLEST] = node++; pqdownheap(s, tree, SMALLEST); } while (s->heap_len >= 2); s->heap[--(s->heap_max)] = s->heap[SMALLEST]; /* At this point, the fields freq and dad are set. We can now * generate the bit lengths. */ gen_bitlen(s, (tree_desc *)desc); /* The field len is now set, we can generate the bit codes */ gen_codes ((ct_data *)tree, max_code, s->bl_count); } /* =========================================================================== * Scan a literal or distance tree to determine the frequencies of the codes * in the bit length tree. */ local void scan_tree (s, tree, max_code) deflate_state *s; ct_data *tree; /* the tree to be scanned */ int max_code; /* and its largest code of non zero frequency */ { int n; /* iterates over all tree elements */ int prevlen = -1; /* last emitted length */ int curlen; /* length of current code */ int nextlen = tree[0].Len; /* length of next code */ int count = 0; /* repeat count of the current code */ int max_count = 7; /* max repeat count */ int min_count = 4; /* min repeat count */ if (nextlen == 0) max_count = 138, min_count = 3; tree[max_code+1].Len = (ush)0xffff; /* guard */ for (n = 0; n <= max_code; n++) { curlen = nextlen; nextlen = tree[n+1].Len; if (++count < max_count && curlen == nextlen) { continue; } else if (count < min_count) { s->bl_tree[curlen].Freq += count; } else if (curlen != 0) { if (curlen != prevlen) s->bl_tree[curlen].Freq++; s->bl_tree[REP_3_6].Freq++; } else if (count <= 10) { s->bl_tree[REPZ_3_10].Freq++; } else { s->bl_tree[REPZ_11_138].Freq++; } count = 0; prevlen = curlen; if (nextlen == 0) { max_count = 138, min_count = 3; } else if (curlen == nextlen) { max_count = 6, min_count = 3; } else { max_count = 7, min_count = 4; } } } /* =========================================================================== * Send a literal or distance tree in compressed form, using the codes in * bl_tree. */ local void send_tree (s, tree, max_code) deflate_state *s; ct_data *tree; /* the tree to be scanned */ int max_code; /* and its largest code of non zero frequency */ { int n; /* iterates over all tree elements */ int prevlen = -1; /* last emitted length */ int curlen; /* length of current code */ int nextlen = tree[0].Len; /* length of next code */ int count = 0; /* repeat count of the current code */ int max_count = 7; /* max repeat count */ int min_count = 4; /* min repeat count */ /* tree[max_code+1].Len = -1; */ /* guard already set */ if (nextlen == 0) max_count = 138, min_count = 3; for (n = 0; n <= max_code; n++) { curlen = nextlen; nextlen = tree[n+1].Len; if (++count < max_count && curlen == nextlen) { continue; } else if (count < min_count) { do { send_code(s, curlen, s->bl_tree); } while (--count != 0); } else if (curlen != 0) { if (curlen != prevlen) { send_code(s, curlen, s->bl_tree); count--; } Assert(count >= 3 && count <= 6, " 3_6?"); send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); } else if (count <= 10) { send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); } else { send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); } count = 0; prevlen = curlen; if (nextlen == 0) { max_count = 138, min_count = 3; } else if (curlen == nextlen) { max_count = 6, min_count = 3; } else { max_count = 7, min_count = 4; } } } /* =========================================================================== * Construct the Huffman tree for the bit lengths and return the index in * bl_order of the last bit length code to send. */ local int build_bl_tree(s) deflate_state *s; { int max_blindex; /* index of last bit length code of non zero freq */ /* Determine the bit length frequencies for literal and distance trees */ scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); /* Build the bit length tree: */ build_tree(s, (tree_desc *)(&(s->bl_desc))); /* opt_len now includes the length of the tree representations, except * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. */ /* Determine the number of bit length codes to send. The pkzip format * requires that at least 4 bit length codes be sent. (appnote.txt says * 3 but the actual value used is 4.) */ for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; } /* Update opt_len to include the bit length tree and counts */ s->opt_len += 3*(max_blindex+1) + 5+5+4; Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", s->opt_len, s->static_len)); return max_blindex; } /* =========================================================================== * Send the header for a block using dynamic Huffman trees: the counts, the * lengths of the bit length codes, the literal tree and the distance tree. * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. */ local void send_all_trees(s, lcodes, dcodes, blcodes) deflate_state *s; int lcodes, dcodes, blcodes; /* number of codes for each tree */ { int rank; /* index in bl_order */ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, "too many codes"); Tracev((stderr, "\nbl counts: ")); send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ send_bits(s, dcodes-1, 5); send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ for (rank = 0; rank < blcodes; rank++) { Tracev((stderr, "\nbl code %2d ", bl_order[rank])); send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); } Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); } /* =========================================================================== * Send a stored block */ void _tr_stored_block(s, buf, stored_len, eof) deflate_state *s; charf *buf; /* input block */ ulg stored_len; /* length of input block */ int eof; /* true if this is the last block for a file */ { send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; s->compressed_len += (stored_len + 4) << 3; copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ } /* Send just the `stored block' type code without any length bytes or data. */ void _tr_stored_type_only(s) deflate_state *s; { send_bits(s, (STORED_BLOCK << 1), 3); bi_windup(s); s->compressed_len = (s->compressed_len + 3) & ~7L; } /* =========================================================================== * Send one empty static block to give enough lookahead for inflate. * This takes 10 bits, of which 7 may remain in the bit buffer. * The current inflate code requires 9 bits of lookahead. If the * last two codes for the previous block (real code plus EOB) were coded * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode * the last real code. In this case we send two empty static blocks instead * of one. (There are no problems if the previous block is stored or fixed.) * To simplify the code, we assume the worst case of last real code encoded * on one bit only. */ void _tr_align(s) deflate_state *s; { send_bits(s, STATIC_TREES<<1, 3); send_code(s, END_BLOCK, static_ltree); s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ bi_flush(s); /* Of the 10 bits for the empty block, we have already sent * (10 - bi_valid) bits. The lookahead for the last real code (before * the EOB of the previous block) was thus at least one plus the length * of the EOB plus what we have just sent of the empty static block. */ if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { send_bits(s, STATIC_TREES<<1, 3); send_code(s, END_BLOCK, static_ltree); s->compressed_len += 10L; bi_flush(s); } s->last_eob_len = 7; } /* =========================================================================== * Determine the best encoding for the current block: dynamic trees, static * trees or store, and output the encoded block to the zip file. This function * returns the total compressed length for the file so far. */ ulg _tr_flush_block(s, buf, stored_len, eof) deflate_state *s; charf *buf; /* input block, or NULL if too old */ ulg stored_len; /* length of input block */ int eof; /* true if this is the last block for a file */ { ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ int max_blindex = 0; /* index of last bit length code of non zero freq */ /* Build the Huffman trees unless a stored block is forced */ if (s->level > 0) { /* Check if the file is ascii or binary */ if (s->data_type == Z_UNKNOWN) set_data_type(s); /* Construct the literal and distance trees */ build_tree(s, (tree_desc *)(&(s->l_desc))); Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, s->static_len)); build_tree(s, (tree_desc *)(&(s->d_desc))); Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, s->static_len)); /* At this point, opt_len and static_len are the total bit lengths of * the compressed block data, excluding the tree representations. */ /* Build the bit length tree for the above two trees, and get the index * in bl_order of the last bit length code to send. */ max_blindex = build_bl_tree(s); /* Determine the best encoding. Compute first the block length in bytes*/ opt_lenb = (s->opt_len+3+7)>>3; static_lenb = (s->static_len+3+7)>>3; Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, s->last_lit)); if (static_lenb <= opt_lenb) opt_lenb = static_lenb; } else { Assert(buf != (char*)0, "lost buf"); opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ } /* If compression failed and this is the first and last block, * and if the .zip file can be seeked (to rewrite the local header), * the whole file is transformed into a stored file: */ #ifdef STORED_FILE_OK # ifdef FORCE_STORED_FILE if (eof && s->compressed_len == 0L) { /* force stored file */ # else if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) { # endif /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ if (buf == (charf*)0) error ("block vanished"); copy_block(s, buf, (unsigned)stored_len, 0); /* without header */ s->compressed_len = stored_len << 3; s->method = STORED; } else #endif /* STORED_FILE_OK */ #ifdef FORCE_STORED if (buf != (char*)0) { /* force stored block */ #else if (stored_len+4 <= opt_lenb && buf != (char*)0) { /* 4: two words for the lengths */ #endif /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. * Otherwise we can't have processed more than WSIZE input bytes since * the last block flush, because compression would have been * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to * transform a block into a stored block. */ _tr_stored_block(s, buf, stored_len, eof); #ifdef FORCE_STATIC } else if (static_lenb >= 0) { /* force static trees */ #else } else if (static_lenb == opt_lenb) { #endif send_bits(s, (STATIC_TREES<<1)+eof, 3); compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); s->compressed_len += 3 + s->static_len; } else { send_bits(s, (DYN_TREES<<1)+eof, 3); send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, max_blindex+1); compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); s->compressed_len += 3 + s->opt_len; } Assert (s->compressed_len == s->bits_sent, "bad compressed size"); init_block(s); if (eof) { bi_windup(s); s->compressed_len += 7; /* align on byte boundary */ } Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, s->compressed_len-7*eof)); return s->compressed_len >> 3; } /* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */ int _tr_tally (s, dist, lc) deflate_state *s; unsigned dist; /* distance of matched string */ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ { s->d_buf[s->last_lit] = (ush)dist; s->l_buf[s->last_lit++] = (uch)lc; if (dist == 0) { /* lc is the unmatched char */ s->dyn_ltree[lc].Freq++; } else { s->matches++; /* Here, lc is the match length - MIN_MATCH */ dist--; /* dist = match distance - 1 */ Assert((ush)dist < (ush)MAX_DIST(s) && (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; } /* Try to guess if it is profitable to stop the current block here */ if (s->level > 2 && (s->last_lit & 0xfff) == 0) { /* Compute an upper bound for the compressed length */ ulg out_length = (ulg)s->last_lit*8L; ulg in_length = (ulg)((long)s->strstart - s->block_start); int dcode; for (dcode = 0; dcode < D_CODES; dcode++) { out_length += (ulg)s->dyn_dtree[dcode].Freq * (5L+extra_dbits[dcode]); } out_length >>= 3; Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", s->last_lit, in_length, out_length, 100L - out_length*100L/in_length)); if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; } return (s->last_lit == s->lit_bufsize-1); /* We avoid equality with lit_bufsize because of wraparound at 64K * on 16 bit machines and because stored blocks are restricted to * 64K-1 bytes. */ } /* =========================================================================== * Send the block data compressed using the given Huffman trees */ local void compress_block(s, ltree, dtree) deflate_state *s; ct_data *ltree; /* literal tree */ ct_data *dtree; /* distance tree */ { unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ unsigned lx = 0; /* running index in l_buf */ unsigned code; /* the code to send */ int extra; /* number of extra bits to send */ if (s->last_lit != 0) do { dist = s->d_buf[lx]; lc = s->l_buf[lx++]; if (dist == 0) { send_code(s, lc, ltree); /* send a literal byte */ Tracecv(isgraph(lc), (stderr," '%c' ", lc)); } else { /* Here, lc is the match length - MIN_MATCH */ code = length_code[lc]; send_code(s, code+LITERALS+1, ltree); /* send the length code */ extra = extra_lbits[code]; if (extra != 0) { lc -= base_length[code]; send_bits(s, lc, extra); /* send the extra length bits */ } dist--; /* dist is now the match distance - 1 */ code = d_code(dist); Assert (code < D_CODES, "bad d_code"); send_code(s, code, dtree); /* send the distance code */ extra = extra_dbits[code]; if (extra != 0) { dist -= base_dist[code]; send_bits(s, dist, extra); /* send the extra distance bits */ } } /* literal or match pair ? */ /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); } while (lx < s->last_lit); send_code(s, END_BLOCK, ltree); s->last_eob_len = ltree[END_BLOCK].Len; } /* =========================================================================== * Set the data type to ASCII or BINARY, using a crude approximation: * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. * IN assertion: the fields freq of dyn_ltree are set and the total of all * frequencies does not exceed 64K (to fit in an int on 16 bit machines). */ local void set_data_type(s) deflate_state *s; { int n = 0; unsigned ascii_freq = 0; unsigned bin_freq = 0; while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); } /* =========================================================================== * Reverse the first len bits of a code, using straightforward code (a faster * method would use a table) * IN assertion: 1 <= len <= 15 */ local unsigned bi_reverse(code, len) unsigned code; /* the value to invert */ int len; /* its bit length */ { - register unsigned res = 0; + unsigned res = 0; do { res |= code & 1; code >>= 1, res <<= 1; } while (--len > 0); return res >> 1; } /* =========================================================================== * Flush the bit buffer, keeping at most 7 bits in it. */ local void bi_flush(s) deflate_state *s; { if (s->bi_valid == 16) { put_short(s, s->bi_buf); s->bi_buf = 0; s->bi_valid = 0; } else if (s->bi_valid >= 8) { put_byte(s, (Byte)s->bi_buf); s->bi_buf >>= 8; s->bi_valid -= 8; } } /* =========================================================================== * Flush the bit buffer and align the output on a byte boundary */ local void bi_windup(s) deflate_state *s; { if (s->bi_valid > 8) { put_short(s, s->bi_buf); } else if (s->bi_valid > 0) { put_byte(s, (Byte)s->bi_buf); } s->bi_buf = 0; s->bi_valid = 0; #ifdef DEBUG_ZLIB s->bits_sent = (s->bits_sent+7) & ~7; #endif } /* =========================================================================== * Copy a stored block, storing first the length and its * one's complement if requested. */ local void copy_block(s, buf, len, header) deflate_state *s; charf *buf; /* the input data */ unsigned len; /* its length */ int header; /* true if block header must be written */ { bi_windup(s); /* align on byte boundary */ s->last_eob_len = 8; /* enough lookahead for inflate */ if (header) { put_short(s, (ush)len); put_short(s, (ush)~len); #ifdef DEBUG_ZLIB s->bits_sent += 2*16; #endif } #ifdef DEBUG_ZLIB s->bits_sent += (ulg)len<<3; #endif /* bundle up the put_byte(s, *buf++) calls */ zmemcpy(&s->pending_buf[s->pending], buf, len); s->pending += len; } /* --- trees.c */ /* +++ inflate.c */ /* inflate.c -- zlib interface to inflate modules * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* #include "zutil.h" */ /* +++ infblock.h */ /* infblock.h -- header to use infblock.c * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ struct inflate_blocks_state; typedef struct inflate_blocks_state FAR inflate_blocks_statef; extern inflate_blocks_statef * inflate_blocks_new OF(( z_streamp z, check_func c, /* check function */ uInt w)); /* window size */ extern int inflate_blocks OF(( inflate_blocks_statef *, z_streamp , int)); /* initial return code */ extern void inflate_blocks_reset OF(( inflate_blocks_statef *, z_streamp , uLongf *)); /* check value on output */ extern int inflate_blocks_free OF(( inflate_blocks_statef *, z_streamp , uLongf *)); /* check value on output */ extern void inflate_set_dictionary OF(( inflate_blocks_statef *s, const Bytef *d, /* dictionary */ uInt n)); /* dictionary length */ extern int inflate_addhistory OF(( inflate_blocks_statef *, z_streamp)); extern int inflate_packet_flush OF(( inflate_blocks_statef *)); /* --- infblock.h */ #ifndef NO_DUMMY_DECL struct inflate_blocks_state {int dummy;}; /* for buggy compilers */ #endif /* inflate private state */ struct internal_state { /* mode */ enum { METHOD, /* waiting for method byte */ FLAG, /* waiting for flag byte */ DICT4, /* four dictionary check bytes to go */ DICT3, /* three dictionary check bytes to go */ DICT2, /* two dictionary check bytes to go */ DICT1, /* one dictionary check byte to go */ DICT0, /* waiting for inflateSetDictionary */ BLOCKS, /* decompressing blocks */ CHECK4, /* four check bytes to go */ CHECK3, /* three check bytes to go */ CHECK2, /* two check bytes to go */ CHECK1, /* one check byte to go */ DONE, /* finished check, done */ BAD} /* got an error--stay here */ mode; /* current inflate mode */ /* mode dependent information */ union { uInt method; /* if FLAGS, method byte */ struct { uLong was; /* computed check value */ uLong need; /* stream check value */ } check; /* if CHECK, check values to compare */ uInt marker; /* if BAD, inflateSync's marker bytes count */ } sub; /* submode */ /* mode independent information */ int nowrap; /* flag for no wrapper */ uInt wbits; /* log2(window size) (8..15, defaults to 15) */ inflate_blocks_statef *blocks; /* current inflate_blocks state */ }; int inflateReset(z) z_streamp z; { uLong c; if (z == Z_NULL || z->state == Z_NULL) return Z_STREAM_ERROR; z->total_in = z->total_out = 0; z->msg = Z_NULL; z->state->mode = z->state->nowrap ? BLOCKS : METHOD; inflate_blocks_reset(z->state->blocks, z, &c); Trace((stderr, "inflate: reset\n")); return Z_OK; } int inflateEnd(z) z_streamp z; { uLong c; if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) return Z_STREAM_ERROR; if (z->state->blocks != Z_NULL) inflate_blocks_free(z->state->blocks, z, &c); ZFREE(z, z->state); z->state = Z_NULL; Trace((stderr, "inflate: end\n")); return Z_OK; } int inflateInit2_(z, w, version, stream_size) z_streamp z; int w; const char *version; int stream_size; { if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || stream_size != sizeof(z_stream)) return Z_VERSION_ERROR; /* initialize state */ if (z == Z_NULL) return Z_STREAM_ERROR; z->msg = Z_NULL; #ifndef NO_ZCFUNCS if (z->zalloc == Z_NULL) { z->zalloc = zcalloc; z->opaque = (voidpf)0; } if (z->zfree == Z_NULL) z->zfree = zcfree; #endif if ((z->state = (struct internal_state FAR *) ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) return Z_MEM_ERROR; z->state->blocks = Z_NULL; /* handle undocumented nowrap option (no zlib header or check) */ z->state->nowrap = 0; if (w < 0) { w = - w; z->state->nowrap = 1; } /* set window size */ if (w < 8 || w > 15) { inflateEnd(z); return Z_STREAM_ERROR; } z->state->wbits = (uInt)w; /* create inflate_blocks state */ if ((z->state->blocks = inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w)) == Z_NULL) { inflateEnd(z); return Z_MEM_ERROR; } Trace((stderr, "inflate: allocated\n")); /* reset state */ inflateReset(z); return Z_OK; } int inflateInit_(z, version, stream_size) z_streamp z; const char *version; int stream_size; { return inflateInit2_(z, DEF_WBITS, version, stream_size); } #define NEEDBYTE {if(z->avail_in==0)goto empty;r=Z_OK;} #define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) int inflate(z, f) z_streamp z; int f; { int r; uInt b; if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL || f < 0) return Z_STREAM_ERROR; r = Z_BUF_ERROR; while (1) switch (z->state->mode) { case METHOD: NEEDBYTE if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED) { z->state->mode = BAD; z->msg = (char*)"unknown compression method"; z->state->sub.marker = 5; /* can't try inflateSync */ break; } if ((z->state->sub.method >> 4) + 8 > z->state->wbits) { z->state->mode = BAD; z->msg = (char*)"invalid window size"; z->state->sub.marker = 5; /* can't try inflateSync */ break; } z->state->mode = FLAG; case FLAG: NEEDBYTE b = NEXTBYTE; if (((z->state->sub.method << 8) + b) % 31) { z->state->mode = BAD; z->msg = (char*)"incorrect header check"; z->state->sub.marker = 5; /* can't try inflateSync */ break; } Trace((stderr, "inflate: zlib header ok\n")); if (!(b & PRESET_DICT)) { z->state->mode = BLOCKS; break; } z->state->mode = DICT4; case DICT4: NEEDBYTE z->state->sub.check.need = (uLong)NEXTBYTE << 24; z->state->mode = DICT3; case DICT3: NEEDBYTE z->state->sub.check.need += (uLong)NEXTBYTE << 16; z->state->mode = DICT2; case DICT2: NEEDBYTE z->state->sub.check.need += (uLong)NEXTBYTE << 8; z->state->mode = DICT1; case DICT1: NEEDBYTE z->state->sub.check.need += (uLong)NEXTBYTE; z->adler = z->state->sub.check.need; z->state->mode = DICT0; return Z_NEED_DICT; case DICT0: z->state->mode = BAD; z->msg = (char*)"need dictionary"; z->state->sub.marker = 0; /* can try inflateSync */ return Z_STREAM_ERROR; case BLOCKS: r = inflate_blocks(z->state->blocks, z, r); if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0) r = inflate_packet_flush(z->state->blocks); if (r == Z_DATA_ERROR) { z->state->mode = BAD; z->state->sub.marker = 0; /* can try inflateSync */ break; } if (r != Z_STREAM_END) return r; r = Z_OK; inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); if (z->state->nowrap) { z->state->mode = DONE; break; } z->state->mode = CHECK4; case CHECK4: NEEDBYTE z->state->sub.check.need = (uLong)NEXTBYTE << 24; z->state->mode = CHECK3; case CHECK3: NEEDBYTE z->state->sub.check.need += (uLong)NEXTBYTE << 16; z->state->mode = CHECK2; case CHECK2: NEEDBYTE z->state->sub.check.need += (uLong)NEXTBYTE << 8; z->state->mode = CHECK1; case CHECK1: NEEDBYTE z->state->sub.check.need += (uLong)NEXTBYTE; if (z->state->sub.check.was != z->state->sub.check.need) { z->state->mode = BAD; z->msg = (char*)"incorrect data check"; z->state->sub.marker = 5; /* can't try inflateSync */ break; } Trace((stderr, "inflate: zlib check ok\n")); z->state->mode = DONE; case DONE: return Z_STREAM_END; case BAD: return Z_DATA_ERROR; default: return Z_STREAM_ERROR; } empty: if (f != Z_PACKET_FLUSH) return r; z->state->mode = BAD; z->msg = (char *)"need more for packet flush"; z->state->sub.marker = 0; /* can try inflateSync */ return Z_DATA_ERROR; } int inflateSetDictionary(z, dictionary, dictLength) z_streamp z; const Bytef *dictionary; uInt dictLength; { uInt length = dictLength; if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0) return Z_STREAM_ERROR; if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR; z->adler = 1L; if (length >= ((uInt)1<state->wbits)) { length = (1<state->wbits)-1; dictionary += dictLength - length; } inflate_set_dictionary(z->state->blocks, dictionary, length); z->state->mode = BLOCKS; return Z_OK; } /* * This subroutine adds the data at next_in/avail_in to the output history * without performing any output. The output buffer must be "caught up"; * i.e. no pending output (hence s->read equals s->write), and the state must * be BLOCKS (i.e. we should be willing to see the start of a series of * BLOCKS). On exit, the output will also be caught up, and the checksum * will have been updated if need be. */ int inflateIncomp(z) z_stream *z; { if (z->state->mode != BLOCKS) return Z_DATA_ERROR; return inflate_addhistory(z->state->blocks, z); } int inflateSync(z) z_streamp z; { uInt n; /* number of bytes to look at */ Bytef *p; /* pointer to bytes */ uInt m; /* number of marker bytes found in a row */ uLong r, w; /* temporaries to save total_in and total_out */ /* set up */ if (z == Z_NULL || z->state == Z_NULL) return Z_STREAM_ERROR; if (z->state->mode != BAD) { z->state->mode = BAD; z->state->sub.marker = 0; } if ((n = z->avail_in) == 0) return Z_BUF_ERROR; p = z->next_in; m = z->state->sub.marker; /* search */ while (n && m < 4) { if (*p == (Byte)(m < 2 ? 0 : 0xff)) m++; else if (*p) m = 0; else m = 4 - m; p++, n--; } /* restore */ z->total_in += p - z->next_in; z->next_in = p; z->avail_in = n; z->state->sub.marker = m; /* return no joy or set up to restart on a new block */ if (m != 4) return Z_DATA_ERROR; r = z->total_in; w = z->total_out; inflateReset(z); z->total_in = r; z->total_out = w; z->state->mode = BLOCKS; return Z_OK; } #undef NEEDBYTE #undef NEXTBYTE /* --- inflate.c */ /* +++ infblock.c */ /* infblock.c -- interpret and process block types to last block * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* #include "zutil.h" */ /* #include "infblock.h" */ /* +++ inftrees.h */ /* inftrees.h -- header to use inftrees.c * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ /* Huffman code lookup table entry--this entry is four bytes for machines that have 16-bit pointers (e.g. PC's in the small or medium model). */ typedef struct inflate_huft_s FAR inflate_huft; struct inflate_huft_s { union { struct { Byte Exop; /* number of extra bits or operation */ Byte Bits; /* number of bits in this code or subcode */ } what; Bytef *pad; /* pad structure to a power of 2 (4 bytes for */ } word; /* 16-bit, 8 bytes for 32-bit machines) */ union { uInt Base; /* literal, length base, or distance base */ inflate_huft *Next; /* pointer to next level of table */ } more; }; #ifdef DEBUG_ZLIB extern uInt inflate_hufts; #endif extern int inflate_trees_bits OF(( uIntf *, /* 19 code lengths */ uIntf *, /* bits tree desired/actual depth */ inflate_huft * FAR *, /* bits tree result */ z_streamp )); /* for zalloc, zfree functions */ extern int inflate_trees_dynamic OF(( uInt, /* number of literal/length codes */ uInt, /* number of distance codes */ uIntf *, /* that many (total) code lengths */ uIntf *, /* literal desired/actual bit depth */ uIntf *, /* distance desired/actual bit depth */ inflate_huft * FAR *, /* literal/length tree result */ inflate_huft * FAR *, /* distance tree result */ z_streamp )); /* for zalloc, zfree functions */ extern int inflate_trees_fixed OF(( uIntf *, /* literal desired/actual bit depth */ uIntf *, /* distance desired/actual bit depth */ inflate_huft * FAR *, /* literal/length tree result */ inflate_huft * FAR *)); /* distance tree result */ extern int inflate_trees_free OF(( inflate_huft *, /* tables to free */ z_streamp )); /* for zfree function */ /* --- inftrees.h */ /* +++ infcodes.h */ /* infcodes.h -- header to use infcodes.c * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ struct inflate_codes_state; typedef struct inflate_codes_state FAR inflate_codes_statef; extern inflate_codes_statef *inflate_codes_new OF(( uInt, uInt, inflate_huft *, inflate_huft *, z_streamp )); extern int inflate_codes OF(( inflate_blocks_statef *, z_streamp , int)); extern void inflate_codes_free OF(( inflate_codes_statef *, z_streamp )); /* --- infcodes.h */ /* +++ infutil.h */ /* infutil.h -- types and macros common to blocks and codes * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ #ifndef _INFUTIL_H #define _INFUTIL_H typedef enum { TYPE, /* get type bits (3, including end bit) */ LENS, /* get lengths for stored */ STORED, /* processing stored block */ TABLE, /* get table lengths */ BTREE, /* get bit lengths tree for a dynamic block */ DTREE, /* get length, distance trees for a dynamic block */ CODES, /* processing fixed or dynamic block */ DRY, /* output remaining window bytes */ DONEB, /* finished last block, done */ BADB} /* got a data error--stuck here */ inflate_block_mode; /* inflate blocks semi-private state */ struct inflate_blocks_state { /* mode */ inflate_block_mode mode; /* current inflate_block mode */ /* mode dependent information */ union { uInt left; /* if STORED, bytes left to copy */ struct { uInt table; /* table lengths (14 bits) */ uInt index; /* index into blens (or border) */ uIntf *blens; /* bit lengths of codes */ uInt bb; /* bit length tree depth */ inflate_huft *tb; /* bit length decoding tree */ } trees; /* if DTREE, decoding info for trees */ struct { inflate_huft *tl; inflate_huft *td; /* trees to free */ inflate_codes_statef *codes; } decode; /* if CODES, current state */ } sub; /* submode */ uInt last; /* true if this block is the last block */ /* mode independent information */ uInt bitk; /* bits in bit buffer */ uLong bitb; /* bit buffer */ Bytef *window; /* sliding window */ Bytef *end; /* one byte after sliding window */ Bytef *read; /* window read pointer */ Bytef *write; /* window write pointer */ check_func checkfn; /* check function */ uLong check; /* check on output */ }; /* defines for inflate input/output */ /* update pointers and return */ #define UPDBITS {s->bitb=b;s->bitk=k;} #define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} #define UPDOUT {s->write=q;} #define UPDATE {UPDBITS UPDIN UPDOUT} #define LEAVE {UPDATE return inflate_flush(s,z,r);} /* get bytes and bits */ #define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} #define NEEDBYTE {if(n)r=Z_OK;else LEAVE} #define NEXTBYTE (n--,*p++) #define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<>=(j);k-=(j);} /* output bytes */ #define WAVAIL (uInt)(qread?s->read-q-1:s->end-q) #define LOADOUT {q=s->write;m=(uInt)WAVAIL;} #define WWRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}} #define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} #define NEEDOUT {if(m==0){WWRAP if(m==0){FLUSH WWRAP if(m==0) LEAVE}}r=Z_OK;} #define OUTBYTE(a) {*q++=(Byte)(a);m--;} /* load local pointers */ #define LOAD {LOADIN LOADOUT} /* masks for lower bits (size given to avoid silly warnings with Visual C++) */ extern uInt inflate_mask[17]; /* copy as much as possible from the sliding window to the output area */ extern int inflate_flush OF(( inflate_blocks_statef *, z_streamp , int)); #ifndef NO_DUMMY_DECL struct internal_state {int dummy;}; /* for buggy compilers */ #endif #endif /* --- infutil.h */ #ifndef NO_DUMMY_DECL struct inflate_codes_state {int dummy;}; /* for buggy compilers */ #endif /* Table for deflate from PKZIP's appnote.txt. */ local const uInt border[] = { /* Order of the bit length code lengths */ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; /* Notes beyond the 1.93a appnote.txt: 1. Distance pointers never point before the beginning of the output stream. 2. Distance pointers can point back across blocks, up to 32k away. 3. There is an implied maximum of 7 bits for the bit length table and 15 bits for the actual data. 4. If only one code exists, then it is encoded using one bit. (Zero would be more efficient, but perhaps a little confusing.) If two codes exist, they are coded using one bit each (0 and 1). 5. There is no way of sending zero distance codes--a dummy must be sent if there are none. (History: a pre 2.0 version of PKZIP would store blocks with no distance codes, but this was discovered to be too harsh a criterion.) Valid only for 1.93a. 2.04c does allow zero distance codes, which is sent as one code of zero bits in length. 6. There are up to 286 literal/length codes. Code 256 represents the end-of-block. Note however that the static length tree defines 288 codes just to fill out the Huffman codes. Codes 286 and 287 cannot be used though, since there is no length base or extra bits defined for them. Similarily, there are up to 30 distance codes. However, static trees define 32 codes (all 5 bits) to fill out the Huffman codes, but the last two had better not show up in the data. 7. Unzip can check dynamic Huffman blocks for complete code sets. The exception is that a single code would not be complete (see #4). 8. The five bits following the block type is really the number of literal codes sent minus 257. 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits (1+6+6). Therefore, to output three times the length, you output three codes (1+1+1), whereas to output four times the same length, you only need two codes (1+3). Hmm. 10. In the tree reconstruction algorithm, Code = Code + Increment only if BitLength(i) is not zero. (Pretty obvious.) 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) 12. Note: length code 284 can represent 227-258, but length code 285 really is 258. The last length deserves its own, short code since it gets used a lot in very redundant files. The length 258 is special since 258 - 3 (the min match length) is 255. 13. The literal/length and distance code bit lengths are read as a single stream of lengths. It is possible (and advantageous) for a repeat code (16, 17, or 18) to go across the boundary between the two sets of lengths. */ void inflate_blocks_reset(s, z, c) inflate_blocks_statef *s; z_streamp z; uLongf *c; { if (s->checkfn != Z_NULL) *c = s->check; if (s->mode == BTREE || s->mode == DTREE) ZFREE(z, s->sub.trees.blens); if (s->mode == CODES) { inflate_codes_free(s->sub.decode.codes, z); inflate_trees_free(s->sub.decode.td, z); inflate_trees_free(s->sub.decode.tl, z); } s->mode = TYPE; s->bitk = 0; s->bitb = 0; s->read = s->write = s->window; if (s->checkfn != Z_NULL) z->adler = s->check = (*s->checkfn)(0L, Z_NULL, 0); Trace((stderr, "inflate: blocks reset\n")); } inflate_blocks_statef *inflate_blocks_new(z, c, w) z_streamp z; check_func c; uInt w; { inflate_blocks_statef *s; if ((s = (inflate_blocks_statef *)ZALLOC (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) return s; if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) { ZFREE(z, s); return Z_NULL; } s->end = s->window + w; s->checkfn = c; s->mode = TYPE; Trace((stderr, "inflate: blocks allocated\n")); inflate_blocks_reset(s, z, &s->check); return s; } #ifdef DEBUG_ZLIB extern uInt inflate_hufts; #endif int inflate_blocks(s, z, r) inflate_blocks_statef *s; z_streamp z; int r; { uInt t; /* temporary storage */ uLong b; /* bit buffer */ uInt k; /* bits in bit buffer */ Bytef *p; /* input data pointer */ uInt n; /* bytes available there */ Bytef *q; /* output window write pointer */ uInt m; /* bytes to end of window or read pointer */ /* copy input/output information to locals (UPDATE macro restores) */ LOAD /* process input based on current state */ while (1) switch (s->mode) { case TYPE: NEEDBITS(3) t = (uInt)b & 7; s->last = t & 1; switch (t >> 1) { case 0: /* stored */ Trace((stderr, "inflate: stored block%s\n", s->last ? " (last)" : "")); DUMPBITS(3) t = k & 7; /* go to byte boundary */ DUMPBITS(t) s->mode = LENS; /* get length of stored block */ break; case 1: /* fixed */ Trace((stderr, "inflate: fixed codes block%s\n", s->last ? " (last)" : "")); { uInt bl, bd; inflate_huft *tl, *td; inflate_trees_fixed(&bl, &bd, &tl, &td); s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); if (s->sub.decode.codes == Z_NULL) { r = Z_MEM_ERROR; LEAVE } s->sub.decode.tl = Z_NULL; /* don't try to free these */ s->sub.decode.td = Z_NULL; } DUMPBITS(3) s->mode = CODES; break; case 2: /* dynamic */ Trace((stderr, "inflate: dynamic codes block%s\n", s->last ? " (last)" : "")); DUMPBITS(3) s->mode = TABLE; break; case 3: /* illegal */ DUMPBITS(3) s->mode = BADB; z->msg = (char*)"invalid block type"; r = Z_DATA_ERROR; LEAVE } break; case LENS: NEEDBITS(32) if ((((~b) >> 16) & 0xffff) != (b & 0xffff)) { s->mode = BADB; z->msg = (char*)"invalid stored block lengths"; r = Z_DATA_ERROR; LEAVE } s->sub.left = (uInt)b & 0xffff; b = k = 0; /* dump bits */ Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE); break; case STORED: if (n == 0) LEAVE NEEDOUT t = s->sub.left; if (t > n) t = n; if (t > m) t = m; zmemcpy(q, p, t); p += t; n -= t; q += t; m -= t; if ((s->sub.left -= t) != 0) break; Tracev((stderr, "inflate: stored end, %lu total out\n", z->total_out + (q >= s->read ? q - s->read : (s->end - s->read) + (q - s->window)))); s->mode = s->last ? DRY : TYPE; break; case TABLE: NEEDBITS(14) s->sub.trees.table = t = (uInt)b & 0x3fff; #ifndef PKZIP_BUG_WORKAROUND if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) { s->mode = BADB; z->msg = (char*)"too many length or distance symbols"; r = Z_DATA_ERROR; LEAVE } #endif t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); if (t < 19) t = 19; if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) { r = Z_MEM_ERROR; LEAVE } DUMPBITS(14) s->sub.trees.index = 0; Tracev((stderr, "inflate: table sizes ok\n")); s->mode = BTREE; case BTREE: while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) { NEEDBITS(3) s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; DUMPBITS(3) } while (s->sub.trees.index < 19) s->sub.trees.blens[border[s->sub.trees.index++]] = 0; s->sub.trees.bb = 7; t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, &s->sub.trees.tb, z); if (t != Z_OK) { r = t; if (r == Z_DATA_ERROR) { ZFREE(z, s->sub.trees.blens); s->mode = BADB; } LEAVE } s->sub.trees.index = 0; Tracev((stderr, "inflate: bits tree ok\n")); s->mode = DTREE; case DTREE: while (t = s->sub.trees.table, s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) { inflate_huft *h; uInt i, j, c; t = s->sub.trees.bb; NEEDBITS(t) h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); t = h->word.what.Bits; c = h->more.Base; if (c < 16) { DUMPBITS(t) s->sub.trees.blens[s->sub.trees.index++] = c; } else /* c == 16..18 */ { i = c == 18 ? 7 : c - 14; j = c == 18 ? 11 : 3; NEEDBITS(t + i) DUMPBITS(t) j += (uInt)b & inflate_mask[i]; DUMPBITS(i) i = s->sub.trees.index; t = s->sub.trees.table; if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || (c == 16 && i < 1)) { inflate_trees_free(s->sub.trees.tb, z); ZFREE(z, s->sub.trees.blens); s->mode = BADB; z->msg = (char*)"invalid bit length repeat"; r = Z_DATA_ERROR; LEAVE } c = c == 16 ? s->sub.trees.blens[i - 1] : 0; do { s->sub.trees.blens[i++] = c; } while (--j); s->sub.trees.index = i; } } inflate_trees_free(s->sub.trees.tb, z); s->sub.trees.tb = Z_NULL; { uInt bl, bd; inflate_huft *tl, *td; inflate_codes_statef *c; bl = 9; /* must be <= 9 for lookahead assumptions */ bd = 6; /* must be <= 9 for lookahead assumptions */ t = s->sub.trees.table; #ifdef DEBUG_ZLIB inflate_hufts = 0; #endif t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), s->sub.trees.blens, &bl, &bd, &tl, &td, z); if (t != Z_OK) { if (t == (uInt)Z_DATA_ERROR) { ZFREE(z, s->sub.trees.blens); s->mode = BADB; } r = t; LEAVE } Tracev((stderr, "inflate: trees ok, %d * %d bytes used\n", inflate_hufts, sizeof(inflate_huft))); if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) { inflate_trees_free(td, z); inflate_trees_free(tl, z); r = Z_MEM_ERROR; LEAVE } /* * this ZFREE must occur *BEFORE* we mess with sub.decode, because * sub.trees is union'd with sub.decode. */ ZFREE(z, s->sub.trees.blens); s->sub.decode.codes = c; s->sub.decode.tl = tl; s->sub.decode.td = td; } s->mode = CODES; case CODES: UPDATE if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) return inflate_flush(s, z, r); r = Z_OK; inflate_codes_free(s->sub.decode.codes, z); inflate_trees_free(s->sub.decode.td, z); inflate_trees_free(s->sub.decode.tl, z); LOAD Tracev((stderr, "inflate: codes end, %lu total out\n", z->total_out + (q >= s->read ? q - s->read : (s->end - s->read) + (q - s->window)))); if (!s->last) { s->mode = TYPE; break; } if (k > 7) /* return unused byte, if any */ { Assert(k < 16, "inflate_codes grabbed too many bytes") k -= 8; n++; p--; /* can always return one */ } s->mode = DRY; case DRY: FLUSH if (s->read != s->write) LEAVE s->mode = DONEB; case DONEB: r = Z_STREAM_END; LEAVE case BADB: r = Z_DATA_ERROR; LEAVE default: r = Z_STREAM_ERROR; LEAVE } } int inflate_blocks_free(s, z, c) inflate_blocks_statef *s; z_streamp z; uLongf *c; { inflate_blocks_reset(s, z, c); ZFREE(z, s->window); ZFREE(z, s); Trace((stderr, "inflate: blocks freed\n")); return Z_OK; } void inflate_set_dictionary(s, d, n) inflate_blocks_statef *s; const Bytef *d; uInt n; { zmemcpy((charf *)s->window, d, n); s->read = s->write = s->window + n; } /* * This subroutine adds the data at next_in/avail_in to the output history * without performing any output. The output buffer must be "caught up"; * i.e. no pending output (hence s->read equals s->write), and the state must * be BLOCKS (i.e. we should be willing to see the start of a series of * BLOCKS). On exit, the output will also be caught up, and the checksum * will have been updated if need be. */ int inflate_addhistory(s, z) inflate_blocks_statef *s; z_stream *z; { uLong b; /* bit buffer */ /* NOT USED HERE */ uInt k; /* bits in bit buffer */ /* NOT USED HERE */ uInt t; /* temporary storage */ Bytef *p; /* input data pointer */ uInt n; /* bytes available there */ Bytef *q; /* output window write pointer */ uInt m; /* bytes to end of window or read pointer */ if (s->read != s->write) return Z_STREAM_ERROR; if (s->mode != TYPE) return Z_DATA_ERROR; /* we're ready to rock */ LOAD /* while there is input ready, copy to output buffer, moving * pointers as needed. */ while (n) { t = n; /* how many to do */ /* is there room until end of buffer? */ if (t > m) t = m; /* update check information */ if (s->checkfn != Z_NULL) s->check = (*s->checkfn)(s->check, q, t); zmemcpy(q, p, t); q += t; p += t; n -= t; z->total_out += t; s->read = q; /* drag read pointer forward */ /* WWRAP */ /* expand WWRAP macro by hand to handle s->read */ if (q == s->end) { s->read = q = s->window; m = WAVAIL; } } UPDATE return Z_OK; } /* * At the end of a Deflate-compressed PPP packet, we expect to have seen * a `stored' block type value but not the (zero) length bytes. */ int inflate_packet_flush(s) inflate_blocks_statef *s; { if (s->mode != LENS) return Z_DATA_ERROR; s->mode = TYPE; return Z_OK; } /* --- infblock.c */ /* +++ inftrees.c */ /* inftrees.c -- generate Huffman trees for efficient decoding * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* #include "zutil.h" */ /* #include "inftrees.h" */ char inflate_copyright[] = " inflate 1.0.4 Copyright 1995-1996 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot include such an acknowledgment, I would appreciate that you keep this copyright string in the executable of your product. */ #ifndef NO_DUMMY_DECL struct internal_state {int dummy;}; /* for buggy compilers */ #endif /* simplify the use of the inflate_huft type with some defines */ #define base more.Base #define next more.Next #define exop word.what.Exop #define bits word.what.Bits local int huft_build OF(( uIntf *, /* code lengths in bits */ uInt, /* number of codes */ uInt, /* number of "simple" codes */ const uIntf *, /* list of base values for non-simple codes */ const uIntf *, /* list of extra bits for non-simple codes */ inflate_huft * FAR*,/* result: starting table */ uIntf *, /* maximum lookup bits (returns actual) */ z_streamp )); /* for zalloc function */ local voidpf falloc OF(( voidpf, /* opaque pointer (not used) */ uInt, /* number of items */ uInt)); /* size of item */ /* Tables for deflate from PKZIP's appnote.txt. */ local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; /* see note #13 above about 258 */ local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */ local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; local const uInt cpdext[30] = { /* Extra bits for distance codes */ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; /* Huffman code decoding is performed using a multi-level table lookup. The fastest way to decode is to simply build a lookup table whose size is determined by the longest code. However, the time it takes to build this table can also be a factor if the data being decoded is not very long. The most common codes are necessarily the shortest codes, so those codes dominate the decoding time, and hence the speed. The idea is you can have a shorter table that decodes the shorter, more probable codes, and then point to subsidiary tables for the longer codes. The time it costs to decode the longer codes is then traded against the time it takes to make longer tables. This results of this trade are in the variables lbits and dbits below. lbits is the number of bits the first level table for literal/ length codes can decode in one step, and dbits is the same thing for the distance codes. Subsequent tables are also less than or equal to those sizes. These values may be adjusted either when all of the codes are shorter than that, in which case the longest code length in bits is used, or when the shortest code is *longer* than the requested table size, in which case the length of the shortest code in bits is used. There are two different values for the two tables, since they code a different number of possibilities each. The literal/length table codes 286 possible values, or in a flat code, a little over eight bits. The distance table codes 30 possible values, or a little less than five bits, flat. The optimum values for speed end up being about one bit more than those, so lbits is 8+1 and dbits is 5+1. The optimum values may differ though from machine to machine, and possibly even between compilers. Your mileage may vary. */ /* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ #define BMAX 15 /* maximum bit length of any code */ #define N_MAX 288 /* maximum number of codes in any set */ #ifdef DEBUG_ZLIB uInt inflate_hufts; #endif local int huft_build(b, n, s, d, e, t, m, zs) uIntf *b; /* code lengths in bits (all assumed <= BMAX) */ uInt n; /* number of codes (assumed <= N_MAX) */ uInt s; /* number of simple-valued codes (0..s-1) */ const uIntf *d; /* list of base values for non-simple codes */ const uIntf *e; /* list of extra bits for non-simple codes */ inflate_huft * FAR *t; /* result: starting table */ uIntf *m; /* maximum lookup bits, returns actual */ z_streamp zs; /* for zalloc function */ /* Given a list of code lengths and a maximum table size, make a set of tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR if the given code set is incomplete (the tables are still built in this case), Z_DATA_ERROR if the input is invalid (an over-subscribed set of lengths), or Z_MEM_ERROR if not enough memory. */ { uInt a; /* counter for codes of length k */ uInt c[BMAX+1]; /* bit length count table */ uInt f; /* i repeats in table every f entries */ int g; /* maximum code length */ int h; /* table level */ - register uInt i; /* counter, current code */ - register uInt j; /* counter */ - register int k; /* number of bits in current code */ + uInt i; /* counter, current code */ + uInt j; /* counter */ + int k; /* number of bits in current code */ int l; /* bits per table (returned in m) */ - register uIntf *p; /* pointer into c[], b[], or v[] */ + uIntf *p; /* pointer into c[], b[], or v[] */ inflate_huft *q; /* points to current table */ struct inflate_huft_s r; /* table entry for structure assignment */ inflate_huft *u[BMAX]; /* table stack */ uInt v[N_MAX]; /* values in order of bit length */ - register int w; /* bits before this table == (l * h) */ + int w; /* bits before this table == (l * h) */ uInt x[BMAX+1]; /* bit offsets, then code stack */ uIntf *xp; /* pointer into x */ int y; /* number of dummy codes added */ uInt z; /* number of entries in current table */ /* Generate counts for each bit length */ p = c; #define C0 *p++ = 0; #define C2 C0 C0 C0 C0 #define C4 C2 C2 C2 C2 C4 /* clear c[]--assume BMAX+1 is 16 */ p = b; i = n; do { c[*p++]++; /* assume all entries <= BMAX */ } while (--i); if (c[0] == n) /* null input--all zero length codes */ { *t = (inflate_huft *)Z_NULL; *m = 0; return Z_OK; } /* Find minimum and maximum length, bound *m by those */ l = *m; for (j = 1; j <= BMAX; j++) if (c[j]) break; k = j; /* minimum code length */ if ((uInt)l < j) l = j; for (i = BMAX; i; i--) if (c[i]) break; g = i; /* maximum code length */ if ((uInt)l > i) l = i; *m = l; /* Adjust last length count to fill out codes, if needed */ for (y = 1 << j; j < i; j++, y <<= 1) if ((y -= c[j]) < 0) return Z_DATA_ERROR; if ((y -= c[i]) < 0) return Z_DATA_ERROR; c[i] += y; /* Generate starting offsets into the value table for each length */ x[1] = j = 0; p = c + 1; xp = x + 2; while (--i) { /* note that i == g from above */ *xp++ = (j += *p++); } /* Make a table of values in order of bit lengths */ p = b; i = 0; do { if ((j = *p++) != 0) v[x[j]++] = i; } while (++i < n); n = x[g]; /* set n to length of v */ /* Generate the Huffman codes and for each, make the table entries */ x[0] = i = 0; /* first Huffman code is zero */ p = v; /* grab values in bit order */ h = -1; /* no tables yet--level -1 */ w = -l; /* bits decoded == (l * h) */ u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ q = (inflate_huft *)Z_NULL; /* ditto */ z = 0; /* ditto */ /* go through the bit lengths (k already is bits in shortest code) */ for (; k <= g; k++) { a = c[k]; while (a--) { /* here i is the Huffman code of length k bits for value *p */ /* make tables up to required level */ while (k > w + l) { h++; w += l; /* previous table always l bits */ /* compute minimum size table less than or equal to l bits */ z = g - w; z = z > (uInt)l ? l : z; /* table size upper limit */ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ { /* too few codes for k-w bit table */ f -= a + 1; /* deduct codes from patterns left */ xp = c + k; if (j < z) while (++j < z) /* try smaller tables up to z bits */ { if ((f <<= 1) <= *++xp) break; /* enough codes to use up j bits */ f -= *xp; /* else deduct codes from patterns */ } } z = 1 << j; /* table entries for j-bit table */ /* allocate and link in new table */ if ((q = (inflate_huft *)ZALLOC (zs,z + 1,sizeof(inflate_huft))) == Z_NULL) { if (h) inflate_trees_free(u[0], zs); return Z_MEM_ERROR; /* not enough memory */ } #ifdef DEBUG_ZLIB inflate_hufts += z + 1; #endif *t = q + 1; /* link to list for huft_free() */ *(t = &(q->next)) = Z_NULL; u[h] = ++q; /* table starts after link */ /* connect to last table, if there is one */ if (h) { x[h] = i; /* save pattern for backing up */ r.bits = (Byte)l; /* bits to dump before this table */ r.exop = (Byte)j; /* bits in this table */ r.next = q; /* pointer to this table */ j = i >> (w - l); /* (get around Turbo C bug) */ u[h-1][j] = r; /* connect to last table */ } } /* set up table entry in r */ r.bits = (Byte)(k - w); if (p >= v + n) r.exop = 128 + 64; /* out of values--invalid code */ else if (*p < s) { r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ r.base = *p++; /* simple code is just the value */ } else { r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */ r.base = d[*p++ - s]; } /* fill code-like entries with r */ f = 1 << (k - w); for (j = i >> w; j < z; j += f) q[j] = r; /* backwards increment the k-bit code i */ for (j = 1 << (k - 1); i & j; j >>= 1) i ^= j; i ^= j; /* backup over finished tables */ while ((i & ((1 << w) - 1)) != x[h]) { h--; /* don't need to update q */ w -= l; } } } /* Return Z_BUF_ERROR if we were given an incomplete table */ return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; } int inflate_trees_bits(c, bb, tb, z) uIntf *c; /* 19 code lengths */ uIntf *bb; /* bits tree desired/actual depth */ inflate_huft * FAR *tb; /* bits tree result */ z_streamp z; /* for zfree function */ { int r; r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, tb, bb, z); if (r == Z_DATA_ERROR) z->msg = (char*)"oversubscribed dynamic bit lengths tree"; else if (r == Z_BUF_ERROR || *bb == 0) { inflate_trees_free(*tb, z); z->msg = (char*)"incomplete dynamic bit lengths tree"; r = Z_DATA_ERROR; } return r; } int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, z) uInt nl; /* number of literal/length codes */ uInt nd; /* number of distance codes */ uIntf *c; /* that many (total) code lengths */ uIntf *bl; /* literal desired/actual bit depth */ uIntf *bd; /* distance desired/actual bit depth */ inflate_huft * FAR *tl; /* literal/length tree result */ inflate_huft * FAR *td; /* distance tree result */ z_streamp z; /* for zfree function */ { int r; /* build literal/length tree */ r = huft_build(c, nl, 257, cplens, cplext, tl, bl, z); if (r != Z_OK || *bl == 0) { if (r == Z_DATA_ERROR) z->msg = (char*)"oversubscribed literal/length tree"; else if (r != Z_MEM_ERROR) { inflate_trees_free(*tl, z); z->msg = (char*)"incomplete literal/length tree"; r = Z_DATA_ERROR; } return r; } /* build distance tree */ r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, z); if (r != Z_OK || (*bd == 0 && nl > 257)) { if (r == Z_DATA_ERROR) z->msg = (char*)"oversubscribed distance tree"; else if (r == Z_BUF_ERROR) { #ifdef PKZIP_BUG_WORKAROUND r = Z_OK; } #else inflate_trees_free(*td, z); z->msg = (char*)"incomplete distance tree"; r = Z_DATA_ERROR; } else if (r != Z_MEM_ERROR) { z->msg = (char*)"empty distance tree with lengths"; r = Z_DATA_ERROR; } inflate_trees_free(*tl, z); return r; #endif } /* done */ return Z_OK; } /* build fixed tables only once--keep them here */ local int fixed_built = 0; #define FIXEDH 530 /* number of hufts used by fixed tables */ local inflate_huft fixed_mem[FIXEDH]; local uInt fixed_bl; local uInt fixed_bd; local inflate_huft *fixed_tl; local inflate_huft *fixed_td; local voidpf falloc(q, n, s) voidpf q; /* opaque pointer */ uInt n; /* number of items */ uInt s; /* size of item */ { Assert(s == sizeof(inflate_huft) && n <= *(intf *)q, "inflate_trees falloc overflow"); *(intf *)q -= n+s-s; /* s-s to avoid warning */ return (voidpf)(fixed_mem + *(intf *)q); } int inflate_trees_fixed(bl, bd, tl, td) uIntf *bl; /* literal desired/actual bit depth */ uIntf *bd; /* distance desired/actual bit depth */ inflate_huft * FAR *tl; /* literal/length tree result */ inflate_huft * FAR *td; /* distance tree result */ { /* build fixed tables if not already (multiple overlapped executions ok) */ if (!fixed_built) { int k; /* temporary variable */ unsigned c[288]; /* length list for huft_build */ z_stream z; /* for falloc function */ int f = FIXEDH; /* number of hufts left in fixed_mem */ /* set up fake z_stream for memory routines */ z.zalloc = falloc; z.zfree = Z_NULL; z.opaque = (voidpf)&f; /* literal table */ for (k = 0; k < 144; k++) c[k] = 8; for (; k < 256; k++) c[k] = 9; for (; k < 280; k++) c[k] = 7; for (; k < 288; k++) c[k] = 8; fixed_bl = 7; huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, &z); /* distance table */ for (k = 0; k < 30; k++) c[k] = 5; fixed_bd = 5; huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, &z); /* done */ Assert(f == 0, "invalid build of fixed tables"); fixed_built = 1; } *bl = fixed_bl; *bd = fixed_bd; *tl = fixed_tl; *td = fixed_td; return Z_OK; } int inflate_trees_free(t, z) inflate_huft *t; /* table to free */ z_streamp z; /* for zfree function */ /* Free the malloc'ed tables built by huft_build(), which makes a linked list of the tables it made, with the links in a dummy first entry of each table. */ { - register inflate_huft *p, *q, *r; + inflate_huft *p, *q, *r; /* Reverse linked list */ p = Z_NULL; q = t; while (q != Z_NULL) { r = (q - 1)->next; (q - 1)->next = p; p = q; q = r; } /* Go through linked list, freeing from the malloced (t[-1]) address. */ while (p != Z_NULL) { q = (--p)->next; ZFREE(z,p); p = q; } return Z_OK; } /* --- inftrees.c */ /* +++ infcodes.c */ /* infcodes.c -- process literals and length/distance pairs * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* #include "zutil.h" */ /* #include "inftrees.h" */ /* #include "infblock.h" */ /* #include "infcodes.h" */ /* #include "infutil.h" */ /* +++ inffast.h */ /* inffast.h -- header to use inffast.c * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ extern int inflate_fast OF(( uInt, uInt, inflate_huft *, inflate_huft *, inflate_blocks_statef *, z_streamp )); /* --- inffast.h */ /* simplify the use of the inflate_huft type with some defines */ #define base more.Base #define next more.Next #define exop word.what.Exop #define bits word.what.Bits /* inflate codes private state */ struct inflate_codes_state { /* mode */ enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ START, /* x: set up for LEN */ LEN, /* i: get length/literal/eob next */ LENEXT, /* i: getting length extra (have base) */ DIST, /* i: get distance next */ DISTEXT, /* i: getting distance extra */ COPY, /* o: copying bytes in window, waiting for space */ LIT, /* o: got literal, waiting for output space */ WASH, /* o: got eob, possibly still output waiting */ END, /* x: got eob and all data flushed */ BADCODE} /* x: got error */ mode; /* current inflate_codes mode */ /* mode dependent information */ uInt len; union { struct { inflate_huft *tree; /* pointer into tree */ uInt need; /* bits needed */ } code; /* if LEN or DIST, where in tree */ uInt lit; /* if LIT, literal */ struct { uInt get; /* bits to get for extra */ uInt dist; /* distance back to copy from */ } copy; /* if EXT or COPY, where and how much */ } sub; /* submode */ /* mode independent information */ Byte lbits; /* ltree bits decoded per branch */ Byte dbits; /* dtree bits decoder per branch */ inflate_huft *ltree; /* literal/length/eob tree */ inflate_huft *dtree; /* distance tree */ }; inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z) uInt bl, bd; inflate_huft *tl; inflate_huft *td; /* need separate declaration for Borland C++ */ z_streamp z; { inflate_codes_statef *c; if ((c = (inflate_codes_statef *) ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) { c->mode = START; c->lbits = (Byte)bl; c->dbits = (Byte)bd; c->ltree = tl; c->dtree = td; Tracev((stderr, "inflate: codes new\n")); } return c; } int inflate_codes(s, z, r) inflate_blocks_statef *s; z_streamp z; int r; { uInt j; /* temporary storage */ inflate_huft *t; /* temporary pointer */ uInt e; /* extra bits or operation */ uLong b; /* bit buffer */ uInt k; /* bits in bit buffer */ Bytef *p; /* input data pointer */ uInt n; /* bytes available there */ Bytef *q; /* output window write pointer */ uInt m; /* bytes to end of window or read pointer */ Bytef *f; /* pointer to copy strings from */ inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ /* copy input/output information to locals (UPDATE macro restores) */ LOAD /* process input and output based on current state */ while (1) switch (c->mode) { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ case START: /* x: set up for LEN */ #ifndef SLOW if (m >= 258 && n >= 10) { UPDATE r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); LOAD if (r != Z_OK) { c->mode = r == Z_STREAM_END ? WASH : BADCODE; break; } } #endif /* !SLOW */ c->sub.code.need = c->lbits; c->sub.code.tree = c->ltree; c->mode = LEN; case LEN: /* i: get length/literal/eob next */ j = c->sub.code.need; NEEDBITS(j) t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); DUMPBITS(t->bits) e = (uInt)(t->exop); if (e == 0) /* literal */ { c->sub.lit = t->base; Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? "inflate: literal '%c'\n" : "inflate: literal 0x%02x\n", t->base)); c->mode = LIT; break; } if (e & 16) /* length */ { c->sub.copy.get = e & 15; c->len = t->base; c->mode = LENEXT; break; } if ((e & 64) == 0) /* next table */ { c->sub.code.need = e; c->sub.code.tree = t->next; break; } if (e & 32) /* end of block */ { Tracevv((stderr, "inflate: end of block\n")); c->mode = WASH; break; } c->mode = BADCODE; /* invalid code */ z->msg = (char*)"invalid literal/length code"; r = Z_DATA_ERROR; LEAVE case LENEXT: /* i: getting length extra (have base) */ j = c->sub.copy.get; NEEDBITS(j) c->len += (uInt)b & inflate_mask[j]; DUMPBITS(j) c->sub.code.need = c->dbits; c->sub.code.tree = c->dtree; Tracevv((stderr, "inflate: length %u\n", c->len)); c->mode = DIST; case DIST: /* i: get distance next */ j = c->sub.code.need; NEEDBITS(j) t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); DUMPBITS(t->bits) e = (uInt)(t->exop); if (e & 16) /* distance */ { c->sub.copy.get = e & 15; c->sub.copy.dist = t->base; c->mode = DISTEXT; break; } if ((e & 64) == 0) /* next table */ { c->sub.code.need = e; c->sub.code.tree = t->next; break; } c->mode = BADCODE; /* invalid code */ z->msg = (char*)"invalid distance code"; r = Z_DATA_ERROR; LEAVE case DISTEXT: /* i: getting distance extra */ j = c->sub.copy.get; NEEDBITS(j) c->sub.copy.dist += (uInt)b & inflate_mask[j]; DUMPBITS(j) Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); c->mode = COPY; case COPY: /* o: copying bytes in window, waiting for space */ #ifndef __TURBOC__ /* Turbo C bug for following expression */ f = (uInt)(q - s->window) < c->sub.copy.dist ? s->end - (c->sub.copy.dist - (q - s->window)) : q - c->sub.copy.dist; #else f = q - c->sub.copy.dist; if ((uInt)(q - s->window) < c->sub.copy.dist) f = s->end - (c->sub.copy.dist - (uInt)(q - s->window)); #endif while (c->len) { NEEDOUT OUTBYTE(*f++) if (f == s->end) f = s->window; c->len--; } c->mode = START; break; case LIT: /* o: got literal, waiting for output space */ NEEDOUT OUTBYTE(c->sub.lit) c->mode = START; break; case WASH: /* o: got eob, possibly more output */ FLUSH if (s->read != s->write) LEAVE c->mode = END; case END: r = Z_STREAM_END; LEAVE case BADCODE: /* x: got error */ r = Z_DATA_ERROR; LEAVE default: r = Z_STREAM_ERROR; LEAVE } } void inflate_codes_free(c, z) inflate_codes_statef *c; z_streamp z; { ZFREE(z, c); Tracev((stderr, "inflate: codes free\n")); } /* --- infcodes.c */ /* +++ infutil.c */ /* inflate_util.c -- data and routines common to blocks and codes * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* #include "zutil.h" */ /* #include "infblock.h" */ /* #include "inftrees.h" */ /* #include "infcodes.h" */ /* #include "infutil.h" */ #ifndef NO_DUMMY_DECL struct inflate_codes_state {int dummy;}; /* for buggy compilers */ #endif /* And'ing with mask[n] masks the lower n bits */ uInt inflate_mask[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff }; /* copy as much as possible from the sliding window to the output area */ int inflate_flush(s, z, r) inflate_blocks_statef *s; z_streamp z; int r; { uInt n; Bytef *p; Bytef *q; /* local copies of source and destination pointers */ p = z->next_out; q = s->read; /* compute number of bytes to copy as far as end of window */ n = (uInt)((q <= s->write ? s->write : s->end) - q); if (n > z->avail_out) n = z->avail_out; if (n && r == Z_BUF_ERROR) r = Z_OK; /* update counters */ z->avail_out -= n; z->total_out += n; /* update check information */ if (s->checkfn != Z_NULL) z->adler = s->check = (*s->checkfn)(s->check, q, n); /* copy as far as end of window */ if (p != Z_NULL) { zmemcpy(p, q, n); p += n; } q += n; /* see if more to copy at beginning of window */ if (q == s->end) { /* wrap pointers */ q = s->window; if (s->write == s->end) s->write = s->window; /* compute bytes to copy */ n = (uInt)(s->write - q); if (n > z->avail_out) n = z->avail_out; if (n && r == Z_BUF_ERROR) r = Z_OK; /* update counters */ z->avail_out -= n; z->total_out += n; /* update check information */ if (s->checkfn != Z_NULL) z->adler = s->check = (*s->checkfn)(s->check, q, n); /* copy */ if (p != Z_NULL) { zmemcpy(p, q, n); p += n; } q += n; } /* update pointers */ z->next_out = p; s->read = q; /* done */ return r; } /* --- infutil.c */ /* +++ inffast.c */ /* inffast.c -- process literals and length/distance pairs fast * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* #include "zutil.h" */ /* #include "inftrees.h" */ /* #include "infblock.h" */ /* #include "infcodes.h" */ /* #include "infutil.h" */ /* #include "inffast.h" */ #ifndef NO_DUMMY_DECL struct inflate_codes_state {int dummy;}; /* for buggy compilers */ #endif /* simplify the use of the inflate_huft type with some defines */ #define base more.Base #define next more.Next #define exop word.what.Exop #define bits word.what.Bits /* macros for bit input with no checking and for returning unused bytes */ #define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<>3);p-=c;k&=7;} /* Called with number of bytes left to write in window at least 258 (the maximum string length) and number of input bytes available at least ten. The ten bytes are six bytes for the longest length/ distance pair plus four bytes for overloading the bit buffer. */ int inflate_fast(bl, bd, tl, td, s, z) uInt bl, bd; inflate_huft *tl; inflate_huft *td; /* need separate declaration for Borland C++ */ inflate_blocks_statef *s; z_streamp z; { inflate_huft *t; /* temporary pointer */ uInt e; /* extra bits or operation */ uLong b; /* bit buffer */ uInt k; /* bits in bit buffer */ Bytef *p; /* input data pointer */ uInt n; /* bytes available there */ Bytef *q; /* output window write pointer */ uInt m; /* bytes to end of window or read pointer */ uInt ml; /* mask for literal/length tree */ uInt md; /* mask for distance tree */ uInt c; /* bytes to copy */ uInt d; /* distance back to copy from */ Bytef *r; /* copy source pointer */ /* load input, output, bit values */ LOAD /* initialize masks */ ml = inflate_mask[bl]; md = inflate_mask[bd]; /* do until not enough input or output space for fast loop */ do { /* assume called with m >= 258 && n >= 10 */ /* get literal/length code */ GRABBITS(20) /* max bits for literal/length code */ if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) { DUMPBITS(t->bits) Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? "inflate: * literal '%c'\n" : "inflate: * literal 0x%02x\n", t->base)); *q++ = (Byte)t->base; m--; continue; } do { DUMPBITS(t->bits) if (e & 16) { /* get extra bits for length */ e &= 15; c = t->base + ((uInt)b & inflate_mask[e]); DUMPBITS(e) Tracevv((stderr, "inflate: * length %u\n", c)); /* decode distance base of block to copy */ GRABBITS(15); /* max bits for distance code */ e = (t = td + ((uInt)b & md))->exop; do { DUMPBITS(t->bits) if (e & 16) { /* get extra bits to add to distance base */ e &= 15; GRABBITS(e) /* get extra bits (up to 13) */ d = t->base + ((uInt)b & inflate_mask[e]); DUMPBITS(e) Tracevv((stderr, "inflate: * distance %u\n", d)); /* do the copy */ m -= c; if ((uInt)(q - s->window) >= d) /* offset before dest */ { /* just copy */ r = q - d; *q++ = *r++; c--; /* minimum count is three, */ *q++ = *r++; c--; /* so unroll loop a little */ } else /* else offset after destination */ { e = d - (uInt)(q - s->window); /* bytes from offset to end */ r = s->end - e; /* pointer to offset */ if (c > e) /* if source crosses, */ { c -= e; /* copy to end of window */ do { *q++ = *r++; } while (--e); r = s->window; /* copy rest from start of window */ } } do { /* copy all or what's left */ *q++ = *r++; } while (--c); break; } else if ((e & 64) == 0) e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop; else { z->msg = (char*)"invalid distance code"; UNGRAB UPDATE return Z_DATA_ERROR; } } while (1); break; } if ((e & 64) == 0) { if ((e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop) == 0) { DUMPBITS(t->bits) Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? "inflate: * literal '%c'\n" : "inflate: * literal 0x%02x\n", t->base)); *q++ = (Byte)t->base; m--; break; } } else if (e & 32) { Tracevv((stderr, "inflate: * end of block\n")); UNGRAB UPDATE return Z_STREAM_END; } else { z->msg = (char*)"invalid literal/length code"; UNGRAB UPDATE return Z_DATA_ERROR; } } while (1); } while (m >= 258 && n >= 10); /* not enough input or output--restore pointers and return */ UNGRAB UPDATE return Z_OK; } /* --- inffast.c */ /* +++ zutil.c */ /* zutil.c -- target dependent utility functions for the compression library * Copyright (C) 1995-1996 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ /* From: zutil.c,v 1.17 1996/07/24 13:41:12 me Exp $ */ #ifdef DEBUG_ZLIB #include #endif /* #include "zutil.h" */ #ifndef NO_DUMMY_DECL struct internal_state {int dummy;}; /* for buggy compilers */ #endif #ifndef STDC extern void exit OF((int)); #endif static const char *z_errmsg[10] = { "need dictionary", /* Z_NEED_DICT 2 */ "stream end", /* Z_STREAM_END 1 */ "", /* Z_OK 0 */ "file error", /* Z_ERRNO (-1) */ "stream error", /* Z_STREAM_ERROR (-2) */ "data error", /* Z_DATA_ERROR (-3) */ "insufficient memory", /* Z_MEM_ERROR (-4) */ "buffer error", /* Z_BUF_ERROR (-5) */ "incompatible version",/* Z_VERSION_ERROR (-6) */ ""}; const char *zlibVersion() { return ZLIB_VERSION; } #ifdef DEBUG_ZLIB void z_error (m) char *m; { fprintf(stderr, "%s\n", m); exit(1); } #endif #ifndef HAVE_MEMCPY void zmemcpy(dest, source, len) Bytef* dest; Bytef* source; uInt len; { if (len == 0) return; do { *dest++ = *source++; /* ??? to be unrolled */ } while (--len != 0); } int zmemcmp(s1, s2, len) Bytef* s1; Bytef* s2; uInt len; { uInt j; for (j = 0; j < len; j++) { if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; } return 0; } void zmemzero(dest, len) Bytef* dest; uInt len; { if (len == 0) return; do { *dest++ = 0; /* ??? to be unrolled */ } while (--len != 0); } #endif #ifdef __TURBOC__ #if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__) /* Small and medium model in Turbo C are for now limited to near allocation * with reduced MAX_WBITS and MAX_MEM_LEVEL */ # define MY_ZCALLOC /* Turbo C malloc() does not allow dynamic allocation of 64K bytes * and farmalloc(64K) returns a pointer with an offset of 8, so we * must fix the pointer. Warning: the pointer must be put back to its * original form in order to free it, use zcfree(). */ #define MAX_PTR 10 /* 10*64K = 640K */ local int next_ptr = 0; typedef struct ptr_table_s { voidpf org_ptr; voidpf new_ptr; } ptr_table; local ptr_table table[MAX_PTR]; /* This table is used to remember the original form of pointers * to large buffers (64K). Such pointers are normalized with a zero offset. * Since MSDOS is not a preemptive multitasking OS, this table is not * protected from concurrent access. This hack doesn't work anyway on * a protected system like OS/2. Use Microsoft C instead. */ voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) { voidpf buf = opaque; /* just to make some compilers happy */ ulg bsize = (ulg)items*size; /* If we allocate less than 65520 bytes, we assume that farmalloc * will return a usable pointer which doesn't have to be normalized. */ if (bsize < 65520L) { buf = farmalloc(bsize); if (*(ush*)&buf != 0) return buf; } else { buf = farmalloc(bsize + 16L); } if (buf == NULL || next_ptr >= MAX_PTR) return NULL; table[next_ptr].org_ptr = buf; /* Normalize the pointer to seg:0 */ *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; *(ush*)&buf = 0; table[next_ptr++].new_ptr = buf; return buf; } void zcfree (voidpf opaque, voidpf ptr) { int n; if (*(ush*)&ptr != 0) { /* object < 64K */ farfree(ptr); return; } /* Find the original pointer */ for (n = 0; n < next_ptr; n++) { if (ptr != table[n].new_ptr) continue; farfree(table[n].org_ptr); while (++n < next_ptr) { table[n-1] = table[n]; } next_ptr--; return; } ptr = opaque; /* just to make some compilers happy */ Assert(0, "zcfree: ptr not found"); } #endif #endif /* __TURBOC__ */ #if defined(M_I86) && !defined(__32BIT__) /* Microsoft C in 16-bit mode */ # define MY_ZCALLOC #if (!defined(_MSC_VER) || (_MSC_VER < 600)) # define _halloc halloc # define _hfree hfree #endif voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) { if (opaque) opaque = 0; /* to make compiler happy */ return _halloc((long)items, size); } void zcfree (voidpf opaque, voidpf ptr) { if (opaque) opaque = 0; /* to make compiler happy */ _hfree(ptr); } #endif /* MSC */ #ifndef MY_ZCALLOC /* Any system without a special alloc function */ #ifndef STDC extern voidp calloc OF((uInt items, uInt size)); extern void free OF((voidpf ptr)); #endif voidpf zcalloc (opaque, items, size) voidpf opaque; unsigned items; unsigned size; { if (opaque) items += size - size; /* make compiler happy */ return (voidpf)calloc(items, size); } void zcfree (opaque, ptr) voidpf opaque; voidpf ptr; { free(ptr); if (opaque) return; /* make compiler happy */ } #endif /* MY_ZCALLOC */ /* --- zutil.c */ /* +++ adler32.c */ /* adler32.c -- compute the Adler-32 checksum of a data stream * Copyright (C) 1995-1996 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* From: adler32.c,v 1.10 1996/05/22 11:52:18 me Exp $ */ /* #include "zlib.h" */ #define BASE 65521L /* largest prime smaller than 65536 */ #define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ #define DO1(buf,i) {s1 += buf[(i)]; s2 += s1;} #define DO2(buf,i) DO1(buf,i); DO1(buf,(i)+1); #define DO4(buf,i) DO2(buf,i); DO2(buf,(i)+2); #define DO8(buf,i) DO4(buf,i); DO4(buf,(i)+4); #define DO16(buf) DO8(buf,0); DO8(buf,8); /* ========================================================================= */ uLong adler32(adler, buf, len) uLong adler; const Bytef *buf; uInt len; { unsigned long s1 = adler & 0xffff; unsigned long s2 = (adler >> 16) & 0xffff; int k; if (buf == Z_NULL) return 1L; while (len > 0) { k = len < NMAX ? len : NMAX; len -= k; while (k >= 16) { DO16(buf); buf += 16; k -= 16; } if (k != 0) do { s1 += *buf++; s2 += s1; } while (--k); s1 %= BASE; s2 %= BASE; } return (s2 << 16) | s1; } /* --- adler32.c */ #ifdef _KERNEL static int zlib_modevent(module_t mod, int type, void *unused) { switch (type) { case MOD_LOAD: return 0; case MOD_UNLOAD: return 0; } return EINVAL; } static moduledata_t zlib_mod = { "zlib", zlib_modevent, 0 }; DECLARE_MODULE(zlib, zlib_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); MODULE_VERSION(zlib, 1); #endif /* _KERNEL */ Index: stable/11/sys/mips/cavium/cryptocteon/cavium_crypto.c =================================================================== --- stable/11/sys/mips/cavium/cryptocteon/cavium_crypto.c (revision 331642) +++ stable/11/sys/mips/cavium/cryptocteon/cavium_crypto.c (revision 331643) @@ -1,2138 +1,2138 @@ /* * vim:sw=4 ts=8 */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2009 David McCullough * * Copyright (c) 2003-2007 Cavium Networks (support@cavium.com). All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Cavium Networks * 4. Cavium Networks' name may not be used to endorse or promote products * derived from this software without specific prior written permission. * * This Software, including technical data, may be subject to U.S. export * control laws, including the U.S. Export Administration Act and its * associated regulations, and may be subject to export or import regulations * in other countries. You warrant that You will comply strictly in all * respects with all such regulations and acknowledge that you have the * responsibility to obtain licenses to export, re-export or import the * Software. * * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" AND * WITH ALL FAULTS AND CAVIUM MAKES NO PROMISES, REPRESENTATIONS OR WARRANTIES, * EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO THE * SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE, * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR * PERFORMANCE OF THE SOFTWARE LIES WITH YOU. */ /****************************************************************************/ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include /****************************************************************************/ #define IOV_INIT(iov, ptr, idx, len) \ do { \ (idx) = 0; \ (ptr) = (iov)[(idx)].iov_base; \ (len) = (iov)[(idx)].iov_len; \ } while (0) /* * XXX * It would be better if this were an IOV_READ/IOV_WRITE macro instead so * that we could detect overflow before it happens rather than right after, * which is especially bad since there is usually no IOV_CONSUME after the * final read or write. */ #define IOV_CONSUME(iov, ptr, idx, len) \ do { \ if ((len) > sizeof *(ptr)) { \ (len) -= sizeof *(ptr); \ (ptr)++; \ } else { \ if ((len) != sizeof *(ptr)) \ panic("%s: went past end of iovec.", __func__); \ (idx)++; \ (ptr) = (iov)[(idx)].iov_base; \ (len) = (iov)[(idx)].iov_len; \ } \ } while (0) #define ESP_HEADER_LENGTH 8 #define DES_CBC_IV_LENGTH 8 #define AES_CBC_IV_LENGTH 16 #define ESP_HMAC_LEN 12 #define ESP_HEADER_LENGTH 8 #define DES_CBC_IV_LENGTH 8 /****************************************************************************/ #define CVM_LOAD_SHA_UNIT(dat, next) { \ if (next == 0) { \ next = 1; \ CVMX_MT_HSH_DAT (dat, 0); \ } else if (next == 1) { \ next = 2; \ CVMX_MT_HSH_DAT (dat, 1); \ } else if (next == 2) { \ next = 3; \ CVMX_MT_HSH_DAT (dat, 2); \ } else if (next == 3) { \ next = 4; \ CVMX_MT_HSH_DAT (dat, 3); \ } else if (next == 4) { \ next = 5; \ CVMX_MT_HSH_DAT (dat, 4); \ } else if (next == 5) { \ next = 6; \ CVMX_MT_HSH_DAT (dat, 5); \ } else if (next == 6) { \ next = 7; \ CVMX_MT_HSH_DAT (dat, 6); \ } else { \ CVMX_MT_HSH_STARTSHA (dat); \ next = 0; \ } \ } #define CVM_LOAD2_SHA_UNIT(dat1, dat2, next) { \ if (next == 0) { \ CVMX_MT_HSH_DAT (dat1, 0); \ CVMX_MT_HSH_DAT (dat2, 1); \ next = 2; \ } else if (next == 1) { \ CVMX_MT_HSH_DAT (dat1, 1); \ CVMX_MT_HSH_DAT (dat2, 2); \ next = 3; \ } else if (next == 2) { \ CVMX_MT_HSH_DAT (dat1, 2); \ CVMX_MT_HSH_DAT (dat2, 3); \ next = 4; \ } else if (next == 3) { \ CVMX_MT_HSH_DAT (dat1, 3); \ CVMX_MT_HSH_DAT (dat2, 4); \ next = 5; \ } else if (next == 4) { \ CVMX_MT_HSH_DAT (dat1, 4); \ CVMX_MT_HSH_DAT (dat2, 5); \ next = 6; \ } else if (next == 5) { \ CVMX_MT_HSH_DAT (dat1, 5); \ CVMX_MT_HSH_DAT (dat2, 6); \ next = 7; \ } else if (next == 6) { \ CVMX_MT_HSH_DAT (dat1, 6); \ CVMX_MT_HSH_STARTSHA (dat2); \ next = 0; \ } else { \ CVMX_MT_HSH_STARTSHA (dat1); \ CVMX_MT_HSH_DAT (dat2, 0); \ next = 1; \ } \ } /****************************************************************************/ #define CVM_LOAD_MD5_UNIT(dat, next) { \ if (next == 0) { \ next = 1; \ CVMX_MT_HSH_DAT (dat, 0); \ } else if (next == 1) { \ next = 2; \ CVMX_MT_HSH_DAT (dat, 1); \ } else if (next == 2) { \ next = 3; \ CVMX_MT_HSH_DAT (dat, 2); \ } else if (next == 3) { \ next = 4; \ CVMX_MT_HSH_DAT (dat, 3); \ } else if (next == 4) { \ next = 5; \ CVMX_MT_HSH_DAT (dat, 4); \ } else if (next == 5) { \ next = 6; \ CVMX_MT_HSH_DAT (dat, 5); \ } else if (next == 6) { \ next = 7; \ CVMX_MT_HSH_DAT (dat, 6); \ } else { \ CVMX_MT_HSH_STARTMD5 (dat); \ next = 0; \ } \ } #define CVM_LOAD2_MD5_UNIT(dat1, dat2, next) { \ if (next == 0) { \ CVMX_MT_HSH_DAT (dat1, 0); \ CVMX_MT_HSH_DAT (dat2, 1); \ next = 2; \ } else if (next == 1) { \ CVMX_MT_HSH_DAT (dat1, 1); \ CVMX_MT_HSH_DAT (dat2, 2); \ next = 3; \ } else if (next == 2) { \ CVMX_MT_HSH_DAT (dat1, 2); \ CVMX_MT_HSH_DAT (dat2, 3); \ next = 4; \ } else if (next == 3) { \ CVMX_MT_HSH_DAT (dat1, 3); \ CVMX_MT_HSH_DAT (dat2, 4); \ next = 5; \ } else if (next == 4) { \ CVMX_MT_HSH_DAT (dat1, 4); \ CVMX_MT_HSH_DAT (dat2, 5); \ next = 6; \ } else if (next == 5) { \ CVMX_MT_HSH_DAT (dat1, 5); \ CVMX_MT_HSH_DAT (dat2, 6); \ next = 7; \ } else if (next == 6) { \ CVMX_MT_HSH_DAT (dat1, 6); \ CVMX_MT_HSH_STARTMD5 (dat2); \ next = 0; \ } else { \ CVMX_MT_HSH_STARTMD5 (dat1); \ CVMX_MT_HSH_DAT (dat2, 0); \ next = 1; \ } \ } /****************************************************************************/ void octo_calc_hash(uint8_t auth, unsigned char *key, uint64_t *inner, uint64_t *outer) { uint8_t hash_key[64]; uint64_t *key1; register uint64_t xor1 = 0x3636363636363636ULL; register uint64_t xor2 = 0x5c5c5c5c5c5c5c5cULL; dprintf("%s()\n", __func__); memset(hash_key, 0, sizeof(hash_key)); memcpy(hash_key, (uint8_t *) key, (auth ? 20 : 16)); key1 = (uint64_t *) hash_key; if (auth) { CVMX_MT_HSH_IV(0x67452301EFCDAB89ULL, 0); CVMX_MT_HSH_IV(0x98BADCFE10325476ULL, 1); CVMX_MT_HSH_IV(0xC3D2E1F000000000ULL, 2); } else { CVMX_MT_HSH_IV(0x0123456789ABCDEFULL, 0); CVMX_MT_HSH_IV(0xFEDCBA9876543210ULL, 1); } CVMX_MT_HSH_DAT((*key1 ^ xor1), 0); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor1), 1); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor1), 2); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor1), 3); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor1), 4); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor1), 5); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor1), 6); key1++; if (auth) CVMX_MT_HSH_STARTSHA((*key1 ^ xor1)); else CVMX_MT_HSH_STARTMD5((*key1 ^ xor1)); CVMX_MF_HSH_IV(inner[0], 0); CVMX_MF_HSH_IV(inner[1], 1); if (auth) { inner[2] = 0; CVMX_MF_HSH_IV(((uint64_t *) inner)[2], 2); } memset(hash_key, 0, sizeof(hash_key)); memcpy(hash_key, (uint8_t *) key, (auth ? 20 : 16)); key1 = (uint64_t *) hash_key; if (auth) { CVMX_MT_HSH_IV(0x67452301EFCDAB89ULL, 0); CVMX_MT_HSH_IV(0x98BADCFE10325476ULL, 1); CVMX_MT_HSH_IV(0xC3D2E1F000000000ULL, 2); } else { CVMX_MT_HSH_IV(0x0123456789ABCDEFULL, 0); CVMX_MT_HSH_IV(0xFEDCBA9876543210ULL, 1); } CVMX_MT_HSH_DAT((*key1 ^ xor2), 0); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor2), 1); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor2), 2); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor2), 3); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor2), 4); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor2), 5); key1++; CVMX_MT_HSH_DAT((*key1 ^ xor2), 6); key1++; if (auth) CVMX_MT_HSH_STARTSHA((*key1 ^ xor2)); else CVMX_MT_HSH_STARTMD5((*key1 ^ xor2)); CVMX_MF_HSH_IV(outer[0], 0); CVMX_MF_HSH_IV(outer[1], 1); if (auth) { outer[2] = 0; CVMX_MF_HSH_IV(outer[2], 2); } return; } /****************************************************************************/ /* DES functions */ int octo_des_cbc_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { uint64_t *data; int data_i, data_l; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x7) || (crypt_off + crypt_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load 3DES Key */ CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 0); if (od->octo_encklen == 24) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[1], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[2], 2); } else if (od->octo_encklen == 8) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 2); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_3DES_IV(* (uint64_t *) ivp); while (crypt_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); crypt_off -= 8; } while (crypt_len > 0) { CVMX_MT_3DES_ENC_CBC(*data); CVMX_MF_3DES_RESULT(*data); IOV_CONSUME(iov, data, data_i, data_l); crypt_len -= 8; } return 0; } int octo_des_cbc_decrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { uint64_t *data; int data_i, data_l; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x7) || (crypt_off + crypt_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load 3DES Key */ CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 0); if (od->octo_encklen == 24) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[1], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[2], 2); } else if (od->octo_encklen == 8) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 2); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_3DES_IV(* (uint64_t *) ivp); while (crypt_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); crypt_off -= 8; } while (crypt_len > 0) { CVMX_MT_3DES_DEC_CBC(*data); CVMX_MF_3DES_RESULT(*data); IOV_CONSUME(iov, data, data_i, data_l); crypt_len -= 8; } return 0; } /****************************************************************************/ /* AES functions */ int octo_aes_cbc_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { uint64_t *data, *pdata; int data_i, data_l; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x7) || (crypt_off + crypt_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load AES Key */ CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[0], 0); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[1], 1); if (od->octo_encklen == 16) { CVMX_MT_AES_KEY(0x0, 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 24) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 32) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[3], 3); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_AES_KEYLENGTH(od->octo_encklen / 8 - 1); CVMX_MT_AES_IV(((uint64_t *) ivp)[0], 0); CVMX_MT_AES_IV(((uint64_t *) ivp)[1], 1); while (crypt_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); crypt_off -= 8; } while (crypt_len > 0) { pdata = data; CVMX_MT_AES_ENC_CBC0(*data); IOV_CONSUME(iov, data, data_i, data_l); CVMX_MT_AES_ENC_CBC1(*data); CVMX_MF_AES_RESULT(*pdata, 0); CVMX_MF_AES_RESULT(*data, 1); IOV_CONSUME(iov, data, data_i, data_l); crypt_len -= 16; } return 0; } int octo_aes_cbc_decrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { uint64_t *data, *pdata; int data_i, data_l; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x7) || (crypt_off + crypt_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load AES Key */ CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[0], 0); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[1], 1); if (od->octo_encklen == 16) { CVMX_MT_AES_KEY(0x0, 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 24) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 32) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[3], 3); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_AES_KEYLENGTH(od->octo_encklen / 8 - 1); CVMX_MT_AES_IV(((uint64_t *) ivp)[0], 0); CVMX_MT_AES_IV(((uint64_t *) ivp)[1], 1); while (crypt_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); crypt_off -= 8; } while (crypt_len > 0) { pdata = data; CVMX_MT_AES_DEC_CBC0(*data); IOV_CONSUME(iov, data, data_i, data_l); CVMX_MT_AES_DEC_CBC1(*data); CVMX_MF_AES_RESULT(*pdata, 0); CVMX_MF_AES_RESULT(*data, 1); IOV_CONSUME(iov, data, data_i, data_l); crypt_len -= 16; } return 0; } /****************************************************************************/ /* MD5 */ int octo_null_md5_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; uint64_t *data; uint64_t tmp1, tmp2; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || (auth_off & 0x7) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data, data_i, data_l); /* Load MD5 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); while (auth_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); auth_off -= 8; } while (auth_len > 0) { CVM_LOAD_MD5_UNIT(*data, next); auth_len -= 8; IOV_CONSUME(iov, data, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_MD5_UNIT(((uint64_t) 0x0ULL), next); } CVMX_ES64(tmp1, ((alen + 64) << 3)); CVM_LOAD_MD5_UNIT(tmp1, next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); CVMX_MT_HSH_DAT(0x8000000000000000ULL, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_ES64(tmp1, ((64 + 16) << 3)); CVMX_MT_HSH_STARTMD5(tmp1); /* save the HMAC */ IOV_INIT(iov, data, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); icv_off -= 8; } CVMX_MF_HSH_IV(*data, 0); IOV_CONSUME(iov, data, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *(uint32_t *)data = (uint32_t) (tmp1 >> 32); return 0; } /****************************************************************************/ /* SHA1 */ int octo_null_sha1_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; uint64_t *data; uint64_t tmp1, tmp2, tmp3; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || (auth_off & 0x7) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data, data_i, data_l); /* Load SHA1 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); CVMX_MT_HSH_IV(od->octo_hminner[2], 2); while (auth_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); auth_off -= 8; } while (auth_len > 0) { CVM_LOAD_SHA_UNIT(*data, next); auth_len -= 8; IOV_CONSUME(iov, data, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_SHA_UNIT(((uint64_t) 0x0ULL), next); } CVM_LOAD_SHA_UNIT((uint64_t) ((alen + 64) << 3), next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); tmp3 = 0; CVMX_MF_HSH_IV(tmp3, 2); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_IV(od->octo_hmouter[2], 2); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); tmp3 |= 0x0000000080000000; CVMX_MT_HSH_DAT(tmp3, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_MT_HSH_STARTSHA((uint64_t) ((64 + 20) << 3)); /* save the HMAC */ IOV_INIT(iov, data, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data, data_i, data_l); icv_off -= 8; } CVMX_MF_HSH_IV(*data, 0); IOV_CONSUME(iov, data, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *(uint32_t *)data = (uint32_t) (tmp1 >> 32); return 0; } /****************************************************************************/ /* DES MD5 */ int octo_des_cbc_md5_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata; uint64_t *data = &mydata.data64[0]; uint32_t *data32; uint64_t tmp1, tmp2; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load 3DES Key */ CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 0); if (od->octo_encklen == 24) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[1], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[2], 2); } else if (od->octo_encklen == 8) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 2); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_3DES_IV(* (uint64_t *) ivp); /* Load MD5 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *first = data32; mydata.data32[0] = *first; IOV_CONSUME(iov, data32, data_i, data_l); mydata.data32[1] = *data32; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_3DES_ENC_CBC(*data); CVMX_MF_3DES_RESULT(*data); crypt_len -= 8; } } else crypt_off -= 8; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_MD5_UNIT(*data, next); auth_len -= 8; } } else auth_off -= 8; *first = mydata.data32[0]; *data32 = mydata.data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_MD5_UNIT(((uint64_t) 0x0ULL), next); } CVMX_ES64(tmp1, ((alen + 64) << 3)); CVM_LOAD_MD5_UNIT(tmp1, next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); CVMX_MT_HSH_DAT(0x8000000000000000ULL, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_ES64(tmp1, ((64 + 16) << 3)); CVMX_MT_HSH_STARTMD5(tmp1); /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } int octo_des_cbc_md5_decrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata; uint64_t *data = &mydata.data64[0]; uint32_t *data32; uint64_t tmp1, tmp2; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load 3DES Key */ CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 0); if (od->octo_encklen == 24) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[1], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[2], 2); } else if (od->octo_encklen == 8) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 2); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_3DES_IV(* (uint64_t *) ivp); /* Load MD5 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *first = data32; mydata.data32[0] = *first; IOV_CONSUME(iov, data32, data_i, data_l); mydata.data32[1] = *data32; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_MD5_UNIT(*data, next); auth_len -= 8; } } else auth_off -= 8; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_3DES_DEC_CBC(*data); CVMX_MF_3DES_RESULT(*data); crypt_len -= 8; } } else crypt_off -= 8; *first = mydata.data32[0]; *data32 = mydata.data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_MD5_UNIT(((uint64_t) 0x0ULL), next); } CVMX_ES64(tmp1, ((alen + 64) << 3)); CVM_LOAD_MD5_UNIT(tmp1, next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); CVMX_MT_HSH_DAT(0x8000000000000000ULL, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_ES64(tmp1, ((64 + 16) << 3)); CVMX_MT_HSH_STARTMD5(tmp1); /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } /****************************************************************************/ /* DES SHA */ int octo_des_cbc_sha1_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata; uint64_t *data = &mydata.data64[0]; uint32_t *data32; uint64_t tmp1, tmp2, tmp3; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load 3DES Key */ CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 0); if (od->octo_encklen == 24) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[1], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[2], 2); } else if (od->octo_encklen == 8) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 2); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_3DES_IV(* (uint64_t *) ivp); /* Load SHA1 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); CVMX_MT_HSH_IV(od->octo_hminner[2], 2); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *first = data32; mydata.data32[0] = *first; IOV_CONSUME(iov, data32, data_i, data_l); mydata.data32[1] = *data32; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_3DES_ENC_CBC(*data); CVMX_MF_3DES_RESULT(*data); crypt_len -= 8; } } else crypt_off -= 8; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_SHA_UNIT(*data, next); auth_len -= 8; } } else auth_off -= 8; *first = mydata.data32[0]; *data32 = mydata.data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_SHA_UNIT(tmp, next); } else { CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_SHA_UNIT(((uint64_t) 0x0ULL), next); } CVM_LOAD_SHA_UNIT((uint64_t) ((alen + 64) << 3), next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); tmp3 = 0; CVMX_MF_HSH_IV(tmp3, 2); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_IV(od->octo_hmouter[2], 2); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); tmp3 |= 0x0000000080000000; CVMX_MT_HSH_DAT(tmp3, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_MT_HSH_STARTSHA((uint64_t) ((64 + 20) << 3)); /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } int octo_des_cbc_sha1_decrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata; uint64_t *data = &mydata.data64[0]; uint32_t *data32; uint64_t tmp1, tmp2, tmp3; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load 3DES Key */ CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 0); if (od->octo_encklen == 24) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[1], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[2], 2); } else if (od->octo_encklen == 8) { CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 1); CVMX_MT_3DES_KEY(((uint64_t *) od->octo_enckey)[0], 2); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_3DES_IV(* (uint64_t *) ivp); /* Load SHA1 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); CVMX_MT_HSH_IV(od->octo_hminner[2], 2); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *first = data32; mydata.data32[0] = *first; IOV_CONSUME(iov, data32, data_i, data_l); mydata.data32[1] = *data32; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_SHA_UNIT(*data, next); auth_len -= 8; } } else auth_off -= 8; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_3DES_DEC_CBC(*data); CVMX_MF_3DES_RESULT(*data); crypt_len -= 8; } } else crypt_off -= 8; *first = mydata.data32[0]; *data32 = mydata.data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_SHA_UNIT(tmp, next); } else { CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_SHA_UNIT(((uint64_t) 0x0ULL), next); } CVM_LOAD_SHA_UNIT((uint64_t) ((alen + 64) << 3), next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); tmp3 = 0; CVMX_MF_HSH_IV(tmp3, 2); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_IV(od->octo_hmouter[2], 2); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); tmp3 |= 0x0000000080000000; CVMX_MT_HSH_DAT(tmp3, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_MT_HSH_STARTSHA((uint64_t) ((64 + 20) << 3)); /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } /****************************************************************************/ /* AES MD5 */ int octo_aes_cbc_md5_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata[2]; uint64_t *pdata = &mydata[0].data64[0]; uint64_t *data = &mydata[1].data64[0]; uint32_t *data32; uint64_t tmp1, tmp2; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load AES Key */ CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[0], 0); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[1], 1); if (od->octo_encklen == 16) { CVMX_MT_AES_KEY(0x0, 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 24) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 32) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[3], 3); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_AES_KEYLENGTH(od->octo_encklen / 8 - 1); CVMX_MT_AES_IV(((uint64_t *) ivp)[0], 0); CVMX_MT_AES_IV(((uint64_t *) ivp)[1], 1); /* Load MD5 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *pdata32[3]; pdata32[0] = data32; mydata[0].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[1] = data32; mydata[0].data32[1] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[2] = data32; mydata[1].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); mydata[1].data32[1] = *data32; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_AES_ENC_CBC0(*pdata); CVMX_MT_AES_ENC_CBC1(*data); CVMX_MF_AES_RESULT(*pdata, 0); CVMX_MF_AES_RESULT(*data, 1); crypt_len -= 16; } } else crypt_off -= 16; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_MD5_UNIT(*pdata, next); CVM_LOAD_MD5_UNIT(*data, next); auth_len -= 16; } } else auth_off -= 16; *pdata32[0] = mydata[0].data32[0]; *pdata32[1] = mydata[0].data32[1]; *pdata32[2] = mydata[1].data32[0]; *data32 = mydata[1].data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_MD5_UNIT(((uint64_t) 0x0ULL), next); } CVMX_ES64(tmp1, ((alen + 64) << 3)); CVM_LOAD_MD5_UNIT(tmp1, next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); CVMX_MT_HSH_DAT(0x8000000000000000ULL, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_ES64(tmp1, ((64 + 16) << 3)); CVMX_MT_HSH_STARTMD5(tmp1); /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } int octo_aes_cbc_md5_decrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata[2]; uint64_t *pdata = &mydata[0].data64[0]; uint64_t *data = &mydata[1].data64[0]; uint32_t *data32; uint64_t tmp1, tmp2; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load AES Key */ CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[0], 0); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[1], 1); if (od->octo_encklen == 16) { CVMX_MT_AES_KEY(0x0, 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 24) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 32) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[3], 3); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_AES_KEYLENGTH(od->octo_encklen / 8 - 1); CVMX_MT_AES_IV(((uint64_t *) ivp)[0], 0); CVMX_MT_AES_IV(((uint64_t *) ivp)[1], 1); /* Load MD5 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *pdata32[3]; pdata32[0] = data32; mydata[0].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[1] = data32; mydata[0].data32[1] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[2] = data32; mydata[1].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); mydata[1].data32[1] = *data32; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_MD5_UNIT(*pdata, next); CVM_LOAD_MD5_UNIT(*data, next); auth_len -= 16; } } else auth_off -= 16; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_AES_DEC_CBC0(*pdata); CVMX_MT_AES_DEC_CBC1(*data); CVMX_MF_AES_RESULT(*pdata, 0); CVMX_MF_AES_RESULT(*data, 1); crypt_len -= 16; } } else crypt_off -= 16; *pdata32[0] = mydata[0].data32[0]; *pdata32[1] = mydata[0].data32[1]; *pdata32[2] = mydata[1].data32[0]; *data32 = mydata[1].data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_MD5_UNIT(((uint64_t) 0x0ULL), next); } CVMX_ES64(tmp1, ((alen + 64) << 3)); CVM_LOAD_MD5_UNIT(tmp1, next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); CVMX_MT_HSH_DAT(0x8000000000000000ULL, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_ES64(tmp1, ((64 + 16) << 3)); CVMX_MT_HSH_STARTMD5(tmp1); /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } /****************************************************************************/ /* AES SHA1 */ int octo_aes_cbc_sha1_encrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata[2]; uint64_t *pdata = &mydata[0].data64[0]; uint64_t *data = &mydata[1].data64[0]; uint32_t *data32; uint64_t tmp1, tmp2, tmp3; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load AES Key */ CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[0], 0); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[1], 1); if (od->octo_encklen == 16) { CVMX_MT_AES_KEY(0x0, 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 24) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 32) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[3], 3); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_AES_KEYLENGTH(od->octo_encklen / 8 - 1); CVMX_MT_AES_IV(((uint64_t *) ivp)[0], 0); CVMX_MT_AES_IV(((uint64_t *) ivp)[1], 1); /* Load SHA IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); CVMX_MT_HSH_IV(od->octo_hminner[2], 2); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *pdata32[3]; pdata32[0] = data32; mydata[0].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[1] = data32; mydata[0].data32[1] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[2] = data32; mydata[1].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); mydata[1].data32[1] = *data32; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_AES_ENC_CBC0(*pdata); CVMX_MT_AES_ENC_CBC1(*data); CVMX_MF_AES_RESULT(*pdata, 0); CVMX_MF_AES_RESULT(*data, 1); crypt_len -= 16; } } else crypt_off -= 16; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_SHA_UNIT(*pdata, next); CVM_LOAD_SHA_UNIT(*data, next); auth_len -= 16; } } else auth_off -= 16; *pdata32[0] = mydata[0].data32[0]; *pdata32[1] = mydata[0].data32[1]; *pdata32[2] = mydata[1].data32[0]; *data32 = mydata[1].data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_SHA_UNIT(tmp, next); } else { CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_SHA_UNIT(((uint64_t) 0x0ULL), next); } CVM_LOAD_SHA_UNIT((uint64_t) ((alen + 64) << 3), next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); tmp3 = 0; CVMX_MF_HSH_IV(tmp3, 2); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_IV(od->octo_hmouter[2], 2); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); tmp3 |= 0x0000000080000000; CVMX_MT_HSH_DAT(tmp3, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_MT_HSH_STARTSHA((uint64_t) ((64 + 20) << 3)); /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); #endif /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } int octo_aes_cbc_sha1_decrypt( struct octo_sess *od, struct iovec *iov, size_t iovcnt, size_t iovlen, int auth_off, int auth_len, int crypt_off, int crypt_len, int icv_off, uint8_t *ivp) { - register int next = 0; + int next = 0; union { uint32_t data32[2]; uint64_t data64[1]; } mydata[2]; uint64_t *pdata = &mydata[0].data64[0]; uint64_t *data = &mydata[1].data64[0]; uint32_t *data32; uint64_t tmp1, tmp2, tmp3; int data_i, data_l, alen = auth_len; dprintf("%s()\n", __func__); if (__predict_false(od == NULL || iov==NULL || iovlen==0 || ivp==NULL || (crypt_off & 0x3) || (crypt_off + crypt_len > iovlen) || (crypt_len & 0x7) || (auth_len & 0x7) || (auth_off & 0x3) || (auth_off + auth_len > iovlen))) { dprintf("%s: Bad parameters od=%p iov=%p iovlen=%jd " "auth_off=%d auth_len=%d crypt_off=%d crypt_len=%d " "icv_off=%d ivp=%p\n", __func__, od, iov, iovlen, auth_off, auth_len, crypt_off, crypt_len, icv_off, ivp); return -EINVAL; } IOV_INIT(iov, data32, data_i, data_l); CVMX_PREFETCH0(ivp); CVMX_PREFETCH0(od->octo_enckey); /* load AES Key */ CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[0], 0); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[1], 1); if (od->octo_encklen == 16) { CVMX_MT_AES_KEY(0x0, 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 24) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(0x0, 3); } else if (od->octo_encklen == 32) { CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[2], 2); CVMX_MT_AES_KEY(((uint64_t *) od->octo_enckey)[3], 3); } else { dprintf("%s: Bad key length %d\n", __func__, od->octo_encklen); return -EINVAL; } CVMX_MT_AES_KEYLENGTH(od->octo_encklen / 8 - 1); CVMX_MT_AES_IV(((uint64_t *) ivp)[0], 0); CVMX_MT_AES_IV(((uint64_t *) ivp)[1], 1); /* Load MD5 IV */ CVMX_MT_HSH_IV(od->octo_hminner[0], 0); CVMX_MT_HSH_IV(od->octo_hminner[1], 1); CVMX_MT_HSH_IV(od->octo_hminner[2], 2); while (crypt_off > 0 && auth_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); crypt_off -= 4; auth_off -= 4; } while (crypt_len > 0 || auth_len > 0) { uint32_t *pdata32[3]; pdata32[0] = data32; mydata[0].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[1] = data32; mydata[0].data32[1] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); pdata32[2] = data32; mydata[1].data32[0] = *data32; IOV_CONSUME(iov, data32, data_i, data_l); mydata[1].data32[1] = *data32; if (auth_off <= 0) { if (auth_len > 0) { CVM_LOAD_SHA_UNIT(*pdata, next); CVM_LOAD_SHA_UNIT(*data, next); auth_len -= 16; } } else auth_off -= 16; if (crypt_off <= 0) { if (crypt_len > 0) { CVMX_MT_AES_DEC_CBC0(*pdata); CVMX_MT_AES_DEC_CBC1(*data); CVMX_MF_AES_RESULT(*pdata, 0); CVMX_MF_AES_RESULT(*data, 1); crypt_len -= 16; } } else crypt_off -= 16; *pdata32[0] = mydata[0].data32[0]; *pdata32[1] = mydata[0].data32[1]; *pdata32[2] = mydata[1].data32[0]; *data32 = mydata[1].data32[1]; IOV_CONSUME(iov, data32, data_i, data_l); } /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_SHA_UNIT(tmp, next); } else { CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_SHA_UNIT(0x8000000000000000ULL, next); #endif /* Finish Inner hash */ while (next != 7) { CVM_LOAD_SHA_UNIT(((uint64_t) 0x0ULL), next); } CVM_LOAD_SHA_UNIT((uint64_t) ((alen + 64) << 3), next); /* Get the inner hash of HMAC */ CVMX_MF_HSH_IV(tmp1, 0); CVMX_MF_HSH_IV(tmp2, 1); tmp3 = 0; CVMX_MF_HSH_IV(tmp3, 2); /* Initialize hash unit */ CVMX_MT_HSH_IV(od->octo_hmouter[0], 0); CVMX_MT_HSH_IV(od->octo_hmouter[1], 1); CVMX_MT_HSH_IV(od->octo_hmouter[2], 2); CVMX_MT_HSH_DAT(tmp1, 0); CVMX_MT_HSH_DAT(tmp2, 1); tmp3 |= 0x0000000080000000; CVMX_MT_HSH_DAT(tmp3, 2); CVMX_MT_HSH_DATZ(3); CVMX_MT_HSH_DATZ(4); CVMX_MT_HSH_DATZ(5); CVMX_MT_HSH_DATZ(6); CVMX_MT_HSH_STARTSHA((uint64_t) ((64 + 20) << 3)); /* finish the hash */ CVMX_PREFETCH0(od->octo_hmouter); #if 0 if (__predict_false(inplen)) { uint64_t tmp = 0; uint8_t *p = (uint8_t *) & tmp; p[inplen] = 0x80; do { inplen--; p[inplen] = ((uint8_t *) data)[inplen]; } while (inplen); CVM_LOAD_MD5_UNIT(tmp, next); } else { CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); } #else CVM_LOAD_MD5_UNIT(0x8000000000000000ULL, next); #endif /* save the HMAC */ IOV_INIT(iov, data32, data_i, data_l); while (icv_off > 0) { IOV_CONSUME(iov, data32, data_i, data_l); icv_off -= 4; } CVMX_MF_HSH_IV(tmp1, 0); *data32 = (uint32_t) (tmp1 >> 32); IOV_CONSUME(iov, data32, data_i, data_l); *data32 = (uint32_t) tmp1; IOV_CONSUME(iov, data32, data_i, data_l); CVMX_MF_HSH_IV(tmp1, 1); *data32 = (uint32_t) (tmp1 >> 32); return 0; } /****************************************************************************/ Index: stable/11/sys/mips/mips/vm_machdep.c =================================================================== --- stable/11/sys/mips/mips/vm_machdep.c (revision 331642) +++ stable/11/sys/mips/mips/vm_machdep.c (revision 331643) @@ -1,626 +1,625 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * from: src/sys/i386/i386/vm_machdep.c,v 1.132.2.2 2000/08/26 04:19:26 yokota * JNPR: vm_machdep.c,v 1.8.2.2 2007/08/16 15:59:17 girish */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Duplicated from asm.h */ #if defined(__mips_o32) #define SZREG 4 #else #define SZREG 8 #endif #if defined(__mips_o32) || defined(__mips_o64) #define CALLFRAME_SIZ (SZREG * (4 + 2)) #elif defined(__mips_n32) || defined(__mips_n64) #define CALLFRAME_SIZ (SZREG * 4) #endif /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void -cpu_fork(register struct thread *td1,register struct proc *p2, - struct thread *td2,int flags) +cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2,int flags) { - register struct proc *p1; + struct proc *p1; struct pcb *pcb2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) return; /* It is assumed that the vm_thread_alloc called * cpu_thread_alloc() before cpu_fork is called. */ /* Point the pcb to the top of the stack */ pcb2 = td2->td_pcb; /* Copy p1's pcb, note that in this case * our pcb also includes the td_frame being copied * too. The older mips2 code did an additional copy * of the td_frame, for us that's not needed any * longer (this copy does them both) */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point mdproc and then copy over td1's contents * md_proc is empty for MIPS */ td2->td_md.md_flags = td1->td_md.md_flags & MDTD_FPUSED; /* * Set up return-value registers as fork() libc stub expects. */ td2->td_frame->v0 = 0; td2->td_frame->v1 = 1; td2->td_frame->a3 = 0; if (td1 == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td1); pcb2->pcb_context[PCB_REG_RA] = (register_t)(intptr_t)fork_trampoline; /* Make sp 64-bit aligned */ pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td2->td_pcb & ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ); pcb2->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)fork_return; pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td2; pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td2->td_frame; pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() & (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK); /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) * and/or bits in other standard MIPS registers (if CPU-Specific) * that are needed. */ td2->td_md.md_tls = td1->td_md.md_tls; td2->td_md.md_saved_intr = MIPS_SR_INT_IE; td2->td_md.md_spinlock_count = 1; #ifdef CPU_CNMIPS if (td1->td_md.md_flags & MDTD_COP2USED) { if (td1->td_md.md_cop2owner == COP2_OWNER_USERLAND) { if (td1->td_md.md_ucop2) octeon_cop2_save(td1->td_md.md_ucop2); else panic("cpu_fork: ucop2 is NULL but COP2 is enabled"); } else { if (td1->td_md.md_cop2) octeon_cop2_save(td1->td_md.md_cop2); else panic("cpu_fork: cop2 is NULL but COP2 is enabled"); } } if (td1->td_md.md_cop2) { td2->td_md.md_cop2 = octeon_cop2_alloc_ctx(); memcpy(td2->td_md.md_cop2, td1->td_md.md_cop2, sizeof(*td1->td_md.md_cop2)); } if (td1->td_md.md_ucop2) { td2->td_md.md_ucop2 = octeon_cop2_alloc_ctx(); memcpy(td2->td_md.md_ucop2, td1->td_md.md_ucop2, sizeof(*td1->td_md.md_ucop2)); } td2->td_md.md_cop2owner = td1->td_md.md_cop2owner; pcb2->pcb_context[PCB_REG_SR] |= MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_SX; /* Clear COP2 bits for userland & kernel */ td2->td_frame->sr &= ~MIPS_SR_COP_2_BIT; pcb2->pcb_context[PCB_REG_SR] &= ~MIPS_SR_COP_2_BIT; #endif } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)func; td->td_pcb->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)arg; } void cpu_exit(struct thread *td) { } void cpu_thread_exit(struct thread *td) { if (PCPU_GET(fpcurthread) == td) PCPU_GET(fpcurthread) = (struct thread *)0; #ifdef CPU_CNMIPS if (td->td_md.md_cop2) memset(td->td_md.md_cop2, 0, sizeof(*td->td_md.md_cop2)); if (td->td_md.md_ucop2) memset(td->td_md.md_ucop2, 0, sizeof(*td->td_md.md_ucop2)); #endif } void cpu_thread_free(struct thread *td) { #ifdef CPU_CNMIPS if (td->td_md.md_cop2) octeon_cop2_free_ctx(td->td_md.md_cop2); if (td->td_md.md_ucop2) octeon_cop2_free_ctx(td->td_md.md_ucop2); td->td_md.md_cop2 = NULL; td->td_md.md_ucop2 = NULL; #endif } void cpu_thread_clean(struct thread *td) { } void cpu_thread_swapin(struct thread *td) { pt_entry_t *pte; int i; /* * The kstack may be at a different physical address now. * Cache the PTEs for the Kernel stack in the machine dependent * part of the thread struct so cpu_switch() can quickly map in * the pcb struct and kernel stack. */ for (i = 0; i < KSTACK_PAGES; i++) { pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; } } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { pt_entry_t *pte; int i; KASSERT((td->td_kstack & (1 << PAGE_SHIFT)) == 0, ("kernel stack must be aligned.")); td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = &td->td_pcb->pcb_regs; for (i = 0; i < KSTACK_PAGES; i++) { pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; } } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *locr0 = td->td_frame; unsigned int code; int quad_syscall; code = locr0->v0; quad_syscall = 0; #if defined(__mips_n32) || defined(__mips_n64) #ifdef COMPAT_FREEBSD32 if (code == SYS___syscall && SV_PROC_FLAG(td->td_proc, SV_ILP32)) quad_syscall = 1; #endif #else if (code == SYS___syscall) quad_syscall = 1; #endif if (code == SYS_syscall) code = locr0->a0; else if (code == SYS___syscall) { if (quad_syscall) code = _QUAD_LOWWORD ? locr0->a1 : locr0->a0; else code = locr0->a0; } switch (error) { case 0: if (quad_syscall && code != SYS_lseek) { /* * System call invoked through the * SYS___syscall interface but the * return value is really just 32 * bits. */ locr0->v0 = td->td_retval[0]; if (_QUAD_LOWWORD) locr0->v1 = td->td_retval[0]; locr0->a3 = 0; } else { locr0->v0 = td->td_retval[0]; locr0->v1 = td->td_retval[1]; locr0->a3 = 0; } break; case ERESTART: locr0->pc = td->td_pcb->pcb_tpc; break; case EJUSTRETURN: break; /* nothing to do */ default: if (quad_syscall && code != SYS_lseek) { locr0->v0 = error; if (_QUAD_LOWWORD) locr0->v1 = error; locr0->a3 = 1; } else { locr0->v0 = error; locr0->a3 = 1; } } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. * * XXXKSE It might be a good idea to simply skip this as * the values of the other registers may be unimportant. * This would remove any requirement for knowing the KSE * at this time (see the matching comment below for * more analysis) (need a good safe default). * In MIPS, the trapframe is the first element of the PCB * and gets copied when we copy the PCB. No separate copy * is needed. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); /* * Set registers for trampoline to user mode. */ pcb2->pcb_context[PCB_REG_RA] = (register_t)(intptr_t)fork_trampoline; /* Make sp 64-bit aligned */ pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td->td_pcb & ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ); pcb2->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)fork_return; pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td; pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td->td_frame; /* Dont set IE bit in SR. sched lock release will take care of it */ pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() & (MIPS_SR_PX | MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK); /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) * that are needed. */ /* Setup to release spin count in in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_intr = MIPS_SR_INT_IE; #if 0 /* Maybe we need to fix this? */ td->td_md.md_saved_sr = ( (MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT) | (MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_SX) | (MIPS_SR_INT_IE | MIPS_HARD_INT_MASK)); #endif } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf; register_t sp; /* * At the point where a function is called, sp must be 8 * byte aligned[for compatibility with 64-bit CPUs] * in ``See MIPS Run'' by D. Sweetman, p. 269 * align stack */ sp = (((intptr_t)stack->ss_sp + stack->ss_size) & ~0x7) - CALLFRAME_SIZ; /* * Set the trap frame to point at the beginning of the uts * function. */ tf = td->td_frame; bzero(tf, sizeof(struct trapframe)); tf->sp = sp; tf->pc = (register_t)(intptr_t)entry; /* * MIPS ABI requires T9 to be the same as PC * in subroutine entry point */ tf->t9 = (register_t)(intptr_t)entry; tf->a0 = (register_t)(intptr_t)arg; /* * Keep interrupt mask */ td->td_frame->sr = MIPS_SR_KSU_USER | MIPS_SR_EXL | MIPS_SR_INT_IE | (mips_rd_status() & MIPS_SR_INT_MASK); #if defined(__mips_n32) td->td_frame->sr |= MIPS_SR_PX; #elif defined(__mips_n64) td->td_frame->sr |= MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX; #endif /* tf->sr |= (ALL_INT_MASK & idle_mask) | SR_INT_ENAB; */ /**XXX the above may now be wrong -- mips2 implements this as panic */ /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) * that are needed. */ } /* * Implement the pre-zeroed page mechanism. * This routine is called from the idle loop. */ #define ZIDLE_LO(v) ((v) * 2 / 3) #define ZIDLE_HI(v) ((v) * 4 / 5) /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending) busdma_swi(); } int cpu_set_user_tls(struct thread *td, void *tls_base) { td->td_md.md_tls = (char*)tls_base; return (0); } #ifdef DDB #include #define DB_PRINT_REG(ptr, regname) \ db_printf(" %-12s %p\n", #regname, (void *)(intptr_t)((ptr)->regname)) #define DB_PRINT_REG_ARRAY(ptr, arrname, regname) \ db_printf(" %-12s %p\n", #regname, (void *)(intptr_t)((ptr)->arrname[regname])) static void dump_trapframe(struct trapframe *trapframe) { db_printf("Trapframe at %p\n", trapframe); DB_PRINT_REG(trapframe, zero); DB_PRINT_REG(trapframe, ast); DB_PRINT_REG(trapframe, v0); DB_PRINT_REG(trapframe, v1); DB_PRINT_REG(trapframe, a0); DB_PRINT_REG(trapframe, a1); DB_PRINT_REG(trapframe, a2); DB_PRINT_REG(trapframe, a3); #if defined(__mips_n32) || defined(__mips_n64) DB_PRINT_REG(trapframe, a4); DB_PRINT_REG(trapframe, a5); DB_PRINT_REG(trapframe, a6); DB_PRINT_REG(trapframe, a7); DB_PRINT_REG(trapframe, t0); DB_PRINT_REG(trapframe, t1); DB_PRINT_REG(trapframe, t2); DB_PRINT_REG(trapframe, t3); #else DB_PRINT_REG(trapframe, t0); DB_PRINT_REG(trapframe, t1); DB_PRINT_REG(trapframe, t2); DB_PRINT_REG(trapframe, t3); DB_PRINT_REG(trapframe, t4); DB_PRINT_REG(trapframe, t5); DB_PRINT_REG(trapframe, t6); DB_PRINT_REG(trapframe, t7); #endif DB_PRINT_REG(trapframe, s0); DB_PRINT_REG(trapframe, s1); DB_PRINT_REG(trapframe, s2); DB_PRINT_REG(trapframe, s3); DB_PRINT_REG(trapframe, s4); DB_PRINT_REG(trapframe, s5); DB_PRINT_REG(trapframe, s6); DB_PRINT_REG(trapframe, s7); DB_PRINT_REG(trapframe, t8); DB_PRINT_REG(trapframe, t9); DB_PRINT_REG(trapframe, k0); DB_PRINT_REG(trapframe, k1); DB_PRINT_REG(trapframe, gp); DB_PRINT_REG(trapframe, sp); DB_PRINT_REG(trapframe, s8); DB_PRINT_REG(trapframe, ra); DB_PRINT_REG(trapframe, sr); DB_PRINT_REG(trapframe, mullo); DB_PRINT_REG(trapframe, mulhi); DB_PRINT_REG(trapframe, badvaddr); DB_PRINT_REG(trapframe, cause); DB_PRINT_REG(trapframe, pc); } DB_SHOW_COMMAND(pcb, ddb_dump_pcb) { struct thread *td; struct pcb *pcb; struct trapframe *trapframe; /* Determine which thread to examine. */ if (have_addr) td = db_lookup_thread(addr, true); else td = curthread; pcb = td->td_pcb; db_printf("Thread %d at %p\n", td->td_tid, td); db_printf("PCB at %p\n", pcb); trapframe = &pcb->pcb_regs; dump_trapframe(trapframe); db_printf("PCB Context:\n"); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S0); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S1); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S2); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S3); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S4); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S5); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S6); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S7); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_SP); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S8); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_RA); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_SR); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_GP); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_PC); db_printf("PCB onfault = %p\n", pcb->pcb_onfault); db_printf("md_saved_intr = 0x%0lx\n", (long)td->td_md.md_saved_intr); db_printf("md_spinlock_count = %d\n", td->td_md.md_spinlock_count); if (td->td_frame != trapframe) { db_printf("td->td_frame %p is not the same as pcb_regs %p\n", td->td_frame, trapframe); } } /* * Dump the trapframe beginning at address specified by first argument. */ DB_SHOW_COMMAND(trapframe, ddb_dump_trapframe) { if (!have_addr) return; dump_trapframe((struct trapframe *)addr); } #endif /* DDB */ Index: stable/11/sys/net/altq/altq_rio.c =================================================================== --- stable/11/sys/net/altq/altq_rio.c (revision 331642) +++ stable/11/sys/net/altq/altq_rio.c (revision 331643) @@ -1,844 +1,844 @@ /*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1990-1994 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* * RIO: RED with IN/OUT bit * described in * "Explicit Allocation of Best Effort Packet Delivery Service" * David D. Clark and Wenjia Fang, MIT Lab for Computer Science * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} * * this implementation is extended to support more than 2 drop precedence * values as described in RFC2597 (Assured Forwarding PHB Group). * */ /* * AF DS (differentiated service) codepoints. * (classes can be mapped to CBQ or H-FSC classes.) * * 0 1 2 3 4 5 6 7 * +---+---+---+---+---+---+---+---+ * | CLASS |DropPre| 0 | CU | * +---+---+---+---+---+---+---+---+ * * class 1: 001 * class 2: 010 * class 3: 011 * class 4: 100 * * low drop prec: 01 * medium drop prec: 10 * high drop prec: 01 */ /* normal red parameters */ #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ /* q_weight = 0.00195 */ /* red parameters for a slow link */ #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ /* q_weight = 0.0078125 */ /* red parameters for a very slow link (e.g., dialup) */ #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ /* q_weight = 0.015625 */ /* fixed-point uses 12-bit decimal places */ #define FP_SHIFT 12 /* fixed-point shift */ /* red parameters for drop probability */ #define INV_P_MAX 10 /* inverse of max drop probability */ #define TH_MIN 5 /* min threshold */ #define TH_MAX 15 /* max threshold */ #define RIO_LIMIT 60 /* default max queue length */ #define RIO_STATS /* collect statistics */ #define TV_DELTA(a, b, delta) { \ - register int xxs; \ + int xxs; \ \ delta = (a)->tv_usec - (b)->tv_usec; \ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ if (xxs < 0) { \ delta = 60000000; \ } else if (xxs > 4) { \ if (xxs > 60) \ delta = 60000000; \ else \ delta += xxs * 1000000; \ } else while (xxs > 0) { \ delta += 1000000; \ xxs--; \ } \ } \ } #ifdef ALTQ3_COMPAT /* rio_list keeps all rio_queue_t's allocated. */ static rio_queue_t *rio_list = NULL; #endif /* default rio parameter values */ static struct redparams default_rio_params[RIO_NDROPPREC] = { /* th_min, th_max, inv_pmax */ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ }; /* internal function prototypes */ static int dscp2index(u_int8_t); #ifdef ALTQ3_COMPAT static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *rio_dequeue(struct ifaltq *, int); static int rio_request(struct ifaltq *, int, void *); static int rio_detach(rio_queue_t *); /* * rio device interface */ altqdev_decl(rio); #endif /* ALTQ3_COMPAT */ rio_t * rio_alloc(int weight, struct redparams *params, int flags, int pkttime) { rio_t *rp; int w, i; int npkts_per_sec; rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) return (NULL); rp->rio_flags = flags; if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->rio_pkttime = 800; else rp->rio_pkttime = pkttime; if (weight != 0) rp->rio_weight = weight; else { /* use default */ rp->rio_weight = W_WEIGHT; /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->rio_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->rio_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->rio_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->rio_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->rio_wshift = i; w = 1 << rp->rio_wshift; if (w != rp->rio_weight) { printf("invalid weight value %d for red! use %d\n", rp->rio_weight, w); rp->rio_weight = w; } /* allocate weight table */ rp->rio_wtab = wtab_alloc(rp->rio_weight); for (i = 0; i < RIO_NDROPPREC; i++) { struct dropprec_state *prec = &rp->rio_precstate[i]; prec->avg = 0; prec->idle = 1; if (params == NULL || params[i].inv_pmax == 0) prec->inv_pmax = default_rio_params[i].inv_pmax; else prec->inv_pmax = params[i].inv_pmax; if (params == NULL || params[i].th_min == 0) prec->th_min = default_rio_params[i].th_min; else prec->th_min = params[i].th_min; if (params == NULL || params[i].th_max == 0) prec->th_max = default_rio_params[i].th_max; else prec->th_max = params[i].th_max; /* * th_min_s and th_max_s are scaled versions of th_min * and th_max to be compared with avg. */ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ prec->probd = (2 * (prec->th_max - prec->th_min) * prec->inv_pmax) << FP_SHIFT; microtime(&prec->last); } return (rp); } void rio_destroy(rio_t *rp) { wtab_destroy(rp->rio_wtab); free(rp, M_DEVBUF); } void rio_getstats(rio_t *rp, struct redstats *sp) { int i; for (i = 0; i < RIO_NDROPPREC; i++) { bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; sp++; } } #if (RIO_NDROPPREC == 3) /* * internally, a drop precedence value is converted to an index * starting from 0. */ static int dscp2index(u_int8_t dscp) { int dpindex = dscp & AF_DROPPRECMASK; if (dpindex == 0) return (0); return ((dpindex >> 3) - 1); } #endif #if 1 /* * kludge: when a packet is dequeued, we need to know its drop precedence * in order to keep the queue length of each drop precedence. * use m_pkthdr.rcvif to pass this info. */ #define RIOM_SET_PRECINDEX(m, idx) \ do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0) #define RIOM_GET_PRECINDEX(m) \ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ (m)->m_pkthdr.rcvif = NULL; idx; }) #endif int rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) { int avg, droptype; u_int8_t dsfield, odsfield; int dpindex, i, n, t; struct timeval now; struct dropprec_state *prec; dsfield = odsfield = read_dsfield(m, pktattr); dpindex = dscp2index(dsfield); /* * update avg of the precedence states whose drop precedence * is larger than or equal to the drop precedence of the packet */ now.tv_sec = 0; for (i = dpindex; i < RIO_NDROPPREC; i++) { prec = &rp->rio_precstate[i]; avg = prec->avg; if (prec->idle) { prec->idle = 0; if (now.tv_sec == 0) microtime(&now); t = (now.tv_sec - prec->last.tv_sec); if (t > 60) avg = 0; else { t = t * 1000000 + (now.tv_usec - prec->last.tv_usec); n = t / rp->rio_pkttime; /* calculate (avg = (1 - Wq)^n * avg) */ if (n > 0) avg = (avg >> FP_SHIFT) * pow_w(rp->rio_wtab, n); } } /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); prec->avg = avg; /* save the new value */ /* * count keeps a tally of arriving traffic that has not * been dropped. */ prec->count++; } prec = &rp->rio_precstate[dpindex]; avg = prec->avg; /* see if we drop early */ droptype = DTYPE_NODROP; if (avg >= prec->th_min_s && prec->qlen > 1) { if (avg >= prec->th_max_s) { /* avg >= th_max: forced drop */ droptype = DTYPE_FORCED; } else if (prec->old == 0) { /* first exceeds th_min */ prec->count = 1; prec->old = 1; } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, prec->probd, prec->count)) { /* unforced drop by red */ droptype = DTYPE_EARLY; } } else { /* avg < th_min */ prec->old = 0; } /* * if the queue length hits the hard limit, it's a forced drop. */ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) droptype = DTYPE_FORCED; if (droptype != DTYPE_NODROP) { /* always drop incoming packet (as opposed to randomdrop) */ for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].count = 0; #ifdef RIO_STATS if (droptype == DTYPE_EARLY) rp->q_stats[dpindex].drop_unforced++; else rp->q_stats[dpindex].drop_forced++; PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); #endif m_freem(m); return (-1); } for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].qlen++; /* save drop precedence index in mbuf hdr */ RIOM_SET_PRECINDEX(m, dpindex); if (rp->rio_flags & RIOF_CLEARDSCP) dsfield &= ~DSCP_MASK; if (dsfield != odsfield) write_dsfield(m, pktattr, dsfield); _addq(q, m); #ifdef RIO_STATS PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); #endif return (0); } struct mbuf * rio_getq(rio_t *rp, class_queue_t *q) { struct mbuf *m; int dpindex, i; if ((m = _getq(q)) == NULL) return NULL; dpindex = RIOM_GET_PRECINDEX(m); for (i = dpindex; i < RIO_NDROPPREC; i++) { if (--rp->rio_precstate[i].qlen == 0) { if (rp->rio_precstate[i].idle == 0) { rp->rio_precstate[i].idle = 1; microtime(&rp->rio_precstate[i].last); } } } return (m); } #ifdef ALTQ3_COMPAT int rioopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int rioclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { rio_queue_t *rqp; int err, error = 0; while ((rqp = rio_list) != NULL) { /* destroy all */ err = rio_detach(rqp); if (err != 0 && error == 0) error = err; } return error; } int rioioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { rio_queue_t *rqp; struct rio_interface *ifacep; struct ifnet *ifp; int error = 0; /* check super-user privilege */ switch (cmd) { case RIO_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case RIO_ENABLE: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = altq_enable(rqp->rq_ifq); break; case RIO_DISABLE: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = altq_disable(rqp->rq_ifq); break; case RIO_IF_ATTACH: ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); if (ifp == NULL) { error = ENXIO; break; } /* allocate and initialize rio_queue_t */ rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); if (rqp == NULL) { error = ENOMEM; break; } bzero(rqp, sizeof(rio_queue_t)); rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK); if (rqp->rq_q == NULL) { free(rqp, M_DEVBUF); error = ENOMEM; break; } bzero(rqp->rq_q, sizeof(class_queue_t)); rqp->rq_rio = rio_alloc(0, NULL, 0, 0); if (rqp->rq_rio == NULL) { free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); error = ENOMEM; break; } rqp->rq_ifq = &ifp->if_snd; qtail(rqp->rq_q) = NULL; qlen(rqp->rq_q) = 0; qlimit(rqp->rq_q) = RIO_LIMIT; qtype(rqp->rq_q) = Q_RIO; /* * set RIO to this ifnet structure. */ error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, rio_enqueue, rio_dequeue, rio_request, NULL, NULL); if (error) { rio_destroy(rqp->rq_rio); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); break; } /* add this state to the rio list */ rqp->rq_next = rio_list; rio_list = rqp; break; case RIO_IF_DETACH: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = rio_detach(rqp); break; case RIO_GETSTATS: do { struct rio_stats *q_stats; rio_t *rp; int i; q_stats = (struct rio_stats *)addr; if ((rqp = altq_lookup(q_stats->iface.rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } rp = rqp->rq_rio; q_stats->q_limit = qlimit(rqp->rq_q); q_stats->weight = rp->rio_weight; q_stats->flags = rp->rio_flags; for (i = 0; i < RIO_NDROPPREC; i++) { q_stats->q_len[i] = rp->rio_precstate[i].qlen; bcopy(&rp->q_stats[i], &q_stats->q_stats[i], sizeof(struct redstats)); q_stats->q_stats[i].q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; q_stats->q_params[i].inv_pmax = rp->rio_precstate[i].inv_pmax; q_stats->q_params[i].th_min = rp->rio_precstate[i].th_min; q_stats->q_params[i].th_max = rp->rio_precstate[i].th_max; } } while (/*CONSTCOND*/ 0); break; case RIO_CONFIG: do { struct rio_conf *fc; rio_t *new; int s, limit, i; fc = (struct rio_conf *)addr; if ((rqp = altq_lookup(fc->iface.rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } new = rio_alloc(fc->rio_weight, &fc->q_params[0], fc->rio_flags, fc->rio_pkttime); if (new == NULL) { error = ENOMEM; break; } s = splnet(); _flushq(rqp->rq_q); limit = fc->rio_limit; if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) limit = fc->q_params[RIO_NDROPPREC-1].th_max; qlimit(rqp->rq_q) = limit; rio_destroy(rqp->rq_rio); rqp->rq_rio = new; splx(s); /* write back new values */ fc->rio_limit = limit; for (i = 0; i < RIO_NDROPPREC; i++) { fc->q_params[i].inv_pmax = rqp->rq_rio->rio_precstate[i].inv_pmax; fc->q_params[i].th_min = rqp->rq_rio->rio_precstate[i].th_min; fc->q_params[i].th_max = rqp->rq_rio->rio_precstate[i].th_max; } } while (/*CONSTCOND*/ 0); break; case RIO_SETDEFAULTS: do { struct redparams *rp; int i; rp = (struct redparams *)addr; for (i = 0; i < RIO_NDROPPREC; i++) default_rio_params[i] = rp[i]; } while (/*CONSTCOND*/ 0); break; default: error = EINVAL; break; } return error; } static int rio_detach(rqp) rio_queue_t *rqp; { rio_queue_t *tmp; int error = 0; if (ALTQ_IS_ENABLED(rqp->rq_ifq)) altq_disable(rqp->rq_ifq); if ((error = altq_detach(rqp->rq_ifq))) return (error); if (rio_list == rqp) rio_list = rqp->rq_next; else { for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) if (tmp->rq_next == rqp) { tmp->rq_next = rqp->rq_next; break; } if (tmp == NULL) printf("rio_detach: no state found in rio_list!\n"); } rio_destroy(rqp->rq_rio); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); return (error); } /* * rio support routines */ static int rio_request(ifq, req, arg) struct ifaltq *ifq; int req; void *arg; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: _flushq(rqp->rq_q); if (ALTQ_IS_ENABLED(ifq)) ifq->ifq_len = 0; break; } return (0); } /* * enqueue routine: * * returns: 0 when successfully queued. * ENOBUFS when drop occurs. */ static int rio_enqueue(ifq, m, pktattr) struct ifaltq *ifq; struct mbuf *m; struct altq_pktattr *pktattr; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; int error = 0; IFQ_LOCK_ASSERT(ifq); if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) ifq->ifq_len++; else error = ENOBUFS; return error; } /* * dequeue routine: * must be called in splimp. * * returns: mbuf dequeued. * NULL when no packet is available in the queue. */ static struct mbuf * rio_dequeue(ifq, op) struct ifaltq *ifq; int op; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; struct mbuf *m = NULL; IFQ_LOCK_ASSERT(ifq); if (op == ALTDQ_POLL) return qhead(rqp->rq_q); m = rio_getq(rqp->rq_rio, rqp->rq_q); if (m != NULL) ifq->ifq_len--; return m; } #ifdef KLD_MODULE static struct altqsw rio_sw = {"rio", rioopen, rioclose, rioioctl}; ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); MODULE_VERSION(altq_rio, 1); MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_RIO */ Index: stable/11/sys/net/altq/altq_rmclass.h =================================================================== --- stable/11/sys/net/altq/altq_rmclass.h (revision 331642) +++ stable/11/sys/net/altq/altq_rmclass.h (revision 331643) @@ -1,273 +1,273 @@ /*- * Copyright (c) 1991-1997 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Network Research * Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_RMCLASS_H_ #define _ALTQ_ALTQ_RMCLASS_H_ #include /* #pragma ident "@(#)rm_class.h 1.20 97/10/23 SMI" */ #ifdef __cplusplus extern "C" { #endif #define RM_MAXPRIO 8 /* Max priority */ #ifdef _KERNEL typedef struct mbuf mbuf_t; typedef struct rm_ifdat rm_ifdat_t; typedef struct rm_class rm_class_t; struct red; /* * Macros for dealing with time values. We assume all times are * 'timevals'. `microtime' is used to get the best available clock * resolution. If `microtime' *doesn't* return a value that's about * ten times smaller than the average packet time on the fastest * link that will use these routines, a slightly different clock * scheme than this one should be used. * (Bias due to truncation error in this scheme will overestimate utilization * and discriminate against high bandwidth classes. To remove this bias an * integrator needs to be added. The simplest integrator uses a history of * 10 * avg.packet.time / min.tick.time packet completion entries. This is * straight forward to add but we don't want to pay the extra memory * traffic to maintain it if it's not necessary (occasionally a vendor * accidentally builds a workstation with a decent clock - e.g., Sun & HP).) */ #define RM_GETTIME(now) microtime(&now) #define TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \ (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec))) #define TV_DELTA(a, b, delta) { \ - register int xxs; \ + int xxs; \ \ delta = (a)->tv_usec - (b)->tv_usec; \ if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \ switch (xxs) { \ default: \ /* if (xxs < 0) \ printf("rm_class: bogus time values\n"); */ \ delta = 0; \ /* fall through */ \ case 2: \ delta += 1000000; \ /* fall through */ \ case 1: \ delta += 1000000; \ break; \ } \ } \ } #define TV_ADD_DELTA(a, delta, res) { \ - register int xxus = (a)->tv_usec + (delta); \ + int xxus = (a)->tv_usec + (delta); \ \ (res)->tv_sec = (a)->tv_sec; \ while (xxus >= 1000000) { \ ++((res)->tv_sec); \ xxus -= 1000000; \ } \ (res)->tv_usec = xxus; \ } #define RM_TIMEOUT 2 /* 1 Clock tick. */ #if 1 #define RM_MAXQUEUED 1 /* this isn't used in ALTQ/CBQ */ #else #define RM_MAXQUEUED 16 /* Max number of packets downstream of CBQ */ #endif #define RM_MAXQUEUE 64 /* Max queue length */ #define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */ #define RM_POWER (1 << RM_FILTER_GAIN) #define RM_MAXDEPTH 32 #define RM_NS_PER_SEC (1000000000) typedef struct _rm_class_stats_ { u_int handle; u_int depth; struct pktcntr xmit_cnt; /* packets sent in this class */ struct pktcntr drop_cnt; /* dropped packets */ u_int over; /* # times went over limit */ u_int borrows; /* # times tried to borrow */ u_int overactions; /* # times invoked overlimit action */ u_int delays; /* # times invoked delay actions */ } rm_class_stats_t; /* * CBQ Class state structure */ struct rm_class { class_queue_t *q_; /* Queue of packets */ rm_ifdat_t *ifdat_; int pri_; /* Class priority. */ int depth_; /* Class depth */ u_int ns_per_byte_; /* NanoSeconds per byte. */ u_int maxrate_; /* Bytes per second for this class. */ u_int allotment_; /* Fraction of link bandwidth. */ u_int w_allotment_; /* Weighted allotment for WRR */ int bytes_alloc_; /* Allocation for round of WRR */ int avgidle_; int maxidle_; int minidle_; int offtime_; int sleeping_; /* != 0 if delaying */ int qthresh_; /* Queue threshold for formal link sharing */ int leaf_; /* Note whether leaf class or not.*/ rm_class_t *children_; /* Children of this class */ rm_class_t *next_; /* Next pointer, used if child */ rm_class_t *peer_; /* Peer class */ rm_class_t *borrow_; /* Borrow class */ rm_class_t *parent_; /* Parent class */ void (*overlimit)(struct rm_class *, struct rm_class *); void (*drop)(struct rm_class *); /* Class drop action. */ union { struct red *red_; /* RED state pointer */ struct codel *codel_; /* codel state pointer */ } cl_aqm_; #define red_ cl_aqm_.red_ #define codel_ cl_aqm_.codel_ struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */ int flags_; int last_pkttime_; /* saved pkt_time */ struct timeval undertime_; /* time can next send */ struct timeval last_; /* time last packet sent */ struct timeval overtime_; struct callout callout_; /* for timeout() calls */ rm_class_stats_t stats_; /* Class Statistics */ }; /* * CBQ Interface state */ struct rm_ifdat { int queued_; /* # pkts queued downstream */ int efficient_; /* Link Efficiency bit */ int wrr_; /* Enable Weighted Round-Robin */ u_long ns_per_byte_; /* Link byte speed. */ int maxqueued_; /* Max packets to queue */ int maxpkt_; /* Max packet size. */ int qi_; /* In/out pointers for downstream */ int qo_; /* packets */ /* * Active class state and WRR state. */ rm_class_t *active_[RM_MAXPRIO]; /* Active cl's in each pri */ int na_[RM_MAXPRIO]; /* # of active cl's in a pri */ int num_[RM_MAXPRIO]; /* # of cl's per pri */ int alloc_[RM_MAXPRIO]; /* Byte Allocation */ u_long M_[RM_MAXPRIO]; /* WRR weights. */ /* * Network Interface/Solaris Queue state pointer. */ struct ifaltq *ifq_; rm_class_t *default_; /* Default Pkt class, BE */ rm_class_t *root_; /* Root Link class. */ rm_class_t *ctl_; /* Control Traffic class. */ void (*restart)(struct ifaltq *); /* Restart routine. */ /* * Current packet downstream packet state and dynamic state. */ rm_class_t *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */ rm_class_t *class_[RM_MAXQUEUED]; /* class sending */ int curlen_[RM_MAXQUEUED]; /* Current pktlen */ struct timeval now_[RM_MAXQUEUED]; /* Current packet time. */ int is_overlimit_[RM_MAXQUEUED];/* Current packet time. */ int cutoff_; /* Cut-off depth for borrowing */ struct timeval ifnow_; /* expected xmit completion time */ #if 1 /* ALTQ4PPP */ int maxiftime_; /* max delay inside interface */ #endif rm_class_t *pollcache_; /* cached rm_class by poll operation */ }; /* flags for rmc_init and rmc_newclass */ /* class flags */ #define RMCF_RED 0x0001 #define RMCF_ECN 0x0002 #define RMCF_RIO 0x0004 #define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */ #define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define RMCF_CODEL 0x0020 /* flags for rmc_init */ #define RMCF_WRR 0x0100 #define RMCF_EFFICIENT 0x0200 #define is_a_parent_class(cl) ((cl)->children_ != NULL) extern rm_class_t *rmc_newclass(int, struct rm_ifdat *, u_int, void (*)(struct rm_class *, struct rm_class *), int, struct rm_class *, struct rm_class *, u_int, int, u_int, int, int); extern void rmc_delete_class(struct rm_ifdat *, struct rm_class *); extern int rmc_modclass(struct rm_class *, u_int, int, u_int, int, u_int, int); extern void rmc_init(struct ifaltq *, struct rm_ifdat *, u_int, void (*)(struct ifaltq *), int, int, u_int, int, u_int, int); extern int rmc_queue_packet(struct rm_class *, mbuf_t *); extern mbuf_t *rmc_dequeue_next(struct rm_ifdat *, int); extern void rmc_update_class_util(struct rm_ifdat *); extern void rmc_delay_action(struct rm_class *, struct rm_class *); extern void rmc_dropall(struct rm_class *); extern int rmc_get_weight(struct rm_ifdat *, int); #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_RMCLASS_H_ */ Index: stable/11/sys/net/bpf_filter.c =================================================================== --- stable/11/sys/net/bpf_filter.c (revision 331642) +++ stable/11/sys/net/bpf_filter.c (revision 331643) @@ -1,587 +1,587 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpf_filter.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include #if !defined(_KERNEL) #include #endif #if !defined(_KERNEL) || defined(sun) #include #endif #ifndef __i386__ #define BPF_ALIGN #endif #ifndef BPF_ALIGN #define EXTRACT_SHORT(p) ((u_int16_t)ntohs(*(u_int16_t *)p)) #define EXTRACT_LONG(p) (ntohl(*(u_int32_t *)p)) #else #define EXTRACT_SHORT(p)\ ((u_int16_t)\ ((u_int16_t)*((u_char *)p+0)<<8|\ (u_int16_t)*((u_char *)p+1)<<0)) #define EXTRACT_LONG(p)\ ((u_int32_t)*((u_char *)p+0)<<24|\ (u_int32_t)*((u_char *)p+1)<<16|\ (u_int32_t)*((u_char *)p+2)<<8|\ (u_int32_t)*((u_char *)p+3)<<0) #endif #ifdef _KERNEL #include #else #include #endif #include #ifdef _KERNEL #define MINDEX(m, k) \ { \ - register int len = m->m_len; \ + int len = m->m_len; \ \ while (k >= len) { \ k -= len; \ m = m->m_next; \ if (m == 0) \ return (0); \ len = m->m_len; \ } \ } static u_int16_t m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err); static u_int32_t m_xword(struct mbuf *m, bpf_u_int32 k, int *err); static u_int32_t m_xword(struct mbuf *m, bpf_u_int32 k, int *err) { size_t len; u_char *cp, *np; struct mbuf *m0; len = m->m_len; while (k >= len) { k -= len; m = m->m_next; if (m == NULL) goto bad; len = m->m_len; } cp = mtod(m, u_char *) + k; if (len - k >= 4) { *err = 0; return (EXTRACT_LONG(cp)); } m0 = m->m_next; if (m0 == NULL || m0->m_len + len - k < 4) goto bad; *err = 0; np = mtod(m0, u_char *); switch (len - k) { case 1: return (((u_int32_t)cp[0] << 24) | ((u_int32_t)np[0] << 16) | ((u_int32_t)np[1] << 8) | (u_int32_t)np[2]); case 2: return (((u_int32_t)cp[0] << 24) | ((u_int32_t)cp[1] << 16) | ((u_int32_t)np[0] << 8) | (u_int32_t)np[1]); default: return (((u_int32_t)cp[0] << 24) | ((u_int32_t)cp[1] << 16) | ((u_int32_t)cp[2] << 8) | (u_int32_t)np[0]); } bad: *err = 1; return (0); } static u_int16_t m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err) { size_t len; u_char *cp; struct mbuf *m0; len = m->m_len; while (k >= len) { k -= len; m = m->m_next; if (m == NULL) goto bad; len = m->m_len; } cp = mtod(m, u_char *) + k; if (len - k >= 2) { *err = 0; return (EXTRACT_SHORT(cp)); } m0 = m->m_next; if (m0 == NULL) goto bad; *err = 0; return ((cp[0] << 8) | mtod(m0, u_char *)[0]); bad: *err = 1; return (0); } #endif /* * Execute the filter program starting at pc on the packet p * wirelen is the length of the original packet * buflen is the amount of data present */ u_int bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) { u_int32_t A = 0, X = 0; bpf_u_int32 k; u_int32_t mem[BPF_MEMWORDS]; bzero(mem, sizeof(mem)); if (pc == NULL) /* * No filter means accept all. */ return ((u_int)-1); --pc; while (1) { ++pc; switch (pc->code) { default: #ifdef _KERNEL return (0); #else abort(); #endif case BPF_RET|BPF_K: return ((u_int)pc->k); case BPF_RET|BPF_A: return ((u_int)A); case BPF_LD|BPF_W|BPF_ABS: k = pc->k; if (k > buflen || sizeof(int32_t) > buflen - k) { #ifdef _KERNEL int merr; if (buflen != 0) return (0); A = m_xword((struct mbuf *)p, k, &merr); if (merr != 0) return (0); continue; #else return (0); #endif } #ifdef BPF_ALIGN if (((intptr_t)(p + k) & 3) != 0) A = EXTRACT_LONG(&p[k]); else #endif A = ntohl(*(int32_t *)(p + k)); continue; case BPF_LD|BPF_H|BPF_ABS: k = pc->k; if (k > buflen || sizeof(int16_t) > buflen - k) { #ifdef _KERNEL int merr; if (buflen != 0) return (0); A = m_xhalf((struct mbuf *)p, k, &merr); continue; #else return (0); #endif } A = EXTRACT_SHORT(&p[k]); continue; case BPF_LD|BPF_B|BPF_ABS: k = pc->k; if (k >= buflen) { #ifdef _KERNEL struct mbuf *m; if (buflen != 0) return (0); m = (struct mbuf *)p; MINDEX(m, k); A = mtod(m, u_char *)[k]; continue; #else return (0); #endif } A = p[k]; continue; case BPF_LD|BPF_W|BPF_LEN: A = wirelen; continue; case BPF_LDX|BPF_W|BPF_LEN: X = wirelen; continue; case BPF_LD|BPF_W|BPF_IND: k = X + pc->k; if (pc->k > buflen || X > buflen - pc->k || sizeof(int32_t) > buflen - k) { #ifdef _KERNEL int merr; if (buflen != 0) return (0); A = m_xword((struct mbuf *)p, k, &merr); if (merr != 0) return (0); continue; #else return (0); #endif } #ifdef BPF_ALIGN if (((intptr_t)(p + k) & 3) != 0) A = EXTRACT_LONG(&p[k]); else #endif A = ntohl(*(int32_t *)(p + k)); continue; case BPF_LD|BPF_H|BPF_IND: k = X + pc->k; if (X > buflen || pc->k > buflen - X || sizeof(int16_t) > buflen - k) { #ifdef _KERNEL int merr; if (buflen != 0) return (0); A = m_xhalf((struct mbuf *)p, k, &merr); if (merr != 0) return (0); continue; #else return (0); #endif } A = EXTRACT_SHORT(&p[k]); continue; case BPF_LD|BPF_B|BPF_IND: k = X + pc->k; if (pc->k >= buflen || X >= buflen - pc->k) { #ifdef _KERNEL struct mbuf *m; if (buflen != 0) return (0); m = (struct mbuf *)p; MINDEX(m, k); A = mtod(m, u_char *)[k]; continue; #else return (0); #endif } A = p[k]; continue; case BPF_LDX|BPF_MSH|BPF_B: k = pc->k; if (k >= buflen) { #ifdef _KERNEL - register struct mbuf *m; + struct mbuf *m; if (buflen != 0) return (0); m = (struct mbuf *)p; MINDEX(m, k); X = (mtod(m, u_char *)[k] & 0xf) << 2; continue; #else return (0); #endif } X = (p[pc->k] & 0xf) << 2; continue; case BPF_LD|BPF_IMM: A = pc->k; continue; case BPF_LDX|BPF_IMM: X = pc->k; continue; case BPF_LD|BPF_MEM: A = mem[pc->k]; continue; case BPF_LDX|BPF_MEM: X = mem[pc->k]; continue; case BPF_ST: mem[pc->k] = A; continue; case BPF_STX: mem[pc->k] = X; continue; case BPF_JMP|BPF_JA: pc += pc->k; continue; case BPF_JMP|BPF_JGT|BPF_K: pc += (A > pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JGE|BPF_K: pc += (A >= pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JEQ|BPF_K: pc += (A == pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JSET|BPF_K: pc += (A & pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JGT|BPF_X: pc += (A > X) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JGE|BPF_X: pc += (A >= X) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JEQ|BPF_X: pc += (A == X) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JSET|BPF_X: pc += (A & X) ? pc->jt : pc->jf; continue; case BPF_ALU|BPF_ADD|BPF_X: A += X; continue; case BPF_ALU|BPF_SUB|BPF_X: A -= X; continue; case BPF_ALU|BPF_MUL|BPF_X: A *= X; continue; case BPF_ALU|BPF_DIV|BPF_X: if (X == 0) return (0); A /= X; continue; case BPF_ALU|BPF_AND|BPF_X: A &= X; continue; case BPF_ALU|BPF_OR|BPF_X: A |= X; continue; case BPF_ALU|BPF_LSH|BPF_X: A <<= X; continue; case BPF_ALU|BPF_RSH|BPF_X: A >>= X; continue; case BPF_ALU|BPF_ADD|BPF_K: A += pc->k; continue; case BPF_ALU|BPF_SUB|BPF_K: A -= pc->k; continue; case BPF_ALU|BPF_MUL|BPF_K: A *= pc->k; continue; case BPF_ALU|BPF_DIV|BPF_K: A /= pc->k; continue; case BPF_ALU|BPF_AND|BPF_K: A &= pc->k; continue; case BPF_ALU|BPF_OR|BPF_K: A |= pc->k; continue; case BPF_ALU|BPF_LSH|BPF_K: A <<= pc->k; continue; case BPF_ALU|BPF_RSH|BPF_K: A >>= pc->k; continue; case BPF_ALU|BPF_NEG: A = -A; continue; case BPF_MISC|BPF_TAX: X = A; continue; case BPF_MISC|BPF_TXA: A = X; continue; } } } #ifdef _KERNEL static const u_short bpf_code_map[] = { 0x10ff, /* 0x00-0x0f: 1111111100001000 */ 0x3070, /* 0x10-0x1f: 0000111000001100 */ 0x3131, /* 0x20-0x2f: 1000110010001100 */ 0x3031, /* 0x30-0x3f: 1000110000001100 */ 0x3131, /* 0x40-0x4f: 1000110010001100 */ 0x1011, /* 0x50-0x5f: 1000100000001000 */ 0x1013, /* 0x60-0x6f: 1100100000001000 */ 0x1010, /* 0x70-0x7f: 0000100000001000 */ 0x0093, /* 0x80-0x8f: 1100100100000000 */ 0x0000, /* 0x90-0x9f: 0000000000000000 */ 0x0000, /* 0xa0-0xaf: 0000000000000000 */ 0x0002, /* 0xb0-0xbf: 0100000000000000 */ 0x0000, /* 0xc0-0xcf: 0000000000000000 */ 0x0000, /* 0xd0-0xdf: 0000000000000000 */ 0x0000, /* 0xe0-0xef: 0000000000000000 */ 0x0000 /* 0xf0-0xff: 0000000000000000 */ }; #define BPF_VALIDATE_CODE(c) \ ((c) <= 0xff && (bpf_code_map[(c) >> 4] & (1 << ((c) & 0xf))) != 0) /* * Return true if the 'fcode' is a valid filter program. * The constraints are that each jump be forward and to a valid * code. The code must terminate with either an accept or reject. * * The kernel needs to be able to verify an application's filter code. * Otherwise, a bogus program could easily crash the system. */ int bpf_validate(const struct bpf_insn *f, int len) { - register int i; - register const struct bpf_insn *p; + int i; + const struct bpf_insn *p; /* Do not accept negative length filter. */ if (len < 0) return (0); /* An empty filter means accept all. */ if (len == 0) return (1); for (i = 0; i < len; ++i) { p = &f[i]; /* * Check that the code is valid. */ if (!BPF_VALIDATE_CODE(p->code)) return (0); /* * Check that that jumps are forward, and within * the code block. */ if (BPF_CLASS(p->code) == BPF_JMP) { - register u_int offset; + u_int offset; if (p->code == (BPF_JMP|BPF_JA)) offset = p->k; else offset = p->jt > p->jf ? p->jt : p->jf; if (offset >= (u_int)(len - i) - 1) return (0); continue; } /* * Check that memory operations use valid addresses. */ if (p->code == BPF_ST || p->code == BPF_STX || p->code == (BPF_LD|BPF_MEM) || p->code == (BPF_LDX|BPF_MEM)) { if (p->k >= BPF_MEMWORDS) return (0); continue; } /* * Check for constant division by 0. */ if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0) return (0); } return (BPF_CLASS(f[len - 1].code) == BPF_RET); } #endif Index: stable/11/sys/net/if_llatbl.c =================================================================== --- stable/11/sys/net/if_llatbl.c (revision 331642) +++ stable/11/sys/net/if_llatbl.c (revision 331643) @@ -1,958 +1,958 @@ /* * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. * Copyright (c) 2004-2008 Qing Li. All rights reserved. * Copyright (c) 2008 Kip Macy. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables"); static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) = SLIST_HEAD_INITIALIZER(lltables); #define V_lltables VNET(lltables) struct rwlock lltable_rwlock; RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock"); static void lltable_unlink(struct lltable *llt); static void llentries_unlink(struct lltable *llt, struct llentries *head); static void htable_unlink_entry(struct llentry *lle); static void htable_link_entry(struct lltable *llt, struct llentry *lle); static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg); /* * Dump lle state for a specific address family. */ static int lltable_dump_af(struct lltable *llt, struct sysctl_req *wr) { int error; LLTABLE_LOCK_ASSERT(); if (llt->llt_ifp->if_flags & IFF_LOOPBACK) return (0); error = 0; IF_AFDATA_RLOCK(llt->llt_ifp); error = lltable_foreach_lle(llt, (llt_foreach_cb_t *)llt->llt_dump_entry, wr); IF_AFDATA_RUNLOCK(llt->llt_ifp); return (error); } /* * Dump arp state for a specific address family. */ int lltable_sysctl_dumparp(int af, struct sysctl_req *wr) { struct lltable *llt; int error = 0; LLTABLE_RLOCK(); SLIST_FOREACH(llt, &V_lltables, llt_link) { if (llt->llt_af == af) { error = lltable_dump_af(llt, wr); if (error != 0) goto done; } } done: LLTABLE_RUNLOCK(); return (error); } /* * Common function helpers for chained hash table. */ /* * Runs specified callback for each entry in @llt. * Caller does the locking. * */ static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg) { struct llentry *lle, *next; int i, error; error = 0; for (i = 0; i < llt->llt_hsize; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { error = f(llt, lle, farg); if (error != 0) break; } } return (error); } static void htable_link_entry(struct lltable *llt, struct llentry *lle) { struct llentries *lleh; uint32_t hashidx; if ((lle->la_flags & LLE_LINKED) != 0) return; IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp); hashidx = llt->llt_hash(lle, llt->llt_hsize); lleh = &llt->lle_head[hashidx]; lle->lle_tbl = llt; lle->lle_head = lleh; lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } static void htable_unlink_entry(struct llentry *lle) { if ((lle->la_flags & LLE_LINKED) != 0) { IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp); LIST_REMOVE(lle, lle_next); lle->la_flags &= ~(LLE_VALID | LLE_LINKED); #if 0 lle->lle_tbl = NULL; lle->lle_head = NULL; #endif } } struct prefix_match_data { const struct sockaddr *addr; const struct sockaddr *mask; struct llentries dchain; u_int flags; }; static int htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg) { struct prefix_match_data *pmd; pmd = (struct prefix_match_data *)farg; if (llt->llt_match_prefix(pmd->addr, pmd->mask, pmd->flags, lle)) { LLE_WLOCK(lle); LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain); } return (0); } static void htable_prefix_free(struct lltable *llt, const struct sockaddr *addr, const struct sockaddr *mask, u_int flags) { struct llentry *lle, *next; struct prefix_match_data pmd; bzero(&pmd, sizeof(pmd)); pmd.addr = addr; pmd.mask = mask; pmd.flags = flags; LIST_INIT(&pmd.dchain); IF_AFDATA_WLOCK(llt->llt_ifp); /* Push matching lles to chain */ lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd); llentries_unlink(llt, &pmd.dchain); IF_AFDATA_WUNLOCK(llt->llt_ifp); LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next) lltable_free_entry(llt, lle); } static void htable_free_tbl(struct lltable *llt) { free(llt->lle_head, M_LLTABLE); free(llt, M_LLTABLE); } static void llentries_unlink(struct lltable *llt, struct llentries *head) { struct llentry *lle, *next; LIST_FOREACH_SAFE(lle, head, lle_chain, next) llt->llt_unlink_entry(lle); } /* * Helper function used to drop all mbufs in hold queue. * * Returns the number of held packets, if any, that were dropped. */ size_t lltable_drop_entry_queue(struct llentry *lle) { size_t pkts_dropped; struct mbuf *next; LLE_WLOCK_ASSERT(lle); pkts_dropped = 0; while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) { next = lle->la_hold->m_nextpkt; m_freem(lle->la_hold); lle->la_hold = next; lle->la_numheld--; pkts_dropped++; } KASSERT(lle->la_numheld == 0, ("%s: la_numheld %d > 0, pkts_droped %zd", __func__, lle->la_numheld, pkts_dropped)); return (pkts_dropped); } void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle, const char *linkhdr, size_t linkhdrsize, int lladdr_off) { memcpy(lle->r_linkdata, linkhdr, linkhdrsize); lle->r_hdrlen = linkhdrsize; lle->ll_addr = &lle->r_linkdata[lladdr_off]; lle->la_flags |= LLE_VALID; lle->r_flags |= RLLE_VALID; } /* * Tries to update @lle link-level address. * Since update requires AFDATA WLOCK, function * drops @lle lock, acquires AFDATA lock and then acquires * @lle lock to maintain lock order. * * Returns 1 on success. */ int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, const char *linkhdr, size_t linkhdrsize, int lladdr_off) { /* Perform real LLE update */ /* use afdata WLOCK to update fields */ LLE_WLOCK_ASSERT(lle); LLE_ADDREF(lle); LLE_WUNLOCK(lle); IF_AFDATA_WLOCK(ifp); LLE_WLOCK(lle); /* * Since we droppped LLE lock, other thread might have deleted * this lle. Check and return */ if ((lle->la_flags & LLE_DELETED) != 0) { IF_AFDATA_WUNLOCK(ifp); LLE_FREE_LOCKED(lle); return (0); } /* Update data */ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off); IF_AFDATA_WUNLOCK(ifp); LLE_REMREF(lle); return (1); } /* * Helper function used to pre-compute full/partial link-layer * header data suitable for feeding into if_output(). */ int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr, char *buf, size_t *bufsize, int *lladdr_off) { struct if_encap_req ereq; int error; bzero(buf, *bufsize); bzero(&ereq, sizeof(ereq)); ereq.buf = buf; ereq.bufsize = *bufsize; ereq.rtype = IFENCAP_LL; ereq.family = family; ereq.lladdr = lladdr; ereq.lladdr_len = ifp->if_addrlen; error = ifp->if_requestencap(ifp, &ereq); if (error == 0) { *bufsize = ereq.bufsize; *lladdr_off = ereq.lladdr_off; } return (error); } /* * Update link-layer header for given @lle after * interface lladdr was changed. */ static int llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg) { struct ifnet *ifp; u_char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; u_char *lladdr; int lladdr_off; ifp = (struct ifnet *)farg; lladdr = lle->ll_addr; LLE_WLOCK(lle); if ((lle->la_flags & LLE_VALID) == 0) { LLE_WUNLOCK(lle); return (0); } if ((lle->la_flags & LLE_IFADDR) != 0) lladdr = IF_LLADDR(ifp); linkhdrsize = sizeof(linkhdr); lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize, &lladdr_off); memcpy(lle->r_linkdata, linkhdr, linkhdrsize); LLE_WUNLOCK(lle); return (0); } /* * Update all calculated headers for given @llt */ void lltable_update_ifaddr(struct lltable *llt) { if (llt->llt_ifp->if_flags & IFF_LOOPBACK) return; IF_AFDATA_WLOCK(llt->llt_ifp); lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp); IF_AFDATA_WUNLOCK(llt->llt_ifp); } /* * * Performs generic cleanup routines and frees lle. * * Called for non-linked entries, with callouts and * other AF-specific cleanups performed. * * @lle must be passed WLOCK'ed * * Returns the number of held packets, if any, that were dropped. */ size_t llentry_free(struct llentry *lle) { size_t pkts_dropped; LLE_WLOCK_ASSERT(lle); KASSERT((lle->la_flags & LLE_LINKED) == 0, ("freeing linked lle")); pkts_dropped = lltable_drop_entry_queue(lle); LLE_FREE_LOCKED(lle); return (pkts_dropped); } /* * (al)locate an llentry for address dst (equivalent to rtalloc for new-arp). * * If found the llentry * is returned referenced and unlocked. */ struct llentry * llentry_alloc(struct ifnet *ifp, struct lltable *lt, struct sockaddr_storage *dst) { struct llentry *la, *la_tmp; IF_AFDATA_RLOCK(ifp); la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst); IF_AFDATA_RUNLOCK(ifp); if (la != NULL) { LLE_ADDREF(la); LLE_WUNLOCK(la); return (la); } if ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) { la = lltable_alloc_entry(lt, 0, (struct sockaddr *)dst); if (la == NULL) return (NULL); IF_AFDATA_WLOCK(ifp); LLE_WLOCK(la); /* Prefer any existing LLE over newly-created one */ la_tmp = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst); if (la_tmp == NULL) lltable_link_entry(lt, la); IF_AFDATA_WUNLOCK(ifp); if (la_tmp != NULL) { lltable_free_entry(lt, la); la = la_tmp; } LLE_ADDREF(la); LLE_WUNLOCK(la); } return (la); } /* * Free all entries from given table and free itself. */ static int lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg) { struct llentries *dchain; dchain = (struct llentries *)farg; LLE_WLOCK(lle); LIST_INSERT_HEAD(dchain, lle, lle_chain); return (0); } /* * Free all entries from given table and free itself. */ void lltable_free(struct lltable *llt) { struct llentry *lle, *next; struct llentries dchain; KASSERT(llt != NULL, ("%s: llt is NULL", __func__)); lltable_unlink(llt); LIST_INIT(&dchain); IF_AFDATA_WLOCK(llt->llt_ifp); /* Push all lles to @dchain */ lltable_foreach_lle(llt, lltable_free_cb, &dchain); llentries_unlink(llt, &dchain); IF_AFDATA_WUNLOCK(llt->llt_ifp); LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) { if (callout_stop(&lle->lle_timer) > 0) LLE_REMREF(lle); llentry_free(lle); } llt->llt_free_tbl(llt); } #if 0 void lltable_drain(int af) { struct lltable *llt; struct llentry *lle; - register int i; + int i; LLTABLE_RLOCK(); SLIST_FOREACH(llt, &V_lltables, llt_link) { if (llt->llt_af != af) continue; for (i=0; i < llt->llt_hsize; i++) { LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { LLE_WLOCK(lle); if (lle->la_hold) { m_freem(lle->la_hold); lle->la_hold = NULL; } LLE_WUNLOCK(lle); } } } LLTABLE_RUNLOCK(); } #endif /* * Deletes an address from given lltable. * Used for userland interaction to remove * individual entries. Skips entries added by OS. */ int lltable_delete_addr(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { struct llentry *lle; struct ifnet *ifp; ifp = llt->llt_ifp; IF_AFDATA_WLOCK(ifp); lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr); if (lle == NULL) { IF_AFDATA_WUNLOCK(ifp); return (ENOENT); } if ((lle->la_flags & LLE_IFADDR) != 0 && (flags & LLE_IFADDR) == 0) { IF_AFDATA_WUNLOCK(ifp); LLE_WUNLOCK(lle); return (EPERM); } lltable_unlink_entry(llt, lle); IF_AFDATA_WUNLOCK(ifp); llt->llt_delete_entry(llt, lle); return (0); } void lltable_prefix_free(int af, struct sockaddr *addr, struct sockaddr *mask, u_int flags) { struct lltable *llt; LLTABLE_RLOCK(); SLIST_FOREACH(llt, &V_lltables, llt_link) { if (llt->llt_af != af) continue; llt->llt_prefix_free(llt, addr, mask, flags); } LLTABLE_RUNLOCK(); } struct lltable * lltable_allocate_htbl(uint32_t hsize) { struct lltable *llt; int i; llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO); llt->llt_hsize = hsize; llt->lle_head = malloc(sizeof(struct llentries) * hsize, M_LLTABLE, M_WAITOK | M_ZERO); for (i = 0; i < llt->llt_hsize; i++) LIST_INIT(&llt->lle_head[i]); /* Set some default callbacks */ llt->llt_link_entry = htable_link_entry; llt->llt_unlink_entry = htable_unlink_entry; llt->llt_prefix_free = htable_prefix_free; llt->llt_foreach_entry = htable_foreach_lle; llt->llt_free_tbl = htable_free_tbl; return (llt); } /* * Links lltable to global llt list. */ void lltable_link(struct lltable *llt) { LLTABLE_WLOCK(); SLIST_INSERT_HEAD(&V_lltables, llt, llt_link); LLTABLE_WUNLOCK(); } static void lltable_unlink(struct lltable *llt) { LLTABLE_WLOCK(); SLIST_REMOVE(&V_lltables, llt, lltable, llt_link); LLTABLE_WUNLOCK(); } /* * External methods used by lltable consumers */ int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg) { return (llt->llt_foreach_entry(llt, f, farg)); } struct llentry * lltable_alloc_entry(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { return (llt->llt_alloc_entry(llt, flags, l3addr)); } void lltable_free_entry(struct lltable *llt, struct llentry *lle) { llt->llt_free_entry(llt, lle); } void lltable_link_entry(struct lltable *llt, struct llentry *lle) { llt->llt_link_entry(llt, lle); } void lltable_unlink_entry(struct lltable *llt, struct llentry *lle) { llt->llt_unlink_entry(lle); } void lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) { struct lltable *llt; llt = lle->lle_tbl; llt->llt_fill_sa_entry(lle, sa); } struct ifnet * lltable_get_ifp(const struct lltable *llt) { return (llt->llt_ifp); } int lltable_get_af(const struct lltable *llt) { return (llt->llt_af); } /* * Called in route_output when rtm_flags contains RTF_LLDATA. */ int lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info) { struct sockaddr_dl *dl = (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY]; struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST]; struct ifnet *ifp; struct lltable *llt; struct llentry *lle, *lle_tmp; uint8_t linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; u_int laflags = 0; int error; KASSERT(dl != NULL && dl->sdl_family == AF_LINK, ("%s: invalid dl\n", __func__)); ifp = ifnet_byindex(dl->sdl_index); if (ifp == NULL) { log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n", __func__, dl->sdl_index); return EINVAL; } /* XXX linked list may be too expensive */ LLTABLE_RLOCK(); SLIST_FOREACH(llt, &V_lltables, llt_link) { if (llt->llt_af == dst->sa_family && llt->llt_ifp == ifp) break; } LLTABLE_RUNLOCK(); KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n")); error = 0; switch (rtm->rtm_type) { case RTM_ADD: /* Add static LLE */ laflags = 0; if (rtm->rtm_rmx.rmx_expire == 0) laflags = LLE_STATIC; lle = lltable_alloc_entry(llt, laflags, dst); if (lle == NULL) return (ENOMEM); linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl), linkhdr, &linkhdrsize, &lladdr_off) != 0) return (EINVAL); lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off); if ((rtm->rtm_flags & RTF_ANNOUNCE)) lle->la_flags |= LLE_PUB; lle->la_expire = rtm->rtm_rmx.rmx_expire; laflags = lle->la_flags; /* Try to link new entry */ lle_tmp = NULL; IF_AFDATA_WLOCK(ifp); LLE_WLOCK(lle); lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, dst); if (lle_tmp != NULL) { /* Check if we are trying to replace immutable entry */ if ((lle_tmp->la_flags & LLE_IFADDR) != 0) { IF_AFDATA_WUNLOCK(ifp); LLE_WUNLOCK(lle_tmp); lltable_free_entry(llt, lle); return (EPERM); } /* Unlink existing entry from table */ lltable_unlink_entry(llt, lle_tmp); } lltable_link_entry(llt, lle); IF_AFDATA_WUNLOCK(ifp); if (lle_tmp != NULL) { EVENTHANDLER_INVOKE(lle_event, lle_tmp,LLENTRY_EXPIRED); lltable_free_entry(llt, lle_tmp); } /* * By invoking LLE handler here we might get * two events on static LLE entry insertion * in routing socket. However, since we might have * other subscribers we need to generate this event. */ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED); LLE_WUNLOCK(lle); #ifdef INET /* gratuitous ARP */ if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) arprequest(ifp, &((struct sockaddr_in *)dst)->sin_addr, &((struct sockaddr_in *)dst)->sin_addr, (u_char *)LLADDR(dl)); #endif break; case RTM_DELETE: return (lltable_delete_addr(llt, 0, dst)); default: error = EINVAL; } return (error); } #ifdef DDB struct llentry_sa { struct llentry base; struct sockaddr l3_addr; }; static void llatbl_lle_show(struct llentry_sa *la) { struct llentry *lle; uint8_t octet[6]; lle = &la->base; db_printf("lle=%p\n", lle); db_printf(" lle_next=%p\n", lle->lle_next.le_next); db_printf(" lle_lock=%p\n", &lle->lle_lock); db_printf(" lle_tbl=%p\n", lle->lle_tbl); db_printf(" lle_head=%p\n", lle->lle_head); db_printf(" la_hold=%p\n", lle->la_hold); db_printf(" la_numheld=%d\n", lle->la_numheld); db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire); db_printf(" la_flags=0x%04x\n", lle->la_flags); db_printf(" la_asked=%u\n", lle->la_asked); db_printf(" la_preempt=%u\n", lle->la_preempt); db_printf(" ln_state=%d\n", lle->ln_state); db_printf(" ln_router=%u\n", lle->ln_router); db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick); db_printf(" lle_refcnt=%d\n", lle->lle_refcnt); bcopy(lle->ll_addr, octet, sizeof(octet)); db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n", octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]); db_printf(" lle_timer=%p\n", &lle->lle_timer); switch (la->l3_addr.sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin; char l3s[INET_ADDRSTRLEN]; sin = (struct sockaddr_in *)&la->l3_addr; inet_ntoa_r(sin->sin_addr, l3s); db_printf(" l3_addr=%s\n", l3s); break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6; char l3s[INET6_ADDRSTRLEN]; sin6 = (struct sockaddr_in6 *)&la->l3_addr; ip6_sprintf(l3s, &sin6->sin6_addr); db_printf(" l3_addr=%s\n", l3s); break; } #endif default: db_printf(" l3_addr=N/A (af=%d)\n", la->l3_addr.sa_family); break; } } DB_SHOW_COMMAND(llentry, db_show_llentry) { if (!have_addr) { db_printf("usage: show llentry \n"); return; } llatbl_lle_show((struct llentry_sa *)addr); } static void llatbl_llt_show(struct lltable *llt) { int i; struct llentry *lle; db_printf("llt=%p llt_af=%d llt_ifp=%p\n", llt, llt->llt_af, llt->llt_ifp); for (i = 0; i < llt->llt_hsize; i++) { LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { llatbl_lle_show((struct llentry_sa *)lle); if (db_pager_quit) return; } } } DB_SHOW_COMMAND(lltable, db_show_lltable) { if (!have_addr) { db_printf("usage: show lltable \n"); return; } llatbl_llt_show((struct lltable *)addr); } DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables) { VNET_ITERATOR_DECL(vnet_iter); struct lltable *llt; VNET_FOREACH(vnet_iter) { CURVNET_SET_QUIET(vnet_iter); #ifdef VIMAGE db_printf("vnet=%p\n", curvnet); #endif SLIST_FOREACH(llt, &V_lltables, llt_link) { db_printf("llt=%p llt_af=%d llt_ifp=%p(%s)\n", llt, llt->llt_af, llt->llt_ifp, (llt->llt_ifp != NULL) ? llt->llt_ifp->if_xname : "?"); if (have_addr && addr != 0) /* verbose */ llatbl_llt_show(llt); if (db_pager_quit) { CURVNET_RESTORE(); return; } } CURVNET_RESTORE(); } } #endif Index: stable/11/sys/net/if_media.c =================================================================== --- stable/11/sys/net/if_media.c (revision 331642) +++ stable/11/sys/net/if_media.c (revision 331643) @@ -1,544 +1,544 @@ /* $NetBSD: if_media.c,v 1.1 1997/03/17 02:55:15 thorpej Exp $ */ /* $FreeBSD$ */ /*- * Copyright (c) 1997 * Jonathan Stone and Jason R. Thorpe. All rights reserved. * * This software is derived from information provided by Matt Thomas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Jonathan Stone * and Jason R. Thorpe for the NetBSD Project. * 4. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * BSD/OS-compatible network interface media selection. * * Where it is safe to do so, this code strays slightly from the BSD/OS * design. Software which uses the API (device drivers, basically) * shouldn't notice any difference. * * Many thanks to Matt Thomas for providing the information necessary * to implement this interface. */ #include "opt_ifmedia.h" #include #include #include #include #include #include #include #include #include /* * Compile-time options: * IFMEDIA_DEBUG: * turn on implementation-level debug printfs. * Useful for debugging newly-ported drivers. */ static struct ifmedia_entry *ifmedia_match(struct ifmedia *ifm, int flags, int mask); #ifdef IFMEDIA_DEBUG #include int ifmedia_debug = 0; SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug, 0, "if_media debugging msgs"); static void ifmedia_printword(int); #endif /* * Initialize if_media struct for a specific interface instance. */ void ifmedia_init(ifm, dontcare_mask, change_callback, status_callback) struct ifmedia *ifm; int dontcare_mask; ifm_change_cb_t change_callback; ifm_stat_cb_t status_callback; { LIST_INIT(&ifm->ifm_list); ifm->ifm_cur = NULL; ifm->ifm_media = 0; ifm->ifm_mask = dontcare_mask; /* IF don't-care bits */ ifm->ifm_change = change_callback; ifm->ifm_status = status_callback; } void ifmedia_removeall(ifm) struct ifmedia *ifm; { struct ifmedia_entry *entry; for (entry = LIST_FIRST(&ifm->ifm_list); entry; entry = LIST_FIRST(&ifm->ifm_list)) { LIST_REMOVE(entry, ifm_list); free(entry, M_IFADDR); } ifm->ifm_cur = NULL; } /* * Add a media configuration to the list of supported media * for a specific interface instance. */ void ifmedia_add(ifm, mword, data, aux) struct ifmedia *ifm; int mword; int data; void *aux; { - register struct ifmedia_entry *entry; + struct ifmedia_entry *entry; #ifdef IFMEDIA_DEBUG if (ifmedia_debug) { if (ifm == NULL) { printf("ifmedia_add: null ifm\n"); return; } printf("Adding entry for "); ifmedia_printword(mword); } #endif entry = malloc(sizeof(*entry), M_IFADDR, M_NOWAIT); if (entry == NULL) panic("ifmedia_add: can't malloc entry"); entry->ifm_media = mword; entry->ifm_data = data; entry->ifm_aux = aux; LIST_INSERT_HEAD(&ifm->ifm_list, entry, ifm_list); } /* * Add an array of media configurations to the list of * supported media for a specific interface instance. */ void ifmedia_list_add(ifm, lp, count) struct ifmedia *ifm; struct ifmedia_entry *lp; int count; { int i; for (i = 0; i < count; i++) ifmedia_add(ifm, lp[i].ifm_media, lp[i].ifm_data, lp[i].ifm_aux); } /* * Set the default active media. * * Called by device-specific code which is assumed to have already * selected the default media in hardware. We do _not_ call the * media-change callback. */ void ifmedia_set(ifm, target) struct ifmedia *ifm; int target; { struct ifmedia_entry *match; match = ifmedia_match(ifm, target, ifm->ifm_mask); if (match == NULL) { printf("ifmedia_set: no match for 0x%x/0x%x\n", target, ~ifm->ifm_mask); panic("ifmedia_set"); } ifm->ifm_cur = match; #ifdef IFMEDIA_DEBUG if (ifmedia_debug) { printf("ifmedia_set: target "); ifmedia_printword(target); printf("ifmedia_set: setting to "); ifmedia_printword(ifm->ifm_cur->ifm_media); } #endif } /* * Given a media word, return one suitable for an application * using the original encoding. */ static int compat_media(int media) { if (IFM_TYPE(media) == IFM_ETHER && IFM_SUBTYPE(media) > IFM_OTHER) { media &= ~(IFM_ETH_XTYPE|IFM_TMASK); media |= IFM_OTHER; } return (media); } /* * Device-independent media ioctl support function. */ int ifmedia_ioctl(ifp, ifr, ifm, cmd) struct ifnet *ifp; struct ifreq *ifr; struct ifmedia *ifm; u_long cmd; { struct ifmedia_entry *match; struct ifmediareq *ifmr = (struct ifmediareq *) ifr; int error = 0; if (ifp == NULL || ifr == NULL || ifm == NULL) return(EINVAL); switch (cmd) { /* * Set the current media. */ case SIOCSIFMEDIA: { struct ifmedia_entry *oldentry; int oldmedia; int newmedia = ifr->ifr_media; match = ifmedia_match(ifm, newmedia, ifm->ifm_mask); if (match == NULL) { #ifdef IFMEDIA_DEBUG if (ifmedia_debug) { printf( "ifmedia_ioctl: no media found for 0x%x\n", newmedia); } #endif return (ENXIO); } /* * If no change, we're done. * XXX Automedia may invole software intervention. * Keep going in case the connected media changed. * Similarly, if best match changed (kernel debugger?). */ if ((IFM_SUBTYPE(newmedia) != IFM_AUTO) && (newmedia == ifm->ifm_media) && (match == ifm->ifm_cur)) return 0; /* * We found a match, now make the driver switch to it. * Make sure to preserve our old media type in case the * driver can't switch. */ #ifdef IFMEDIA_DEBUG if (ifmedia_debug) { printf("ifmedia_ioctl: switching %s to ", ifp->if_xname); ifmedia_printword(match->ifm_media); } #endif oldentry = ifm->ifm_cur; oldmedia = ifm->ifm_media; ifm->ifm_cur = match; ifm->ifm_media = newmedia; error = (*ifm->ifm_change)(ifp); if (error) { ifm->ifm_cur = oldentry; ifm->ifm_media = oldmedia; } break; } /* * Get list of available media and current media on interface. */ case SIOCGIFMEDIA: case SIOCGIFXMEDIA: { struct ifmedia_entry *ep; int i; if (ifmr->ifm_count < 0) return (EINVAL); if (cmd == SIOCGIFMEDIA) { ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ? compat_media(ifm->ifm_cur->ifm_media) : IFM_NONE; } else { ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ? ifm->ifm_cur->ifm_media : IFM_NONE; } ifmr->ifm_mask = ifm->ifm_mask; ifmr->ifm_status = 0; (*ifm->ifm_status)(ifp, ifmr); /* * If there are more interfaces on the list, count * them. This allows the caller to set ifmr->ifm_count * to 0 on the first call to know how much space to * allocate. */ i = 0; LIST_FOREACH(ep, &ifm->ifm_list, ifm_list) if (i++ < ifmr->ifm_count) { error = copyout(&ep->ifm_media, ifmr->ifm_ulist + i - 1, sizeof(int)); if (error) break; } if (error == 0 && i > ifmr->ifm_count) error = ifmr->ifm_count ? E2BIG : 0; ifmr->ifm_count = i; break; } default: return (EINVAL); } return (error); } /* * Find media entry matching a given ifm word. * */ static struct ifmedia_entry * ifmedia_match(ifm, target, mask) struct ifmedia *ifm; int target; int mask; { struct ifmedia_entry *match, *next; match = NULL; mask = ~mask; LIST_FOREACH(next, &ifm->ifm_list, ifm_list) { if ((next->ifm_media & mask) == (target & mask)) { #if defined(IFMEDIA_DEBUG) || defined(DIAGNOSTIC) if (match) { printf("ifmedia_match: multiple match for " "0x%x/0x%x\n", target, mask); } #endif match = next; } } return match; } /* * Compute the interface `baudrate' from the media, for the interface * metrics (used by routing daemons). */ static const struct ifmedia_baudrate ifmedia_baudrate_descriptions[] = IFM_BAUDRATE_DESCRIPTIONS; uint64_t ifmedia_baudrate(int mword) { int i; for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) { if (IFM_TYPE_MATCH(mword, ifmedia_baudrate_descriptions[i].ifmb_word)) return (ifmedia_baudrate_descriptions[i].ifmb_baudrate); } /* Not known. */ return (0); } #ifdef IFMEDIA_DEBUG struct ifmedia_description ifm_type_descriptions[] = IFM_TYPE_DESCRIPTIONS; struct ifmedia_description ifm_subtype_ethernet_descriptions[] = IFM_SUBTYPE_ETHERNET_DESCRIPTIONS; struct ifmedia_description ifm_subtype_ethernet_option_descriptions[] = IFM_SUBTYPE_ETHERNET_OPTION_DESCRIPTIONS; struct ifmedia_description ifm_subtype_tokenring_descriptions[] = IFM_SUBTYPE_TOKENRING_DESCRIPTIONS; struct ifmedia_description ifm_subtype_tokenring_option_descriptions[] = IFM_SUBTYPE_TOKENRING_OPTION_DESCRIPTIONS; struct ifmedia_description ifm_subtype_fddi_descriptions[] = IFM_SUBTYPE_FDDI_DESCRIPTIONS; struct ifmedia_description ifm_subtype_fddi_option_descriptions[] = IFM_SUBTYPE_FDDI_OPTION_DESCRIPTIONS; struct ifmedia_description ifm_subtype_ieee80211_descriptions[] = IFM_SUBTYPE_IEEE80211_DESCRIPTIONS; struct ifmedia_description ifm_subtype_ieee80211_option_descriptions[] = IFM_SUBTYPE_IEEE80211_OPTION_DESCRIPTIONS; struct ifmedia_description ifm_subtype_ieee80211_mode_descriptions[] = IFM_SUBTYPE_IEEE80211_MODE_DESCRIPTIONS; struct ifmedia_description ifm_subtype_atm_descriptions[] = IFM_SUBTYPE_ATM_DESCRIPTIONS; struct ifmedia_description ifm_subtype_atm_option_descriptions[] = IFM_SUBTYPE_ATM_OPTION_DESCRIPTIONS; struct ifmedia_description ifm_subtype_shared_descriptions[] = IFM_SUBTYPE_SHARED_DESCRIPTIONS; struct ifmedia_description ifm_shared_option_descriptions[] = IFM_SHARED_OPTION_DESCRIPTIONS; struct ifmedia_type_to_subtype { struct ifmedia_description *subtypes; struct ifmedia_description *options; struct ifmedia_description *modes; }; /* must be in the same order as IFM_TYPE_DESCRIPTIONS */ struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[] = { { &ifm_subtype_ethernet_descriptions[0], &ifm_subtype_ethernet_option_descriptions[0], NULL, }, { &ifm_subtype_tokenring_descriptions[0], &ifm_subtype_tokenring_option_descriptions[0], NULL, }, { &ifm_subtype_fddi_descriptions[0], &ifm_subtype_fddi_option_descriptions[0], NULL, }, { &ifm_subtype_ieee80211_descriptions[0], &ifm_subtype_ieee80211_option_descriptions[0], &ifm_subtype_ieee80211_mode_descriptions[0] }, { &ifm_subtype_atm_descriptions[0], &ifm_subtype_atm_option_descriptions[0], NULL, }, }; /* * print a media word. */ static void ifmedia_printword(ifmw) int ifmw; { struct ifmedia_description *desc; struct ifmedia_type_to_subtype *ttos; int seen_option = 0; /* Find the top-level interface type. */ for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes; desc->ifmt_string != NULL; desc++, ttos++) if (IFM_TYPE(ifmw) == desc->ifmt_word) break; if (desc->ifmt_string == NULL) { printf("\n"); return; } printf("%s", desc->ifmt_string); /* Any mode. */ for (desc = ttos->modes; desc && desc->ifmt_string != NULL; desc++) if (IFM_MODE(ifmw) == desc->ifmt_word) { if (desc->ifmt_string != NULL) printf(" mode %s", desc->ifmt_string); break; } /* * Check for the shared subtype descriptions first, then the * type-specific ones. */ for (desc = ifm_subtype_shared_descriptions; desc->ifmt_string != NULL; desc++) if (IFM_SUBTYPE(ifmw) == desc->ifmt_word) goto got_subtype; for (desc = ttos->subtypes; desc->ifmt_string != NULL; desc++) if (IFM_SUBTYPE(ifmw) == desc->ifmt_word) break; if (desc->ifmt_string == NULL) { printf(" \n"); return; } got_subtype: printf(" %s", desc->ifmt_string); /* * Look for shared options. */ for (desc = ifm_shared_option_descriptions; desc->ifmt_string != NULL; desc++) { if (ifmw & desc->ifmt_word) { if (seen_option == 0) printf(" <"); printf("%s%s", seen_option++ ? "," : "", desc->ifmt_string); } } /* * Look for subtype-specific options. */ for (desc = ttos->options; desc->ifmt_string != NULL; desc++) { if (ifmw & desc->ifmt_word) { if (seen_option == 0) printf(" <"); printf("%s%s", seen_option++ ? "," : "", desc->ifmt_string); } } printf("%s\n", seen_option ? ">" : ""); } #endif /* IFMEDIA_DEBUG */ Index: stable/11/sys/net/slcompress.c =================================================================== --- stable/11/sys/net/slcompress.c (revision 331642) +++ stable/11/sys/net/slcompress.c (revision 331643) @@ -1,609 +1,595 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)slcompress.c 8.2 (Berkeley) 4/16/94 * $FreeBSD$ */ /* * Routines to compress and uncompess tcp packets (for transmission * over low speed serial lines. * * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989: * - Initial distribution. * */ #include #include #include #include #include #include #include #include #ifndef SL_NO_STATS #define INCR(counter) ++comp->counter; #else #define INCR(counter) #endif #define BCMP(p1, p2, n) bcmp((void *)(p1), (void *)(p2), (int)(n)) #define BCOPY(p1, p2, n) bcopy((void *)(p1), (void *)(p2), (int)(n)) void -sl_compress_init(comp, max_state) - struct slcompress *comp; - int max_state; +sl_compress_init(struct slcompress *comp, int max_state) { - register u_int i; - register struct cstate *tstate = comp->tstate; + u_int i; + struct cstate *tstate = comp->tstate; if (max_state == -1) { max_state = MAX_STATES - 1; bzero((char *)comp, sizeof(*comp)); } else { /* Don't reset statistics */ bzero((char *)comp->tstate, sizeof(comp->tstate)); bzero((char *)comp->rstate, sizeof(comp->rstate)); } for (i = max_state; i > 0; --i) { tstate[i].cs_id = i; tstate[i].cs_next = &tstate[i - 1]; } tstate[0].cs_next = &tstate[max_state]; tstate[0].cs_id = 0; comp->last_cs = &tstate[0]; comp->last_recv = 255; comp->last_xmit = 255; comp->flags = SLF_TOSS; } /* ENCODE encodes a number that is known to be non-zero. ENCODEZ * checks for zero (since zero has to be encoded in the long, 3 byte * form). */ #define ENCODE(n) { \ if ((u_int16_t)(n) >= 256) { \ *cp++ = 0; \ cp[1] = (n); \ cp[0] = (n) >> 8; \ cp += 2; \ } else { \ *cp++ = (n); \ } \ } #define ENCODEZ(n) { \ if ((u_int16_t)(n) >= 256 || (u_int16_t)(n) == 0) { \ *cp++ = 0; \ cp[1] = (n); \ cp[0] = (n) >> 8; \ cp += 2; \ } else { \ *cp++ = (n); \ } \ } #define DECODEL(f) { \ if (*cp == 0) {\ (f) = htonl(ntohl(f) + ((cp[1] << 8) | cp[2])); \ cp += 3; \ } else { \ (f) = htonl(ntohl(f) + (u_int32_t)*cp++); \ } \ } #define DECODES(f) { \ if (*cp == 0) {\ (f) = htons(ntohs(f) + ((cp[1] << 8) | cp[2])); \ cp += 3; \ } else { \ (f) = htons(ntohs(f) + (u_int32_t)*cp++); \ } \ } #define DECODEU(f) { \ if (*cp == 0) {\ (f) = htons((cp[1] << 8) | cp[2]); \ cp += 3; \ } else { \ (f) = htons((u_int32_t)*cp++); \ } \ } /* * Attempt to compress an outgoing TCP packet and return the type of * the result. The caller must have already verified that the protocol * is TCP. The first mbuf must contain the complete IP and TCP headers, * and "ip" must be == mtod(m, struct ip *). "comp" supplies the * compression state, and "compress_cid" tells us whether it is OK * to leave out the CID field when feasible. * * The caller is responsible for adjusting m->m_pkthdr.len upon return, * if m is an M_PKTHDR mbuf. */ u_int -sl_compress_tcp(m, ip, comp, compress_cid) - struct mbuf *m; - register struct ip *ip; - struct slcompress *comp; - int compress_cid; +sl_compress_tcp(struct mbuf *m, struct ip *ip, struct slcompress *comp, + int compress_cid) { - register struct cstate *cs = comp->last_cs->cs_next; - register u_int hlen = ip->ip_hl; - register struct tcphdr *oth; - register struct tcphdr *th; - register u_int deltaS, deltaA; - register u_int changes = 0; + struct cstate *cs = comp->last_cs->cs_next; + u_int hlen = ip->ip_hl; + struct tcphdr *oth; + struct tcphdr *th; + u_int deltaS, deltaA; + u_int changes = 0; u_char new_seq[16]; - register u_char *cp = new_seq; + u_char *cp = new_seq; /* * Bail if this is an IP fragment or if the TCP packet isn't * `compressible' (i.e., ACK isn't set or some other control bit is * set). (We assume that the caller has already made sure the * packet is IP proto TCP). */ if ((ip->ip_off & htons(0x3fff)) || m->m_len < 40) return (TYPE_IP); th = (struct tcphdr *)&((int32_t *)ip)[hlen]; if ((th->th_flags & (TH_SYN|TH_FIN|TH_RST|TH_ACK)) != TH_ACK) return (TYPE_IP); /* * Packet is compressible -- we're going to send either a * COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way we need * to locate (or create) the connection state. Special case the * most recently used connection since it's most likely to be used * again & we don't have to do any reordering if it's used. */ INCR(sls_packets) if (ip->ip_src.s_addr != cs->cs_ip.ip_src.s_addr || ip->ip_dst.s_addr != cs->cs_ip.ip_dst.s_addr || *(int32_t *)th != ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl]) { /* * Wasn't the first -- search for it. * * States are kept in a circularly linked list with * last_cs pointing to the end of the list. The * list is kept in lru order by moving a state to the * head of the list whenever it is referenced. Since * the list is short and, empirically, the connection * we want is almost always near the front, we locate * states via linear search. If we don't find a state * for the datagram, the oldest state is (re-)used. */ - register struct cstate *lcs; - register struct cstate *lastcs = comp->last_cs; + struct cstate *lcs; + struct cstate *lastcs = comp->last_cs; do { lcs = cs; cs = cs->cs_next; INCR(sls_searches) if (ip->ip_src.s_addr == cs->cs_ip.ip_src.s_addr && ip->ip_dst.s_addr == cs->cs_ip.ip_dst.s_addr && *(int32_t *)th == ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl]) goto found; } while (cs != lastcs); /* * Didn't find it -- re-use oldest cstate. Send an * uncompressed packet that tells the other side what * connection number we're using for this conversation. * Note that since the state list is circular, the oldest * state points to the newest and we only need to set * last_cs to update the lru linkage. */ INCR(sls_misses) comp->last_cs = lcs; hlen += th->th_off; hlen <<= 2; if (hlen > m->m_len) return TYPE_IP; goto uncompressed; found: /* * Found it -- move to the front on the connection list. */ if (cs == lastcs) comp->last_cs = lcs; else { lcs->cs_next = cs->cs_next; cs->cs_next = lastcs->cs_next; lastcs->cs_next = cs; } } /* * Make sure that only what we expect to change changed. The first * line of the `if' checks the IP protocol version, header length & * type of service. The 2nd line checks the "Don't fragment" bit. * The 3rd line checks the time-to-live and protocol (the protocol * check is unnecessary but costless). The 4th line checks the TCP * header length. The 5th line checks IP options, if any. The 6th * line checks TCP options, if any. If any of these things are * different between the previous & current datagram, we send the * current datagram `uncompressed'. */ oth = (struct tcphdr *)&((int32_t *)&cs->cs_ip)[hlen]; deltaS = hlen; hlen += th->th_off; hlen <<= 2; if (hlen > m->m_len) return TYPE_IP; if (((u_int16_t *)ip)[0] != ((u_int16_t *)&cs->cs_ip)[0] || ((u_int16_t *)ip)[3] != ((u_int16_t *)&cs->cs_ip)[3] || ((u_int16_t *)ip)[4] != ((u_int16_t *)&cs->cs_ip)[4] || th->th_off != oth->th_off || (deltaS > 5 && BCMP(ip + 1, &cs->cs_ip + 1, (deltaS - 5) << 2)) || (th->th_off > 5 && BCMP(th + 1, oth + 1, (th->th_off - 5) << 2))) goto uncompressed; /* * Figure out which of the changing fields changed. The * receiver expects changes in the order: urgent, window, * ack, seq (the order minimizes the number of temporaries * needed in this section of code). */ if (th->th_flags & TH_URG) { deltaS = ntohs(th->th_urp); ENCODEZ(deltaS); changes |= NEW_U; } else if (th->th_urp != oth->th_urp) /* argh! URG not set but urp changed -- a sensible * implementation should never do this but RFC793 * doesn't prohibit the change so we have to deal * with it. */ goto uncompressed; deltaS = (u_int16_t)(ntohs(th->th_win) - ntohs(oth->th_win)); if (deltaS) { ENCODE(deltaS); changes |= NEW_W; } deltaA = ntohl(th->th_ack) - ntohl(oth->th_ack); if (deltaA) { if (deltaA > 0xffff) goto uncompressed; ENCODE(deltaA); changes |= NEW_A; } deltaS = ntohl(th->th_seq) - ntohl(oth->th_seq); if (deltaS) { if (deltaS > 0xffff) goto uncompressed; ENCODE(deltaS); changes |= NEW_S; } switch(changes) { case 0: /* * Nothing changed. If this packet contains data and the * last one didn't, this is probably a data packet following * an ack (normal on an interactive connection) and we send * it compressed. Otherwise it's probably a retransmit, * retransmitted ack or window probe. Send it uncompressed * in case the other side missed the compressed version. */ if (ip->ip_len != cs->cs_ip.ip_len && ntohs(cs->cs_ip.ip_len) == hlen) break; /* FALLTHROUGH */ case SPECIAL_I: case SPECIAL_D: /* * actual changes match one of our special case encodings -- * send packet uncompressed. */ goto uncompressed; case NEW_S|NEW_A: if (deltaS == deltaA && deltaS == ntohs(cs->cs_ip.ip_len) - hlen) { /* special case for echoed terminal traffic */ changes = SPECIAL_I; cp = new_seq; } break; case NEW_S: if (deltaS == ntohs(cs->cs_ip.ip_len) - hlen) { /* special case for data xfer */ changes = SPECIAL_D; cp = new_seq; } break; } deltaS = ntohs(ip->ip_id) - ntohs(cs->cs_ip.ip_id); if (deltaS != 1) { ENCODEZ(deltaS); changes |= NEW_I; } if (th->th_flags & TH_PUSH) changes |= TCP_PUSH_BIT; /* * Grab the cksum before we overwrite it below. Then update our * state with this packet's header. */ deltaA = ntohs(th->th_sum); BCOPY(ip, &cs->cs_ip, hlen); /* * We want to use the original packet as our compressed packet. * (cp - new_seq) is the number of bytes we need for compressed * sequence numbers. In addition we need one byte for the change * mask, one for the connection id and two for the tcp checksum. * So, (cp - new_seq) + 4 bytes of header are needed. hlen is how * many bytes of the original packet to toss so subtract the two to * get the new packet size. */ deltaS = cp - new_seq; cp = (u_char *)ip; if (compress_cid == 0 || comp->last_xmit != cs->cs_id) { comp->last_xmit = cs->cs_id; hlen -= deltaS + 4; cp += hlen; *cp++ = changes | NEW_C; *cp++ = cs->cs_id; } else { hlen -= deltaS + 3; cp += hlen; *cp++ = changes; } m->m_len -= hlen; m->m_data += hlen; *cp++ = deltaA >> 8; *cp++ = deltaA; BCOPY(new_seq, cp, deltaS); INCR(sls_compressed) return (TYPE_COMPRESSED_TCP); /* * Update connection state cs & send uncompressed packet ('uncompressed' * means a regular ip/tcp packet but with the 'conversation id' we hope * to use on future compressed packets in the protocol field). */ uncompressed: BCOPY(ip, &cs->cs_ip, hlen); ip->ip_p = cs->cs_id; comp->last_xmit = cs->cs_id; return (TYPE_UNCOMPRESSED_TCP); } int -sl_uncompress_tcp(bufp, len, type, comp) - u_char **bufp; - int len; - u_int type; - struct slcompress *comp; +sl_uncompress_tcp(u_char **bufp, int len, u_int type, struct slcompress *comp) { u_char *hdr, *cp; int hlen, vjlen; cp = bufp? *bufp: NULL; vjlen = sl_uncompress_tcp_core(cp, len, len, type, comp, &hdr, &hlen); if (vjlen < 0) return (0); /* error */ if (vjlen == 0) return (len); /* was uncompressed already */ cp += vjlen; len -= vjlen; /* * At this point, cp points to the first byte of data in the * packet. If we're not aligned on a 4-byte boundary, copy the * data down so the ip & tcp headers will be aligned. Then back up * cp by the tcp/ip header length to make room for the reconstructed * header (we assume the packet we were handed has enough space to * prepend 128 bytes of header). */ if ((intptr_t)cp & 3) { if (len > 0) BCOPY(cp, ((intptr_t)cp &~ 3), len); cp = (u_char *)((intptr_t)cp &~ 3); } cp -= hlen; len += hlen; BCOPY(hdr, cp, hlen); *bufp = cp; return (len); } /* * Uncompress a packet of total length total_len. The first buflen * bytes are at buf; this must include the entire (compressed or * uncompressed) TCP/IP header. This procedure returns the length * of the VJ header, with a pointer to the uncompressed IP header * in *hdrp and its length in *hlenp. */ int -sl_uncompress_tcp_core(buf, buflen, total_len, type, comp, hdrp, hlenp) - u_char *buf; - int buflen, total_len; - u_int type; - struct slcompress *comp; - u_char **hdrp; - u_int *hlenp; +sl_uncompress_tcp_core(u_char *buf, int buflen, int total_len, u_int type, + struct slcompress *comp, u_char **hdrp, u_int *hlenp) { - register u_char *cp; - register u_int hlen, changes; - register struct tcphdr *th; - register struct cstate *cs; - register struct ip *ip; - register u_int16_t *bp; - register u_int vjlen; + u_char *cp; + u_int hlen, changes; + struct tcphdr *th; + struct cstate *cs; + struct ip *ip; + u_int16_t *bp; + u_int vjlen; switch (type) { case TYPE_UNCOMPRESSED_TCP: ip = (struct ip *) buf; if (ip->ip_p >= MAX_STATES) goto bad; cs = &comp->rstate[comp->last_recv = ip->ip_p]; comp->flags &=~ SLF_TOSS; ip->ip_p = IPPROTO_TCP; /* * Calculate the size of the TCP/IP header and make sure that * we don't overflow the space we have available for it. */ hlen = ip->ip_hl << 2; if (hlen + sizeof(struct tcphdr) > buflen) goto bad; hlen += ((struct tcphdr *)&((char *)ip)[hlen])->th_off << 2; if (hlen > MAX_HDR || hlen > buflen) goto bad; BCOPY(ip, &cs->cs_ip, hlen); cs->cs_hlen = hlen; INCR(sls_uncompressedin) *hdrp = (u_char *) &cs->cs_ip; *hlenp = hlen; return (0); default: goto bad; case TYPE_COMPRESSED_TCP: break; } /* We've got a compressed packet. */ INCR(sls_compressedin) cp = buf; changes = *cp++; if (changes & NEW_C) { /* Make sure the state index is in range, then grab the state. * If we have a good state index, clear the 'discard' flag. */ if (*cp >= MAX_STATES) goto bad; comp->flags &=~ SLF_TOSS; comp->last_recv = *cp++; } else { /* this packet has an implicit state index. If we've * had a line error since the last time we got an * explicit state index, we have to toss the packet. */ if (comp->flags & SLF_TOSS) { INCR(sls_tossed) return (-1); } } cs = &comp->rstate[comp->last_recv]; hlen = cs->cs_ip.ip_hl << 2; th = (struct tcphdr *)&((u_char *)&cs->cs_ip)[hlen]; th->th_sum = htons((*cp << 8) | cp[1]); cp += 2; if (changes & TCP_PUSH_BIT) th->th_flags |= TH_PUSH; else th->th_flags &=~ TH_PUSH; switch (changes & SPECIALS_MASK) { case SPECIAL_I: { - register u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen; + u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen; th->th_ack = htonl(ntohl(th->th_ack) + i); th->th_seq = htonl(ntohl(th->th_seq) + i); } break; case SPECIAL_D: th->th_seq = htonl(ntohl(th->th_seq) + ntohs(cs->cs_ip.ip_len) - cs->cs_hlen); break; default: if (changes & NEW_U) { th->th_flags |= TH_URG; DECODEU(th->th_urp) } else th->th_flags &=~ TH_URG; if (changes & NEW_W) DECODES(th->th_win) if (changes & NEW_A) DECODEL(th->th_ack) if (changes & NEW_S) DECODEL(th->th_seq) break; } if (changes & NEW_I) { DECODES(cs->cs_ip.ip_id) } else cs->cs_ip.ip_id = htons(ntohs(cs->cs_ip.ip_id) + 1); /* * At this point, cp points to the first byte of data in the * packet. Fill in the IP total length and update the IP * header checksum. */ vjlen = cp - buf; buflen -= vjlen; if (buflen < 0) /* we must have dropped some characters (crc should detect * this but the old slip framing won't) */ goto bad; total_len += cs->cs_hlen - vjlen; cs->cs_ip.ip_len = htons(total_len); /* recompute the ip header checksum */ bp = (u_int16_t *) &cs->cs_ip; cs->cs_ip.ip_sum = 0; for (changes = 0; hlen > 0; hlen -= 2) changes += *bp++; changes = (changes & 0xffff) + (changes >> 16); changes = (changes & 0xffff) + (changes >> 16); cs->cs_ip.ip_sum = ~ changes; *hdrp = (u_char *) &cs->cs_ip; *hlenp = cs->cs_hlen; return vjlen; bad: comp->flags |= SLF_TOSS; INCR(sls_errorin) return (-1); } Index: stable/11/sys/netinet/in.c =================================================================== --- stable/11/sys/netinet/in.c (revision 331642) +++ stable/11/sys/netinet/in.c (revision 331643) @@ -1,1489 +1,1489 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (C) 2001 WIDE Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.4 (Berkeley) 1/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *); static void in_socktrim(struct sockaddr_in *); static void in_purgemaddrs(struct ifnet *); static VNET_DEFINE(int, nosameprefix); #define V_nosameprefix VNET(nosameprefix) SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nosameprefix), 0, "Refuse to create same prefixes on different interfaces"); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) static struct sx in_control_sx; SX_SYSINIT(in_control_sx, &in_control_sx, "in_control"); /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). */ int in_localaddr(struct in_addr in) { struct rm_priotracker in_ifa_tracker; - register u_long i = ntohl(in.s_addr); - register struct in_ifaddr *ia; + u_long i = ntohl(in.s_addr); + struct in_ifaddr *ia; IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if ((i & ia->ia_subnetmask) == ia->ia_subnet) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (1); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (0); } /* * Return 1 if an internet address is for the local host and configured * on one of its interfaces. */ int in_localip(struct in_addr in) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) { if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (1); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (0); } /* * Return 1 if an internet address is configured on an interface. */ int in_ifhasaddr(struct ifnet *ifp, struct in_addr in) { struct ifaddr *ifa; struct in_ifaddr *ia; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = (struct in_ifaddr *)ifa; if (ia->ia_addr.sin_addr.s_addr == in.s_addr) { IF_ADDR_RUNLOCK(ifp); return (1); } } IF_ADDR_RUNLOCK(ifp); return (0); } /* * Return a reference to the interface address which is different to * the supplied one but with same IP address value. */ static struct in_ifaddr * in_localip_more(struct in_ifaddr *ia) { struct rm_priotracker in_ifa_tracker; in_addr_t in = IA_SIN(ia)->sin_addr.s_addr; struct in_ifaddr *it; IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(it, INADDR_HASH(in), ia_hash) { if (it != ia && IA_SIN(it)->sin_addr.s_addr == in) { ifa_ref(&it->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (it); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (NULL); } /* * Determine whether an IP address is in a reserved set of addresses * that may not be forwarded, or whether datagrams to that destination * may be forwarded. */ int in_canforward(struct in_addr in) { - register u_long i = ntohl(in.s_addr); - register u_long net; + u_long i = ntohl(in.s_addr); + u_long net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i)) return (0); if (IN_CLASSA(i)) { net = i & IN_CLASSA_NET; if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT)) return (0); } return (1); } /* * Trim a mask in a sockaddr */ static void in_socktrim(struct sockaddr_in *ap) { - register char *cplim = (char *) &ap->sin_addr; - register char *cp = (char *) (&ap->sin_addr + 1); + char *cplim = (char *) &ap->sin_addr; + char *cp = (char *) (&ap->sin_addr + 1); ap->sin_len = 0; while (--cp >= cplim) if (*cp) { (ap)->sin_len = cp - (char *) (ap) + 1; break; } } /* * Generic internet control operations (ioctl's). */ int in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr; struct ifaddr *ifa; struct in_ifaddr *ia; int error; if (ifp == NULL) return (EADDRNOTAVAIL); /* * Filter out 4 ioctls we implement directly. Forward the rest * to specific functions and ifp->if_ioctl(). */ switch (cmd) { case SIOCGIFADDR: case SIOCGIFBRDADDR: case SIOCGIFDSTADDR: case SIOCGIFNETMASK: break; case SIOCDIFADDR: sx_xlock(&in_control_sx); error = in_difaddr_ioctl(data, ifp, td); sx_xunlock(&in_control_sx); return (error); case OSIOCAIFADDR: /* 9.x compat */ case SIOCAIFADDR: sx_xlock(&in_control_sx); error = in_aifaddr_ioctl(cmd, data, ifp, td); sx_xunlock(&in_control_sx); return (error); case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFDSTADDR: case SIOCSIFNETMASK: /* We no longer support that old commands. */ return (EINVAL); default: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); return ((*ifp->if_ioctl)(ifp, cmd, data)); } if (addr->sin_addr.s_addr != INADDR_ANY && prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0) return (EADDRNOTAVAIL); /* * Find address for this interface, if it exists. If an * address was specified, find that one instead of the * first one on the interface, if possible. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = (struct in_ifaddr *)ifa; if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr) break; } if (ifa == NULL) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET) { ia = (struct in_ifaddr *)ifa; if (prison_check_ip4(td->td_ucred, &ia->ia_addr.sin_addr) == 0) break; } if (ifa == NULL) { IF_ADDR_RUNLOCK(ifp); return (EADDRNOTAVAIL); } error = 0; switch (cmd) { case SIOCGIFADDR: *addr = ia->ia_addr; break; case SIOCGIFBRDADDR: if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EINVAL; break; } *addr = ia->ia_broadaddr; break; case SIOCGIFDSTADDR: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; break; } *addr = ia->ia_dstaddr; break; case SIOCGIFNETMASK: *addr = ia->ia_sockmask; break; } IF_ADDR_RUNLOCK(ifp); return (error); } static int in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { const struct in_aliasreq *ifra = (struct in_aliasreq *)data; const struct sockaddr_in *addr = &ifra->ifra_addr; const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr; const struct sockaddr_in *mask = &ifra->ifra_mask; const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr; const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0; struct ifaddr *ifa; struct in_ifaddr *ia; bool iaIsFirst; int error = 0; error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); /* * ifra_addr must be present and be of INET family. * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional. */ if (addr->sin_len != sizeof(struct sockaddr_in) || addr->sin_family != AF_INET) return (EINVAL); if (broadaddr->sin_len != 0 && (broadaddr->sin_len != sizeof(struct sockaddr_in) || broadaddr->sin_family != AF_INET)) return (EINVAL); if (mask->sin_len != 0 && (mask->sin_len != sizeof(struct sockaddr_in) || mask->sin_family != AF_INET)) return (EINVAL); if ((ifp->if_flags & IFF_POINTOPOINT) && (dstaddr->sin_len != sizeof(struct sockaddr_in) || dstaddr->sin_addr.s_addr == INADDR_ANY)) return (EDESTADDRREQ); if (vhid > 0 && carp_attach_p == NULL) return (EPROTONOSUPPORT); /* * See whether address already exist. */ iaIsFirst = true; ia = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in_ifaddr *it; if (ifa->ifa_addr->sa_family != AF_INET) continue; it = (struct in_ifaddr *)ifa; iaIsFirst = false; if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0) ia = it; } IF_ADDR_RUNLOCK(ifp); if (ia != NULL) (void )in_difaddr_ioctl(data, ifp, td); ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK); ia = (struct in_ifaddr *)ifa; ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; callout_init_rw(&ia->ia_garp_timer, &ifp->if_addr_lock, CALLOUT_RETURNUNLOCKED); ia->ia_ifp = ifp; ia->ia_addr = *addr; if (mask->sin_len != 0) { ia->ia_sockmask = *mask; ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); } else { in_addr_t i = ntohl(addr->sin_addr.s_addr); /* * Be compatible with network classes, if netmask isn't * supplied, guess it based on classes. */ if (IN_CLASSA(i)) ia->ia_subnetmask = IN_CLASSA_NET; else if (IN_CLASSB(i)) ia->ia_subnetmask = IN_CLASSB_NET; else ia->ia_subnetmask = IN_CLASSC_NET; ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask); } ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask; in_socktrim(&ia->ia_sockmask); if (ifp->if_flags & IFF_BROADCAST) { if (broadaddr->sin_len != 0) { ia->ia_broadaddr = *broadaddr; } else if (ia->ia_subnetmask == IN_RFC3021_MASK) { ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST; ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); ia->ia_broadaddr.sin_family = AF_INET; } else { ia->ia_broadaddr.sin_addr.s_addr = htonl(ia->ia_subnet | ~ia->ia_subnetmask); ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); ia->ia_broadaddr.sin_family = AF_INET; } } if (ifp->if_flags & IFF_POINTOPOINT) ia->ia_dstaddr = *dstaddr; /* XXXGL: rtinit() needs this strange assignment. */ if (ifp->if_flags & IFF_LOOPBACK) ia->ia_dstaddr = ia->ia_addr; if (vhid != 0) { error = (*carp_attach_p)(&ia->ia_ifa, vhid); if (error) return (error); } /* if_addrhead is already referenced by ifa_alloc() */ IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_ref(ifa); /* in_ifaddrhead */ IN_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link); LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * Give the interface a chance to initialize * if this is its first address, * and to validate the address if necessary. */ if (ifp->if_ioctl != NULL) { error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); if (error) goto fail1; } /* * Add route for the network. */ if (vhid == 0) { int flags = RTF_UP; if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) flags |= RTF_HOST; error = in_addprefix(ia, flags); if (error) goto fail1; } /* * Add a loopback route to self. */ if (vhid == 0 && (ifp->if_flags & IFF_LOOPBACK) == 0 && ia->ia_addr.sin_addr.s_addr != INADDR_ANY && !((ifp->if_flags & IFF_POINTOPOINT) && ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)) { struct in_ifaddr *eia; eia = in_localip_more(ia); if (eia == NULL) { error = ifa_add_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error) goto fail2; } else ifa_free(&eia->ia_ifa); } if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) { struct in_addr allhosts_addr; struct in_ifinfo *ii; ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); error = in_joingroup(ifp, &allhosts_addr, NULL, &ii->ii_allhosts); } EVENTHANDLER_INVOKE(ifaddr_event, ifp); return (error); fail2: if (vhid == 0) (void )in_scrubprefix(ia, LLE_STATIC); fail1: if (ia->ia_ifa.ifa_carp) (*carp_detach_p)(&ia->ia_ifa); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ return (error); } static int in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) { const struct ifreq *ifr = (struct ifreq *)data; const struct sockaddr_in *addr = (const struct sockaddr_in *) &ifr->ifr_addr; struct ifaddr *ifa; struct in_ifaddr *ia; bool deleteAny, iaIsLast; int error; if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } if (addr->sin_len != sizeof(struct sockaddr_in) || addr->sin_family != AF_INET) deleteAny = true; else deleteAny = false; iaIsLast = true; ia = NULL; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in_ifaddr *it; if (ifa->ifa_addr->sa_family != AF_INET) continue; it = (struct in_ifaddr *)ifa; if (deleteAny && ia == NULL && (td == NULL || prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0)) ia = it; if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && (td == NULL || prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)) ia = it; if (it != ia) iaIsLast = false; } if (ia == NULL) { IF_ADDR_WUNLOCK(ifp); return (EADDRNOTAVAIL); } TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, LLE_STATIC); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right * thing to do, but at least if we are running * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 1); if (ia->ia_ifa.ifa_carp) (*carp_detach_p)(&ia->ia_ifa); /* * If this is the last IPv4 address configured on this * interface, leave the all-hosts group. * No state-change report need be transmitted. */ if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) { struct in_ifinfo *ii; ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); IN_MULTI_LOCK(); if (ii->ii_allhosts) { (void)in_leavegroup_locked(ii->ii_allhosts, NULL); ii->ii_allhosts = NULL; } IN_MULTI_UNLOCK(); } IF_ADDR_WLOCK(ifp); if (callout_stop(&ia->ia_garp_timer) == 1) { ifa_free(&ia->ia_ifa); } IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifaddr_event, ifp); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ return (0); } #define rtinitflags(x) \ ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \ ? RTF_HOST : 0) /* * Check if we have a route for the given prefix already or add one accordingly. */ int in_addprefix(struct in_ifaddr *target, int flags) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error; if ((flags & RTF_HOST) != 0) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } IN_IFADDR_RLOCK(&in_ifa_tracker); /* Look for an existing address with the same prefix, mask, and fib */ TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib) continue; /* * If we got a matching prefix route inserted by other * interface address, we are done here. */ if (ia->ia_flags & IFA_ROUTE) { #ifdef RADIX_MPATH if (ia->ia_addr.sin_addr.s_addr == target->ia_addr.sin_addr.s_addr) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (EEXIST); } else break; #endif if (V_nosameprefix) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (EEXIST); } else { int fibnum; fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum); IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (0); } } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * No-one seem to have this prefix route, so we try to insert it. */ error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags); if (!error) target->ia_flags |= IFA_ROUTE; return (error); } /* * Removes either all lle entries for given @ia, or lle * corresponding to @ia address. */ static void in_scrubprefixlle(struct in_ifaddr *ia, int all, u_int flags) { struct sockaddr_in addr, mask; struct sockaddr *saddr, *smask; struct ifnet *ifp; saddr = (struct sockaddr *)&addr; bzero(&addr, sizeof(addr)); addr.sin_len = sizeof(addr); addr.sin_family = AF_INET; smask = (struct sockaddr *)&mask; bzero(&mask, sizeof(mask)); mask.sin_len = sizeof(mask); mask.sin_family = AF_INET; mask.sin_addr.s_addr = ia->ia_subnetmask; ifp = ia->ia_ifp; if (all) { /* * Remove all L2 entries matching given prefix. * Convert address to host representation to avoid * doing this on every callback. ia_subnetmask is already * stored in host representation. */ addr.sin_addr.s_addr = ntohl(ia->ia_addr.sin_addr.s_addr); lltable_prefix_free(AF_INET, saddr, smask, flags); } else { /* Remove interface address only */ addr.sin_addr.s_addr = ia->ia_addr.sin_addr.s_addr; lltable_delete_addr(LLTABLE(ifp), LLE_IFADDR, saddr); } } /* * If there is no other address in the system that can serve a route to the * same prefix, remove the route. Hand over the route to the new address * otherwise. */ int in_scrubprefix(struct in_ifaddr *target, u_int flags) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error = 0; /* * Remove the loopback route to the interface address. */ if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) && !(target->ia_ifp->if_flags & IFF_LOOPBACK) && (flags & LLE_STATIC)) { struct in_ifaddr *eia; /* * XXXME: add fib-aware in_localip. * We definitely don't want to switch between * prefixes in different fibs. */ eia = in_localip_more(target); if (eia != NULL) { error = ifa_switch_loopback_route((struct ifaddr *)eia, (struct sockaddr *)&target->ia_addr); ifa_free(&eia->ia_ifa); } else { error = ifa_del_loopback_route((struct ifaddr *)target, (struct sockaddr *)&target->ia_addr); } } if (rtinitflags(target)) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } if ((target->ia_flags & IFA_ROUTE) == 0) { int fibnum; fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum); /* * Removing address from !IFF_UP interface or * prefix which exists on other interface (along with route). * No entries should exist here except target addr. * Given that, delete this entry only. */ in_scrubprefixlle(target, 0, flags); return (0); } IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if ((ia->ia_ifp->if_flags & IFF_UP) == 0) continue; /* * If we got a matching prefix address, move IFA_ROUTE and * the route itself to it. Make sure that routing daemons * get a heads-up. */ if ((ia->ia_flags & IFA_ROUTE) == 0) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", error); /* Scrub all entries IFF interface is different */ in_scrubprefixlle(target, target->ia_ifp != ia->ia_ifp, flags); error = rtinit(&ia->ia_ifa, (int)RTM_ADD, rtinitflags(ia) | RTF_UP); if (error == 0) ia->ia_flags |= IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", error); ifa_free(&ia->ia_ifa); return (error); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * remove all L2 entries on the given prefix */ in_scrubprefixlle(target, 1, flags); /* * As no-one seem to have this prefix, we can remove the route. */ error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); return (error); } #undef rtinitflags void in_ifscrub_all(void) { struct ifnet *ifp; struct ifaddr *ifa, *nifa; struct ifaliasreq ifr; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { /* Cannot lock here - lock recursion. */ /* IF_ADDR_RLOCK(ifp); */ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { if (ifa->ifa_addr->sa_family != AF_INET) continue; /* * This is ugly but the only way for legacy IP to * cleanly remove addresses and everything attached. */ bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; (void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL); } /* IF_ADDR_RUNLOCK(ifp); */ in_purgemaddrs(ifp); igmp_domifdetach(ifp); } IFNET_RUNLOCK(); } /* * Return 1 if the address might be a local broadcast address. */ int in_broadcast(struct in_addr in, struct ifnet *ifp) { - register struct ifaddr *ifa; + struct ifaddr *ifa; u_long t; if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY) return (1); if ((ifp->if_flags & IFF_BROADCAST) == 0) return (0); t = ntohl(in.s_addr); /* * Look through the list of addresses for a match * with a broadcast address. */ #define ia ((struct in_ifaddr *)ifa) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET && (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr || /* * Check for old-style (host 0) broadcast, but * taking into account that RFC 3021 obsoletes it. */ (ia->ia_subnetmask != IN_RFC3021_MASK && t == ia->ia_subnet)) && /* * Check for an all one subnetmask. These * only exist when an interface gets a secondary * address. */ ia->ia_subnetmask != (u_long)0xffffffff) return (1); return (0); #undef ia } /* * On interface removal, clean up IPv4 data structures hung off of the ifnet. */ void in_ifdetach(struct ifnet *ifp) { in_pcbpurgeif0(&V_ripcbinfo, ifp); in_pcbpurgeif0(&V_udbinfo, ifp); in_pcbpurgeif0(&V_ulitecbinfo, ifp); in_purgemaddrs(ifp); } /* * Delete all IPv4 multicast address records, and associated link-layer * multicast address records, associated with ifp. * XXX It looks like domifdetach runs AFTER the link layer cleanup. * XXX This should not race with ifma_protospec being set during * a new allocation, if it does, we have bigger problems. */ static void in_purgemaddrs(struct ifnet *ifp) { LIST_HEAD(,in_multi) purgeinms; struct in_multi *inm, *tinm; struct ifmultiaddr *ifma; LIST_INIT(&purgeinms); IN_MULTI_LOCK(); /* * Extract list of in_multi associated with the detaching ifp * which the PF_INET layer is about to release. * We need to do this as IF_ADDR_LOCK() may be re-acquired * by code further down. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; #if 0 KASSERT(ifma->ifma_protospec != NULL, ("%s: ifma_protospec is NULL", __func__)); #endif inm = (struct in_multi *)ifma->ifma_protospec; LIST_INSERT_HEAD(&purgeinms, inm, inm_link); } IF_ADDR_RUNLOCK(ifp); LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) { LIST_REMOVE(inm, inm_link); inm_release_locked(inm); } igmp_ifdetach(ifp); IN_MULTI_UNLOCK(); } struct in_llentry { struct llentry base; }; #define IN_LLTBL_DEFAULT_HSIZE 32 #define IN_LLTBL_HASH(k, h) \ (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) /* * Do actual deallocation of @lle. */ static void in_lltable_destroy_lle_unlocked(struct llentry *lle) { LLE_LOCK_DESTROY(lle); LLE_REQ_DESTROY(lle); free(lle, M_LLTABLE); } /* * Called by LLE_FREE_LOCKED when number of references * drops to zero. */ static void in_lltable_destroy_lle(struct llentry *lle) { LLE_WUNLOCK(lle); in_lltable_destroy_lle_unlocked(lle); } static struct llentry * in_lltable_new(struct in_addr addr4, u_int flags) { struct in_llentry *lle; lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); if (lle == NULL) /* NB: caller generates msg */ return NULL; /* * For IPv4 this will trigger "arpresolve" to generate * an ARP request. */ lle->base.la_expire = time_uptime; /* mark expired */ lle->base.r_l3addr.addr4 = addr4; lle->base.lle_refcnt = 1; lle->base.lle_free = in_lltable_destroy_lle; LLE_LOCK_INIT(&lle->base); LLE_REQ_INIT(&lle->base); callout_init(&lle->base.lle_timer, 1); return (&lle->base); } #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ ((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 ) static int in_lltable_match_prefix(const struct sockaddr *saddr, const struct sockaddr *smask, u_int flags, struct llentry *lle) { struct in_addr addr, mask, lle_addr; addr = ((const struct sockaddr_in *)saddr)->sin_addr; mask = ((const struct sockaddr_in *)smask)->sin_addr; lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr); if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) return (0); if (lle->la_flags & LLE_IFADDR) { /* * Delete LLE_IFADDR records IFF address & flag matches. * Note that addr is the interface address within prefix * being matched. * Note also we should handle 'ifdown' cases without removing * ifaddr macs. */ if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0) return (1); return (0); } /* flags & LLE_STATIC means deleting both dynamic and static entries */ if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) return (1); return (0); } static void in_lltable_free_entry(struct lltable *llt, struct llentry *lle) { struct ifnet *ifp; size_t pkts_dropped; LLE_WLOCK_ASSERT(lle); KASSERT(llt != NULL, ("lltable is NULL")); /* Unlink entry from table if not already */ if ((lle->la_flags & LLE_LINKED) != 0) { ifp = llt->llt_ifp; IF_AFDATA_WLOCK_ASSERT(ifp); lltable_unlink_entry(llt, lle); } /* cancel timer */ if (callout_stop(&lle->lle_timer) > 0) LLE_REMREF(lle); /* Drop hold queue */ pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } static int in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { struct rt_addrinfo info; struct sockaddr_in rt_key, rt_mask; struct sockaddr rt_gateway; int rt_flags; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); bzero(&rt_key, sizeof(rt_key)); rt_key.sin_len = sizeof(rt_key); bzero(&rt_mask, sizeof(rt_mask)); rt_mask.sin_len = sizeof(rt_mask); bzero(&rt_gateway, sizeof(rt_gateway)); rt_gateway.sa_len = sizeof(rt_gateway); bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key; info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&rt_mask; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; if (rib_lookup_info(ifp->if_fib, l3addr, NHR_REF, 0, &info) != 0) return (EINVAL); rt_flags = info.rti_flags; /* * If the gateway for an existing host route matches the target L3 * address, which is a special route inserted by some implementation * such as MANET, and the interface is of the correct type, then * allow for ARP to proceed. */ if (rt_flags & RTF_GATEWAY) { if (!(rt_flags & RTF_HOST) || !info.rti_ifp || info.rti_ifp->if_type != IFT_ETHER || (info.rti_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || memcmp(rt_gateway.sa_data, l3addr->sa_data, sizeof(in_addr_t)) != 0) { rib_free_info(&info); return (EINVAL); } } rib_free_info(&info); /* * Make sure that at least the destination address is covered * by the route. This is for handling the case where 2 or more * interfaces have the same prefix. An incoming packet arrives * on one interface and the corresponding outgoing packet leaves * another interface. */ if (!(rt_flags & RTF_HOST) && info.rti_ifp != ifp) { const char *sa, *mask, *addr, *lim; const struct sockaddr_in *l3sin; mask = (const char *)&rt_mask; /* * Just being extra cautious to avoid some custom * code getting into trouble. */ if ((info.rti_addrs & RTA_NETMASK) == 0) return (EINVAL); sa = (const char *)&rt_key; addr = (const char *)l3addr; l3sin = (const struct sockaddr_in *)l3addr; lim = addr + l3sin->sin_len; for ( ; addr < lim; sa++, mask++, addr++) { if ((*sa ^ *addr) & *mask) { #ifdef DIAGNOSTIC char addrbuf[INET_ADDRSTRLEN]; log(LOG_INFO, "IPv4 address: \"%s\" " "is not on the network\n", inet_ntoa_r(l3sin->sin_addr, addrbuf)); #endif return (EINVAL); } } } return (0); } static inline uint32_t in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize) { return (IN_LLTBL_HASH(dst.s_addr, hsize)); } static uint32_t in_lltable_hash(const struct llentry *lle, uint32_t hsize) { return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize)); } static void in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)sa; bzero(sin, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = lle->r_l3addr.addr4; } static inline struct llentry * in_lltable_find_dst(struct lltable *llt, struct in_addr dst) { struct llentry *lle; struct llentries *lleh; u_int hashidx; hashidx = in_lltable_hash_dst(dst, llt->llt_hsize); lleh = &llt->lle_head[hashidx]; LIST_FOREACH(lle, lleh, lle_next) { if (lle->la_flags & LLE_DELETED) continue; if (lle->r_l3addr.addr4.s_addr == dst.s_addr) break; } return (lle); } static void in_lltable_delete_entry(struct lltable *llt, struct llentry *lle) { lle->la_flags |= LLE_DELETED; EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif llentry_free(lle); } static struct llentry * in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); /* * A route that covers the given address must have * been installed 1st because we are doing a resolution, * verify this. */ if (!(flags & LLE_IFADDR) && in_lltable_rtcheck(ifp, flags, l3addr) != 0) return (NULL); lle = in_lltable_new(sin->sin_addr, flags); if (lle == NULL) { log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); return (NULL); } lle->la_flags = flags; if (flags & LLE_STATIC) lle->r_flags |= RLLE_VALID; if ((flags & LLE_IFADDR) == LLE_IFADDR) { linkhdrsize = LLE_MAX_LINKHDR; if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp), linkhdr, &linkhdrsize, &lladdr_off) != 0) { in_lltable_destroy_lle_unlocked(lle); return (NULL); } lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off); lle->la_flags |= LLE_STATIC; lle->r_flags |= (RLLE_VALID | RLLE_IFADDR); } return (lle); } /* * Return NULL if not found or marked for deletion. * If found return lle read locked. */ static struct llentry * in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct llentry *lle; IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); lle = in_lltable_find_dst(llt, sin->sin_addr); if (lle == NULL) return (NULL); KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", flags)); if (flags & LLE_UNLOCKED) return (lle); if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else LLE_RLOCK(lle); return (lle); } static int in_lltable_dump_entry(struct lltable *llt, struct llentry *lle, struct sysctl_req *wr) { struct ifnet *ifp = llt->llt_ifp; /* XXX stack use */ struct { struct rt_msghdr rtm; struct sockaddr_in sin; struct sockaddr_dl sdl; } arpc; struct sockaddr_dl *sdl; int error; bzero(&arpc, sizeof(arpc)); /* skip deleted entries */ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) return (0); /* Skip if jailed and not a valid IP of the prison. */ lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin); if (prison_if(wr->td->td_ucred, (struct sockaddr *)&arpc.sin) != 0) return (0); /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_in; (IPv4) * struct sockaddr_dl; */ arpc.rtm.rtm_msglen = sizeof(arpc); arpc.rtm.rtm_version = RTM_VERSION; arpc.rtm.rtm_type = RTM_GET; arpc.rtm.rtm_flags = RTF_UP; arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; /* publish */ if (lle->la_flags & LLE_PUB) arpc.rtm.rtm_flags |= RTF_ANNOUNCE; sdl = &arpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; if ((lle->la_flags & LLE_VALID) == LLE_VALID) { sdl->sdl_alen = ifp->if_addrlen; bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); } else { sdl->sdl_alen = 0; bzero(LLADDR(sdl), ifp->if_addrlen); } arpc.rtm.rtm_rmx.rmx_expire = lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) arpc.rtm.rtm_flags |= RTF_STATIC; if (lle->la_flags & LLE_IFADDR) arpc.rtm.rtm_flags |= RTF_PINNED; arpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); return (error); } static struct lltable * in_lltattach(struct ifnet *ifp) { struct lltable *llt; llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE); llt->llt_af = AF_INET; llt->llt_ifp = ifp; llt->llt_lookup = in_lltable_lookup; llt->llt_alloc_entry = in_lltable_alloc; llt->llt_delete_entry = in_lltable_delete_entry; llt->llt_dump_entry = in_lltable_dump_entry; llt->llt_hash = in_lltable_hash; llt->llt_fill_sa_entry = in_lltable_fill_sa_entry; llt->llt_free_entry = in_lltable_free_entry; llt->llt_match_prefix = in_lltable_match_prefix; lltable_link(llt); return (llt); } void * in_domifattach(struct ifnet *ifp) { struct in_ifinfo *ii; ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO); ii->ii_llt = in_lltattach(ifp); ii->ii_igmp = igmp_domifattach(ifp); return (ii); } void in_domifdetach(struct ifnet *ifp, void *aux) { struct in_ifinfo *ii = (struct in_ifinfo *)aux; igmp_domifdetach(ifp); lltable_free(ii->ii_llt); free(ii, M_IFADDR); } Index: stable/11/sys/netinet/in_cksum.c =================================================================== --- stable/11/sys/netinet/in_cksum.c (revision 331642) +++ stable/11/sys/netinet/in_cksum.c (revision 331643) @@ -1,148 +1,148 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include #include /* * Checksum routine for Internet Protocol family headers (Portable Version). * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} int in_cksum(struct mbuf *m, int len) { - register u_short *w; - register int sum = 0; - register int mlen = 0; + u_short *w; + int sum = 0; + int mlen = 0; int byte_swapped = 0; union { char c[2]; u_short s; } s_util; union { u_short s[2]; long l; } l_util; for (;m && len; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, u_short *); if (mlen == -1) { /* * The first byte of this mbuf is the continuation * of a word spanning between this mbuf and the * last mbuf. * * s_util.c[0] is already saved when scanning previous * mbuf. */ s_util.c[1] = *(char *)w; sum += s_util.s; w = (u_short *)((char *)w + 1); mlen = m->m_len - 1; len--; } else mlen = m->m_len; if (len < mlen) mlen = len; len -= mlen; /* * Force to even boundary. */ if ((1 & (uintptr_t) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(u_char *)w; w = (u_short *)((char *)w + 1); mlen--; byte_swapped = 1; } /* * Unroll the loop to make overhead from * branches &c small. */ while ((mlen -= 32) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; w += 16; } mlen += 32; while ((mlen -= 8) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; w += 4; } mlen += 8; if (mlen == 0 && byte_swapped == 0) continue; REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { REDUCE; sum <<= 8; byte_swapped = 0; if (mlen == -1) { s_util.c[1] = *(char *)w; sum += s_util.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) s_util.c[0] = *(char *)w; } if (len) printf("cksum: out of data\n"); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the standard (the odd byte may be shifted left by 8 bits or not as determined by endian-ness of the machine) */ s_util.c[1] = 0; sum += s_util.s; } REDUCE; return (~sum & 0xffff); } Index: stable/11/sys/netinet/ip_icmp.c =================================================================== --- stable/11/sys/netinet/ip_icmp.c (revision 331642) +++ stable/11/sys/netinet/ip_icmp.c (revision 331643) @@ -1,1038 +1,1038 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ /* * ICMP routines: error generation, receive packet processing, and * routines to turnaround packets back to the originator, and * host table maintenance routines. */ static VNET_DEFINE(int, icmplim) = 200; #define V_icmplim VNET(icmplim) SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmplim), 0, "Maximum number of ICMP responses per second"); static VNET_DEFINE(int, icmplim_output) = 1; #define V_icmplim_output VNET(icmplim_output) SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmplim_output), 0, "Enable logging of ICMP response rate limiting"); #ifdef INET VNET_PCPUSTAT_DEFINE(struct icmpstat, icmpstat); VNET_PCPUSTAT_SYSINIT(icmpstat); SYSCTL_VNET_PCPUSTAT(_net_inet_icmp, ICMPCTL_STATS, stats, struct icmpstat, icmpstat, "ICMP statistics (struct icmpstat, netinet/icmp_var.h)"); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(icmpstat); #endif /* VIMAGE */ static VNET_DEFINE(int, icmpmaskrepl) = 0; #define V_icmpmaskrepl VNET(icmpmaskrepl) SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmpmaskrepl), 0, "Reply to ICMP Address Mask Request packets"); static VNET_DEFINE(u_int, icmpmaskfake) = 0; #define V_icmpmaskfake VNET(icmpmaskfake) SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmpmaskfake), 0, "Fake reply to ICMP Address Mask Request packets"); VNET_DEFINE(int, drop_redirect) = 0; #define V_drop_redirect VNET(drop_redirect) SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(drop_redirect), 0, "Ignore ICMP redirects"); static VNET_DEFINE(int, log_redirect) = 0; #define V_log_redirect VNET(log_redirect) SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(log_redirect), 0, "Log ICMP redirects to the console"); static VNET_DEFINE(char, reply_src[IFNAMSIZ]); #define V_reply_src VNET(reply_src) SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(reply_src), IFNAMSIZ, "ICMP reply source for non-local packets"); static VNET_DEFINE(int, icmp_rfi) = 0; #define V_icmp_rfi VNET(icmp_rfi) SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_rfi), 0, "ICMP reply from incoming interface for non-local packets"); static VNET_DEFINE(int, icmp_quotelen) = 8; #define V_icmp_quotelen VNET(icmp_quotelen) SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_quotelen), 0, "Number of bytes from original packet to quote in ICMP reply"); static VNET_DEFINE(int, icmpbmcastecho) = 0; #define V_icmpbmcastecho VNET(icmpbmcastecho) SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmpbmcastecho), 0, "Reply to multicast ICMP Echo Request and Timestamp packets"); static VNET_DEFINE(int, icmptstamprepl) = 1; #define V_icmptstamprepl VNET(icmptstamprepl) SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW, &VNET_NAME(icmptstamprepl), 0, "Respond to ICMP Timestamp packets"); #ifdef ICMPPRINTFS int icmpprintfs = 0; #endif static void icmp_reflect(struct mbuf *); static void icmp_send(struct mbuf *, struct mbuf *); extern struct protosw inetsw[]; /* * Kernel module interface for updating icmpstat. The argument is an index * into icmpstat treated as an array of u_long. While this encodes the * general layout of icmpstat into the caller, it doesn't encode its * location, so that future changes to add, for example, per-CPU stats * support won't cause binary compatibility problems for kernel modules. */ void kmod_icmpstat_inc(int statnum) { counter_u64_add(VNET(icmpstat)[statnum], 1); } /* * Generate an error packet of type error * in response to bad packet ip. */ void icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu) { struct ip *oip, *nip; struct icmp *icp; struct mbuf *m; unsigned icmplen, icmpelen, nlen, oiphlen; KASSERT((u_int)type <= ICMP_MAXTYPE, ("%s: illegal ICMP type", __func__)); if (type != ICMP_REDIRECT) ICMPSTAT_INC(icps_error); /* * Don't send error: * if the original packet was encrypted. * if not the first fragment of message. * in response to a multicast or broadcast packet. * if the old packet protocol was an ICMP error message. */ if (n->m_flags & M_DECRYPTED) goto freeit; if (n->m_flags & (M_BCAST|M_MCAST)) goto freeit; /* Drop if IP header plus 8 bytes is not contiguous in first mbuf. */ if (n->m_len < sizeof(struct ip) + ICMP_MINLEN) goto freeit; oip = mtod(n, struct ip *); oiphlen = oip->ip_hl << 2; if (n->m_len < oiphlen + ICMP_MINLEN) goto freeit; #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_error(%p, %x, %d)\n", oip, type, code); #endif if (oip->ip_off & htons(~(IP_MF|IP_DF))) goto freeit; if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiphlen))->icmp_type)) { ICMPSTAT_INC(icps_oldicmp); goto freeit; } /* * Calculate length to quote from original packet and * prevent the ICMP mbuf from overflowing. * Unfortunately this is non-trivial since ip_forward() * sends us truncated packets. */ nlen = m_length(n, NULL); if (oip->ip_p == IPPROTO_TCP) { struct tcphdr *th; int tcphlen; if (oiphlen + sizeof(struct tcphdr) > n->m_len && n->m_next == NULL) goto stdreply; if (n->m_len < oiphlen + sizeof(struct tcphdr) && (n = m_pullup(n, oiphlen + sizeof(struct tcphdr))) == NULL) goto freeit; oip = mtod(n, struct ip *); th = mtodo(n, oiphlen); tcphlen = th->th_off << 2; if (tcphlen < sizeof(struct tcphdr)) goto freeit; if (ntohs(oip->ip_len) < oiphlen + tcphlen) goto freeit; if (oiphlen + tcphlen > n->m_len && n->m_next == NULL) goto stdreply; if (n->m_len < oiphlen + tcphlen && (n = m_pullup(n, oiphlen + tcphlen)) == NULL) goto freeit; icmpelen = max(tcphlen, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); } else if (oip->ip_p == IPPROTO_SCTP) { struct sctphdr *sh; struct sctp_chunkhdr *ch; if (ntohs(oip->ip_len) < oiphlen + sizeof(struct sctphdr)) goto stdreply; if (oiphlen + sizeof(struct sctphdr) > n->m_len && n->m_next == NULL) goto stdreply; if (n->m_len < oiphlen + sizeof(struct sctphdr) && (n = m_pullup(n, oiphlen + sizeof(struct sctphdr))) == NULL) goto freeit; oip = mtod(n, struct ip *); icmpelen = max(sizeof(struct sctphdr), min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); sh = mtodo(n, oiphlen); if (ntohl(sh->v_tag) == 0 && ntohs(oip->ip_len) >= oiphlen + sizeof(struct sctphdr) + 8 && (n->m_len >= oiphlen + sizeof(struct sctphdr) + 8 || n->m_next != NULL)) { if (n->m_len < oiphlen + sizeof(struct sctphdr) + 8 && (n = m_pullup(n, oiphlen + sizeof(struct sctphdr) + 8)) == NULL) goto freeit; oip = mtod(n, struct ip *); sh = mtodo(n, oiphlen); ch = (struct sctp_chunkhdr *)(sh + 1); if (ch->chunk_type == SCTP_INITIATION) { icmpelen = max(sizeof(struct sctphdr) + 8, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); } } } else stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); icmplen = min(oiphlen + icmpelen, nlen); if (icmplen < sizeof(struct ip)) goto freeit; if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen) m = m_gethdr(M_NOWAIT, MT_DATA); else m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto freeit; #ifdef MAC mac_netinet_icmp_reply(n, m); #endif icmplen = min(icmplen, M_TRAILINGSPACE(m) - sizeof(struct ip) - ICMP_MINLEN); m_align(m, ICMP_MINLEN + icmplen); m->m_len = ICMP_MINLEN + icmplen; /* XXX MRT make the outgoing packet use the same FIB * that was associated with the incoming packet */ M_SETFIB(m, M_GETFIB(n)); icp = mtod(m, struct icmp *); ICMPSTAT_INC(icps_outhist[type]); icp->icmp_type = type; if (type == ICMP_REDIRECT) icp->icmp_gwaddr.s_addr = dest; else { icp->icmp_void = 0; /* * The following assignments assume an overlay with the * just zeroed icmp_void field. */ if (type == ICMP_PARAMPROB) { icp->icmp_pptr = code; code = 0; } else if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG && mtu) { icp->icmp_nextmtu = htons(mtu); } } icp->icmp_code = code; /* * Copy the quotation into ICMP message and * convert quoted IP header back to network representation. */ m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); nip = &icp->icmp_ip; /* * Set up ICMP message mbuf and copy old IP header (without options * in front of ICMP message. * If the original mbuf was meant to bypass the firewall, the error * reply should bypass as well. */ m->m_flags |= n->m_flags & M_SKIP_FIREWALL; m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; nip = mtod(m, struct ip *); bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip)); nip->ip_len = htons(m->m_len); nip->ip_v = IPVERSION; nip->ip_hl = 5; nip->ip_p = IPPROTO_ICMP; nip->ip_tos = 0; nip->ip_off = 0; icmp_reflect(m); freeit: m_freem(n); } /* * Process a received ICMP message. */ int icmp_input(struct mbuf **mp, int *offp, int proto) { struct icmp *icp; struct in_ifaddr *ia; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct sockaddr_in icmpsrc, icmpdst, icmpgw; int hlen = *offp; int icmplen = ntohs(ip->ip_len) - *offp; int i, code; void (*ctlfunc)(int, struct sockaddr *, void *); int fibnum; *mp = NULL; /* * Locate icmp structure in mbuf, and check * that not corrupted and of at least minimum length. */ #ifdef ICMPPRINTFS if (icmpprintfs) { char srcbuf[INET_ADDRSTRLEN]; char dstbuf[INET_ADDRSTRLEN]; printf("icmp_input from %s to %s, len %d\n", inet_ntoa_r(ip->ip_src, srcbuf), inet_ntoa_r(ip->ip_dst, dstbuf), icmplen); } #endif if (icmplen < ICMP_MINLEN) { ICMPSTAT_INC(icps_tooshort); goto freeit; } i = hlen + min(icmplen, ICMP_ADVLENMIN); if (m->m_len < i && (m = m_pullup(m, i)) == NULL) { ICMPSTAT_INC(icps_tooshort); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); m->m_len -= hlen; m->m_data += hlen; icp = mtod(m, struct icmp *); if (in_cksum(m, icmplen)) { ICMPSTAT_INC(icps_checksum); goto freeit; } m->m_len += hlen; m->m_data -= hlen; #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_input, type %d code %d\n", icp->icmp_type, icp->icmp_code); #endif /* * Message type specific processing. */ if (icp->icmp_type > ICMP_MAXTYPE) goto raw; /* Initialize */ bzero(&icmpsrc, sizeof(icmpsrc)); icmpsrc.sin_len = sizeof(struct sockaddr_in); icmpsrc.sin_family = AF_INET; bzero(&icmpdst, sizeof(icmpdst)); icmpdst.sin_len = sizeof(struct sockaddr_in); icmpdst.sin_family = AF_INET; bzero(&icmpgw, sizeof(icmpgw)); icmpgw.sin_len = sizeof(struct sockaddr_in); icmpgw.sin_family = AF_INET; ICMPSTAT_INC(icps_inhist[icp->icmp_type]); code = icp->icmp_code; switch (icp->icmp_type) { case ICMP_UNREACH: switch (code) { case ICMP_UNREACH_NET: case ICMP_UNREACH_HOST: case ICMP_UNREACH_SRCFAIL: case ICMP_UNREACH_NET_UNKNOWN: case ICMP_UNREACH_HOST_UNKNOWN: case ICMP_UNREACH_ISOLATED: case ICMP_UNREACH_TOSNET: case ICMP_UNREACH_TOSHOST: case ICMP_UNREACH_HOST_PRECEDENCE: case ICMP_UNREACH_PRECEDENCE_CUTOFF: code = PRC_UNREACH_NET; break; case ICMP_UNREACH_NEEDFRAG: code = PRC_MSGSIZE; break; /* * RFC 1122, Sections 3.2.2.1 and 4.2.3.9. * Treat subcodes 2,3 as immediate RST */ case ICMP_UNREACH_PROTOCOL: code = PRC_UNREACH_PROTOCOL; break; case ICMP_UNREACH_PORT: code = PRC_UNREACH_PORT; break; case ICMP_UNREACH_NET_PROHIB: case ICMP_UNREACH_HOST_PROHIB: case ICMP_UNREACH_FILTER_PROHIB: code = PRC_UNREACH_ADMIN_PROHIB; break; default: goto badcode; } goto deliver; case ICMP_TIMXCEED: if (code > 1) goto badcode; code += PRC_TIMXCEED_INTRANS; goto deliver; case ICMP_PARAMPROB: if (code > 1) goto badcode; code = PRC_PARAMPROB; deliver: /* * Problem with datagram; advise higher level routines. */ if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { ICMPSTAT_INC(icps_badlen); goto freeit; } /* Discard ICMP's in response to multicast packets */ if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) goto badcode; #ifdef ICMPPRINTFS if (icmpprintfs) printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; /* * XXX if the packet contains [IPv4 AH TCP], we can't make a * notification to TCP layer. */ i = sizeof(struct ip) + min(icmplen, ICMP_ADVLENPREF(icp)); ip_stripoptions(m); if (m->m_len < i && (m = m_pullup(m, i)) == NULL) { /* This should actually not happen */ ICMPSTAT_INC(icps_tooshort); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); icp = (struct icmp *)(ip + 1); /* * The upper layer handler can rely on: * - The outer IP header has no options. * - The outer IP header, the ICMP header, the inner IP header, * and the first n bytes of the inner payload are contiguous. * n is at least 8, but might be larger based on * ICMP_ADVLENPREF. See its definition in ip_icmp.h. */ ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; if (ctlfunc) (*ctlfunc)(code, (struct sockaddr *)&icmpsrc, (void *)&icp->icmp_ip); break; badcode: ICMPSTAT_INC(icps_badcode); break; case ICMP_ECHO: if (!V_icmpbmcastecho && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { ICMPSTAT_INC(icps_bmcastecho); break; } icp->icmp_type = ICMP_ECHOREPLY; if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) goto freeit; else goto reflect; case ICMP_TSTAMP: if (V_icmptstamprepl == 0) break; if (!V_icmpbmcastecho && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { ICMPSTAT_INC(icps_bmcasttstamp); break; } if (icmplen < ICMP_TSLEN) { ICMPSTAT_INC(icps_badlen); break; } icp->icmp_type = ICMP_TSTAMPREPLY; icp->icmp_rtime = iptime(); icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) goto freeit; else goto reflect; case ICMP_MASKREQ: if (V_icmpmaskrepl == 0) break; /* * We are not able to respond with all ones broadcast * unless we receive it over a point-to-point interface. */ if (icmplen < ICMP_MASKLEN) break; switch (ip->ip_dst.s_addr) { case INADDR_BROADCAST: case INADDR_ANY: icmpdst.sin_addr = ip->ip_src; break; default: icmpdst.sin_addr = ip->ip_dst; } ia = (struct in_ifaddr *)ifaof_ifpforaddr( (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); if (ia == NULL) break; if (ia->ia_ifp == NULL) { ifa_free(&ia->ia_ifa); break; } icp->icmp_type = ICMP_MASKREPLY; if (V_icmpmaskfake == 0) icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; else icp->icmp_mask = V_icmpmaskfake; if (ip->ip_src.s_addr == 0) { if (ia->ia_ifp->if_flags & IFF_BROADCAST) ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr; else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; } ifa_free(&ia->ia_ifa); reflect: ICMPSTAT_INC(icps_reflect); ICMPSTAT_INC(icps_outhist[icp->icmp_type]); icmp_reflect(m); return (IPPROTO_DONE); case ICMP_REDIRECT: if (V_log_redirect) { u_long src, dst, gw; src = ntohl(ip->ip_src.s_addr); dst = ntohl(icp->icmp_ip.ip_dst.s_addr); gw = ntohl(icp->icmp_gwaddr.s_addr); printf("icmp redirect from %d.%d.%d.%d: " "%d.%d.%d.%d => %d.%d.%d.%d\n", (int)(src >> 24), (int)((src >> 16) & 0xff), (int)((src >> 8) & 0xff), (int)(src & 0xff), (int)(dst >> 24), (int)((dst >> 16) & 0xff), (int)((dst >> 8) & 0xff), (int)(dst & 0xff), (int)(gw >> 24), (int)((gw >> 16) & 0xff), (int)((gw >> 8) & 0xff), (int)(gw & 0xff)); } /* * RFC1812 says we must ignore ICMP redirects if we * are acting as router. */ if (V_drop_redirect || V_ipforwarding) break; if (code > 3) goto badcode; if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { ICMPSTAT_INC(icps_badlen); break; } /* * Short circuit routing redirects to force * immediate change in the kernel's routing * tables. The message is also handed to anyone * listening on a raw socket (e.g. the routing * daemon for use in updating its tables). */ icmpgw.sin_addr = ip->ip_src; icmpdst.sin_addr = icp->icmp_gwaddr; #ifdef ICMPPRINTFS if (icmpprintfs) { char dstbuf[INET_ADDRSTRLEN]; char gwbuf[INET_ADDRSTRLEN]; printf("redirect dst %s to %s\n", inet_ntoa_r(icp->icmp_ip.ip_dst, dstbuf), inet_ntoa_r(icp->icmp_gwaddr, gwbuf)); } #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { in_rtredirect((struct sockaddr *)&icmpsrc, (struct sockaddr *)&icmpdst, (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&icmpgw, fibnum); } pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); break; /* * No kernel processing for the following; * just fall through to send to raw listener. */ case ICMP_ECHOREPLY: case ICMP_ROUTERADVERT: case ICMP_ROUTERSOLICIT: case ICMP_TSTAMPREPLY: case ICMP_IREQREPLY: case ICMP_MASKREPLY: case ICMP_SOURCEQUENCH: default: break; } raw: *mp = m; rip_input(mp, offp, proto); return (IPPROTO_DONE); freeit: m_freem(m); return (IPPROTO_DONE); } /* * Reflect the ip packet back to the source */ static void icmp_reflect(struct mbuf *m) { struct rm_priotracker in_ifa_tracker; struct ip *ip = mtod(m, struct ip *); struct ifaddr *ifa; struct ifnet *ifp; struct in_ifaddr *ia; struct in_addr t; struct nhop4_extended nh_ext; struct mbuf *opts = NULL; int optlen = (ip->ip_hl << 2) - sizeof(struct ip); if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || IN_EXPERIMENTAL(ntohl(ip->ip_src.s_addr)) || IN_ZERONET(ntohl(ip->ip_src.s_addr)) ) { m_freem(m); /* Bad return address */ ICMPSTAT_INC(icps_badaddr); goto done; /* Ip_output() will check for broadcast */ } t = ip->ip_dst; ip->ip_dst = ip->ip_src; /* * Source selection for ICMP replies: * * If the incoming packet was addressed directly to one of our * own addresses, use dst as the src for the reply. */ IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) { if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) { t = IA_SIN(ia)->sin_addr; IN_IFADDR_RUNLOCK(&in_ifa_tracker); goto match; } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * If the incoming packet was addressed to one of our broadcast * addresses, use the first non-broadcast address which corresponds * to the incoming interface. */ ifp = m->m_pkthdr.rcvif; if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == t.s_addr) { t = IA_SIN(ia)->sin_addr; IF_ADDR_RUNLOCK(ifp); goto match; } } IF_ADDR_RUNLOCK(ifp); } /* * If the packet was transiting through us, use the address of * the interface the packet came through in. If that interface * doesn't have a suitable IP address, the normal selection * criteria apply. */ if (V_icmp_rfi && ifp != NULL) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); t = IA_SIN(ia)->sin_addr; IF_ADDR_RUNLOCK(ifp); goto match; } IF_ADDR_RUNLOCK(ifp); } /* * If the incoming packet was not addressed directly to us, use * designated interface for icmp replies specified by sysctl * net.inet.icmp.reply_src (default not set). Otherwise continue * with normal source selection. */ if (V_reply_src[0] != '\0' && (ifp = ifunit(V_reply_src))) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); t = IA_SIN(ia)->sin_addr; IF_ADDR_RUNLOCK(ifp); goto match; } IF_ADDR_RUNLOCK(ifp); } /* * If the packet was transiting through us, use the address of * the interface that is the closest to the packet source. * When we don't have a route back to the packet source, stop here * and drop the packet. */ if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh_ext) != 0) { m_freem(m); ICMPSTAT_INC(icps_noroute); goto done; } t = nh_ext.nh_src; match: #ifdef MAC mac_netinet_icmp_replyinplace(m); #endif ip->ip_src = t; ip->ip_ttl = V_ip_defttl; if (optlen > 0) { - register u_char *cp; + u_char *cp; int opt, cnt; u_int len; /* * Retrieve any source routing from the incoming packet; * add on any record-route or timestamp options. */ cp = (u_char *) (ip + 1); if ((opts = ip_srcroute(m)) == NULL && (opts = m_gethdr(M_NOWAIT, MT_DATA))) { opts->m_len = sizeof(struct in_addr); mtod(opts, struct in_addr *)->s_addr = 0; } if (opts) { #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_reflect optlen %d rt %d => ", optlen, opts->m_len); #endif for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) len = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) break; len = cp[IPOPT_OLEN]; if (len < IPOPT_OLEN + sizeof(*cp) || len > cnt) break; } /* * Should check for overflow, but it "can't happen" */ if (opt == IPOPT_RR || opt == IPOPT_TS || opt == IPOPT_SECURITY) { bcopy((caddr_t)cp, mtod(opts, caddr_t) + opts->m_len, len); opts->m_len += len; } } /* Terminate & pad, if necessary */ cnt = opts->m_len % 4; if (cnt) { for (; cnt < 4; cnt++) { *(mtod(opts, caddr_t) + opts->m_len) = IPOPT_EOL; opts->m_len++; } } #ifdef ICMPPRINTFS if (icmpprintfs) printf("%d\n", opts->m_len); #endif } ip_stripoptions(m); } m_tag_delete_nonpersistent(m); m->m_flags &= ~(M_BCAST|M_MCAST); icmp_send(m, opts); done: if (opts) (void)m_free(opts); } /* * Send an icmp packet back to the ip level, * after supplying a checksum. */ static void icmp_send(struct mbuf *m, struct mbuf *opts) { - register struct ip *ip = mtod(m, struct ip *); - register int hlen; - register struct icmp *icp; + struct ip *ip = mtod(m, struct ip *); + int hlen; + struct icmp *icp; hlen = ip->ip_hl << 2; m->m_data += hlen; m->m_len -= hlen; icp = mtod(m, struct icmp *); icp->icmp_cksum = 0; icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen); m->m_data -= hlen; m->m_len += hlen; m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef ICMPPRINTFS if (icmpprintfs) { char dstbuf[INET_ADDRSTRLEN]; char srcbuf[INET_ADDRSTRLEN]; printf("icmp_send dst %s src %s\n", inet_ntoa_r(ip->ip_dst, dstbuf), inet_ntoa_r(ip->ip_src, srcbuf)); } #endif (void) ip_output(m, opts, NULL, 0, NULL, NULL); } /* * Return milliseconds since 00:00 UTC in network format. */ uint32_t iptime(void) { struct timeval atv; u_long t; getmicrotime(&atv); t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; return (htonl(t)); } /* * Return the next larger or smaller MTU plateau (table from RFC 1191) * given current value MTU. If DIR is less than zero, a larger plateau * is returned; otherwise, a smaller value is returned. */ int ip_next_mtu(int mtu, int dir) { static int mtutab[] = { 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508, 296, 68, 0 }; int i, size; size = (sizeof mtutab) / (sizeof mtutab[0]); if (dir >= 0) { for (i = 0; i < size; i++) if (mtu > mtutab[i]) return mtutab[i]; } else { for (i = size - 1; i >= 0; i--) if (mtu < mtutab[i]) return mtutab[i]; if (mtu == mtutab[0]) return mtutab[0]; } return 0; } #endif /* INET */ /* * badport_bandlim() - check for ICMP bandwidth limit * * Return 0 if it is ok to send an ICMP error response, -1 if we have * hit our bandwidth limit and it is not ok. * * If icmplim is <= 0, the feature is disabled and 0 is returned. * * For now we separate the TCP and UDP subsystems w/ different 'which' * values. We may eventually remove this separation (and simplify the * code further). * * Note that the printing of the error message is delayed so we can * properly print the icmp error rate that the system was trying to do * (i.e. 22000/100 pps, etc...). This can cause long delays in printing * the 'final' error, but it doesn't make sense to solve the printing * delay with more complex code. */ int badport_bandlim(int which) { #define N(a) (sizeof (a) / sizeof (a[0])) static struct rate { const char *type; struct timeval lasttime; int curpps; } rates[BANDLIM_MAX+1] = { { "icmp unreach response" }, { "icmp ping response" }, { "icmp tstamp response" }, { "closed port RST response" }, { "open port RST response" }, { "icmp6 unreach response" }, { "sctp ootb response" } }; /* * Return ok status if feature disabled or argument out of range. */ if (V_icmplim > 0 && (u_int) which < N(rates)) { struct rate *r = &rates[which]; int opps = r->curpps; if (!ppsratecheck(&r->lasttime, &r->curpps, V_icmplim)) return -1; /* discard packet */ /* * If we've dropped below the threshold after having * rate-limited traffic print the message. This preserves * the previous behaviour at the expense of added complexity. */ if (V_icmplim_output && opps > V_icmplim) log(LOG_NOTICE, "Limiting %s from %d to %d packets/sec\n", r->type, opps, V_icmplim); } return 0; /* okay to send packet */ #undef N } Index: stable/11/sys/netinet6/in6_pcb.c =================================================================== --- stable/11/sys/netinet6/in6_pcb.c (revision 331642) +++ stable/11/sys/netinet6/in6_pcb.c (revision 331643) @@ -1,1285 +1,1285 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pcbgroup.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *); int -in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, +in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { struct socket *so = inp->inp_socket; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int error, lookupflags = 0; int reuseport = (so->so_options & SO_REUSEPORT); INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip6(cred, &inp->in6p_laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); } else { sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof(*sin6)) return (EINVAL); /* * family check. */ if (nam->sa_family != AF_INET6) return (EAFNOSUPPORT); if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if ((error = prison_local_ip6(cred, &sin6->sin6_addr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); lport = sin6->sin6_port; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ifa; sin6->sin6_port = 0; /* yech... */ if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL && (inp->inp_flags & INP_BINDANY) == 0) { return (EADDRNOTAVAIL); } /* * XXX: bind to an anycast address might accidentally * cause sending a packet with anycast source address. * We should allow to bind to a deprecated address, since * the application dares to use it. */ if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { ifa_free(ifa); return (EADDRNOTAVAIL); } if (ifa != NULL) ifa_free(ifa); } if (lport) { struct inpcb *t; struct tcptw *tw; /* GROSS */ if (ntohs(lport) <= V_ipport_reservedhigh && ntohs(lport) >= V_ipport_reservedlow && priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) return (EACCES); if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT, 0) != 0) { t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_flags2 & INP_REUSEPORT) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); /* * If the socket is a BINDMULTI socket, then * the credentials need to match and the * original socket also has to have been bound * with BINDMULTI. */ if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } #endif } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, lookupflags, cred); if (t && (t->inp_flags & INP_TIMEWAIT)) { /* * XXXRW: If an incpb has had its timewait * state recycled, we treat the address as * being in use (for now). This is better * than a panic, but not desirable. */ tw = intotw(t); if (tw == NULL || (reuseport & tw->tw_so_options) == 0) return (EADDRINUSE); } else if (t && (reuseport & inp_so_options(t)) == 0) { return (EADDRINUSE); } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, lookupflags, cred); if (t && t->inp_flags & INP_TIMEWAIT) { tw = intotw(t); if (tw == NULL) return (EADDRINUSE); if ((reuseport & tw->tw_so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || ((inp->inp_vflag & INP_IPV6PROTO) == (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (reuseport & inp_so_options(t)) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_vflag & INP_IPV6PROTO) != 0)) return (EADDRINUSE); } #endif } inp->in6p_laddr = sin6->sin6_addr; } if (lport == 0) { if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; return (error); } } else { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } } return (0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new * in6_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in6_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ static int -in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, +in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in6_addr *plocal_addr6) { - register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error = 0; int scope_ambiguous = 0; struct in6_addr in6a; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ if (nam->sa_len != sizeof (*sin6)) return (EINVAL); if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) sin6->sin6_addr = in6addr_loopback; } if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) return (error); error = in6_selectsrc_socket(sin6, inp->in6p_outputopts, inp, inp->inp_cred, scope_ambiguous, &in6a, NULL); if (error) return (error); /* * Do not update this earlier, in case we return with an error. * * XXX: this in6_selectsrc_socket result might replace the bound local * address with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ *plocal_addr6 = in6a; /* * Don't do pcblookup call here; return interface in * plocal_addr6 * and exit to caller, that will do the lookup. */ return (0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int -in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam, +in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred, struct mbuf *m) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; - register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_addr addr6; int error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. */ if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return (error); if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, cred); if (error) return (error); } inp->in6p_laddr = addr6; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; if (inp->inp_flags & IN6P_AUTOFLOWLABEL) inp->inp_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); in_pcbrehash_mbuf(inp, m); return (0); } int in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { return (in6_pcbconnect_mbuf(inp, nam, cred, NULL)); } void in6_pcbdisconnect(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); } struct sockaddr * in6_sockaddr(in_port_t port, struct in6_addr *addr_p) { struct sockaddr_in6 *sin6; sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK); bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ return (struct sockaddr *)sin6; } struct sockaddr * in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p) { struct sockaddr_in sin; struct sockaddr_in6 *sin6_p; bzero(&sin, sizeof sin); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = port; sin.sin_addr = *addr_p; sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK); in6_sin_2_v4mapsin6(&sin, sin6_p); return (struct sockaddr *)sin6_p; } int in6_getsockaddr(struct socket *so, struct sockaddr **nam) { - register struct inpcb *inp; + struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_lport; addr = inp->in6p_laddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_fport; addr = inp->in6p_faddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getsockaddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif { /* scope issues will be handled in in6_getsockaddr(). */ error = in6_getsockaddr(so, nam); } return error; } int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getpeeraddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif /* scope issues will be handled in in6_getpeeraddr(). */ error = in6_getpeeraddr(so, nam); return error; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. */ void in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg, struct inpcb *(*notify)(struct inpcb *, int)) { struct inpcb *inp, *inp_temp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; int errno; if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return; sa6_dst = (struct sockaddr_in6 *)dst; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return; /* * note that src can be NULL when we get notify by local fragmentation. */ sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; flowinfo = sa6_src.sin6_flowinfo; /* * Redirects go to all references to the destination, * and use in6_rtchange to invalidate the route cache. * Dead host indications: also use in6_rtchange to invalidate * the cache, and deliver the error to all the sockets. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); if ((inp->inp_vflag & INP_IPV6) == 0) { INP_WUNLOCK(inp); continue; } /* * If the error designates a new path MTU for a destination * and the application (associated with this socket) wanted to * know the value, notify. * XXX: should we avoid to notify the value to TCP sockets? */ if (cmd == PRC_MSGSIZE && cmdarg != NULL) ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, *(u_int32_t *)cmdarg); /* * Detect if we should notify the error. If no source and * destination ports are specifed, but non-zero flowinfo and * local address match, notify the error. This is the case * when the error is delivered with an encrypted buffer * by ESP. Otherwise, just compare addresses and ports * as usual. */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr) || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) { INP_WUNLOCK(inp); continue; } do_notify: if (notify) { if ((*notify)(inp, errno)) INP_WUNLOCK(inp); } else INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } /* * Lookup a PCB based on the local address and port. Caller must hold the * hash lock. No inpcb locks or references are acquired. */ struct inpcb * in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, u_short lport, int lookupflags, struct ucred *cred) { - register struct inpcb *inp; + struct inpcb *inp; int matchwild = 3, wildcard; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_WLOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_lport == lport) { /* Found. */ if (cred == NULL || prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) continue; /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) wildcard++; if (!IN6_IS_ADDR_UNSPECIFIED( &inp->in6p_laddr)) { if (IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; else if (!IN6_ARE_ADDR_EQUAL( &inp->in6p_laddr, laddr)) continue; } else { if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) break; } } } return (match); } } void in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *in6p; struct ip6_moptions *im6o; int i, gap; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(in6p); im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { /* * Unselect the outgoing ifp for multicast if it * is being detached. */ if (im6o->im6o_multicast_ifp == ifp) im6o->im6o_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ gap = 0; for (i = 0; i < im6o->im6o_num_memberships; i++) { if (im6o->im6o_membership[i]->in6m_ifp == ifp) { in6_mc_leave(im6o->im6o_membership[i], NULL); gap++; } else if (gap != 0) { im6o->im6o_membership[i - gap] = im6o->im6o_membership[i]; } } im6o->im6o_num_memberships -= gap; } INP_WUNLOCK(in6p); } INP_INFO_WUNLOCK(pcbinfo); } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(struct inpcb *in6p) { if (in6p->inp_route6.ro_rt) { RTFREE(in6p->inp_route6.ro_rt); in6p->inp_route6.ro_rt = (struct rtentry *)NULL; } if (in6p->inp_route.ro_lle) LLE_FREE(in6p->inp_route.ro_lle); /* zeros ro_lle */ return; } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ struct inpcb * in6_rtchange(struct inpcb *inp, int errno) { if (inp->inp_route6.ro_rt) { RTFREE(inp->inp_route6.ro_rt); inp->inp_route6.ro_rt = (struct rtentry *)NULL; } if (inp->inp_route.ro_lle) LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */ return inp; } #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. */ static struct inpcb * in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ tmpinp = NULL; INP_GROUP_LOCK(pcbgroup); head = &pcbgroup->ipg_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP6)) goto found; if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) { inp = tmpinp; goto found; } /* * Then look for a wildcard match in the pcbgroup. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbgroup->ipg_hashbase[ INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) goto found; else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; if (inp != NULL) goto found; } /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_wildbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_wildmask)]; LIST_FOREACH(inp, head, inp_pcbgroup_wild) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) goto found; else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; if (inp != NULL) goto found; } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ INP_GROUP_UNLOCK(pcbgroup); return (NULL); found: in_pcbref(inp); INP_GROUP_UNLOCK(pcbgroup); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking buf", __func__); return (inp); } #endif /* PCBGROUP */ /* * Lookup PCB in hash list. */ static struct inpcb * in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP6)) return (inp); if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) return (tmpinp); /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) return (inp); else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ if (jail_wild != NULL) return (jail_wild); if (local_exact != NULL) return (local_exact); if (local_wild != NULL) return (local_wild); } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ /* * Not found. */ return (NULL); } /* * Lookup PCB in hash list, using pcbinfo tables. This variation locks the * hash list lock, and will return the inpcb locked (i.e., requires * INPLOOKUP_LOCKPCB). */ static struct inpcb * in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { struct inpcb *inp; INP_HASH_RLOCK(pcbinfo); inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); if (inp != NULL) { in_pcbref(inp); INP_HASH_RUNLOCK(pcbinfo); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking bug", __func__); } else INP_HASH_RUNLOCK(pcbinfo); return (inp); } /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. * * Possibly more of this logic should be in in6_pcbgroup.c. */ struct inpcb * in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { #if defined(PCBGROUP) && !defined(RSS) struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); /* * When not using RSS, use connection groups in preference to the * reservation table when looking up 4-tuples. When using RSS, just * use the reservation table, due to the cost of the Toeplitz hash * in software. * * XXXRW: This policy belongs in the pcbgroup code, as in principle * we could be doing RSS with a non-Toeplitz hash that is affordable * in software. */ #if defined(PCBGROUP) && !defined(RSS) if (in_pcbgroup_enabled(pcbinfo)) { pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); } #endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } struct inpcb * in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp, struct mbuf *m) { #ifdef PCBGROUP struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP /* * If we can use a hardware-generated hash to look up the connection * group, use that connection group to find the inpcb. Otherwise * fall back on a software hash -- or the reservation table if we're * using RSS. * * XXXRW: As above, that policy belongs in the pcbgroup code. */ if (in_pcbgroup_enabled(pcbinfo) && M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) { pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), m->m_pkthdr.flowid); if (pcbgroup != NULL) return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #ifndef RSS pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #endif } #endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) { struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ return; } Index: stable/11/sys/netinet6/raw_ip6.c =================================================================== --- stable/11/sys/netinet6/raw_ip6.c (revision 331642) +++ stable/11/sys/netinet6/raw_ip6.c (revision 331643) @@ -1,907 +1,907 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) /* * Raw interface to IP6 protocol. */ VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) extern u_long rip_sendspace; extern u_long rip_recvspace; VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat); VNET_PCPUSTAT_SYSINIT(rip6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(rip6stat); #endif /* VIMAGE */ /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip6_mrouter); /* * The various mrouter functions. */ int (*ip6_mrouter_set)(struct socket *, struct sockopt *); int (*ip6_mrouter_get)(struct socket *, struct sockopt *); int (*ip6_mrouter_done)(void); int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); int (*mrt6_ioctl)(u_long, caddr_t); /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip6_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; - register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - register struct inpcb *in6p; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct inpcb *in6p; struct inpcb *last = NULL; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; RIP6STAT_INC(rip6s_ipackets); init_sin6(&fromsa, m); /* general init */ ifp = m->m_pkthdr.rcvif; INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { /* XXX inp locking */ if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p && in6p->inp_ip_p != proto) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (jailed_without_vnet(in6p->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && prison_check_ip6(in6p->inp_cred, &ip6->ip6_dst) != 0) continue; } INP_RLOCK(in6p); if (in6p->in6p_cksum != -1) { RIP6STAT_INC(rip6s_isum); if (in6_cksum(m, proto, *offp, m->m_pkthdr.len - *offp)) { INP_RUNLOCK(in6p); RIP6STAT_INC(rip6s_badsum); continue; } } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (in6p->in6p_moptions && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If the incoming datagram is for MLD, allow it * through unconditionally to the raw socket. * * Use the M_RTALERT_MLD flag to check for MLD * traffic without having to inspect the mbuf chain * more deeply, as all MLDv1/v2 host messages MUST * contain the Router Alert option. * * In the case of MLDv1, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. im6o_mc_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if ((m->m_flags & M_RTALERT_MLD) == 0) { struct sockaddr_in6 mcaddr; bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(in6p->in6p_moptions, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); } if (blocked != MCAST_PASS) { IP6STAT_INC(ip6s_notmember); INP_RUNLOCK(in6p); continue; } } if (last != NULL) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv6)) { if (n != NULL && IPSEC_CHECK_POLICY(ipv6, n, last) != 0) { m_freem(n); /* Do not inject data into pcb. */ n = NULL; } } #endif /* IPSEC */ if (n) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv6) && last != NULL && IPSEC_CHECK_POLICY(ipv6, m, last) != 0) { m_freem(m); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ INP_RUNLOCK(last); } else #endif /* IPSEC */ if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); /* Strip intermediate headers. */ m_adj(m, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); INP_RUNLOCK(last); } else { RIP6STAT_INC(rip6s_nosock); if (m->m_flags & M_MCAST) RIP6STAT_INC(rip6s_nosockmcast); if (proto == IPPROTO_NONE) m_freem(m); else icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, ip6_get_prevhdr(m, *offp)); IP6STAT_DEC(ip6s_delivered); } return (IPPROTO_DONE); } void rip6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* * If the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } (void) in6_pcbnotify(&V_ripcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } /* * Generate IPv6 header and pass packet to ip6_output. Tack on options user * may have setup with control call. */ int rip6_output(struct mbuf *m, struct socket *so, ...) { struct mbuf *control; struct m_tag *mtag; struct sockaddr_in6 *dstsock; struct in6_addr *dst; struct ip6_hdr *ip6; struct inpcb *in6p; u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *optp; struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; int hlim = 0; struct in6_addr in6a; va_list ap; va_start(ap, so); dstsock = va_arg(ap, struct sockaddr_in6 *); control = va_arg(ap, struct mbuf *); va_end(ap); in6p = sotoinpcb(so); INP_WLOCK(in6p); dst = &dstsock->sin6_addr; if (control != NULL) { if ((error = ip6_setpktopts(control, &opt, in6p->in6p_outputopts, so->so_cred, so->so_proto->pr_protocol)) != 0) { goto bad; } optp = &opt; } else optp = in6p->in6p_outputopts; /* * Check and convert scope zone ID into internal form. * * XXX: we may still need to determine the zone later. */ if (!(so->so_state & SS_ISCONNECTED)) { if (!optp || !optp->ip6po_pktinfo || !optp->ip6po_pktinfo->ipi6_ifindex) use_defzone = V_ip6_use_defzone; if (dstsock->sin6_scope_id == 0 && !use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(dstsock, use_defzone)) != 0) goto bad; } /* * For an ICMPv6 packet, we should know its type and code to update * statistics. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6; if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { error = ENOBUFS; goto bad; } icmp6 = mtod(m, struct icmp6_hdr *); type = icmp6->icmp6_type; code = icmp6->icmp6_code; } M_PREPEND(m, sizeof(*ip6), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } ip6 = mtod(m, struct ip6_hdr *); /* * Source address selection. */ error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred, scope_ambiguous, &in6a, &hlim); if (error) goto bad; error = prison_check_ip6(in6p->inp_cred, &in6a); if (error != 0) goto bad; ip6->ip6_src = in6a; ip6->ip6_dst = dstsock->sin6_addr; /* * Fill in the rest of the IPv6 header fields. */ ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (in6p->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->inp_ip_p; ip6->ip6_hlim = hlim; if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; /* Compute checksum. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else off = in6p->in6p_cksum; if (plen < off + 1) { error = EINVAL; goto bad; } off += sizeof(struct ip6_hdr); n = m; while (n && n->m_len <= off) { off -= n->m_len; n = n->m_next; } if (!n) goto bad; p = (u_int16_t *)(mtod(n, caddr_t) + off); *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } /* * Send RA/RS messages to user land for protection, before sending * them to rtadvd/rtsol. */ if ((send_sendso_input_hook != NULL) && so->so_proto->pr_protocol == IPPROTO_ICMPV6) { switch (type) { case ND_ROUTER_ADVERT: case ND_ROUTER_SOLICIT: mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; m_tag_prepend(m, mtag); } } error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); ICMP6STAT_INC(icp6s_outhist[type]); } else RIP6STAT_INC(rip6s_opackets); goto freectl; bad: if (m) m_freem(m); freectl: if (control != NULL) { ip6_clearpktopts(&opt, -1); m_freem(control); } INP_WUNLOCK(in6p); return (error); } /* * Raw IPv6 socket option processing. */ int rip6_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp; int error; if (sopt->sopt_level == IPPROTO_ICMPV6) /* * XXX: is it better to call icmp6_ctloutput() directly * from protosw? */ return (icmp6_ctloutput(so, sopt)); else if (sopt->sopt_level != IPPROTO_IPV6) { if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_SETFIB) { inp = sotoinpcb(so); INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(inp); return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; } return (error); } static int rip6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct icmp6_filter *filter; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip6_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT); if (filter == NULL) return (ENOMEM); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); free(filter, M_PCB); return (error); } inp = (struct inpcb *)so->so_pcb; INP_INFO_WUNLOCK(&V_ripcbinfo); inp->inp_vflag |= INP_IPV6; inp->inp_ip_p = (long)proto; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; inp->in6p_icmp6filt = filter; ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt); INP_WUNLOCK(inp); return (0); } static void rip6_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_detach: inp == NULL")); if (so == V_ip6_mrouter && ip6_mrouter_done) ip6_mrouter_done(); /* xxx: RSVP */ INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); free(inp->in6p_icmp6filt, M_PCB); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } /* XXXRW: This can't ever be called. */ static void rip6_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_abort: inp == NULL")); soisdisconnected(so); } static void rip6_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_close: inp == NULL")); soisdisconnected(so); } static int rip6_disconnect(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL")); if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp->in6p_faddr = in6addr_any; rip6_abort(so); return (0); } static int rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct ifaddr *ifa = NULL; int error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_bind: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0) return (error); if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6) return (EADDRNOTAVAIL); if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) return (EADDRNOTAVAIL); if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { ifa_free(ifa); return (EADDRNOTAVAIL); } if (ifa != NULL) ifa_free(ifa); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); inp->in6p_laddr = addr->sin6_addr; INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_connect: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (TAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin6_family != AF_INET6) return (EAFNOSUPPORT); /* * Application should provide a proper zone ID or the use of default * zone IDs should be enabled. Unfortunately, some applications do * not behave as it should, so we need a workaround. Even if an * appropriate ID is not determined, we'll see if we can determine * the outgoing interface. If we can, determine the zone ID based on * the interface below. */ if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ error = in6_selectsrc_socket(addr, inp->in6p_outputopts, inp, so->so_cred, scope_ambiguous, &in6a, NULL); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 tmp; struct sockaddr_in6 *dst; int ret; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_send: inp == NULL")); /* Always copy sockaddr to avoid overwrites. */ /* Unlocked read. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } /* XXX */ bzero(&tmp, sizeof(tmp)); tmp.sin6_family = AF_INET6; tmp.sin6_len = sizeof(struct sockaddr_in6); INP_RLOCK(inp); bcopy(&inp->in6p_faddr, &tmp.sin6_addr, sizeof(struct in6_addr)); INP_RUNLOCK(inp); dst = &tmp; } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } if (nam->sa_len != sizeof(struct sockaddr_in6)) { m_freem(m); return (EINVAL); } tmp = *(struct sockaddr_in6 *)nam; dst = &tmp; if (dst->sin6_family == AF_UNSPEC) { /* * XXX: we allow this case for backward * compatibility to buggy applications that * rely on old (and wrong) kernel behavior. */ log(LOG_INFO, "rip6 SEND: address family is " "unspec. Assume AF_INET6\n"); dst->sin6_family = AF_INET6; } else if (dst->sin6_family != AF_INET6) { m_freem(m); return(EAFNOSUPPORT); } } ret = rip6_output(m, so, dst, control); return (ret); } struct pr_usrreqs rip6_usrreqs = { .pru_abort = rip6_abort, .pru_attach = rip6_attach, .pru_bind = rip6_bind, .pru_connect = rip6_connect, .pru_control = in6_control, .pru_detach = rip6_detach, .pru_disconnect = rip6_disconnect, .pru_peeraddr = in6_getpeeraddr, .pru_send = rip6_send, .pru_shutdown = rip6_shutdown, .pru_sockaddr = in6_getsockaddr, .pru_close = rip6_close, }; Index: stable/11/sys/netipsec/ipsec_mbuf.c =================================================================== --- stable/11/sys/netipsec/ipsec_mbuf.c (revision 331642) +++ stable/11/sys/netipsec/ipsec_mbuf.c (revision 331643) @@ -1,337 +1,337 @@ /*- * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * IPsec-specific mbuf routines. */ #include #include #include #include #include #include #include #include /* * Make space for a new header of length hlen at skip bytes * into the packet. When doing this we allocate new mbufs only * when absolutely necessary. The mbuf where the new header * is to go is returned together with an offset into the mbuf. * If NULL is returned then the mbuf chain may have been modified; * the caller is assumed to always free the chain. */ struct mbuf * m_makespace(struct mbuf *m0, int skip, int hlen, int *off) { struct mbuf *m; unsigned remain; IPSEC_ASSERT(m0 != NULL, ("null mbuf")); IPSEC_ASSERT(hlen < MHLEN, ("hlen too big: %u", hlen)); for (m = m0; m && skip > m->m_len; m = m->m_next) skip -= m->m_len; if (m == NULL) return (NULL); /* * At this point skip is the offset into the mbuf m * where the new header should be placed. Figure out * if there's space to insert the new header. If so, * and copying the remainder makes sense then do so. * Otherwise insert a new mbuf in the chain, splitting * the contents of m as needed. */ remain = m->m_len - skip; /* data to move */ if (remain > skip && hlen + max_linkhdr < M_LEADINGSPACE(m)) { /* * mbuf has enough free space at the beginning. * XXX: which operation is the most heavy - copying of * possible several hundred of bytes or allocation * of new mbuf? We can remove max_linkhdr check * here, but it is possible that this will lead * to allocation of new mbuf in Layer 2 code. */ m->m_data -= hlen; bcopy(mtodo(m, hlen), mtod(m, caddr_t), skip); m->m_len += hlen; *off = skip; } else if (hlen > M_TRAILINGSPACE(m)) { struct mbuf *n0, *n, **np; int todo, len, done, alloc; n0 = NULL; np = &n0; alloc = 0; done = 0; todo = remain; while (todo > 0) { if (todo > MHLEN) { n = m_getcl(M_NOWAIT, m->m_type, 0); len = MCLBYTES; } else { n = m_get(M_NOWAIT, m->m_type); len = MHLEN; } if (n == NULL) { m_freem(n0); return NULL; } *np = n; np = &n->m_next; alloc++; len = min(todo, len); memcpy(n->m_data, mtod(m, char *) + skip + done, len); n->m_len = len; done += len; todo -= len; } if (hlen <= M_TRAILINGSPACE(m) + remain) { m->m_len = skip + hlen; *off = skip; if (n0 != NULL) { *np = m->m_next; m->m_next = n0; } } else { n = m_get(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(n0); return NULL; } alloc++; if ((n->m_next = n0) == NULL) np = &n->m_next; n0 = n; *np = m->m_next; m->m_next = n0; n->m_len = hlen; m->m_len = skip; m = n; /* header is at front ... */ *off = 0; /* ... of new mbuf */ } IPSECSTAT_INC(ips_mbinserted); } else { /* * Copy the remainder to the back of the mbuf * so there's space to write the new header. */ bcopy(mtod(m, caddr_t) + skip, mtod(m, caddr_t) + skip + hlen, remain); m->m_len += hlen; *off = skip; } m0->m_pkthdr.len += hlen; /* adjust packet length */ return m; } /* * m_pad(m, n) pads with bytes at the end. The packet header * length is updated, and a pointer to the first byte of the padding * (which is guaranteed to be all in one mbuf) is returned. */ caddr_t m_pad(struct mbuf *m, int n) { - register struct mbuf *m0, *m1; - register int len, pad; + struct mbuf *m0, *m1; + int len, pad; caddr_t retval; if (n <= 0) { /* No stupid arguments. */ DPRINTF(("%s: pad length invalid (%d)\n", __func__, n)); m_freem(m); return NULL; } len = m->m_pkthdr.len; pad = n; m0 = m; while (m0->m_len < len) { len -= m0->m_len; m0 = m0->m_next; } if (m0->m_len != len) { DPRINTF(("%s: length mismatch (should be %d instead of %d)\n", __func__, m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); m_freem(m); return NULL; } /* Check for zero-length trailing mbufs, and find the last one. */ for (m1 = m0; m1->m_next; m1 = m1->m_next) { if (m1->m_next->m_len != 0) { DPRINTF(("%s: length mismatch (should be %d instead " "of %d)\n", __func__, m->m_pkthdr.len, m->m_pkthdr.len + m1->m_next->m_len)); m_freem(m); return NULL; } m0 = m1->m_next; } if (pad > M_TRAILINGSPACE(m0)) { /* Add an mbuf to the chain. */ MGET(m1, M_NOWAIT, MT_DATA); if (m1 == NULL) { m_freem(m0); DPRINTF(("%s: unable to get extra mbuf\n", __func__)); return NULL; } m0->m_next = m1; m0 = m1; m0->m_len = 0; } retval = m0->m_data + m0->m_len; m0->m_len += pad; m->m_pkthdr.len += pad; return retval; } /* * Remove hlen data at offset skip in the packet. This is used by * the protocols strip protocol headers and associated data (e.g. IV, * authenticator) on input. */ int m_striphdr(struct mbuf *m, int skip, int hlen) { struct mbuf *m1; int roff; /* Find beginning of header */ m1 = m_getptr(m, skip, &roff); if (m1 == NULL) return (EINVAL); /* Remove the header and associated data from the mbuf. */ if (roff == 0) { /* The header was at the beginning of the mbuf */ IPSECSTAT_INC(ips_input_front); m_adj(m1, hlen); if ((m1->m_flags & M_PKTHDR) == 0) m->m_pkthdr.len -= hlen; } else if (roff + hlen >= m1->m_len) { struct mbuf *mo; /* * Part or all of the header is at the end of this mbuf, * so first let's remove the remainder of the header from * the beginning of the remainder of the mbuf chain, if any. */ IPSECSTAT_INC(ips_input_end); if (roff + hlen > m1->m_len) { /* Adjust the next mbuf by the remainder */ m_adj(m1->m_next, roff + hlen - m1->m_len); /* The second mbuf is guaranteed not to have a pkthdr... */ m->m_pkthdr.len -= (roff + hlen - m1->m_len); } /* Now, let's unlink the mbuf chain for a second...*/ mo = m1->m_next; m1->m_next = NULL; /* ...and trim the end of the first part of the chain...sick */ m_adj(m1, -(m1->m_len - roff)); if ((m1->m_flags & M_PKTHDR) == 0) m->m_pkthdr.len -= (m1->m_len - roff); /* Finally, let's relink */ m1->m_next = mo; } else { /* * The header lies in the "middle" of the mbuf; copy * the remainder of the mbuf down over the header. */ IPSECSTAT_INC(ips_input_middle); bcopy(mtod(m1, u_char *) + roff + hlen, mtod(m1, u_char *) + roff, m1->m_len - (roff + hlen)); m1->m_len -= hlen; m->m_pkthdr.len -= hlen; } return (0); } /* * Diagnostic routine to check mbuf alignment as required by the * crypto device drivers (that use DMA). */ void m_checkalignment(const char* where, struct mbuf *m0, int off, int len) { int roff; struct mbuf *m = m_getptr(m0, off, &roff); caddr_t addr; if (m == NULL) return; printf("%s (off %u len %u): ", where, off, len); addr = mtod(m, caddr_t) + roff; do { int mlen; if (((uintptr_t) addr) & 3) { printf("addr misaligned %p,", addr); break; } mlen = m->m_len; if (mlen > len) mlen = len; len -= mlen; if (len && (mlen & 3)) { printf("len mismatch %u,", mlen); break; } m = m->m_next; addr = m ? mtod(m, caddr_t) : NULL; } while (m && len > 0); for (m = m0; m; m = m->m_next) printf(" [%p:%u]", mtod(m, caddr_t), m->m_len); printf("\n"); } Index: stable/11/sys/rpc/clnt.h =================================================================== --- stable/11/sys/rpc/clnt.h (revision 331642) +++ stable/11/sys/rpc/clnt.h (revision 331643) @@ -1,716 +1,716 @@ /* $NetBSD: clnt.h,v 1.14 2000/06/02 22:57:55 fvdl Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2010, Oracle America, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the "Oracle America, Inc." nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * from: @(#)clnt.h 1.31 94/04/29 SMI * from: @(#)clnt.h 2.1 88/07/29 4.0 RPCSRC * $FreeBSD$ */ /* * clnt.h - Client side remote procedure call interface. * * Copyright (c) 1986-1991,1994-1999 by Sun Microsystems, Inc. * All rights reserved. */ #ifndef _RPC_CLNT_H_ #define _RPC_CLNT_H_ #include #include #ifdef _KERNEL #include #include #else #include #endif #include /* * Well-known IPV6 RPC broadcast address. */ #define RPCB_MULTICAST_ADDR "ff02::202" /* * the following errors are in general unrecoverable. The caller * should give up rather than retry. */ #define IS_UNRECOVERABLE_RPC(s) (((s) == RPC_AUTHERROR) || \ ((s) == RPC_CANTENCODEARGS) || \ ((s) == RPC_CANTDECODERES) || \ ((s) == RPC_VERSMISMATCH) || \ ((s) == RPC_PROCUNAVAIL) || \ ((s) == RPC_PROGUNAVAIL) || \ ((s) == RPC_PROGVERSMISMATCH) || \ ((s) == RPC_CANTDECODEARGS)) /* * Error info. */ struct rpc_err { enum clnt_stat re_status; union { int RE_errno; /* related system error */ enum auth_stat RE_why; /* why the auth error occurred */ struct { rpcvers_t low; /* lowest version supported */ rpcvers_t high; /* highest version supported */ } RE_vers; struct { /* maybe meaningful if RPC_FAILED */ int32_t s1; int32_t s2; } RE_lb; /* life boot & debugging only */ } ru; #define re_errno ru.RE_errno #define re_why ru.RE_why #define re_vers ru.RE_vers #define re_lb ru.RE_lb }; #ifdef _KERNEL /* * Functions of this type may be used to receive notification when RPC * calls have to be re-transmitted etc. */ typedef void rpc_feedback(int cmd, int procnum, void *); /* * Timers used for the pseudo-transport protocol when using datagrams */ struct rpc_timers { u_short rt_srtt; /* smoothed round-trip time */ u_short rt_deviate; /* estimated deviation */ u_long rt_rtxcur; /* current (backed-off) rto */ }; /* * A structure used with CLNT_CALL_EXT to pass extra information used * while processing an RPC call. */ struct rpc_callextra { AUTH *rc_auth; /* auth handle to use for this call */ rpc_feedback *rc_feedback; /* callback for retransmits etc. */ void *rc_feedback_arg; /* argument for callback */ struct rpc_timers *rc_timers; /* optional RTT timers */ struct rpc_err rc_err; /* detailed call status */ }; #endif /* * Client rpc handle. * Created by individual implementations * Client is responsible for initializing auth, see e.g. auth_none.c. */ typedef struct __rpc_client { #ifdef _KERNEL volatile u_int cl_refs; /* reference count */ AUTH *cl_auth; /* authenticator */ struct clnt_ops { /* call remote procedure */ enum clnt_stat (*cl_call)(struct __rpc_client *, struct rpc_callextra *, rpcproc_t, struct mbuf *, struct mbuf **, struct timeval); /* abort a call */ void (*cl_abort)(struct __rpc_client *); /* get specific error code */ void (*cl_geterr)(struct __rpc_client *, struct rpc_err *); /* frees results */ bool_t (*cl_freeres)(struct __rpc_client *, xdrproc_t, void *); /* close the connection and terminate pending RPCs */ void (*cl_close)(struct __rpc_client *); /* destroy this structure */ void (*cl_destroy)(struct __rpc_client *); /* the ioctl() of rpc */ bool_t (*cl_control)(struct __rpc_client *, u_int, void *); } *cl_ops; #else AUTH *cl_auth; /* authenticator */ struct clnt_ops { /* call remote procedure */ enum clnt_stat (*cl_call)(struct __rpc_client *, rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval); /* abort a call */ void (*cl_abort)(struct __rpc_client *); /* get specific error code */ void (*cl_geterr)(struct __rpc_client *, struct rpc_err *); /* frees results */ bool_t (*cl_freeres)(struct __rpc_client *, xdrproc_t, void *); /* destroy this structure */ void (*cl_destroy)(struct __rpc_client *); /* the ioctl() of rpc */ bool_t (*cl_control)(struct __rpc_client *, u_int, void *); } *cl_ops; #endif void *cl_private; /* private stuff */ char *cl_netid; /* network token */ char *cl_tp; /* device name */ } CLIENT; /* * Feedback values used for possible congestion and rate control */ #define FEEDBACK_OK 1 /* no retransmits */ #define FEEDBACK_REXMIT1 2 /* first retransmit */ #define FEEDBACK_REXMIT2 3 /* second and subsequent retransmit */ #define FEEDBACK_RECONNECT 4 /* client reconnect */ /* Used to set version of portmapper used in broadcast */ #define CLCR_SET_LOWVERS 3 #define CLCR_GET_LOWVERS 4 #define RPCSMALLMSGSIZE 400 /* a more reasonable packet size */ /* * client side rpc interface ops * * Parameter types are: * */ #ifdef _KERNEL #define CLNT_ACQUIRE(rh) \ refcount_acquire(&(rh)->cl_refs) #define CLNT_RELEASE(rh) \ if (refcount_release(&(rh)->cl_refs)) \ CLNT_DESTROY(rh) /* * void * CLNT_CLOSE(rh); * CLIENT *rh; */ #define CLNT_CLOSE(rh) ((*(rh)->cl_ops->cl_close)(rh)) enum clnt_stat clnt_call_private(CLIENT *, struct rpc_callextra *, rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval); /* * enum clnt_stat * CLNT_CALL_MBUF(rh, ext, proc, mreq, mrepp, timeout) * CLIENT *rh; * struct rpc_callextra *ext; * rpcproc_t proc; * struct mbuf *mreq; * struct mbuf **mrepp; * struct timeval timeout; * * Call arguments in mreq which is consumed by the call (even if there * is an error). Results returned in *mrepp. */ #define CLNT_CALL_MBUF(rh, ext, proc, mreq, mrepp, secs) \ ((*(rh)->cl_ops->cl_call)(rh, ext, proc, mreq, mrepp, secs)) /* * enum clnt_stat * CLNT_CALL_EXT(rh, ext, proc, xargs, argsp, xres, resp, timeout) * CLIENT *rh; * struct rpc_callextra *ext; * rpcproc_t proc; * xdrproc_t xargs; * void *argsp; * xdrproc_t xres; * void *resp; * struct timeval timeout; */ #define CLNT_CALL_EXT(rh, ext, proc, xargs, argsp, xres, resp, secs) \ clnt_call_private(rh, ext, proc, xargs, \ argsp, xres, resp, secs) #endif /* * enum clnt_stat * CLNT_CALL(rh, proc, xargs, argsp, xres, resp, timeout) * CLIENT *rh; * rpcproc_t proc; * xdrproc_t xargs; * void *argsp; * xdrproc_t xres; * void *resp; * struct timeval timeout; */ #ifdef _KERNEL #define CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs) \ clnt_call_private(rh, NULL, proc, xargs, \ argsp, xres, resp, secs) #define clnt_call(rh, proc, xargs, argsp, xres, resp, secs) \ clnt_call_private(rh, NULL, proc, xargs, \ argsp, xres, resp, secs) #else #define CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs) \ ((*(rh)->cl_ops->cl_call)(rh, proc, xargs, \ argsp, xres, resp, secs)) #define clnt_call(rh, proc, xargs, argsp, xres, resp, secs) \ ((*(rh)->cl_ops->cl_call)(rh, proc, xargs, \ argsp, xres, resp, secs)) #endif /* * void * CLNT_ABORT(rh); * CLIENT *rh; */ #define CLNT_ABORT(rh) ((*(rh)->cl_ops->cl_abort)(rh)) #define clnt_abort(rh) ((*(rh)->cl_ops->cl_abort)(rh)) /* * struct rpc_err * CLNT_GETERR(rh); * CLIENT *rh; */ #define CLNT_GETERR(rh,errp) ((*(rh)->cl_ops->cl_geterr)(rh, errp)) #define clnt_geterr(rh,errp) ((*(rh)->cl_ops->cl_geterr)(rh, errp)) /* * bool_t * CLNT_FREERES(rh, xres, resp); * CLIENT *rh; * xdrproc_t xres; * void *resp; */ #define CLNT_FREERES(rh,xres,resp) ((*(rh)->cl_ops->cl_freeres)(rh,xres,resp)) #define clnt_freeres(rh,xres,resp) ((*(rh)->cl_ops->cl_freeres)(rh,xres,resp)) /* * bool_t * CLNT_CONTROL(cl, request, info) * CLIENT *cl; * u_int request; * char *info; */ #define CLNT_CONTROL(cl,rq,in) ((*(cl)->cl_ops->cl_control)(cl,rq,in)) #define clnt_control(cl,rq,in) ((*(cl)->cl_ops->cl_control)(cl,rq,in)) /* * control operations that apply to both udp and tcp transports */ #define CLSET_TIMEOUT 1 /* set timeout (timeval) */ #define CLGET_TIMEOUT 2 /* get timeout (timeval) */ #define CLGET_SERVER_ADDR 3 /* get server's address (sockaddr) */ #define CLGET_FD 6 /* get connections file descriptor */ #define CLGET_SVC_ADDR 7 /* get server's address (netbuf) */ #define CLSET_FD_CLOSE 8 /* close fd while clnt_destroy */ #define CLSET_FD_NCLOSE 9 /* Do not close fd while clnt_destroy */ #define CLGET_XID 10 /* Get xid */ #define CLSET_XID 11 /* Set xid */ #define CLGET_VERS 12 /* Get version number */ #define CLSET_VERS 13 /* Set version number */ #define CLGET_PROG 14 /* Get program number */ #define CLSET_PROG 15 /* Set program number */ #define CLSET_SVC_ADDR 16 /* get server's address (netbuf) */ #define CLSET_PUSH_TIMOD 17 /* push timod if not already present */ #define CLSET_POP_TIMOD 18 /* pop timod */ /* * Connectionless only control operations */ #define CLSET_RETRY_TIMEOUT 4 /* set retry timeout (timeval) */ #define CLGET_RETRY_TIMEOUT 5 /* get retry timeout (timeval) */ #define CLSET_ASYNC 19 #define CLSET_CONNECT 20 /* Use connect() for UDP. (int) */ #ifdef _KERNEL /* * Kernel control operations. The default msleep string is "rpcrecv", * and sleeps are non-interruptible by default. */ #define CLSET_WAITCHAN 21 /* set string to use in msleep call */ #define CLGET_WAITCHAN 22 /* get string used in msleep call */ #define CLSET_INTERRUPTIBLE 23 /* set interruptible flag */ #define CLGET_INTERRUPTIBLE 24 /* set interruptible flag */ #define CLSET_RETRIES 25 /* set retry count for reconnect */ #define CLGET_RETRIES 26 /* get retry count for reconnect */ #define CLSET_PRIVPORT 27 /* set privileged source port flag */ #define CLGET_PRIVPORT 28 /* get privileged source port flag */ #define CLSET_BACKCHANNEL 29 /* set backchannel for socket */ #endif /* * void * CLNT_DESTROY(rh); * CLIENT *rh; */ #define CLNT_DESTROY(rh) ((*(rh)->cl_ops->cl_destroy)(rh)) #define clnt_destroy(rh) ((*(rh)->cl_ops->cl_destroy)(rh)) /* * RPCTEST is a test program which is accessible on every rpc * transport/port. It is used for testing, performance evaluation, * and network administration. */ #define RPCTEST_PROGRAM ((rpcprog_t)1) #define RPCTEST_VERSION ((rpcvers_t)1) #define RPCTEST_NULL_PROC ((rpcproc_t)2) #define RPCTEST_NULL_BATCH_PROC ((rpcproc_t)3) /* * By convention, procedure 0 takes null arguments and returns them */ #define NULLPROC ((rpcproc_t)0) /* * Below are the client handle creation routines for the various * implementations of client side rpc. They can return NULL if a * creation failure occurs. */ /* * Generic client creation routine. Supported protocols are those that * belong to the nettype namespace (/etc/netconfig). */ __BEGIN_DECLS #ifdef _KERNEL /* * struct socket *so; -- socket * struct sockaddr *svcaddr; -- servers address * rpcprog_t prog; -- program number * rpcvers_t vers; -- version number * size_t sendsz; -- buffer recv size * size_t recvsz; -- buffer send size */ extern CLIENT *clnt_dg_create(struct socket *so, struct sockaddr *svcaddr, rpcprog_t program, rpcvers_t version, size_t sendsz, size_t recvsz); /* * struct socket *so; -- socket * struct sockaddr *svcaddr; -- servers address * rpcprog_t prog; -- program number * rpcvers_t vers; -- version number * size_t sendsz; -- buffer recv size * size_t recvsz; -- buffer send size * int intrflag; -- is it interruptible */ extern CLIENT *clnt_vc_create(struct socket *so, struct sockaddr *svcaddr, rpcprog_t program, rpcvers_t version, size_t sendsz, size_t recvsz, int intrflag); /* * struct netconfig *nconf; -- network type * struct sockaddr *svcaddr; -- servers address * rpcprog_t prog; -- program number * rpcvers_t vers; -- version number * size_t sendsz; -- buffer recv size * size_t recvsz; -- buffer send size */ extern CLIENT *clnt_reconnect_create(struct netconfig *nconf, struct sockaddr *svcaddr, rpcprog_t program, rpcvers_t version, size_t sendsz, size_t recvsz); #else extern CLIENT *clnt_create(const char *, const rpcprog_t, const rpcvers_t, const char *); /* * * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const char *nettype; -- network type */ /* * Generic client creation routine. Just like clnt_create(), except * it takes an additional timeout parameter. */ extern CLIENT * clnt_create_timed(const char *, const rpcprog_t, const rpcvers_t, const char *, const struct timeval *); /* * * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const char *nettype; -- network type * const struct timeval *tp; -- timeout */ /* * Generic client creation routine. Supported protocols are which belong * to the nettype name space. */ extern CLIENT *clnt_create_vers(const char *, const rpcprog_t, rpcvers_t *, const rpcvers_t, const rpcvers_t, const char *); /* * const char *host; -- hostname * const rpcprog_t prog; -- program number * rpcvers_t *vers_out; -- servers highest available version * const rpcvers_t vers_low; -- low version number * const rpcvers_t vers_high; -- high version number * const char *nettype; -- network type */ /* * Generic client creation routine. Supported protocols are which belong * to the nettype name space. */ extern CLIENT * clnt_create_vers_timed(const char *, const rpcprog_t, rpcvers_t *, const rpcvers_t, const rpcvers_t, const char *, const struct timeval *); /* * const char *host; -- hostname * const rpcprog_t prog; -- program number * rpcvers_t *vers_out; -- servers highest available version * const rpcvers_t vers_low; -- low version number * const rpcvers_t vers_high; -- high version number * const char *nettype; -- network type * const struct timeval *tp -- timeout */ /* * Generic client creation routine. It takes a netconfig structure * instead of nettype */ extern CLIENT *clnt_tp_create(const char *, const rpcprog_t, const rpcvers_t, const struct netconfig *); /* * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const struct netconfig *netconf; -- network config structure */ /* * Generic client creation routine. Just like clnt_tp_create(), except * it takes an additional timeout parameter. */ extern CLIENT * clnt_tp_create_timed(const char *, const rpcprog_t, const rpcvers_t, const struct netconfig *, const struct timeval *); /* * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const struct netconfig *netconf; -- network config structure * const struct timeval *tp -- timeout */ /* * Generic TLI create routine. Only provided for compatibility. */ extern CLIENT *clnt_tli_create(const int, const struct netconfig *, struct netbuf *, const rpcprog_t, const rpcvers_t, const u_int, const u_int); /* - * const register int fd; -- fd + * const int fd; -- fd * const struct netconfig *nconf; -- netconfig structure * struct netbuf *svcaddr; -- servers address * const u_long prog; -- program number * const u_long vers; -- version number * const u_int sendsz; -- send size * const u_int recvsz; -- recv size */ /* * Low level clnt create routine for connectionful transports, e.g. tcp. */ extern CLIENT *clnt_vc_create(const int, const struct netbuf *, const rpcprog_t, const rpcvers_t, u_int, u_int); /* * Added for compatibility to old rpc 4.0. Obsoleted by clnt_vc_create(). */ extern CLIENT *clntunix_create(struct sockaddr_un *, u_long, u_long, int *, u_int, u_int); /* * const int fd; -- open file descriptor * const struct netbuf *svcaddr; -- servers address * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const u_int sendsz; -- buffer recv size * const u_int recvsz; -- buffer send size */ /* * Low level clnt create routine for connectionless transports, e.g. udp. */ extern CLIENT *clnt_dg_create(const int, const struct netbuf *, const rpcprog_t, const rpcvers_t, const u_int, const u_int); /* * const int fd; -- open file descriptor * const struct netbuf *svcaddr; -- servers address * const rpcprog_t program; -- program number * const rpcvers_t version; -- version number * const u_int sendsz; -- buffer recv size * const u_int recvsz; -- buffer send size */ /* * Memory based rpc (for speed check and testing) * CLIENT * * clnt_raw_create(prog, vers) * u_long prog; * u_long vers; */ extern CLIENT *clnt_raw_create(rpcprog_t, rpcvers_t); #endif __END_DECLS /* * Print why creation failed */ __BEGIN_DECLS extern void clnt_pcreateerror(const char *); /* stderr */ extern char *clnt_spcreateerror(const char *); /* string */ __END_DECLS /* * Like clnt_perror(), but is more verbose in its output */ __BEGIN_DECLS extern void clnt_perrno(enum clnt_stat); /* stderr */ extern char *clnt_sperrno(enum clnt_stat); /* string */ __END_DECLS /* * Print an English error message, given the client error code */ __BEGIN_DECLS extern void clnt_perror(CLIENT *, const char *); /* stderr */ extern char *clnt_sperror(CLIENT *, const char *); /* string */ __END_DECLS /* * If a creation fails, the following allows the user to figure out why. */ struct rpc_createerr { enum clnt_stat cf_stat; struct rpc_err cf_error; /* useful when cf_stat == RPC_PMAPFAILURE */ }; #ifdef _KERNEL extern struct rpc_createerr rpc_createerr; #else __BEGIN_DECLS extern struct rpc_createerr *__rpc_createerr(void); __END_DECLS #define rpc_createerr (*(__rpc_createerr())) #endif #ifndef _KERNEL /* * The simplified interface: * enum clnt_stat * rpc_call(host, prognum, versnum, procnum, inproc, in, outproc, out, nettype) * const char *host; * const rpcprog_t prognum; * const rpcvers_t versnum; * const rpcproc_t procnum; * const xdrproc_t inproc, outproc; * const char *in; * char *out; * const char *nettype; */ __BEGIN_DECLS extern enum clnt_stat rpc_call(const char *, const rpcprog_t, const rpcvers_t, const rpcproc_t, const xdrproc_t, const char *, const xdrproc_t, char *, const char *); __END_DECLS /* * RPC broadcast interface * The call is broadcasted to all locally connected nets. * * extern enum clnt_stat * rpc_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, * eachresult, nettype) * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const rpcproc_t proc; -- procedure number * const xdrproc_t xargs; -- xdr routine for args * caddr_t argsp; -- pointer to args * const xdrproc_t xresults; -- xdr routine for results * caddr_t resultsp; -- pointer to results * const resultproc_t eachresult; -- call with each result * const char *nettype; -- Transport type * * For each valid response received, the procedure eachresult is called. * Its form is: * done = eachresult(resp, raddr, nconf) * bool_t done; * caddr_t resp; * struct netbuf *raddr; * struct netconfig *nconf; * where resp points to the results of the call and raddr is the * address if the responder to the broadcast. nconf is the transport * on which the response was received. * * extern enum clnt_stat * rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp, * eachresult, inittime, waittime, nettype) * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const rpcproc_t proc; -- procedure number * const xdrproc_t xargs; -- xdr routine for args * caddr_t argsp; -- pointer to args * const xdrproc_t xresults; -- xdr routine for results * caddr_t resultsp; -- pointer to results * const resultproc_t eachresult; -- call with each result * const int inittime; -- how long to wait initially * const int waittime; -- maximum time to wait * const char *nettype; -- Transport type */ typedef bool_t (*resultproc_t)(caddr_t, ...); __BEGIN_DECLS extern enum clnt_stat rpc_broadcast(const rpcprog_t, const rpcvers_t, const rpcproc_t, const xdrproc_t, caddr_t, const xdrproc_t, caddr_t, const resultproc_t, const char *); extern enum clnt_stat rpc_broadcast_exp(const rpcprog_t, const rpcvers_t, const rpcproc_t, const xdrproc_t, caddr_t, const xdrproc_t, caddr_t, const resultproc_t, const int, const int, const char *); __END_DECLS /* For backward compatibility */ #include #endif #endif /* !_RPC_CLNT_H_ */ Index: stable/11/sys/sparc64/include/pcpu.h =================================================================== --- stable/11/sys/sparc64/include/pcpu.h (revision 331642) +++ stable/11/sys/sparc64/include/pcpu.h (revision 331643) @@ -1,100 +1,100 @@ /*- * Copyright (c) 1999 Luoqi Chen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/include/globaldata.h,v 1.27 2001/04/27 * $FreeBSD$ */ #ifndef _MACHINE_PCPU_H_ #define _MACHINE_PCPU_H_ #include #include #include #include #define ALT_STACK_SIZE 128 struct pmap; /* * Inside the kernel, the globally reserved register g7 is used to * point at the globaldata structure. */ #define PCPU_MD_FIELDS \ struct cacheinfo pc_cache; \ struct intr_request pc_irpool[IR_FREE]; \ struct intr_request *pc_irhead; \ struct intr_request **pc_irtail; \ struct intr_request *pc_irfree; \ struct pmap *pc_pmap; \ vm_offset_t pc_addr; \ vm_offset_t pc_qmap_addr; \ u_long pc_tickref; \ u_long pc_tickadj; \ u_long pc_tickincrement; \ u_int pc_clock; \ u_int pc_impl; \ u_int pc_mid; \ u_int pc_node; \ u_int pc_tlb_ctx; \ u_int pc_tlb_ctx_max; \ u_int pc_tlb_ctx_min; \ char __pad[397] #ifdef _KERNEL extern void *dpcpu0; struct pcb; struct pcpu; -register struct pcb *curpcb __asm__(__XSTRING(PCB_REG)); -register struct pcpu *pcpup __asm__(__XSTRING(PCPU_REG)); +struct pcb *curpcb __asm__(__XSTRING(PCB_REG)); +struct pcpu *pcpup __asm__(__XSTRING(PCPU_REG)); #define PCPU_GET(member) (pcpup->pc_ ## member) static __inline __pure2 struct thread * __curthread(void) { struct thread *td; __asm("ldx [%" __XSTRING(PCPU_REG) "], %0" : "=r" (td)); return (td); } #define curthread (__curthread()) /* * XXX The implementation of this operation should be made atomic * with respect to preemption. */ #define PCPU_ADD(member, value) (pcpup->pc_ ## member += (value)) #define PCPU_INC(member) PCPU_ADD(member, 1) #define PCPU_PTR(member) (&pcpup->pc_ ## member) #define PCPU_SET(member,value) (pcpup->pc_ ## member = (value)) #endif /* _KERNEL */ #endif /* !_MACHINE_PCPU_H_ */ Index: stable/11 =================================================================== --- stable/11 (revision 331642) +++ stable/11 (revision 331643) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r314568,318389