Index: projects/ifnet/sys/arm/arm/trap-v6.c =================================================================== --- projects/ifnet/sys/arm/arm/trap-v6.c (revision 281649) +++ projects/ifnet/sys/arm/arm/trap-v6.c (revision 281650) @@ -1,663 +1,668 @@ /*- * Copyright 2014 Olivier Houchard * Copyright 2014 Svatopluk Kraus * Copyright 2014 Michal Meloun * Copyright 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_ktrace.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #include #include #include #include #include #include +#include #include #include #include #include #include #include #ifdef KDB #include #include #endif extern char fusubailout[]; #ifdef DEBUG int last_fault_code; /* For the benefit of pmap_fault_fixup() */ #endif struct ksig { int sig; u_long code; vm_offset_t addr; }; typedef int abort_func_t(struct trapframe *, u_int, u_int, u_int, u_int, struct thread *, struct ksig *); static abort_func_t abort_fatal; static abort_func_t abort_align; static abort_func_t abort_icache; struct abort { abort_func_t *func; const char *desc; }; /* * How are the aborts handled? * * Undefined Code: * - Always fatal as we do not know what does it mean. * Imprecise External Abort: * - Always fatal, but can be handled somehow in the future. * Now, due to PCIe buggy harware, ignored. * Precise External Abort: * - Always fatal, but who knows in the future??? * Debug Event: * - Special handling. * External Translation Abort (L1 & L2) * - Always fatal as something is screwed up in page tables or harware. * Domain Fault (L1 & L2): * - Always fatal as we do not play game with domains. * Alignment Fault: * - Everything should be aligned in kernel including user to kernel and * vice versa data copying, so we ignore pcb_onfault, and it's always fatal. * We generate signal in case of abort from user mode. * Instruction cache maintenance: * - According to manual, this is translation fault during cache maintenance * operation. So, it could be really complex in SMP case and fuzzy too * for cache operations working on virtual addresses. For now, we will * consider this abort as fatal. In fact, no cache maintenance on * not mapped virtual addresses should be called. As cache maintenance * operation (except DMB, DSB, and Flush Prefetch Buffer) are priviledged, * the abort is fatal for user mode as well for now. (This is good place to * note that cache maintenance on virtual address fill TLB.) * Acces Bit (L1 & L2): * - Fast hardware emulation for kernel and user mode. * Translation Fault (L1 & L2): * - Standard fault mechanism is held including vm_fault(). * Permission Fault (L1 & L2): * - Fast harware emulation of modify bits and in other cases, standard * fault mechanism is held including vm_fault(). */ static const struct abort aborts[] = { {abort_fatal, "Undefined Code (0x000)"}, {abort_align, "Alignment Fault"}, {abort_fatal, "Debug Event"}, {NULL, "Access Bit (L1)"}, {abort_icache, "Instruction cache maintenance"}, {NULL, "Translation Fault (L1)"}, {NULL, "Access Bit (L2)"}, {NULL, "Translation Fault (L2)"}, {abort_fatal, "External Abort"}, {abort_fatal, "Domain Fault (L1)"}, {abort_fatal, "Undefined Code (0x00A)"}, {abort_fatal, "Domain Fault (L2)"}, {abort_fatal, "External Translation Abort (L1)"}, {NULL, "Permission Fault (L1)"}, {abort_fatal, "External Translation Abort (L2)"}, {NULL, "Permission Fault (L2)"}, {abort_fatal, "TLB Conflict Abort"}, {abort_fatal, "Undefined Code (0x401)"}, {abort_fatal, "Undefined Code (0x402)"}, {abort_fatal, "Undefined Code (0x403)"}, {abort_fatal, "Undefined Code (0x404)"}, {abort_fatal, "Undefined Code (0x405)"}, {abort_fatal, "Asynchronous External Abort"}, {abort_fatal, "Undefined Code (0x407)"}, {abort_fatal, "Asynchronous Parity Error on Memory Access"}, {abort_fatal, "Parity Error on Memory Access"}, {abort_fatal, "Undefined Code (0x40A)"}, {abort_fatal, "Undefined Code (0x40B)"}, {abort_fatal, "Parity Error on Translation (L1)"}, {abort_fatal, "Undefined Code (0x40D)"}, {abort_fatal, "Parity Error on Translation (L2)"}, {abort_fatal, "Undefined Code (0x40F)"} }; static __inline void call_trapsignal(struct thread *td, int sig, int code, vm_offset_t addr) { ksiginfo_t ksi; CTR4(KTR_TRAP, "%s: addr: %#x, sig: %d, code: %d", __func__, addr, sig, code); /* * TODO: some info would be nice to know * if we are serving data or prefetch abort. */ ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = (void *)addr; trapsignal(td, &ksi); } /* * abort_imprecise() handles the following abort: * * FAULT_EA_IMPREC - Imprecise External Abort * * The imprecise means that we don't know where the abort happened, * thus FAR is undefined. The abort should not never fire, but hot * plugging or accidental harware failure can be the cause of it. * If the abort happens, it can even be on different (thread) context. * Without any additional support, the abort is fatal, as we do not * know what really happened. * * QQQ: Some additional functionality, like pcb_onfault but global, * can be implemented. Imprecise handlers could be registered * which tell us if the abort is caused by something they know * about. They should return one of three codes like: * FAULT_IS_MINE, * FAULT_CAN_BE_MINE, * FAULT_IS_NOT_MINE. * The handlers should be called until some of them returns * FAULT_IS_MINE value or all was called. If all handlers return * FAULT_IS_NOT_MINE value, then the abort is fatal. */ static __inline void abort_imprecise(struct trapframe *tf, u_int fsr, u_int prefetch, u_int usermode) { /* XXXX We can got imprecise abort as result of access * to not-present PCI/PCIe configuration space. */ #if 0 goto out; #endif abort_fatal(tf, FAULT_EA_IMPREC, fsr, 0, prefetch, curthread, NULL); /* * Returning from this function means that we ignore * the abort for good reason. Note that imprecise abort * could fire any time even in user mode. */ #if 0 out: if (usermode) userret(curthread, tf); #endif } /* * abort_debug() handles the following abort: * * FAULT_DEBUG - Debug Event * */ static __inline void abort_debug(struct trapframe *tf, u_int fsr, u_int prefetch, u_int usermode, u_int far) { if (usermode) { struct thread *td; td = curthread; call_trapsignal(td, SIGTRAP, TRAP_BRKPT, far); userret(td, tf); } else { #ifdef KDB kdb_trap(T_BREAKPOINT, 0, tf); #else printf("No debugger in kernel.\n"); #endif } } /* * Abort handler. * * FAR, FSR, and everything what can be lost after enabling * interrupts must be grabbed before the interrupts will be * enabled. Note that when interrupts will be enabled, we * could even migrate to another CPU ... * * TODO: move quick cases to ASM */ void abort_handler(struct trapframe *tf, int prefetch) { struct thread *td; vm_offset_t far, va; int idx, usermode; uint32_t fsr; struct ksig ksig; struct proc *p; struct pcb *pcb; struct vm_map *map; struct vmspace *vm; vm_prot_t ftype; int rv; #ifdef INVARIANTS void *onfault; #endif td = curthread; fsr = (prefetch) ? cp15_ifsr_get(): cp15_dfsr_get(); +#if __ARM_ARCH >= 7 + far = (prefetch) ? cp15_ifar_get() : cp15_dfar_get(); +#else far = (prefetch) ? TRAPF_PC(tf) : cp15_dfar_get(); +#endif idx = FSR_TO_FAULT(fsr); usermode = TRAPF_USERMODE(tf); /* Abort came from user mode? */ if (usermode) td->td_frame = tf; CTR4(KTR_TRAP, "abort_handler: fsr %#x (idx %u) far %#x prefetch %u", fsr, idx, far, prefetch); /* * Firstly, handle aborts that are not directly related to mapping. */ if (__predict_false(idx == FAULT_EA_IMPREC)) { abort_imprecise(tf, fsr, prefetch, usermode); return; } if (__predict_false(idx == FAULT_DEBUG)) { abort_debug(tf, fsr, prefetch, usermode, far); return; } #ifdef ARM_NEW_PMAP rv = pmap_fault(PCPU_GET(curpmap), far, fsr, idx, usermode); if (rv == 0) { return; } else if (rv == EFAULT) { call_trapsignal(td, SIGSEGV, SEGV_MAPERR, far); userret(td, tf); return; } #endif /* * Now, when we handled imprecise and debug aborts, the rest of * aborts should be really related to mapping. * */ PCPU_INC(cnt.v_trap); #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != far || (td->td_pflags & TDP_RESETSPUR) != 0) { td->td_md.md_spurflt_addr = far; td->td_pflags &= ~TDP_RESETSPUR; tlb_flush_local(far & ~PAGE_MASK); return; } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig); return; } } /* Re-enable interrupts if they were enabled previously. */ if (td->td_md.md_spinlock_count == 0) { if (__predict_true(tf->tf_spsr & PSR_I) == 0) enable_interrupts(PSR_I); if (__predict_true(tf->tf_spsr & PSR_F) == 0) enable_interrupts(PSR_F); } p = td->td_proc; if (usermode) { td->td_pticks = 0; if (td->td_ucred != p->p_ucred) cred_update_thread(td); } /* Invoke the appropriate handler, if necessary. */ if (__predict_false(aborts[idx].func != NULL)) { if ((aborts[idx].func)(tf, idx, fsr, far, prefetch, td, &ksig)) goto do_trapsignal; goto out; } /* * At this point, we're dealing with one of the following aborts: * * FAULT_TRAN_xx - Translation * FAULT_PERM_xx - Permission * * These are the main virtual memory-related faults signalled by * the MMU. */ /* fusubailout is used by [fs]uswintr to avoid page faulting */ pcb = td->td_pcb; if (__predict_false(pcb->pcb_onfault == fusubailout)) { tf->tf_r0 = EFAULT; tf->tf_pc = (register_t)pcb->pcb_onfault; return; } /* * QQQ: ARM has a set of unprivileged load and store instructions * (LDRT/LDRBT/STRT/STRBT ...) which are supposed to be used * in other than user mode and OS should recognize their * aborts and behaved appropriately. However, there is no way * how to do that reasonably in general unless we restrict * the handling somehow. One way is to limit the handling for * aborts which come from undefined mode only. * * Anyhow, we do not use these instructions and do not implement * any special handling for them. */ va = trunc_page(far); if (va >= KERNBASE) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) goto nogo; map = kernel_map; } else { /* * This is a fault on non-kernel virtual memory. If curproc * is NULL or curproc->p_vmspace is NULL the fault is fatal. */ vm = (p != NULL) ? p->p_vmspace : NULL; if (vm == NULL) goto nogo; map = &vm->vm_map; if (!usermode && (td->td_intr_nesting_level != 0 || pcb->pcb_onfault == NULL)) { abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig); return; } } ftype = (fsr & FSR_WNR) ? VM_PROT_WRITE : VM_PROT_READ; if (prefetch) ftype |= VM_PROT_EXECUTE; #ifdef DEBUG last_fault_code = fsr; #endif #ifndef ARM_NEW_PMAP if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace), va, ftype, usermode)) { goto out; } #endif #ifdef INVARIANTS onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; #endif if (map != kernel_map) { /* * Keep swapout from messing with us during this * critical time. */ PROC_LOCK(p); ++p->p_lock; PROC_UNLOCK(p); /* Fault in the user page: */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); PROC_LOCK(p); --p->p_lock; PROC_UNLOCK(p); } else { /* * Don't have to worry about process locking or stacks in the * kernel. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); } #ifdef INVARIANTS pcb->pcb_onfault = onfault; #endif if (__predict_true(rv == KERN_SUCCESS)) goto out; nogo: if (!usermode) { if (td->td_intr_nesting_level == 0 && pcb->pcb_onfault != NULL) { tf->tf_r0 = rv; tf->tf_pc = (int)pcb->pcb_onfault; return; } CTR2(KTR_TRAP, "%s: vm_fault() failed with %d", __func__, rv); abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig); return; } ksig.sig = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; ksig.code = 0; ksig.addr = far; do_trapsignal: call_trapsignal(td, ksig.sig, ksig.code, ksig.addr); out: if (usermode) userret(td, tf); } /* * abort_fatal() handles the following data aborts: * FAULT_DEBUG - Debug Event * FAULT_ACCESS_xx - Acces Bit * FAULT_EA_PREC - Precise External Abort * FAULT_DOMAIN_xx - Domain Fault * FAULT_EA_TRAN_xx - External Translation Abort * FAULT_EA_IMPREC - Imprecise External Abort * + all undefined codes for ABORT * * We should never see these on a properly functioning system. * * This function is also called by the other handlers if they * detect a fatal problem. * * Note: If 'l' is NULL, we assume we're dealing with a prefetch abort. */ static int abort_fatal(struct trapframe *tf, u_int idx, u_int fsr, u_int far, u_int prefetch, struct thread *td, struct ksig *ksig) { u_int usermode; const char *mode; const char *rw_mode; usermode = TRAPF_USERMODE(tf); mode = usermode ? "user" : "kernel"; rw_mode = fsr & FSR_WNR ? "write" : "read"; disable_interrupts(PSR_I|PSR_F); if (td != NULL) { printf("Fatal %s mode data abort: '%s' on %s\n", mode, aborts[idx].desc, rw_mode); printf("trapframe: %p\nFSR=%08x, FAR=", tf, fsr); if (idx != FAULT_EA_IMPREC) printf("%08x, ", far); else printf("Invalid, "); printf("spsr=%08x\n", tf->tf_spsr); } else { printf("Fatal %s mode prefetch abort at 0x%08x\n", mode, tf->tf_pc); printf("trapframe: %p, spsr=%08x\n", tf, tf->tf_spsr); } printf("r0 =%08x, r1 =%08x, r2 =%08x, r3 =%08x\n", tf->tf_r0, tf->tf_r1, tf->tf_r2, tf->tf_r3); printf("r4 =%08x, r5 =%08x, r6 =%08x, r7 =%08x\n", tf->tf_r4, tf->tf_r5, tf->tf_r6, tf->tf_r7); printf("r8 =%08x, r9 =%08x, r10=%08x, r11=%08x\n", tf->tf_r8, tf->tf_r9, tf->tf_r10, tf->tf_r11); printf("r12=%08x, ", tf->tf_r12); if (usermode) printf("usp=%08x, ulr=%08x", tf->tf_usr_sp, tf->tf_usr_lr); else printf("ssp=%08x, slr=%08x", tf->tf_svc_sp, tf->tf_svc_lr); printf(", pc =%08x\n\n", tf->tf_pc); #ifdef KDB if (debugger_on_panic || kdb_active) kdb_trap(fsr, 0, tf); #endif panic("Fatal abort"); /*NOTREACHED*/ } /* * abort_align() handles the following data abort: * * FAULT_ALIGN - Alignment fault * * Every memory access should be correctly aligned in kernel including * user to kernel and vice versa data copying, so we ignore pcb_onfault, * and it's always fatal. We generate a signal in case of abort from user mode. */ static int abort_align(struct trapframe *tf, u_int idx, u_int fsr, u_int far, u_int prefetch, struct thread *td, struct ksig *ksig) { u_int usermode; usermode = TRAPF_USERMODE(tf); /* * Alignment faults are always fatal if they occur in any but user mode. * * XXX The old trap code handles pcb fault even for alignment traps. * Unfortunately, we don't known why and if is this need. */ if (!usermode) { if (td->td_intr_nesting_level == 0 && td != NULL && td->td_pcb->pcb_onfault != NULL) { printf("%s: Got alignment fault with pcb_onfault set" ", please report this issue\n", __func__); tf->tf_r0 = EFAULT;; tf->tf_pc = (int)td->td_pcb->pcb_onfault; return (0); } abort_fatal(tf, idx, fsr, far, prefetch, td, ksig); } /* Deliver a bus error signal to the process */ ksig->code = 0; ksig->sig = SIGBUS; ksig->addr = far; return (1); } /* * abort_icache() handles the following data abort: * * FAULT_ICACHE - Instruction cache maintenance * * According to manual, FAULT_ICACHE is translation fault during cache * maintenance operation. In fact, no cache maintenance operation on * not mapped virtual addresses should be called. As cache maintenance * operation (except DMB, DSB, and Flush Prefetch Buffer) are priviledged, * the abort is concider as fatal for now. However, all the matter with * cache maintenance operation on virtual addresses could be really complex * and fuzzy in SMP case, so maybe in future standard fault mechanism * should be held here including vm_fault() calling. */ static int abort_icache(struct trapframe *tf, u_int idx, u_int fsr, u_int far, u_int prefetch, struct thread *td, struct ksig *ksig) { abort_fatal(tf, idx, fsr, far, prefetch, td, ksig); return(0); } Index: projects/ifnet/sys/arm/arm/vm_machdep.c =================================================================== --- projects/ifnet/sys/arm/arm/vm_machdep.c (revision 281649) +++ projects/ifnet/sys/arm/arm/vm_machdep.c (revision 281650) @@ -1,346 +1,342 @@ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary :forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * struct switchframe and trapframe must both be a multiple of 8 * for correct stack alignment. */ CTASSERT(sizeof(struct switchframe) == 48); CTASSERT(sizeof(struct trapframe) == 80); /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(register struct thread *td1, register struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; struct mdproc *mdp2; if ((flags & RFPROC) == 0) return; /* Point the pcb to the top of the stack */ pcb2 = (struct pcb *) (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; #ifdef __XSCALE__ #ifndef CPU_XSCALE_CORE3 pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE); #endif #endif td2->td_pcb = pcb2; /* Clone td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point to mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2)); /* Point the frame to the stack in front of pcb and copy td1's frame */ td2->td_frame = (struct trapframe *)pcb2 - 1; *td2->td_frame = *td1->td_frame; /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2); pcb2->pcb_regs.sf_r4 = (register_t)fork_return; pcb2->pcb_regs.sf_r5 = (register_t)td2; pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline; pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame); pcb2->pcb_vfpcpu = -1; pcb2->pcb_vfpstate.fpscr = VFPSCR_DN | VFPSCR_FZ; tf = td2->td_frame; tf->tf_spsr &= ~PSR_C; tf->tf_r0 = 0; tf->tf_r1 = 0; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_cspr = PSR_SVC32_MODE;; #ifdef ARM_TP_ADDRESS td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS; #else td2->td_md.md_tp = td1->td_md.md_tp; #endif } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; int fixup; #ifdef __ARMEB__ u_int call; #endif frame = td->td_frame; fixup = 0; #ifdef __ARMEB__ /* * __syscall returns an off_t while most other syscalls return an * int. As an off_t is 64-bits and an int is 32-bits we need to * place the returned data into r1. As the lseek and frerebsd6_lseek * syscalls also return an off_t they do not need this fixup. */ -#ifdef __ARM_EABI__ call = frame->tf_r7; -#else - call = *(u_int32_t *)(frame->tf_pc - INSN_SIZE) & 0x000fffff; -#endif if (call == SYS___syscall) { register_t *ap = &frame->tf_r0; register_t code = ap[_QUAD_LOWWORD]; if (td->td_proc->p_sysent->sv_mask) code &= td->td_proc->p_sysent->sv_mask; fixup = (code != SYS_freebsd6_lseek && code != SYS_lseek) ? 1 : 0; } #endif switch (error) { case 0: if (fixup) { frame->tf_r0 = 0; frame->tf_r1 = td->td_retval[0]; } else { frame->tf_r0 = td->td_retval[0]; frame->tf_r1 = td->td_retval[1]; } frame->tf_spsr &= ~PSR_C; /* carry bit */ break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ frame->tf_pc -= INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_r0 = error; frame->tf_spsr |= PSR_C; /* carry bit */ break; } } /* * Initialize machine state (pcb and trap frame) for a new thread about to * upcall. Put enough state in the new thread's PCB to get it to go back * userret(), where we can intercept it again to set the return (upcall) * Address and stack, along with those from upcals that are from other sources * such as those generated in thread_userret() itself. */ void cpu_set_upcall(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_regs.sf_r4 = (register_t)fork_return; td->td_pcb->pcb_regs.sf_r5 = (register_t)td; td->td_pcb->pcb_regs.sf_lr = (register_t)fork_trampoline; td->td_pcb->pcb_regs.sf_sp = STACKALIGN(td->td_frame); td->td_frame->tf_spsr &= ~PSR_C; td->td_frame->tf_r0 = 0; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = PSR_SVC32_MODE; } /* * Set that machine state for performing an upcall that has to * be done in thread_userret() so that those upcalls generated * in thread_userret() itself can be done as well. */ void cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size); tf->tf_pc = (int)entry; tf->tf_r0 = (int)arg; tf->tf_spsr = PSR_USR32_MODE; } int cpu_set_user_tls(struct thread *td, void *tls_base) { td->td_md.md_tp = (register_t)tls_base; if (td == curthread) { critical_enter(); #ifdef ARM_TP_ADDRESS *(register_t *)ARM_TP_ADDRESS = (register_t)tls_base; #else set_tls(tls_base); #endif critical_exit(); } return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; /* * Ensure td_frame is aligned to an 8 byte boundary as it will be * placed into the stack pointer which must be 8 byte aligned in * the ARM EABI. */ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1; #ifdef __XSCALE__ #ifndef CPU_XSCALE_CORE3 pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE); #endif #endif } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_regs.sf_r4 = (register_t)func; /* function */ td->td_pcb->pcb_regs.sf_r5 = (register_t)arg; /* first arg */ } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending) busdma_swi(); } void cpu_exit(struct thread *td) { } Index: projects/ifnet/sys/dev/cxgbe/tom/t4_listen.c =================================================================== --- projects/ifnet/sys/dev/cxgbe/tom/t4_listen.c (revision 281649) +++ projects/ifnet/sys/dev/cxgbe/tom/t4_listen.c (revision 281650) @@ -1,1627 +1,1575 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* stid services */ static int alloc_stid(struct adapter *, struct listen_ctx *, int); static struct listen_ctx *lookup_stid(struct adapter *, int); static void free_stid(struct adapter *, struct listen_ctx *); /* lctx services */ static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, struct port_info *); static int free_lctx(struct adapter *, struct listen_ctx *); static void hold_lctx(struct listen_ctx *); static void listen_hash_add(struct adapter *, struct listen_ctx *); static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *); static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *); static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *); static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); static void send_reset_synqe(struct toedev *, struct synq_entry *); static int alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6) { struct tid_info *t = &sc->tids; u_int stid, n, f, mask; struct stid_region *sr = &lctx->stid_region; /* * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in * the TCAM. The start of the stid region is properly aligned (the chip * requires each region to be 128-cell aligned). */ n = isipv6 ? 2 : 1; mask = n - 1; KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0, ("%s: stid region (%u, %u) not properly aligned. n = %u", __func__, t->stid_base, t->nstids, n)); mtx_lock(&t->stid_lock); if (n > t->nstids - t->stids_in_use) { mtx_unlock(&t->stid_lock); return (-1); } if (t->nstids_free_head >= n) { /* * This allocation will definitely succeed because the region * starts at a good alignment and we just checked we have enough * stids free. */ f = t->nstids_free_head & mask; t->nstids_free_head -= n + f; stid = t->nstids_free_head; TAILQ_INSERT_HEAD(&t->stids, sr, link); } else { struct stid_region *s; stid = t->nstids_free_head; TAILQ_FOREACH(s, &t->stids, link) { stid += s->used + s->free; f = stid & mask; if (s->free >= n + f) { stid -= n + f; s->free -= n + f; TAILQ_INSERT_AFTER(&t->stids, s, sr, link); goto allocated; } } if (__predict_false(stid != t->nstids)) { panic("%s: stids TAILQ (%p) corrupt." " At %d instead of %d at the end of the queue.", __func__, &t->stids, stid, t->nstids); } mtx_unlock(&t->stid_lock); return (-1); } allocated: sr->used = n; sr->free = f; t->stids_in_use += n; t->stid_tab[stid] = lctx; mtx_unlock(&t->stid_lock); KASSERT(((stid + t->stid_base) & mask) == 0, ("%s: EDOOFUS.", __func__)); return (stid + t->stid_base); } static struct listen_ctx * lookup_stid(struct adapter *sc, int stid) { struct tid_info *t = &sc->tids; return (t->stid_tab[stid - t->stid_base]); } static void free_stid(struct adapter *sc, struct listen_ctx *lctx) { struct tid_info *t = &sc->tids; struct stid_region *sr = &lctx->stid_region; struct stid_region *s; KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used)); mtx_lock(&t->stid_lock); s = TAILQ_PREV(sr, stid_head, link); if (s != NULL) s->free += sr->used + sr->free; else t->nstids_free_head += sr->used + sr->free; KASSERT(t->stids_in_use >= sr->used, ("%s: stids_in_use (%u) < stids being freed (%u)", __func__, t->stids_in_use, sr->used)); t->stids_in_use -= sr->used; TAILQ_REMOVE(&t->stids, sr, link); mtx_unlock(&t->stid_lock); } static struct listen_ctx * alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) { struct listen_ctx *lctx; INP_WLOCK_ASSERT(inp); lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO); if (lctx == NULL) return (NULL); lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6); if (lctx->stid < 0) { free(lctx, M_CXGBE); return (NULL); } if (inp->inp_vflag & INP_IPV6 && !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) { struct tom_data *td = sc->tom_softc; lctx->ce = hold_lip(td, &inp->in6p_laddr); if (lctx->ce == NULL) { free(lctx, M_CXGBE); return (NULL); } } lctx->ctrlq = &sc->sge.ctrlq[pi->port_id]; lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; refcount_init(&lctx->refcount, 1); TAILQ_INIT(&lctx->synq); lctx->inp = inp; in_pcbref(inp); return (lctx); } /* Don't call this directly, use release_lctx instead */ static int free_lctx(struct adapter *sc, struct listen_ctx *lctx) { struct inpcb *inp = lctx->inp; struct tom_data *td = sc->tom_softc; INP_WLOCK_ASSERT(inp); KASSERT(lctx->refcount == 0, ("%s: refcount %d", __func__, lctx->refcount)); KASSERT(TAILQ_EMPTY(&lctx->synq), ("%s: synq not empty.", __func__)); KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid)); CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", __func__, lctx->stid, lctx, lctx->inp); if (lctx->ce) release_lip(td, lctx->ce); free_stid(sc, lctx); free(lctx, M_CXGBE); return (in_pcbrele_wlocked(inp)); } static void hold_lctx(struct listen_ctx *lctx) { refcount_acquire(&lctx->refcount); } static inline uint32_t listen_hashfn(void *key, u_long mask) { return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask); } /* * Add a listen_ctx entry to the listen hash table. */ static void listen_hash_add(struct adapter *sc, struct listen_ctx *lctx) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(lctx->inp, td->listen_mask); mtx_lock(&td->lctx_hash_lock); LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link); td->lctx_count++; mtx_unlock(&td->lctx_hash_lock); } /* * Look for the listening socket's context entry in the hash and return it. */ static struct listen_ctx * listen_hash_find(struct adapter *sc, struct inpcb *inp) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(inp, td->listen_mask); struct listen_ctx *lctx; mtx_lock(&td->lctx_hash_lock); LIST_FOREACH(lctx, &td->listen_hash[bucket], link) { if (lctx->inp == inp) break; } mtx_unlock(&td->lctx_hash_lock); return (lctx); } /* * Removes the listen_ctx structure for inp from the hash and returns it. */ static struct listen_ctx * listen_hash_del(struct adapter *sc, struct inpcb *inp) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(inp, td->listen_mask); struct listen_ctx *lctx, *l; mtx_lock(&td->lctx_hash_lock); LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) { if (lctx->inp == inp) { LIST_REMOVE(lctx, link); td->lctx_count--; break; } } mtx_unlock(&td->lctx_hash_lock); return (lctx); } /* * Releases a hold on the lctx. Must be called with the listening socket's inp * locked. The inp may be freed by this function and it returns NULL to * indicate this. */ static struct inpcb * release_lctx(struct adapter *sc, struct listen_ctx *lctx) { struct inpcb *inp = lctx->inp; int inp_freed = 0; INP_WLOCK_ASSERT(inp); if (refcount_release(&lctx->refcount)) inp_freed = free_lctx(sc, lctx); return (inp_freed ? NULL : inp); } static void send_reset_synqe(struct toedev *tod, struct synq_entry *synqe) { struct adapter *sc = tod->tod_softc; struct mbuf *m = synqe->syn; struct ifnet *ifp = m->m_pkthdr.rcvif; struct port_info *pi = ifp->if_softc; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; struct wrqe *wr; struct fw_flowc_wr *flowc; struct cpl_abort_req *req; int txqid, rxqid, flowclen; struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; const int nparams = 6; unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; INP_WLOCK_ASSERT(synqe->lctx->inp); CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s", __func__, synqe, synqe->flags, synqe->tid, synqe->flags & TPF_ABORT_SHUTDOWN ? " (abort already in progress)" : ""); if (synqe->flags & TPF_ABORT_SHUTDOWN) return; /* abort already in progress */ synqe->flags |= TPF_ABORT_SHUTDOWN; get_qids_from_mbuf(m, &txqid, &rxqid); ofld_txq = &sc->sge.ofld_txq[txqid]; ofld_rxq = &sc->sge.ofld_rxq[rxqid]; /* The wrqe will have two WRs - a flowc followed by an abort_req */ flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE)); /* First the flowc ... */ memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(synqe->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[4].val = htobe32(512); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[5].val = htobe32(512); synqe->flags |= TPF_FLOWC_WR_SENT; /* ... then ABORT request */ INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid); req->rsvd0 = 0; /* don't have a snd_nxt */ req->rsvd1 = 1; /* no data sent yet */ req->cmd = CPL_ABORT_SEND_RST; t4_l2t_send(sc, wr, e); } static int create_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_pass_open_req *req; struct inpcb *inp = lctx->inp; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { log(LOG_ERR, "%s: allocation failure", __func__); return (ENOMEM); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); req->local_port = inp->inp_lport; req->peer_port = 0; req->local_ip = inp->inp_laddr.s_addr; req->peer_ip = 0; req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); t4_wrq_tx(sc, wr); return (0); } static int create_server6(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_pass_open_req6 *req; struct inpcb *inp = lctx->inp; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { log(LOG_ERR, "%s: allocation failure", __func__); return (ENOMEM); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid)); req->local_port = inp->inp_lport; req->peer_port = 0; req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; req->peer_ip_hi = 0; req->peer_ip_lo = 0; req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); t4_wrq_tx(sc, wr); return (0); } static int destroy_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_close_listsvr_req *req; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, lctx->stid)); req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id); req->rsvd = htobe16(0); t4_wrq_tx(sc, wr); return (0); } /* * Start a listening server by sending a passive open request to HW. * * Can't take adapter lock here and access to sc->flags, sc->open_device_map, * sc->offload_map, if_capenable are all race prone. */ int t4_listen_start(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct port_info *pi; struct inpcb *inp = tp->t_inpcb; struct listen_ctx *lctx; int i, rc; INP_WLOCK_ASSERT(inp); /* Don't start a hardware listener for any loopback address. */ if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr)) return (0); if (!(inp->inp_vflag & INP_IPV6) && IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr))) return (0); #if 0 ADAPTER_LOCK(sc); if (IS_BUSY(sc)) { log(LOG_ERR, "%s: listen request ignored, %s is busy", __func__, device_get_nameunit(sc->dev)); goto done; } KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM not initialized", __func__)); #endif if ((sc->open_device_map & sc->offload_map) == 0) goto done; /* no port that's UP with IFCAP_TOE enabled */ /* * Find a running port with IFCAP_TOE (4 or 6). We'll use the first * such port's queues to send the passive open and receive the reply to * it. * * XXX: need a way to mark a port in use by offload. if_cxgbe should * then reject any attempt to bring down such a port (and maybe reject * attempts to disable IFCAP_TOE on that port too?). */ for_each_port(sc, i) { if (isset(&sc->open_device_map, i) && sc->port[i]->ifp->if_capenable & IFCAP_TOE) break; } KASSERT(i < sc->params.nports, ("%s: no running port with TOE capability enabled.", __func__)); pi = sc->port[i]; if (listen_hash_find(sc, inp) != NULL) goto done; /* already setup */ lctx = alloc_lctx(sc, inp, pi); if (lctx == NULL) { log(LOG_ERR, "%s: listen request ignored, %s couldn't allocate lctx\n", __func__, device_get_nameunit(sc->dev)); goto done; } listen_hash_add(sc, lctx); CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x", __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp, inp->inp_vflag); if (inp->inp_vflag & INP_IPV6) rc = create_server6(sc, lctx); else rc = create_server(sc, lctx); if (rc != 0) { log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n", __func__, device_get_nameunit(sc->dev), rc); (void) listen_hash_del(sc, inp); inp = release_lctx(sc, lctx); /* can't be freed, host stack has a reference */ KASSERT(inp != NULL, ("%s: inp freed", __func__)); goto done; } lctx->flags |= LCTX_RPL_PENDING; done: #if 0 ADAPTER_UNLOCK(sc); #endif return (0); } int t4_listen_stop(struct toedev *tod, struct tcpcb *tp) { struct listen_ctx *lctx; struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct synq_entry *synqe; INP_WLOCK_ASSERT(inp); lctx = listen_hash_del(sc, inp); if (lctx == NULL) return (ENOENT); /* no hardware listener for this inp */ CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid, lctx, lctx->flags); /* * If the reply to the PASS_OPEN is still pending we'll wait for it to * arrive and clean up when it does. */ if (lctx->flags & LCTX_RPL_PENDING) { KASSERT(TAILQ_EMPTY(&lctx->synq), ("%s: synq not empty.", __func__)); return (EINPROGRESS); } /* * The host stack will abort all the connections on the listening * socket's so_comp. It doesn't know about the connections on the synq * so we need to take care of those. */ TAILQ_FOREACH(synqe, &lctx->synq, link) { if (synqe->flags & TPF_SYNQE_HAS_L2TE) send_reset_synqe(tod, synqe); } destroy_server(sc, lctx); return (0); } static inline void hold_synqe(struct synq_entry *synqe) { refcount_acquire(&synqe->refcnt); } static inline void release_synqe(struct synq_entry *synqe) { if (refcount_release(&synqe->refcnt)) { int needfree = synqe->flags & TPF_SYNQE_NEEDFREE; m_freem(synqe->syn); if (needfree) free(synqe, M_CXGBE); } } void t4_syncache_added(struct toedev *tod __unused, void *arg) { struct synq_entry *synqe = arg; hold_synqe(synqe); } void t4_syncache_removed(struct toedev *tod __unused, void *arg) { struct synq_entry *synqe = arg; release_synqe(synqe); } /* XXX */ extern void tcp_dooptions(struct tcpopt *, u_char *, int, int); int t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) { struct adapter *sc = tod->tod_softc; struct synq_entry *synqe = arg; struct wrqe *wr; struct l2t_entry *e; struct tcpopt to; struct ip *ip = mtod(m, struct ip *); struct tcphdr *th; wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); if (wr == NULL) { m_freem(m); return (EALREADY); } if (ip->ip_v == IPVERSION) th = (void *)(ip + 1); else th = (void *)((struct ip6_hdr *)ip + 1); bzero(&to, sizeof(to)); tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), TO_SYN); /* save these for later */ synqe->iss = be32toh(th->th_seq); synqe->ts = to.to_tsval; if (is_t5(sc)) { struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr); rpl5->iss = th->th_seq; } e = &sc->l2t->l2tab[synqe->l2e_idx]; t4_l2t_send(sc, wr, e); m_freem(m); /* don't need this any more */ return (0); } static int do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1); int stid = GET_TID(cpl); unsigned int status = cpl->status; struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_OPEN_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); INP_WLOCK(inp); CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x", __func__, stid, status, lctx->flags); lctx->flags &= ~LCTX_RPL_PENDING; if (status != CPL_ERR_NONE) log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status); #ifdef INVARIANTS /* * If the inp has been dropped (listening socket closed) then * listen_stop must have run and taken the inp out of the hash. */ if (inp->inp_flags & INP_DROPPED) { KASSERT(listen_hash_del(sc, inp) == NULL, ("%s: inp %p still in listen hash", __func__, inp)); } #endif if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) { if (release_lctx(sc, lctx) != NULL) INP_WUNLOCK(inp); return (status); } /* * Listening socket stopped listening earlier and now the chip tells us * it has started the hardware listener. Stop it; the lctx will be * released in do_close_server_rpl. */ if (inp->inp_flags & INP_DROPPED) { destroy_server(sc, lctx); INP_WUNLOCK(inp); return (status); } /* * Failed to start hardware listener. Take inp out of the hash and * release our reference on it. An error message has been logged * already. */ if (status != CPL_ERR_NONE) { listen_hash_del(sc, inp); if (release_lctx(sc, lctx) != NULL) INP_WUNLOCK(inp); return (status); } /* hardware listener open for business */ INP_WUNLOCK(inp); return (status); } static int do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1); int stid = GET_TID(cpl); unsigned int status = cpl->status; struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); if (status != CPL_ERR_NONE) { log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n", __func__, status, stid); return (status); } INP_WLOCK(inp); inp = release_lctx(sc, lctx); if (inp != NULL) INP_WUNLOCK(inp); return (status); } static void done_with_synqe(struct adapter *sc, struct synq_entry *synqe) { struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; INP_WLOCK_ASSERT(inp); TAILQ_REMOVE(&lctx->synq, synqe, link); inp = release_lctx(sc, lctx); if (inp) INP_WUNLOCK(inp); remove_tid(sc, synqe->tid); release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]); t4_l2t_release(e); release_synqe(synqe); /* removed from synq list */ } int do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; int txqid; struct sge_wrq *ofld_txq; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); if (negative_advice(cpl->status)) return (0); /* Ignore negative advice */ INP_WLOCK(inp); get_qids_from_mbuf(synqe->syn, &txqid, NULL); ofld_txq = &sc->sge.ofld_txq[txqid]; /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (synqe->flags & TPF_ABORT_SHUTDOWN) { INP_WUNLOCK(inp); goto done; } done_with_synqe(sc, synqe); /* inp lock released by done_with_synqe */ done: send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } int do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); INP_WLOCK(inp); KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: wasn't expecting abort reply for synqe %p (0x%x)", __func__, synqe, synqe->flags)); done_with_synqe(sc, synqe); /* inp lock released by done_with_synqe */ return (0); } void t4_offload_socket(struct toedev *tod, void *arg, struct socket *so) { struct adapter *sc = tod->tod_softc; struct synq_entry *synqe = arg; #ifdef INVARIANTS struct inpcb *inp = sotoinpcb(so); #endif struct cpl_pass_establish *cpl = mtod(synqe->syn, void *); struct toepcb *toep = *(struct toepcb **)(cpl + 1); INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */ INP_WLOCK_ASSERT(inp); KASSERT(synqe->flags & TPF_SYNQE, ("%s: %p not a synq_entry?", __func__, arg)); offload_socket(so, toep); make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); toep->flags |= TPF_CPL_PENDING; update_tid(sc, synqe->tid, toep); synqe->flags |= TPF_SYNQE_EXPANDED; } static inline void save_qids_in_mbuf(struct mbuf *m, struct port_info *pi) { uint32_t txqid, rxqid; txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff); } static inline void get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid) { if (txqid) *txqid = m->m_pkthdr.flowid >> 16; if (rxqid) *rxqid = m->m_pkthdr.flowid & 0xffff; } /* * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to * store some state temporarily. */ static struct synq_entry * mbuf_to_synqe(struct mbuf *m) { int len = roundup2(sizeof (struct synq_entry), 8); int tspace = M_TRAILINGSPACE(m); struct synq_entry *synqe = NULL; if (tspace < len) { synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT); if (synqe == NULL) return (NULL); synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE; } else { synqe = (void *)(m->m_data + m->m_len + tspace - len); synqe->flags = TPF_SYNQE; } return (synqe); } static void t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to) { bzero(to, sizeof(*to)); if (t4opt->mss) { to->to_flags |= TOF_MSS; to->to_mss = be16toh(t4opt->mss); } if (t4opt->wsf) { to->to_flags |= TOF_SCALE; to->to_wscale = t4opt->wsf; } if (t4opt->tstamp) to->to_flags |= TOF_TS; if (t4opt->sack) to->to_flags |= TOF_SACKPERM; } /* * Options2 for passive open. */ static uint32_t calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid, const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode) { struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid]; uint32_t opt2; opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) | F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id); if (V_tcp_do_rfc1323) { if (tcpopt->tstamp) opt2 |= F_TSTAMPS_EN; if (tcpopt->sack) opt2 |= F_SACK_EN; if (tcpopt->wsf <= 14) opt2 |= F_WND_SCALE_EN; } if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR)) opt2 |= F_CCTRL_ECN; /* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */ if (is_t4(sc)) opt2 |= F_RX_COALESCE_VALID; else { opt2 |= F_T5_OPT_2_VALID; opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */ } if (sc->tt.rx_coalesce) opt2 |= V_RX_COALESCE(M_RX_COALESCE); #ifdef USE_DDP_RX_FLOW_CONTROL if (ulp_mode == ULP_MODE_TCPDDP) opt2 |= F_RX_FC_VALID | F_RX_FC_DDP; #endif return htobe32(opt2); } static void pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, struct tcphdr *th) { const struct cpl_pass_accept_req *cpl = mtod(m, const void *); const struct ether_header *eh; unsigned int hlen = be32toh(cpl->hdr_len); uintptr_t l3hdr; const struct tcphdr *tcp; eh = (const void *)(cpl + 1); l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); if (inc) { bzero(inc, sizeof(*inc)); inc->inc_fport = tcp->th_sport; inc->inc_lport = tcp->th_dport; if (((struct ip *)l3hdr)->ip_v == IPVERSION) { const struct ip *ip = (const void *)l3hdr; inc->inc_faddr = ip->ip_src; inc->inc_laddr = ip->ip_dst; } else { const struct ip6_hdr *ip6 = (const void *)l3hdr; inc->inc_flags |= INC_ISIPV6; inc->inc6_faddr = ip6->ip6_src; inc->inc6_laddr = ip6->ip6_dst; } } if (th) { bcopy(tcp, th, sizeof(*th)); tcp_fields_to_host(th); /* just like tcp_input */ } } -static int -ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6) -{ - struct ifaddr *ifa; - struct sockaddr_in6 *sin6; - int found = 0; - struct in6_addr in6 = *ip6; - - /* Just as in ip6_input */ - if (in6_clearscope(&in6) || in6_clearscope(&in6)) - return (0); - in6_setscope(&in6, ifp, NULL); - - if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - sin6 = (void *)ifa->ifa_addr; - if (sin6->sin6_family != AF_INET6) - continue; - - if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) { - found = 1; - break; - } - } - if_addr_runlock(ifp); - - return (found); -} - static struct l2t_entry * get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, struct in_conninfo *inc) { struct rtentry *rt; struct l2t_entry *e; struct sockaddr_in6 sin6; struct sockaddr *dst = (void *)&sin6; if (inc->inc_flags & INC_ISIPV6) { dst->sa_len = sizeof(struct sockaddr_in6); dst->sa_family = AF_INET6; ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr; if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { /* no need for route lookup */ e = t4_l2t_get(pi, ifp, dst); return (e); } } else { dst->sa_len = sizeof(struct sockaddr_in); dst->sa_family = AF_INET; ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; } rt = rtalloc1(dst, 0, 0); if (rt == NULL) return (NULL); else { struct sockaddr *nexthop; RT_UNLOCK(rt); if (rt->rt_ifp != ifp) e = NULL; else { if (rt->rt_flags & RTF_GATEWAY) nexthop = rt->rt_gateway; else nexthop = dst; e = t4_l2t_get(pi, ifp, nexthop); } RTFREE(rt); } return (e); } -static int -ifnet_has_ip(struct ifnet *ifp, struct in_addr in) -{ - struct ifaddr *ifa; - struct sockaddr_in *sin; - int found = 0; - - if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - sin = (void *)ifa->ifa_addr; - if (sin->sin_family != AF_INET) - continue; - - if (sin->sin_addr.s_addr == in.s_addr) { - found = 1; - break; - } - } - if_addr_runlock(ifp); - - return (found); -} - #define REJECT_PASS_ACCEPT() do { \ reject_reason = __LINE__; \ goto reject; \ } while (0) /* * The context associated with a tid entry via insert_tid could be a synq_entry * or a toepcb. The only way CPL handlers can tell is via a bit in these flags. */ CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags)); /* * Incoming SYN on a listening socket. * * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe, * etc. */ static int do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; struct toedev *tod; const struct cpl_pass_accept_req *cpl = mtod(m, const void *); struct cpl_pass_accept_rpl *rpl; struct wrqe *wr; unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); unsigned int tid = GET_TID(cpl); struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp; struct socket *so; struct in_conninfo inc; struct tcphdr th; struct tcpopt to; struct port_info *pi; struct ifnet *hw_ifp, *ifp; struct l2t_entry *e = NULL; int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; struct synq_entry *synqe = NULL; int reject_reason; uint16_t vid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_ACCEPT_REQ, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid, lctx); pass_accept_req_to_protohdrs(m, &inc, &th); t4opt_to_tcpopt(&cpl->tcpopt, &to); pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; hw_ifp = pi->ifp; /* the cxgbeX ifnet */ m->m_pkthdr.rcvif = hw_ifp; tod = TOEDEV(hw_ifp); /* * Figure out if there is a pseudo interface (vlan, lagg, etc.) * involved. Don't offload if the SYN had a VLAN tag and the vid * doesn't match anything on this interface. * * XXX: lagg support, lagg + vlan support. */ vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); if (vid != 0xfff) { ifp = VLAN_DEVAT(hw_ifp, vid); if (ifp == NULL) REJECT_PASS_ACCEPT(); } else ifp = hw_ifp; /* * Don't offload if the peer requested a TCP option that's not known to * the silicon. */ if (cpl->tcpopt.unknown) REJECT_PASS_ACCEPT(); if (inc.inc_flags & INC_ISIPV6) { /* Don't offload if the ifcap isn't enabled */ if ((ifp->if_capenable & IFCAP_TOE6) == 0) REJECT_PASS_ACCEPT(); /* * SYN must be directed to an IP6 address on this ifnet. This * is more restrictive than in6_localip. */ - if (!ifnet_has_ip6(ifp, &inc.inc6_laddr)) + if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) REJECT_PASS_ACCEPT(); } else { /* Don't offload if the ifcap isn't enabled */ if ((ifp->if_capenable & IFCAP_TOE4) == 0) REJECT_PASS_ACCEPT(); /* * SYN must be directed to an IP address on this ifnet. This * is more restrictive than in_localip. */ - if (!ifnet_has_ip(ifp, inc.inc_laddr)) + if (!in_ifhasaddr(ifp, inc.inc_laddr)) REJECT_PASS_ACCEPT(); } e = get_l2te_for_nexthop(pi, ifp, &inc); if (e == NULL) REJECT_PASS_ACCEPT(); synqe = mbuf_to_synqe(m); if (synqe == NULL) REJECT_PASS_ACCEPT(); wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) : sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]); if (wr == NULL) REJECT_PASS_ACCEPT(); rpl = wrtod(wr); INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check */ /* Don't offload if the 4-tuple is already in use */ if (toe_4tuple_check(&inc, &th, ifp) != 0) { INP_INFO_WUNLOCK(&V_tcbinfo); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } INP_INFO_WUNLOCK(&V_tcbinfo); inp = lctx->inp; /* listening socket, not owned by TOE */ INP_WLOCK(inp); /* Don't offload if the listening socket has closed */ if (__predict_false(inp->inp_flags & INP_DROPPED)) { /* * The listening socket has closed. The reply from the TOE to * our CPL_CLOSE_LISTSRV_REQ will ultimately release all * resources tied to this listen context. */ INP_WUNLOCK(inp); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } so = inp->inp_socket; mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss)); rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0; SOCKBUF_LOCK(&so->so_rcv); /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); save_qids_in_mbuf(m, pi); get_qids_from_mbuf(m, NULL, &rxqid); if (is_t4(sc)) INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid); else { struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl; INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid); } if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) { ulp_mode = ULP_MODE_TCPDDP; synqe->flags |= TPF_SYNQE_TCPDDP; } else ulp_mode = ULP_MODE_NONE; rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode); rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode); synqe->tid = tid; synqe->lctx = lctx; synqe->syn = m; m = NULL; refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */ synqe->l2e_idx = e->idx; synqe->rcv_bufsize = rx_credits; atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); insert_tid(sc, tid, synqe); TAILQ_INSERT_TAIL(&lctx->synq, synqe, link); hold_synqe(synqe); /* hold for the duration it's in the synq */ hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */ /* * If all goes well t4_syncache_respond will get called during * syncache_add. Note that syncache_add releases the pcb lock. */ toe_syncache_add(&inc, &to, &th, inp, tod, synqe); INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ /* * If we replied during syncache_add (synqe->wr has been consumed), * good. Otherwise, set it to 0 so that further syncache_respond * attempts by the kernel will be ignored. */ if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) { /* * syncache may or may not have a hold on the synqe, which may * or may not be stashed in the original SYN mbuf passed to us. * Just copy it over instead of dealing with all possibilities. */ m = m_dup(synqe->syn, M_NOWAIT); if (m) m->m_pkthdr.rcvif = hw_ifp; remove_tid(sc, synqe->tid); free(wr, M_CXGBE); /* Yank the synqe out of the lctx synq. */ INP_WLOCK(inp); TAILQ_REMOVE(&lctx->synq, synqe, link); release_synqe(synqe); /* removed from synq list */ inp = release_lctx(sc, lctx); if (inp) INP_WUNLOCK(inp); release_synqe(synqe); /* extra hold */ REJECT_PASS_ACCEPT(); } CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK", __func__, stid, tid, lctx, synqe); INP_WLOCK(inp); synqe->flags |= TPF_SYNQE_HAS_L2TE; if (__predict_false(inp->inp_flags & INP_DROPPED)) { /* * Listening socket closed but tod_listen_stop did not abort * this tid because there was no L2T entry for the tid at that * time. Abort it now. The reply to the abort will clean up. */ CTR6(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT", __func__, stid, tid, lctx, synqe, synqe->flags); if (!(synqe->flags & TPF_SYNQE_EXPANDED)) send_reset_synqe(tod, synqe); INP_WUNLOCK(inp); release_synqe(synqe); /* extra hold */ return (__LINE__); } INP_WUNLOCK(inp); release_synqe(synqe); /* extra hold */ return (0); reject: CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, reject_reason); if (e) t4_l2t_release(e); release_tid(sc, tid, lctx->ctrlq); if (__predict_true(m != NULL)) { m_adj(m, sizeof(*cpl)); m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; hw_ifp->if_input(hw_ifp, m); } return (reject_reason); } static void synqe_to_protohdrs(struct synq_entry *synqe, const struct cpl_pass_establish *cpl, struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to) { uint16_t tcp_opt = be16toh(cpl->tcp_opt); /* start off with the original SYN */ pass_accept_req_to_protohdrs(synqe->syn, inc, th); /* modify parts to make it look like the ACK to our SYN|ACK */ th->th_flags = TH_ACK; th->th_ack = synqe->iss + 1; th->th_seq = be32toh(cpl->rcv_isn); bzero(to, sizeof(*to)); if (G_TCPOPT_TSTAMP(tcp_opt)) { to->to_flags |= TOF_TS; to->to_tsecr = synqe->ts; } } static int do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; struct port_info *pi; struct ifnet *ifp; const struct cpl_pass_establish *cpl = (const void *)(rss + 1); #if defined(KTR) || defined(INVARIANTS) unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); #endif unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; struct socket *so; struct tcphdr th; struct tcpopt to; struct in_conninfo inc; struct toepcb *toep; u_int txqid, rxqid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_ESTABLISH, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); KASSERT(synqe->flags & TPF_SYNQE, ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */ INP_WLOCK(inp); CTR6(KTR_CXGBE, "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x", __func__, stid, tid, synqe, synqe->flags, inp->inp_flags); if (__predict_false(inp->inp_flags & INP_DROPPED)) { if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } ifp = synqe->syn->m_pkthdr.rcvif; pi = ifp->if_softc; KASSERT(pi->adapter == sc, ("%s: pi %p, sc %p mismatch", __func__, pi, sc)); get_qids_from_mbuf(synqe->syn, &txqid, &rxqid); KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0], ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid, (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0]))); toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT); if (toep == NULL) { reset: /* * The reply to this abort will perform final cleanup. There is * no need to check for HAS_L2TE here. We can be here only if * we responded to the PASS_ACCEPT_REQ, and our response had the * L2T idx. */ send_reset_synqe(TOEDEV(ifp), synqe); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } toep->tid = tid; toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx]; if (synqe->flags & TPF_SYNQE_TCPDDP) set_tcpddp_ulp_mode(toep); else toep->ulp_mode = ULP_MODE_NONE; /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ toep->rx_credits = synqe->rcv_bufsize; so = inp->inp_socket; KASSERT(so != NULL, ("%s: socket is NULL", __func__)); /* Come up with something that syncache_expand should be ok with. */ synqe_to_protohdrs(synqe, cpl, &inc, &th, &to); /* * No more need for anything in the mbuf that carried the * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer * there. XXX: bad form but I don't want to increase the size of synqe. */ m = synqe->syn; KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len, ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len)); bcopy(cpl, mtod(m, void *), sizeof(*cpl)); *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep; if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) { free_toepcb(toep); goto reset; } /* * This is for the unlikely case where the syncache entry that we added * has been evicted from the syncache, but the syncache_expand above * works because of syncookies. * * XXX: we've held the tcbinfo lock throughout so there's no risk of * anyone accept'ing a connection before we've installed our hooks, but * this somewhat defeats the purpose of having a tod_offload_socket :-( */ if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) { struct inpcb *new_inp = sotoinpcb(so); INP_WLOCK(new_inp); tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0); t4_offload_socket(TOEDEV(ifp), synqe, so); INP_WUNLOCK(new_inp); } /* Done with the synqe */ TAILQ_REMOVE(&lctx->synq, synqe, link); inp = release_lctx(sc, lctx); if (inp != NULL) INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); release_synqe(synqe); return (0); } void t4_init_listen_cpl_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl); t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl); t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req); t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish); } #endif Index: projects/ifnet/sys/dev/usb/video/udl.c =================================================================== --- projects/ifnet/sys/dev/usb/video/udl.c (revision 281649) +++ projects/ifnet/sys/dev/usb/video/udl.c (revision 281650) @@ -1,1097 +1,1154 @@ /* $OpenBSD: udl.c,v 1.81 2014/12/09 07:05:06 doug Exp $ */ /* $FreeBSD$ */ /*- * Copyright (c) 2015 Hans Petter Selasky * Copyright (c) 2009 Marcus Glocker * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * Driver for the "DisplayLink DL-120 / DL-160" graphic chips based on * the reversed engineered specifications of Florian Echtler * : * * http://floe.butterbrot.org/displaylink/doku.php */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "usbdevs.h" #include #include "fb_if.h" #undef DPRINTF #undef DPRINTFN #define USB_DEBUG_VAR udl_debug #include static SYSCTL_NODE(_hw_usb, OID_AUTO, udl, CTLFLAG_RW, 0, "USB UDL"); #ifdef USB_DEBUG static int udl_debug = 0; SYSCTL_INT(_hw_usb_udl, OID_AUTO, debug, CTLFLAG_RWTUN, &udl_debug, 0, "Debug level"); #endif #define UDL_FPS_MAX 60 #define UDL_FPS_MIN 1 static int udl_fps = 25; SYSCTL_INT(_hw_usb_udl, OID_AUTO, fps, CTLFLAG_RWTUN, &udl_fps, 0, "Frames Per Second, 1-60"); +static struct mtx udl_buffer_mtx; +static struct udl_buffer_head udl_buffer_head; + +MALLOC_DEFINE(M_USB_DL, "USB", "USB DisplayLink"); + /* * Prototypes. */ static usb_callback_t udl_bulk_write_callback; static device_probe_t udl_probe; static device_attach_t udl_attach; static device_detach_t udl_detach; static fb_getinfo_t udl_fb_getinfo; static fb_setblankmode_t udl_fb_setblankmode; static void udl_select_chip(struct udl_softc *, struct usb_attach_arg *); static int udl_init_chip(struct udl_softc *); static void udl_select_mode(struct udl_softc *); static int udl_init_resolution(struct udl_softc *); static void udl_fbmem_alloc(struct udl_softc *); static int udl_cmd_write_buf_le16(struct udl_softc *, const uint8_t *, uint32_t, uint8_t, int); static int udl_cmd_buf_copy_le16(struct udl_softc *, uint32_t, uint32_t, uint8_t, int); static void udl_cmd_insert_int_1(struct udl_cmd_buf *, uint8_t); static void udl_cmd_insert_int_3(struct udl_cmd_buf *, uint32_t); static void udl_cmd_insert_buf_le16(struct udl_cmd_buf *, const uint8_t *, uint32_t); static void udl_cmd_write_reg_1(struct udl_cmd_buf *, uint8_t, uint8_t); static void udl_cmd_write_reg_3(struct udl_cmd_buf *, uint8_t, uint32_t); static int udl_power_save(struct udl_softc *, int, int); static const struct usb_config udl_config[UDL_N_TRANSFER] = { [UDL_BULK_WRITE_0] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_TX, .flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,}, .bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES, .callback = &udl_bulk_write_callback, .frames = UDL_CMD_MAX_FRAMES, .timeout = 5000, /* 5 seconds */ }, [UDL_BULK_WRITE_1] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_TX, .flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,}, .bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES, .callback = &udl_bulk_write_callback, .frames = UDL_CMD_MAX_FRAMES, .timeout = 5000, /* 5 seconds */ }, }; /* * Driver glue. */ static devclass_t udl_devclass; static device_method_t udl_methods[] = { DEVMETHOD(device_probe, udl_probe), DEVMETHOD(device_attach, udl_attach), DEVMETHOD(device_detach, udl_detach), DEVMETHOD(fb_getinfo, udl_fb_getinfo), DEVMETHOD_END }; static driver_t udl_driver = { .name = "udl", .methods = udl_methods, .size = sizeof(struct udl_softc), }; DRIVER_MODULE(udl, uhub, udl_driver, udl_devclass, NULL, NULL); MODULE_DEPEND(udl, usb, 1, 1, 1); MODULE_DEPEND(udl, fbd, 1, 1, 1); MODULE_DEPEND(udl, videomode, 1, 1, 1); MODULE_VERSION(udl, 1); /* * Matching devices. */ static const STRUCT_USB_HOST_ID udl_devs[] = { {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD4300U, DL120)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD8000U, DL120)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_GUC2020, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LD220, DL165)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VCUD60, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_DLDVI, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VGA10, DL120)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_WSDVI, DLUNK)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_EC008, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_HPDOCK, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NL571, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_M01061, DL195)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NBDOCK, DL165)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_SWDVI, DLUNK)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_UM7X0, DL120)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_CONV, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_PLUGABLE, DL160)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LUM70, DL125)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_POLARIS2, DLUNK)}, {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LT1421, DLUNK)} }; +static void +udl_buffer_init(void *arg) +{ + mtx_init(&udl_buffer_mtx, "USB", "UDL", MTX_DEF); + TAILQ_INIT(&udl_buffer_head); +} +SYSINIT(udl_buffer_init, SI_SUB_LOCK, SI_ORDER_FIRST, udl_buffer_init, NULL); + +CTASSERT(sizeof(struct udl_buffer) < PAGE_SIZE); + +static void * +udl_buffer_alloc(uint32_t size) +{ + struct udl_buffer *buf; + mtx_lock(&udl_buffer_mtx); + TAILQ_FOREACH(buf, &udl_buffer_head, entry) { + if (buf->size == size) { + TAILQ_REMOVE(&udl_buffer_head, buf, entry); + break; + } + } + mtx_unlock(&udl_buffer_mtx); + if (buf != NULL) { + /* wipe and recycle buffer */ + memset(buf, 0, size); + return (buf); + } + /* allocate new buffer */ + return (malloc(size, M_USB_DL, M_WAITOK | M_ZERO)); +} + +static void +udl_buffer_free(void *_buf, uint32_t size) +{ + struct udl_buffer *buf; + + buf = (struct udl_buffer *)_buf; + if (buf == NULL) + return; + + /* + * Memory mapped buffers should never be freed. + * Put display buffer into a recycle list. + */ + mtx_lock(&udl_buffer_mtx); + buf->size = size; + TAILQ_INSERT_TAIL(&udl_buffer_head, buf, entry); + mtx_unlock(&udl_buffer_mtx); +} + static uint32_t udl_get_fb_size(struct udl_softc *sc) { unsigned i = sc->sc_cur_mode; return ((uint32_t)udl_modes[i].hdisplay * (uint32_t)udl_modes[i].vdisplay * 2); } static uint32_t udl_get_fb_width(struct udl_softc *sc) { unsigned i = sc->sc_cur_mode; return (udl_modes[i].hdisplay); } static uint32_t udl_get_fb_height(struct udl_softc *sc) { unsigned i = sc->sc_cur_mode; return (udl_modes[i].vdisplay); } static uint32_t udl_get_fb_hz(struct udl_softc *sc) { unsigned i = sc->sc_cur_mode; return (udl_modes[i].hz); } static void udl_callout(void *arg) { struct udl_softc *sc = arg; const uint32_t max = udl_get_fb_size(sc); int fps; if (sc->sc_power_save == 0) { fps = udl_fps; /* figure out number of frames per second */ if (fps < UDL_FPS_MIN) fps = UDL_FPS_MIN; else if (fps > UDL_FPS_MAX) fps = UDL_FPS_MAX; if (sc->sc_sync_off >= max) sc->sc_sync_off = 0; usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]); usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]); } else { fps = 1; } callout_reset(&sc->sc_callout, hz / fps, &udl_callout, sc); } static int udl_probe(device_t dev) { struct usb_attach_arg *uaa = device_get_ivars(dev); if (uaa->usb_mode != USB_MODE_HOST) return (ENXIO); if (uaa->info.bConfigIndex != 0) return (ENXIO); if (uaa->info.bIfaceIndex != 0) return (ENXIO); return (usbd_lookup_id_by_uaa(udl_devs, sizeof(udl_devs), uaa)); } static int udl_attach(device_t dev) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct udl_softc *sc = device_get_softc(dev); struct usb_attach_arg *uaa = device_get_ivars(dev); int error; int i; device_set_usb_desc(dev); mtx_init(&sc->sc_mtx, "UDL lock", NULL, MTX_DEF); cv_init(&sc->sc_cv, "UDLCV"); callout_init_mtx(&sc->sc_callout, &sc->sc_mtx, 0); sc->sc_udev = uaa->device; error = usbd_transfer_setup(uaa->device, &uaa->info.bIfaceIndex, sc->sc_xfer, udl_config, UDL_N_TRANSFER, sc, &sc->sc_mtx); if (error) { DPRINTF("usbd_transfer_setup error=%s\n", usbd_errstr(error)); goto detach; } usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_0], &sc->sc_xfer_head[0]); usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_1], &sc->sc_xfer_head[1]); TAILQ_INIT(&sc->sc_xfer_head[0]); TAILQ_INIT(&sc->sc_xfer_head[1]); TAILQ_INIT(&sc->sc_cmd_buf_free); TAILQ_INIT(&sc->sc_cmd_buf_pending); sc->sc_def_chip = -1; sc->sc_chip = USB_GET_DRIVER_INFO(uaa); sc->sc_def_mode = -1; sc->sc_cur_mode = UDL_MAX_MODES; /* Allow chip ID to be overwritten */ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid_force", CTLFLAG_RWTUN, &sc->sc_def_chip, 0, "chip ID"); /* Export current chip ID */ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid", CTLFLAG_RD, &sc->sc_chip, 0, "chip ID"); if (sc->sc_def_chip > -1 && sc->sc_def_chip <= DLMAX) { device_printf(dev, "Forcing chip ID to 0x%04x\n", sc->sc_def_chip); sc->sc_chip = sc->sc_def_chip; } /* * The product might have more than one chip */ if (sc->sc_chip == DLUNK) udl_select_chip(sc, uaa); for (i = 0; i != UDL_CMD_MAX_BUFFERS; i++) { struct udl_cmd_buf *cb = &sc->sc_cmd_buf_temp[i]; TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry); } /* * Initialize chip. */ error = udl_init_chip(sc); if (error != USB_ERR_NORMAL_COMPLETION) goto detach; /* * Select edid mode. */ udl_select_mode(sc); /* Allow default mode to be overwritten */ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode_force", CTLFLAG_RWTUN, &sc->sc_def_mode, 0, "mode"); /* Export current mode */ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode", CTLFLAG_RD, &sc->sc_cur_mode, 0, "mode"); i = sc->sc_def_mode; if (i > -1 && i < UDL_MAX_MODES) { if (udl_modes[i].chip <= sc->sc_chip) { device_printf(dev, "Forcing mode to %d\n", i); sc->sc_cur_mode = i; } } /* Printout current mode */ device_printf(dev, "Mode selected %dx%d @ %dHz\n", (int)udl_get_fb_width(sc), (int)udl_get_fb_height(sc), (int)udl_get_fb_hz(sc)); udl_init_resolution(sc); /* Allocate frame buffer */ udl_fbmem_alloc(sc); UDL_LOCK(sc); udl_callout(sc); UDL_UNLOCK(sc); sc->sc_fb_info.fb_name = device_get_nameunit(dev); sc->sc_fb_info.fb_size = sc->sc_fb_size; sc->sc_fb_info.fb_bpp = 16; sc->sc_fb_info.fb_depth = 16; sc->sc_fb_info.fb_width = udl_get_fb_width(sc); sc->sc_fb_info.fb_height = udl_get_fb_height(sc); sc->sc_fb_info.fb_stride = sc->sc_fb_info.fb_width * 2; sc->sc_fb_info.fb_pbase = 0; sc->sc_fb_info.fb_vbase = (uintptr_t)sc->sc_fb_addr; sc->sc_fb_info.fb_priv = sc; sc->sc_fb_info.setblankmode = &udl_fb_setblankmode; sc->sc_fbdev = device_add_child(dev, "fbd", -1); if (sc->sc_fbdev == NULL) goto detach; if (device_probe_and_attach(sc->sc_fbdev) != 0) goto detach; return (0); detach: udl_detach(dev); return (ENXIO); } static int udl_detach(device_t dev) { struct udl_softc *sc = device_get_softc(dev); if (sc->sc_fbdev != NULL) { device_t bdev; bdev = sc->sc_fbdev; sc->sc_fbdev = NULL; device_detach(bdev); device_delete_child(dev, bdev); } UDL_LOCK(sc); sc->sc_gone = 1; callout_stop(&sc->sc_callout); UDL_UNLOCK(sc); usbd_transfer_unsetup(sc->sc_xfer, UDL_N_TRANSFER); callout_drain(&sc->sc_callout); mtx_destroy(&sc->sc_mtx); cv_destroy(&sc->sc_cv); - /* - * Free framebuffer memory, if any. - */ - free(sc->sc_fb_addr, M_DEVBUF); - free(sc->sc_fb_copy, M_DEVBUF); + /* put main framebuffer into a recycle list, if any */ + udl_buffer_free(sc->sc_fb_addr, sc->sc_fb_size); + /* free shadow framebuffer memory, if any */ + free(sc->sc_fb_copy, M_USB_DL); + return (0); } static struct fb_info * udl_fb_getinfo(device_t dev) { struct udl_softc *sc = device_get_softc(dev); return (&sc->sc_fb_info); } static int udl_fb_setblankmode(void *arg, int mode) { struct udl_softc *sc = arg; switch (mode) { case V_DISPLAY_ON: udl_power_save(sc, 1, M_WAITOK); break; case V_DISPLAY_BLANK: udl_power_save(sc, 1, M_WAITOK); if (sc->sc_fb_addr != 0) { const uint32_t max = udl_get_fb_size(sc); memset((void *)sc->sc_fb_addr, 0, max); } break; case V_DISPLAY_STAND_BY: case V_DISPLAY_SUSPEND: udl_power_save(sc, 0, M_WAITOK); break; } return (0); } static struct udl_cmd_buf * udl_cmd_buf_alloc_locked(struct udl_softc *sc, int flags) { struct udl_cmd_buf *cb; while ((cb = TAILQ_FIRST(&sc->sc_cmd_buf_free)) == NULL) { if (flags != M_WAITOK) break; cv_wait(&sc->sc_cv, &sc->sc_mtx); } if (cb != NULL) { TAILQ_REMOVE(&sc->sc_cmd_buf_free, cb, entry); cb->off = 0; } return (cb); } static struct udl_cmd_buf * udl_cmd_buf_alloc(struct udl_softc *sc, int flags) { struct udl_cmd_buf *cb; UDL_LOCK(sc); cb = udl_cmd_buf_alloc_locked(sc, flags); UDL_UNLOCK(sc); return (cb); } static void udl_cmd_buf_send(struct udl_softc *sc, struct udl_cmd_buf *cb) { UDL_LOCK(sc); if (sc->sc_gone) { TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry); } else { /* mark end of command stack */ udl_cmd_insert_int_1(cb, UDL_BULK_SOC); udl_cmd_insert_int_1(cb, UDL_BULK_CMD_EOC); TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_pending, cb, entry); usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]); usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]); } UDL_UNLOCK(sc); } static struct udl_cmd_buf * udl_fb_synchronize_locked(struct udl_softc *sc) { const uint32_t max = udl_get_fb_size(sc); /* check if framebuffer is not ready */ if (sc->sc_fb_addr == NULL || sc->sc_fb_copy == NULL) return (NULL); while (sc->sc_sync_off < max) { uint32_t delta = max - sc->sc_sync_off; if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2) delta = UDL_CMD_MAX_PIXEL_COUNT * 2; if (bcmp(sc->sc_fb_addr + sc->sc_sync_off, sc->sc_fb_copy + sc->sc_sync_off, delta) != 0) { struct udl_cmd_buf *cb; cb = udl_cmd_buf_alloc_locked(sc, M_NOWAIT); if (cb == NULL) goto done; memcpy(sc->sc_fb_copy + sc->sc_sync_off, sc->sc_fb_addr + sc->sc_sync_off, delta); udl_cmd_insert_int_1(cb, UDL_BULK_SOC); udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD); udl_cmd_insert_int_3(cb, sc->sc_sync_off); udl_cmd_insert_int_1(cb, delta / 2); udl_cmd_insert_buf_le16(cb, sc->sc_fb_copy + sc->sc_sync_off, delta); sc->sc_sync_off += delta; return (cb); } else { sc->sc_sync_off += delta; } } done: return (NULL); } static void udl_bulk_write_callback(struct usb_xfer *xfer, usb_error_t error) { struct udl_softc *sc = usbd_xfer_softc(xfer); struct udl_cmd_head *phead = usbd_xfer_get_priv(xfer); struct udl_cmd_buf *cb; unsigned i; switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry); case USB_ST_SETUP: tr_setup: for (i = 0; i != UDL_CMD_MAX_FRAMES; i++) { cb = TAILQ_FIRST(&sc->sc_cmd_buf_pending); if (cb == NULL) { cb = udl_fb_synchronize_locked(sc); if (cb == NULL) break; } else { TAILQ_REMOVE(&sc->sc_cmd_buf_pending, cb, entry); } TAILQ_INSERT_TAIL(phead, cb, entry); usbd_xfer_set_frame_data(xfer, i, cb->buf, cb->off); } if (i != 0) { usbd_xfer_set_frames(xfer, i); usbd_transfer_submit(xfer); } break; default: TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry); if (error != USB_ERR_CANCELLED) { /* try clear stall first */ usbd_xfer_set_stall(xfer); goto tr_setup; } break; } /* wakeup any waiters */ cv_signal(&sc->sc_cv); } static int udl_power_save(struct udl_softc *sc, int on, int flags) { struct udl_cmd_buf *cb; /* get new buffer */ cb = udl_cmd_buf_alloc(sc, flags); if (cb == NULL) return (EAGAIN); DPRINTF("screen %s\n", on ? "ON" : "OFF"); sc->sc_power_save = on ? 0 : 1; if (on) udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON); else udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_OFF); udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); udl_cmd_buf_send(sc, cb); return (0); } static int udl_ctrl_msg(struct udl_softc *sc, uint8_t rt, uint8_t r, uint16_t index, uint16_t value, uint8_t *buf, size_t len) { usb_device_request_t req; int error; req.bmRequestType = rt; req.bRequest = r; USETW(req.wIndex, index); USETW(req.wValue, value); USETW(req.wLength, len); error = usbd_do_request_flags(sc->sc_udev, NULL, &req, buf, 0, NULL, USB_DEFAULT_TIMEOUT); DPRINTF("%s\n", usbd_errstr(error)); return (error); } static int udl_poll(struct udl_softc *sc, uint32_t *buf) { uint32_t lbuf; int error; error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_POLL, 0x0000, 0x0000, (uint8_t *)&lbuf, sizeof(lbuf)); if (error == USB_ERR_NORMAL_COMPLETION) *buf = le32toh(lbuf); return (error); } static int udl_read_1(struct udl_softc *sc, uint16_t addr, uint8_t *buf) { uint8_t lbuf[1]; int error; error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_1, addr, 0x0000, lbuf, 1); if (error == USB_ERR_NORMAL_COMPLETION) *buf = *(uint8_t *)lbuf; return (error); } static int udl_write_1(struct udl_softc *sc, uint16_t addr, uint8_t buf) { int error; error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE, UDL_CTRL_CMD_WRITE_1, addr, 0x0000, &buf, 1); return (error); } static int udl_read_edid(struct udl_softc *sc, uint8_t *buf) { uint8_t lbuf[64]; uint16_t offset; int error; offset = 0; error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64); if (error != USB_ERR_NORMAL_COMPLETION) goto fail; bcopy(lbuf + 1, buf + offset, 63); offset += 63; error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64); if (error != USB_ERR_NORMAL_COMPLETION) goto fail; bcopy(lbuf + 1, buf + offset, 63); offset += 63; error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 3); if (error != USB_ERR_NORMAL_COMPLETION) goto fail; bcopy(lbuf + 1, buf + offset, 2); fail: return (error); } static uint8_t udl_lookup_mode(uint16_t hdisplay, uint16_t vdisplay, uint8_t hz, uint16_t chip, uint32_t clock) { uint8_t idx; /* * Check first if we have a matching mode with pixelclock */ for (idx = 0; idx != UDL_MAX_MODES; idx++) { if ((udl_modes[idx].hdisplay == hdisplay) && (udl_modes[idx].vdisplay == vdisplay) && (udl_modes[idx].clock == clock) && (udl_modes[idx].chip <= chip)) { return (idx); } } /* * If not, check for matching mode with update frequency */ for (idx = 0; idx != UDL_MAX_MODES; idx++) { if ((udl_modes[idx].hdisplay == hdisplay) && (udl_modes[idx].vdisplay == vdisplay) && (udl_modes[idx].hz == hz) && (udl_modes[idx].chip <= chip)) { return (idx); } } return (idx); } static void udl_select_chip(struct udl_softc *sc, struct usb_attach_arg *uaa) { const char *pserial; pserial = usb_get_serial(uaa->device); sc->sc_chip = DL120; if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) && (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_WSDVI)) { /* * WS Tech DVI is DL120 or DL160. All deviced uses the * same revision (0.04) so iSerialNumber must be used * to determin which chip it is. */ if (strlen(pserial) > 7) { if (strncmp(pserial, "0198-13", 7) == 0) sc->sc_chip = DL160; } DPRINTF("iSerialNumber (%s) used to select chip (%d)\n", pserial, sc->sc_chip); } if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) && (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_SWDVI)) { /* * SUNWEIT DVI is DL160, DL125, DL165 or DL195. Major revision * can be used to differ between DL1x0 and DL1x5. Minor to * differ between DL1x5. iSerialNumber seems not to be uniqe. */ sc->sc_chip = DL160; if (uaa->info.bcdDevice >= 0x100) { sc->sc_chip = DL165; if (uaa->info.bcdDevice == 0x104) sc->sc_chip = DL195; if (uaa->info.bcdDevice == 0x108) sc->sc_chip = DL125; } DPRINTF("bcdDevice (%02x) used to select chip (%d)\n", uaa->info.bcdDevice, sc->sc_chip); } } static int udl_set_enc_key(struct udl_softc *sc, uint8_t *buf, uint8_t len) { int error; error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE, UDL_CTRL_CMD_SET_KEY, 0x0000, 0x0000, buf, len); return (error); } static void udl_fbmem_alloc(struct udl_softc *sc) { uint32_t size; size = udl_get_fb_size(sc); size = round_page(size); - + /* check for zero size */ + if (size == 0) + size = PAGE_SIZE; /* * It is assumed that allocations above PAGE_SIZE bytes will * be PAGE_SIZE aligned for use with mmap() */ - sc->sc_fb_addr = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); - sc->sc_fb_copy = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); + sc->sc_fb_addr = udl_buffer_alloc(size); + sc->sc_fb_copy = malloc(size, M_USB_DL, M_WAITOK | M_ZERO); sc->sc_fb_size = size; } static void udl_cmd_insert_int_1(struct udl_cmd_buf *cb, uint8_t value) { cb->buf[cb->off] = value; cb->off += 1; } #if 0 static void udl_cmd_insert_int_2(struct udl_cmd_buf *cb, uint16_t value) { uint16_t lvalue; lvalue = htobe16(value); bcopy(&lvalue, cb->buf + cb->off, 2); cb->off += 2; } #endif static void udl_cmd_insert_int_3(struct udl_cmd_buf *cb, uint32_t value) { uint32_t lvalue; #if BYTE_ORDER == BIG_ENDIAN lvalue = htobe32(value) << 8; #else lvalue = htobe32(value) >> 8; #endif bcopy(&lvalue, cb->buf + cb->off, 3); cb->off += 3; } #if 0 static void udl_cmd_insert_int_4(struct udl_cmd_buf *cb, uint32_t value) { uint32_t lvalue; lvalue = htobe32(value); bcopy(&lvalue, cb->buf + cb->off, 4); cb->off += 4; } #endif static void udl_cmd_insert_buf_le16(struct udl_cmd_buf *cb, const uint8_t *buf, uint32_t len) { uint32_t x; for (x = 0; x != len; x += 2) { /* byte swap from little endian to big endian */ cb->buf[cb->off + x + 0] = buf[x + 1]; cb->buf[cb->off + x + 1] = buf[x + 0]; } cb->off += len; } static void udl_cmd_write_reg_1(struct udl_cmd_buf *cb, uint8_t reg, uint8_t val) { udl_cmd_insert_int_1(cb, UDL_BULK_SOC); udl_cmd_insert_int_1(cb, UDL_BULK_CMD_REG_WRITE_1); udl_cmd_insert_int_1(cb, reg); udl_cmd_insert_int_1(cb, val); } static void udl_cmd_write_reg_3(struct udl_cmd_buf *cb, uint8_t reg, uint32_t val) { udl_cmd_write_reg_1(cb, reg + 0, (val >> 16) & 0xff); udl_cmd_write_reg_1(cb, reg + 1, (val >> 8) & 0xff); udl_cmd_write_reg_1(cb, reg + 2, (val >> 0) & 0xff); } static int udl_init_chip(struct udl_softc *sc) { uint32_t ui32; uint8_t ui8; int error; error = udl_poll(sc, &ui32); if (error != USB_ERR_NORMAL_COMPLETION) return (error); DPRINTF("poll=0x%08x\n", ui32); /* Some products may use later chip too */ switch (ui32 & 0xff) { case 0xf1: /* DL1x5 */ switch (sc->sc_chip) { case DL120: sc->sc_chip = DL125; break; case DL160: sc->sc_chip = DL165; break; } break; } DPRINTF("chip 0x%04x\n", sc->sc_chip); error = udl_read_1(sc, 0xc484, &ui8); if (error != USB_ERR_NORMAL_COMPLETION) return (error); DPRINTF("read 0x%02x from 0xc484\n", ui8); error = udl_write_1(sc, 0xc41f, 0x01); if (error != USB_ERR_NORMAL_COMPLETION) return (error); DPRINTF("write 0x01 to 0xc41f\n"); error = udl_read_edid(sc, sc->sc_edid); if (error != USB_ERR_NORMAL_COMPLETION) return (error); DPRINTF("read EDID\n"); error = udl_set_enc_key(sc, __DECONST(void *, udl_null_key_1), sizeof(udl_null_key_1)); if (error != USB_ERR_NORMAL_COMPLETION) return (error); DPRINTF("set encryption key\n"); error = udl_write_1(sc, 0xc40b, 0x00); if (error != USB_ERR_NORMAL_COMPLETION) return (error); DPRINTF("write 0x00 to 0xc40b\n"); return (USB_ERR_NORMAL_COMPLETION); } static void udl_init_fb_offsets(struct udl_cmd_buf *cb, uint32_t start16, uint32_t stride16, uint32_t start8, uint32_t stride8) { udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00); udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START16, start16); udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE16, stride16); udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START8, start8); udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE8, stride8); udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); } static int udl_init_resolution(struct udl_softc *sc) { const uint32_t max = udl_get_fb_size(sc); const uint8_t *buf = udl_modes[sc->sc_cur_mode].mode; struct udl_cmd_buf *cb; uint32_t delta; uint32_t i; int error; /* get new buffer */ cb = udl_cmd_buf_alloc(sc, M_WAITOK); if (cb == NULL) return (EAGAIN); /* write resolution values and set video memory offsets */ udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00); for (i = 0; i < UDL_MODE_SIZE; i++) udl_cmd_write_reg_1(cb, i, buf[i]); udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); udl_init_fb_offsets(cb, 0x000000, 0x000a00, 0x555555, 0x000500); udl_cmd_buf_send(sc, cb); /* fill screen with black color */ for (i = 0; i < max; i += delta) { static const uint8_t udl_black[UDL_CMD_MAX_PIXEL_COUNT * 2] __aligned(4); delta = max - i; if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2) delta = UDL_CMD_MAX_PIXEL_COUNT * 2; if (i == 0) error = udl_cmd_write_buf_le16(sc, udl_black, i, delta / 2, M_WAITOK); else error = udl_cmd_buf_copy_le16(sc, 0, i, delta / 2, M_WAITOK); if (error) return (error); } /* get new buffer */ cb = udl_cmd_buf_alloc(sc, M_WAITOK); if (cb == NULL) return (EAGAIN); /* show framebuffer content */ udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON); udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff); udl_cmd_buf_send(sc, cb); return (0); } static void udl_select_mode(struct udl_softc *sc) { struct udl_mode mode; int index = UDL_MAX_MODES; int i; /* try to get the preferred mode from EDID */ edid_parse(sc->sc_edid, &sc->sc_edid_info); #ifdef USB_DEBUG edid_print(&sc->sc_edid_info); #endif if (sc->sc_edid_info.edid_preferred_mode != NULL) { mode.hz = (sc->sc_edid_info.edid_preferred_mode->dot_clock * 1000) / (sc->sc_edid_info.edid_preferred_mode->htotal * sc->sc_edid_info.edid_preferred_mode->vtotal); mode.clock = sc->sc_edid_info.edid_preferred_mode->dot_clock / 10; mode.hdisplay = sc->sc_edid_info.edid_preferred_mode->hdisplay; mode.vdisplay = sc->sc_edid_info.edid_preferred_mode->vdisplay; index = udl_lookup_mode(mode.hdisplay, mode.vdisplay, mode.hz, sc->sc_chip, mode.clock); sc->sc_cur_mode = index; } else { DPRINTF("no preferred mode found!\n"); } if (index == UDL_MAX_MODES) { DPRINTF("no mode line found for %dx%d @ %dHz!\n", mode.hdisplay, mode.vdisplay, mode.hz); i = 0; while (i < sc->sc_edid_info.edid_nmodes) { mode.hz = (sc->sc_edid_info.edid_modes[i].dot_clock * 1000) / (sc->sc_edid_info.edid_modes[i].htotal * sc->sc_edid_info.edid_modes[i].vtotal); mode.clock = sc->sc_edid_info.edid_modes[i].dot_clock / 10; mode.hdisplay = sc->sc_edid_info.edid_modes[i].hdisplay; mode.vdisplay = sc->sc_edid_info.edid_modes[i].vdisplay; index = udl_lookup_mode(mode.hdisplay, mode.vdisplay, mode.hz, sc->sc_chip, mode.clock); if (index < UDL_MAX_MODES) if ((sc->sc_cur_mode == UDL_MAX_MODES) || (index > sc->sc_cur_mode)) sc->sc_cur_mode = index; i++; } } /* * If no mode found use default. */ if (sc->sc_cur_mode == UDL_MAX_MODES) sc->sc_cur_mode = udl_lookup_mode(800, 600, 60, sc->sc_chip, 0); } static int udl_cmd_write_buf_le16(struct udl_softc *sc, const uint8_t *buf, uint32_t off, uint8_t pixels, int flags) { struct udl_cmd_buf *cb; cb = udl_cmd_buf_alloc(sc, flags); if (cb == NULL) return (EAGAIN); udl_cmd_insert_int_1(cb, UDL_BULK_SOC); udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD); udl_cmd_insert_int_3(cb, off); udl_cmd_insert_int_1(cb, pixels); udl_cmd_insert_buf_le16(cb, buf, 2 * pixels); udl_cmd_buf_send(sc, cb); return (0); } static int udl_cmd_buf_copy_le16(struct udl_softc *sc, uint32_t src, uint32_t dst, uint8_t pixels, int flags) { struct udl_cmd_buf *cb; cb = udl_cmd_buf_alloc(sc, flags); if (cb == NULL) return (EAGAIN); udl_cmd_insert_int_1(cb, UDL_BULK_SOC); udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_COPY | UDL_BULK_CMD_FB_WORD); udl_cmd_insert_int_3(cb, dst); udl_cmd_insert_int_1(cb, pixels); udl_cmd_insert_int_3(cb, src); udl_cmd_buf_send(sc, cb); return (0); } Index: projects/ifnet/sys/dev/usb/video/udl.h =================================================================== --- projects/ifnet/sys/dev/usb/video/udl.h (revision 281649) +++ projects/ifnet/sys/dev/usb/video/udl.h (revision 281650) @@ -1,311 +1,320 @@ /* $OpenBSD: udl.h,v 1.21 2013/04/15 09:23:02 mglocker Exp $ */ /* $FreeBSD$ */ /* * Copyright (c) 2009 Marcus Glocker * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef _UDL_H_ #define _UDL_H_ #include #include /* * BULK command transfer structure. */ #define UDL_CMD_MAX_FRAMES 64 /* units */ #define UDL_CMD_MAX_DATA_SIZE 512 /* bytes */ #define UDL_CMD_MAX_HEAD_SIZE 16 /* bytes */ #define UDL_CMD_MAX_PIXEL_COUNT ((UDL_CMD_MAX_DATA_SIZE - UDL_CMD_MAX_HEAD_SIZE) / 2) #define UDL_CMD_MAX_BUFFERS (3 * UDL_CMD_MAX_FRAMES) #define UDL_FONT_HEIGHT 16 /* pixels */ #define UDL_MAX_MODES 25 /* units */ +MALLOC_DECLARE(M_USB_DL); + +struct udl_buffer { + TAILQ_ENTRY(udl_buffer) entry; + uint32_t size; +}; + +TAILQ_HEAD(udl_buffer_head, udl_buffer); + struct udl_cmd_buf { TAILQ_ENTRY(udl_cmd_buf) entry; uint32_t off; uint8_t buf[UDL_CMD_MAX_DATA_SIZE] __aligned(4); }; TAILQ_HEAD(udl_cmd_head, udl_cmd_buf); enum { UDL_BULK_WRITE_0, UDL_BULK_WRITE_1, UDL_N_TRANSFER, }; /* * Our per device structure. */ struct udl_softc { struct mtx sc_mtx; struct cv sc_cv; struct callout sc_callout; struct usb_xfer *sc_xfer[UDL_N_TRANSFER]; struct usb_device *sc_udev; device_t sc_fbdev; struct fb_info sc_fb_info; uint8_t sc_edid[128]; struct edid_info sc_edid_info; struct udl_cmd_head sc_xfer_head[2]; struct udl_cmd_head sc_cmd_buf_free; struct udl_cmd_head sc_cmd_buf_pending; struct udl_cmd_buf sc_cmd_buf_temp[UDL_CMD_MAX_BUFFERS]; uint32_t sc_sync_off; uint32_t sc_fb_size; uint8_t *sc_fb_addr; uint8_t *sc_fb_copy; int sc_def_chip; /* default chip version */ int sc_chip; #define DLALL 0x0000 #define DL125 0x0000 /* max 1280x1024, 1440x900 */ #define DL120 0x0001 /* max 1280x1024, 1440x1050 */ #define DL160 0x0002 /* max 1600x1200, 1680x1050 */ #define DL165 0x0003 /* max 1600x1200, 1920x1080 */ #define DL195 0x0004 /* max 1920x1200, 2048x1152 */ #define DLMAX 0x0004 #define DLUNK 0x00ff /* unknown */ int sc_def_mode; /* default mode */ int sc_cur_mode; uint8_t sc_power_save; /* set if power save is enabled */ uint8_t sc_gone; }; #define UDL_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define UDL_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) /* * Chip commands. */ #define UDL_CTRL_CMD_READ_EDID 0x02 #define UDL_CTRL_CMD_WRITE_1 0x03 #define UDL_CTRL_CMD_READ_1 0x04 #define UDL_CTRL_CMD_POLL 0x06 #define UDL_CTRL_CMD_SET_KEY 0x12 #define UDL_BULK_SOC 0xaf /* start of command token */ #define UDL_BULK_CMD_REG_WRITE_1 0x20 /* write 1 byte to register */ #define UDL_BULK_CMD_EOC 0xa0 /* end of command stack */ #define UDL_BULK_CMD_DECOMP 0xe0 /* send decompression table */ #define UDL_BULK_CMD_FB_BASE 0x60 #define UDL_BULK_CMD_FB_WORD 0x08 #define UDL_BULK_CMD_FB_COMP 0x10 #define UDL_BULK_CMD_FB_WRITE (UDL_BULK_CMD_FB_BASE | 0x00) #define UDL_BULK_CMD_FB_COPY (UDL_BULK_CMD_FB_BASE | 0x02) /* * Chip registers. */ #define UDL_REG_ADDR_START16 0x20 #define UDL_REG_ADDR_STRIDE16 0x23 #define UDL_REG_ADDR_START8 0x26 #define UDL_REG_ADDR_STRIDE8 0x29 #define UDL_REG_SCREEN 0x1f #define UDL_REG_SCREEN_ON 0x00 #define UDL_REG_SCREEN_OFF 0x01 #define UDL_REG_SYNC 0xff #define UDL_MODE_SIZE 29 /* * Register values for screen resolution initialization. */ static const uint8_t udl_reg_vals_640x480_60[UDL_MODE_SIZE] = { /* 25.17 Mhz 59.9 Hz * VESA std */ 0x00, 0x99, 0x30, 0x26, 0x94, 0x60, 0xa9, 0xce, 0x60, 0x07, 0xb3, 0x0f, 0x79, 0xff, 0xff, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xfc, 0xff, 0xff, 0x01, 0xe0, 0x01, 0x02, 0xab, 0x13 }; static const uint8_t udl_reg_vals_640x480_67[UDL_MODE_SIZE] = { /* 30.25 MHz 66.6 Hz MAC * std */ 0x00, 0x1d, 0x33, 0x07, 0xb3, 0x60, 0xa9, 0xce, 0x60, 0xb6, 0xa8, 0xff, 0xff, 0xbf, 0x70, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xff, 0xff, 0xf9, 0x01, 0xe0, 0x01, 0x02, 0xa2, 0x17 }; static const uint8_t udl_reg_vals_640x480_72[UDL_MODE_SIZE] = { /* 31.50 Mhz 72.8 Hz * VESA std */ 0x00, 0x2b, 0xeb, 0x35, 0xd3, 0x0a, 0x95, 0xe6, 0x0e, 0x0f, 0xb5, 0x15, 0x2a, 0xff, 0xff, 0x02, 0x80, 0xcc, 0x1d, 0xff, 0xf9, 0xff, 0xff, 0x01, 0xe0, 0x01, 0x02, 0x9c, 0x18 }; static const uint8_t udl_reg_vals_640x480_75[UDL_MODE_SIZE] = { /* 31.50 Mhz 75.7 Hz * VESA std */ 0x00, 0xeb, 0xf7, 0xd3, 0x0f, 0x4f, 0x93, 0xfa, 0x47, 0xb5, 0x58, 0xff, 0xff, 0xbf, 0x70, 0x02, 0x80, 0xf4, 0x8f, 0xff, 0xff, 0xff, 0xf9, 0x01, 0xe0, 0x01, 0x02, 0x9c, 0x18 }; static const uint8_t udl_reg_vals_800x480_61[UDL_MODE_SIZE] = { /* 33.00 MHz 61.9 Hz */ 0x00, 0x20, 0x3c, 0x7a, 0xc9, 0xf2, 0x6c, 0x48, 0xf9, 0x70, 0x53, 0xff, 0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0xf3, 0xff, 0xff, 0xff, 0xf9, 0x01, 0xe0, 0x01, 0x02, 0xc8, 0x19 }; static const uint8_t udl_reg_vals_800x600_56[UDL_MODE_SIZE] = { /* 36.00 MHz 56.2 Hz * VESA std */ 0x00, 0x65, 0x35, 0x48, 0xf4, 0xf2, 0x6c, 0x19, 0x18, 0xc9, 0x4b, 0xff, 0xff, 0x70, 0x35, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xfc, 0x02, 0x58, 0x01, 0x02, 0x20, 0x1c }; static const uint8_t udl_reg_vals_800x600_60[UDL_MODE_SIZE] = { /* 40.00 MHz 60.3 Hz * VESA std */ 0x00, 0x20, 0x3c, 0x7a, 0xc9, 0x93, 0x60, 0xc8, 0xc7, 0x70, 0x53, 0xff, 0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0x8f, 0xff, 0xff, 0xff, 0xf2, 0x02, 0x58, 0x01, 0x02, 0x40, 0x1f }; static const uint8_t udl_reg_vals_800x600_72[UDL_MODE_SIZE] = { /* 50.00 MHz 72.1 Hz * VESA std */ 0x00, 0xeb, 0xf7, 0xd1, 0x90, 0x4d, 0x82, 0x23, 0x1f, 0x39, 0xcf, 0xff, 0xff, 0x43, 0x21, 0x03, 0x20, 0x62, 0xc5, 0xff, 0xff, 0xff, 0xca, 0x02, 0x58, 0x01, 0x02, 0x10, 0x27 }; static const uint8_t udl_reg_vals_800x600_74[UDL_MODE_SIZE] = { /* 50.00 MHz 74.4 Hz */ 0x00, 0xb3, 0x76, 0x39, 0xcf, 0x60, 0xa9, 0xc7, 0xf4, 0x70, 0x53, 0xff, 0xff, 0x35, 0x33, 0x03, 0x20, 0x8f, 0xe9, 0xff, 0xff, 0xff, 0xf9, 0x02, 0x58, 0x01, 0x02, 0x10, 0x27 }; static const uint8_t udl_reg_vals_800x600_75[UDL_MODE_SIZE] = { /* 49.50 MHz 75.0 Hz * VESA std */ 0x00, 0xb3, 0x76, 0x39, 0xcf, 0xf2, 0x6c, 0x19, 0x18, 0x70, 0x53, 0xff, 0xff, 0x35, 0x33, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xf9, 0x02, 0x58, 0x01, 0x02, 0xac, 0x26 }; static const uint8_t udl_reg_vals_1024x768_60[UDL_MODE_SIZE] = { /* 65.00 MHz 60.0 Hz * VESA std */ 0x00, 0x36, 0x18, 0xd5, 0x10, 0x60, 0xa9, 0x7b, 0x33, 0xa1, 0x2b, 0x27, 0x32, 0xff, 0xff, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xca, 0xff, 0xff, 0x03, 0x00, 0x04, 0x03, 0xc8, 0x32 }; static const uint8_t udl_reg_vals_1024x768_70[UDL_MODE_SIZE] = { /* 75.00 MHz 70.0 Hz * VESA std */ 0x00, 0xb4, 0xed, 0x4c, 0x5e, 0x60, 0xa9, 0x7b, 0x33, 0x10, 0x4d, 0xff, 0xff, 0x27, 0x32, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xff, 0xff, 0xca, 0x03, 0x00, 0x04, 0x02, 0x98, 0x3a }; static const uint8_t udl_reg_vals_1024x768_75[UDL_MODE_SIZE] = { /* 78.75 MHz 75.0 Hz * VESA std */ 0x00, 0xec, 0xb4, 0xa0, 0x4c, 0x36, 0x0a, 0x07, 0xb3, 0x5e, 0xd5, 0xff, 0xff, 0x0f, 0x79, 0x04, 0x00, 0x0f, 0x66, 0xff, 0xff, 0xff, 0xf9, 0x03, 0x00, 0x04, 0x02, 0x86, 0x3d }; static const uint8_t udl_reg_vals_1280x800_60[UDL_MODE_SIZE] = { /* 83.46 MHz 59.9 MHz */ 0x00, 0xb2, 0x19, 0x34, 0xdf, 0x93, 0x60, 0x30, 0xfb, 0x9f, 0xca, 0xff, 0xff, 0x27, 0x32, 0x05, 0x00, 0x61, 0xf6, 0xff, 0xff, 0xff, 0xf9, 0x03, 0x20, 0x04, 0x02, 0x34, 0x41 }; static const uint8_t udl_reg_vals_1280x960_60[UDL_MODE_SIZE] = { /* 108.00 MHz 60.0 Hz * VESA std */ 0x00, 0xa6, 0x03, 0x5c, 0x7e, 0x0a, 0x95, 0x48, 0xf4, 0x61, 0xbd, 0xff, 0xff, 0x94, 0x43, 0x05, 0x00, 0x91, 0xe8, 0xff, 0xff, 0xff, 0xf9, 0x03, 0xc0, 0x04, 0x02, 0x60, 0x54 }; static const uint8_t udl_reg_vals_1280x1024_60[UDL_MODE_SIZE] = { /* 108.00 MHz 60.0 Hz * VESA std */ 0x00, 0x98, 0xf8, 0x0d, 0x57, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff, 0xff, 0x94, 0x43, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04, 0x00, 0x04, 0x02, 0x60, 0x54 }; static const uint8_t udl_reg_vals_1280x1024_75[UDL_MODE_SIZE] = { /* 135.00 MHz 75.0 Hz * VESA std */ 0x00, 0xce, 0x12, 0x3f, 0x9f, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff, 0xff, 0x32, 0x60, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04, 0x00, 0x04, 0x02, 0x78, 0x69 }; static const uint8_t udl_reg_vals_1366x768_60[UDL_MODE_SIZE] = { /* 90 MHz 60.0 Hz */ 0x01, 0x19, 0x1e, 0x1f, 0xb0, 0x93, 0x60, 0x40, 0x7b, 0x36, 0xe8, 0x27, 0x32, 0xff, 0xff, 0x05, 0x56, 0x03, 0xd9, 0xff, 0xff, 0xfc, 0xa7, 0x03, 0x00, 0x04, 0x02, 0x9a, 0x42 }; static const uint8_t udl_reg_vals_1440x900_60[UDL_MODE_SIZE] = { /* 106.47 MHz 59.9 Hz */ 0x00, 0x24, 0xce, 0xe7, 0x72, 0x36, 0x0a, 0x86, 0xca, 0x1c, 0x10, 0xff, 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x0d, 0x94, 0xff, 0xff, 0xff, 0xf9, 0x03, 0x84, 0x04, 0x02, 0x2e, 0x53 }; static const uint8_t udl_reg_vals_1440x900_59[UDL_MODE_SIZE] = { /* 106.50 MHz 59.8 Hz */ 0x00, 0x24, 0xce, 0xe7, 0x72, 0xd8, 0x2a, 0x1b, 0x28, 0x1c, 0x10, 0xff, 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x36, 0x50, 0xff, 0xff, 0xff, 0xf9, 0x03, 0x84, 0x04, 0x02, 0x34, 0x53 }; static const uint8_t udl_reg_vals_1440x900_75[UDL_MODE_SIZE] = { /* 136.49 MHz 75.0 Hz */ 0x00, 0x73, 0xa6, 0x14, 0xea, 0x0a, 0x95, 0xca, 0x10, 0x7f, 0x46, 0xff, 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x94, 0x20, 0xff, 0xff, 0xff, 0xf9, 0x03, 0x84, 0x04, 0x02, 0xa2, 0x6a }; static const uint8_t udl_reg_vals_1680x1050_60[UDL_MODE_SIZE] = { /* 147.14 MHz 60.0 Hz */ 0x00, 0x53, 0x43, 0xa6, 0x71, 0xc1, 0x52, 0xd9, 0x29, 0x69, 0x9f, 0xff, 0xff, 0xd7, 0xee, 0x06, 0x90, 0xb2, 0x53, 0xff, 0xff, 0xff, 0xf9, 0x04, 0x1a, 0x04, 0x02, 0xf4, 0x72 }; static const uint8_t udl_reg_vals_1600x1200_60[UDL_MODE_SIZE] = { /* 162.00 MHz 60.0 Hz * VESA std */ 0x00, 0xcf, 0xa4, 0x3c, 0x4e, 0x55, 0x73, 0x71, 0x2b, 0x71, 0x52, 0xff, 0xff, 0xee, 0xca, 0x06, 0x40, 0xe2, 0x57, 0xff, 0xff, 0xff, 0xf9, 0x04, 0xb0, 0x04, 0x02, 0x90, 0x7e }; static const uint8_t udl_reg_vals_1920x1080_60[UDL_MODE_SIZE] = { /* 138.50 MHz 59.9 Hz */ 0x00, 0x73, 0xa6, 0x28, 0xb3, 0x54, 0xaa, 0x41, 0x5d, 0x0d, 0x9f, 0x32, 0x60, 0xff, 0xff, 0x07, 0x80, 0x0a, 0xea, 0xff, 0xf9, 0xff, 0xff, 0x04, 0x38, 0x04, 0x02, 0xe0, 0x7c }; struct udl_mode { uint16_t hdisplay; uint16_t vdisplay; uint8_t hz; uint16_t chip; uint32_t clock; const uint8_t *mode; }; static const struct udl_mode udl_modes[UDL_MAX_MODES] = { {640, 480, 60, DLALL, 2520, udl_reg_vals_640x480_60}, {640, 480, 67, DLALL, 3025, udl_reg_vals_640x480_67}, {640, 480, 72, DLALL, 3150, udl_reg_vals_640x480_72}, {640, 480, 75, DLALL, 3150, udl_reg_vals_640x480_75}, {800, 480, 59, DLALL, 5000, udl_reg_vals_800x480_61}, {800, 480, 61, DLALL, 3300, udl_reg_vals_800x480_61}, {800, 600, 56, DLALL, 3600, udl_reg_vals_800x600_56}, {800, 600, 60, DLALL, 4000, udl_reg_vals_800x600_60}, {800, 600, 72, DLALL, 5000, udl_reg_vals_800x600_72}, {800, 600, 74, DLALL, 5000, udl_reg_vals_800x600_74}, {800, 600, 75, DLALL, 4950, udl_reg_vals_800x600_75}, {1024, 768, 60, DLALL, 6500, udl_reg_vals_1024x768_60}, {1024, 768, 70, DLALL, 7500, udl_reg_vals_1024x768_70}, {1024, 768, 75, DLALL, 7850, udl_reg_vals_1024x768_75}, {1280, 800, 60, DLALL, 8346, udl_reg_vals_1280x800_60}, {1280, 960, 60, DLALL, 10800, udl_reg_vals_1280x960_60}, {1280, 1024, 60, DLALL, 10800, udl_reg_vals_1280x1024_60}, {1280, 1024, 75, DLALL, 13500, udl_reg_vals_1280x1024_75}, {1366, 768, 60, DLALL, 9000, udl_reg_vals_1366x768_60}, {1440, 900, 59, DL125, 10650, udl_reg_vals_1440x900_59}, {1440, 900, 60, DL125, 10647, udl_reg_vals_1440x900_60}, {1440, 900, 75, DL125, 13649, udl_reg_vals_1440x900_75}, {1680, 1050, 60, DL160, 14714, udl_reg_vals_1680x1050_60}, {1600, 1200, 60, DL160, 16200, udl_reg_vals_1600x1200_60}, {1920, 1080, 60, DL165, 13850, udl_reg_vals_1920x1080_60} }; /* * Encryption. */ static const uint8_t udl_null_key_1[] = { 0x57, 0xcd, 0xdc, 0xa7, 0x1c, 0x88, 0x5e, 0x15, 0x60, 0xfe, 0xc6, 0x97, 0x16, 0x3d, 0x47, 0xf2 }; #endif /* _UDL_H_ */ Index: projects/ifnet/sys/modules/usb/Makefile =================================================================== --- projects/ifnet/sys/modules/usb/Makefile (revision 281649) +++ projects/ifnet/sys/modules/usb/Makefile (revision 281650) @@ -1,109 +1,110 @@ # # $FreeBSD$ # # Copyright (c) 2008 Hans Petter Selasky. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # SYSDIR?=${.CURDIR}/../.. .include "${SYSDIR}/conf/kern.opts.mk" # # Allow USB modules to be built in parallel # SUBDIR_PARALLEL= # # Check for common USB debug flags to pass when building the USB # modules in this directory: # .if defined(USB_DEBUG) MAKE+=" DEBUG_FLAGS+=-DUSB_DEBUG" .endif .if defined(USB_DEBUG) && defined(USB_REQ_DEBUG) MAKE+=" DEBUG_FLAGS+=-DUSB_REQ_DEBUG" .endif # Modules that include binary-only blobs of microcode should be selectable by # MK_SOURCELESS_UCODE option (see below). SUBDIR = usb SUBDIR += ${_dwc_otg} ehci ${_musb} ohci uhci xhci ${_uss820dci} ${_at91dci} \ ${_atmegadci} ${_avr32dci} ${_rsu} ${_rsufw} ${_saf1761otg} SUBDIR += ${_rum} ${_run} ${_runfw} ${_uath} upgt usie ural ${_zyd} ${_urtw} SUBDIR += ${_urtwn} ${_urtwnfw} SUBDIR += atp uhid ukbd ums udbp ufm uep wsp uled SUBDIR += ucom u3g uark ubsa ubser uchcom ucycom ufoma uftdi ugensa uipaq ulpt \ umct umcs umodem umoscom uplcom uslcom uvisor uvscom +SUBDIR += udl SUBDIR += uether aue axe axge cdce cue ${_kue} mos rue smsc udav uhso ipheth SUBDIR += urndis SUBDIR += usfs umass urio SUBDIR += quirk template SUBDIR += ${_g_audio} ${_g_keyboard} ${_g_modem} ${_g_mouse} .if ${MK_USB_GADGET_EXAMPLES} == "yes" _g_audio= g_audio _g_keyboard= g_keyboard _g_modem= g_modem _g_mouse= g_mouse .endif .if ${MK_SOURCELESS_UCODE} != "no" _rum= rum _uath= uath _zyd= zyd _kue= kue _urtwn= urtwn _urtwnfw= urtwnfw _run= run _runfw= runfw _rsu= rsu _rsufw= rsufw .endif .if ${MACHINE_CPUARCH} == "amd64" _urtw= urtw .endif .if ${MACHINE_CPUARCH} == "arm" _at91dci= at91dci _atmegadci= atmegadci _dwc_otg= dwc_otg _musb= musb _uss820dci= uss820dci .endif .if ${MACHINE_CPUARCH} == "i386" _urtw= urtw .endif .if ${MACHINE_CPUARCH} == "avr32" _avr32dci= avr32dci .endif .if ${MACHINE_CPUARCH} == "mips" _saf1761otg= saf1761otg .endif .include Index: projects/ifnet/sys/net/altq/altqconf.h =================================================================== --- projects/ifnet/sys/net/altq/altqconf.h (revision 281649) +++ projects/ifnet/sys/net/altq/altqconf.h (nonexistent) @@ -1,29 +0,0 @@ -/* $OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */ -/* $NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $ */ - -#if defined(_KERNEL_OPT) || defined(__OpenBSD__) - -#if defined(_KERNEL_OPT) -#include "opt_altq_enabled.h" -#endif - -#include - -#ifdef ALTQ -#define NALTQ 1 -#else -#define NALTQ 0 -#endif - -cdev_decl(altq); - -#ifdef __OpenBSD__ -#define cdev_altq_init(c,n) { \ - dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \ - (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \ - (dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \ - (dev_type_mmap((*))) enodev } -#else -#define cdev_altq_init(x,y) cdev__oci_init(x,y) -#endif -#endif /* defined(_KERNEL_OPT) || defined(__OpenBSD__) */ Property changes on: projects/ifnet/sys/net/altq/altqconf.h ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/ifnet/sys/net/altq/altq.h =================================================================== --- projects/ifnet/sys/net/altq/altq.h (revision 281649) +++ projects/ifnet/sys/net/altq/altq.h (revision 281650) @@ -1,204 +1,204 @@ -/* $FreeBSD$ */ -/* $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ */ - -/* +/*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_H_ #define _ALTQ_ALTQ_H_ #if 0 /* * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq. * altq3 is mainly for research experiments. pf-based altq is for daily use. */ #define ALTQ3_COMPAT /* for compatibility with altq-3 */ #define ALTQ3_CLFIER_COMPAT /* for compatibility with altq-3 classifier */ #endif #ifdef ALTQ3_COMPAT #include #include #include #include #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif #endif /* ALTQ3_COMPAT */ /* altq discipline type */ #define ALTQT_NONE 0 /* reserved */ #define ALTQT_CBQ 1 /* cbq */ #define ALTQT_WFQ 2 /* wfq */ #define ALTQT_AFMAP 3 /* afmap */ #define ALTQT_FIFOQ 4 /* fifoq */ #define ALTQT_RED 5 /* red */ #define ALTQT_RIO 6 /* rio */ #define ALTQT_LOCALQ 7 /* local use */ #define ALTQT_HFSC 8 /* hfsc */ #define ALTQT_CDNR 9 /* traffic conditioner */ #define ALTQT_BLUE 10 /* blue */ #define ALTQT_PRIQ 11 /* priority queue */ #define ALTQT_JOBS 12 /* JoBS */ #define ALTQT_MAX 13 /* should be max discipline type + 1 */ #ifdef ALTQ3_COMPAT struct altqreq { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ u_long arg; /* request-specific argument */ }; #endif /* simple token backet meter profile */ struct tb_profile { u_int rate; /* rate in bit-per-sec */ u_int depth; /* depth in bytes */ }; #ifdef ALTQ3_COMPAT struct tbrreq { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ struct tb_profile tb_prof; /* token bucket profile */ }; #ifdef ALTQ3_CLFIER_COMPAT /* * common network flow info structure */ struct flowinfo { u_char fi_len; /* total length */ u_char fi_family; /* address family */ u_int8_t fi_data[46]; /* actually longer; address family specific flow info. */ }; /* * flow info structure for internet protocol family. * (currently this is the only protocol family supported) */ struct flowinfo_in { u_char fi_len; /* sizeof(struct flowinfo_in) */ u_char fi_family; /* AF_INET */ u_int8_t fi_proto; /* IPPROTO_XXX */ u_int8_t fi_tos; /* type-of-service */ struct in_addr fi_dst; /* dest address */ struct in_addr fi_src; /* src address */ u_int16_t fi_dport; /* dest port */ u_int16_t fi_sport; /* src port */ u_int32_t fi_gpi; /* generalized port id for ipsec */ u_int8_t _pad[28]; /* make the size equal to flowinfo_in6 */ }; #ifdef SIN6_LEN struct flowinfo_in6 { u_char fi6_len; /* sizeof(struct flowinfo_in6) */ u_char fi6_family; /* AF_INET6 */ u_int8_t fi6_proto; /* IPPROTO_XXX */ u_int8_t fi6_tclass; /* traffic class */ u_int32_t fi6_flowlabel; /* ipv6 flowlabel */ u_int16_t fi6_dport; /* dest port */ u_int16_t fi6_sport; /* src port */ u_int32_t fi6_gpi; /* generalized port id */ struct in6_addr fi6_dst; /* dest address */ struct in6_addr fi6_src; /* src address */ }; #endif /* INET6 */ /* * flow filters for AF_INET and AF_INET6 */ struct flow_filter { int ff_ruleno; struct flowinfo_in ff_flow; struct { struct in_addr mask_dst; struct in_addr mask_src; u_int8_t mask_tos; u_int8_t _pad[3]; } ff_mask; u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */ }; #ifdef SIN6_LEN struct flow_filter6 { int ff_ruleno; struct flowinfo_in6 ff_flow6; struct { struct in6_addr mask6_dst; struct in6_addr mask6_src; u_int8_t mask6_tclass; u_int8_t _pad[3]; } ff_mask6; }; #endif /* INET6 */ #endif /* ALTQ3_CLFIER_COMPAT */ #endif /* ALTQ3_COMPAT */ /* * generic packet counter */ struct pktcntr { u_int64_t packets; u_int64_t bytes; }; #define PKTCNTR_ADD(cntr, len) \ do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0) #ifdef ALTQ3_COMPAT /* * altq related ioctls */ #define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */ #if 0 /* * these ioctls are currently discipline-specific but could be shared * in the future. */ #define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */ #define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */ #define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */ #define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/ #define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */ #define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */ #define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */ #define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */ #define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */ #define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */ #define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */ #define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */ #define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */ #endif /* 0 */ #define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */ #define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */ #endif /* ALTQ3_COMPAT */ #ifdef _KERNEL #include #endif #endif /* _ALTQ_ALTQ_H_ */ Index: projects/ifnet/sys/net/altq/altq_cbq.c =================================================================== --- projects/ifnet/sys/net/altq/altq_cbq.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_cbq.c (revision 281650) @@ -1,1173 +1,1165 @@ -/* $FreeBSD$ */ -/* $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ */ - -/* +/*- * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. + * + * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #include #endif #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #ifdef ALTQ3_COMPAT /* * Local Data structures. */ static cbq_state_t *cbq_list = NULL; #endif /* * Forward Declarations. */ static int cbq_class_destroy(cbq_state_t *, struct rm_class *); static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t); static int cbq_clear_interface(cbq_state_t *); static int cbq_request(struct ifaltq *, int, void *); static int cbq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *cbq_dequeue(struct ifaltq *, int); static void cbqrestart(struct ifaltq *); static void get_class_stats(class_stats_t *, struct rm_class *); static void cbq_purge(cbq_state_t *); #ifdef ALTQ3_COMPAT static int cbq_add_class(struct cbq_add_class *); static int cbq_delete_class(struct cbq_delete_class *); static int cbq_modify_class(struct cbq_modify_class *); static int cbq_class_create(cbq_state_t *, struct cbq_add_class *, struct rm_class *, struct rm_class *); static int cbq_clear_hierarchy(struct cbq_interface *); static int cbq_set_enable(struct cbq_interface *, int); static int cbq_ifattach(struct cbq_interface *); static int cbq_ifdetach(struct cbq_interface *); static int cbq_getstats(struct cbq_getstats *); static int cbq_add_filter(struct cbq_add_filter *); static int cbq_delete_filter(struct cbq_delete_filter *); #endif /* ALTQ3_COMPAT */ /* * int * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This * function destroys a given traffic class. Before destroying * the class, all traffic for that class is released. */ static int cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl) { int i; /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; #ifdef ALTQ3_COMPAT if (cl == cbqp->ifnp.ctl_) cbqp->ifnp.ctl_ = NULL; #endif return (0); } /* convert class handle to class pointer */ static struct rm_class * clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle) { int i; struct rm_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % CBQ_MAX_CLASSES; if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); return (NULL); } static int cbq_clear_interface(cbq_state_t *cbqp) { int again, i; struct rm_class *cl; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1); #endif /* clear out the classes now */ do { again = 0; for (i = 0; i < CBQ_MAX_CLASSES; i++) { if ((cl = cbqp->cbq_class_tbl[i]) != NULL) { if (is_a_parent_class(cl)) again++; else { cbq_class_destroy(cbqp, cl); cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; #ifdef ALTQ3_COMPAT if (cl == cbqp->ifnp.ctl_) cbqp->ifnp.ctl_ = NULL; #endif } } } } while (again); return (0); } static int cbq_request(struct ifaltq *ifq, int req, void *arg) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: cbq_purge(cbqp); break; } return (0); } /* copy the stats info in rm_class to class_states_t */ static void get_class_stats(class_stats_t *statsp, struct rm_class *cl) { statsp->xmit_cnt = cl->stats_.xmit_cnt; statsp->drop_cnt = cl->stats_.drop_cnt; statsp->over = cl->stats_.over; statsp->borrows = cl->stats_.borrows; statsp->overactions = cl->stats_.overactions; statsp->delays = cl->stats_.delays; statsp->depth = cl->depth_; statsp->priority = cl->pri_; statsp->maxidle = cl->maxidle_; statsp->minidle = cl->minidle_; statsp->offtime = cl->offtime_; statsp->qmax = qlimit(cl->q_); statsp->ns_per_byte = cl->ns_per_byte_; statsp->wrr_allot = cl->w_allotment_; statsp->qcnt = qlen(cl->q_); statsp->avgidle = cl->avgidle_; statsp->qtype = qtype(cl->q_); #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_getstats(cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_getstats((rio_t *)cl->red_, &statsp->red[0]); #endif } int cbq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc, cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL); splx(s); return (error); } int cbq_add_altq(struct pf_altq *a) { cbq_state_t *cbqp; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); /* allocate and initialize cbq_state_t */ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cbqp == NULL) return (ENOMEM); CALLOUT_INIT(&cbqp->cbq_callout); cbqp->cbq_qlen = 0; cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* keep the state in pf_altq */ a->altq_disc = cbqp; return (0); } int cbq_remove_altq(struct pf_altq *a) { cbq_state_t *cbqp; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; cbq_clear_interface(cbqp); if (cbqp->ifnp.default_) cbq_class_destroy(cbqp, cbqp->ifnp.default_); if (cbqp->ifnp.root_) cbq_class_destroy(cbqp, cbqp->ifnp.root_); /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } int cbq_add_queue(struct pf_altq *a) { struct rm_class *borrow, *parent; cbq_state_t *cbqp; struct rm_class *cl; struct cbq_opts *opts; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = a->qid % CBQ_MAX_CLASSES; if (cbqp->cbq_class_tbl[i] != NULL) { for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); } opts = &a->pq_u.cbq_opts; /* check parameters */ if (a->priority >= CBQ_MAXPRI) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, a->parent_qid); if (opts->flags & CBQCLF_BORROW) borrow = parent; else borrow = NULL; /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_queue: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } /* * check parameters */ switch (opts->flags & CBQCLF_CLASSMASK) { case CBQCLF_ROOTCLASS: if (parent != NULL) return (EINVAL); if (cbqp->ifnp.root_) return (EINVAL); break; case CBQCLF_DEFCLASS: if (cbqp->ifnp.default_) return (EINVAL); break; case 0: if (a->qid == 0) return (EINVAL); break; default: /* more than two flags bits set */ return (EINVAL); } /* * create a class. if this is a root class, initialize the * interface. */ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte, cbqrestart, a->qlimit, RM_MAXQUEUED, opts->maxidle, opts->minidle, opts->offtime, opts->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(a->priority, &cbqp->ifnp, opts->ns_per_byte, rmc_delay_action, a->qlimit, parent, borrow, opts->maxidle, opts->minidle, opts->offtime, opts->pktsize, opts->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ cl->stats_.handle = a->qid; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; return (0); } int cbq_remove_queue(struct pf_altq *a) { struct rm_class *cl; cbq_state_t *cbqp; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) { cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; break; } return (0); } int cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats; int error = 0; if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * int * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr) * - Queue data packets. * * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper * layer (e.g. ether_output). cbq_enqueue queues the given packet * to the cbq, then invokes the driver's start routine. * * Assumptions: called in splimp * Returns: 0 if the queueing is successful. * ENOBUFS if a packet dropping occurred as a result of * the queueing. */ static int cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct rm_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(cbqp, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL) { cl = cbqp->ifnp.default_; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->pktattr_ = pktattr; /* save proto hdr used by ECN */ else #endif cl->pktattr_ = NULL; len = m_pktlen(m); if (rmc_queue_packet(cl, m) != 0) { /* drop occurred. some mbuf was freed in rmc_queue_packet. */ PKTCNTR_ADD(&cl->stats_.drop_cnt, len); return (ENOBUFS); } /* successfully queued. */ ++cbqp->cbq_qlen; IFQ_INC_LEN(ifq); return (0); } static struct mbuf * cbq_dequeue(struct ifaltq *ifq, int op) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); m = rmc_dequeue_next(&cbqp->ifnp, op); if (m && op == ALTDQ_REMOVE) { --cbqp->cbq_qlen; /* decrement # of packets in cbq */ IFQ_DEC_LEN(ifq); /* Update the class. */ rmc_update_class_util(&cbqp->ifnp); } return (m); } /* * void * cbqrestart(queue_t *) - Restart sending of data. * called from rmc_restart in splimp via timeout after waking up * a suspended class. * Returns: NONE */ static void cbqrestart(struct ifaltq *ifq) { cbq_state_t *cbqp; struct ifnet *ifp; IFQ_LOCK_ASSERT(ifq); if (!ALTQ_IS_ENABLED(ifq)) /* cbq must have been detached */ return; if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL) /* should not happen */ return; ifp = ifq->altq_ifp; if (ifp->if_start && cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_UNLOCK(ifq); (*ifp->if_start)(ifp); IFQ_LOCK(ifq); } } static void cbq_purge(cbq_state_t *cbqp) { struct rm_class *cl; int i; for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL) rmc_dropall(cl); if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_)) cbqp->ifnp.ifq_->ifq_len = 0; } #ifdef ALTQ3_COMPAT static int cbq_add_class(acp) struct cbq_add_class *acp; { char *ifacename; struct rm_class *borrow, *parent; cbq_state_t *cbqp; ifacename = acp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* check parameters */ if (acp->cbq_class.priority >= CBQ_MAXPRI || acp->cbq_class.maxq > CBQ_MAXQSIZE) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle); borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle); /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_class: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } return cbq_class_create(cbqp, acp, parent, borrow); } static int cbq_delete_class(dcp) struct cbq_delete_class *dcp; { char *ifacename; struct rm_class *cl; cbq_state_t *cbqp; ifacename = dcp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* if a filter has a reference to this class delete the filter */ acc_discard_filters(&cbqp->cbq_classifier, cl, 0); return cbq_class_destroy(cbqp, cl); } static int cbq_modify_class(acp) struct cbq_modify_class *acp; { char *ifacename; struct rm_class *cl; cbq_state_t *cbqp; ifacename = acp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* Get pointer to this class */ if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL) return (EINVAL); if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte, acp->cbq_class.maxq, acp->cbq_class.maxidle, acp->cbq_class.minidle, acp->cbq_class.offtime, acp->cbq_class.pktsize) < 0) return (EINVAL); return (0); } /* * struct rm_class * * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp, * struct rm_class *parent, struct rm_class *borrow) * * This function create a new traffic class in the CBQ class hierarchy of * given paramters. The class that created is either the root, default, * or a new dynamic class. If CBQ is not initilaized, the the root class * will be created. */ static int cbq_class_create(cbqp, acp, parent, borrow) cbq_state_t *cbqp; struct cbq_add_class *acp; struct rm_class *parent, *borrow; { struct rm_class *cl; cbq_class_spec_t *spec = &acp->cbq_class; u_int32_t chandle; int i; /* * allocate class handle */ for (i = 1; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); chandle = i; /* use the slot number as class handle */ /* * create a class. if this is a root class, initialize the * interface. */ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte, cbqrestart, spec->maxq, RM_MAXQUEUED, spec->maxidle, spec->minidle, spec->offtime, spec->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(spec->priority, &cbqp->ifnp, spec->nano_sec_per_byte, rmc_delay_action, spec->maxq, parent, borrow, spec->maxidle, spec->minidle, spec->offtime, spec->pktsize, spec->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ acp->cbq_class_handle = chandle; cl->stats_.handle = chandle; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS) cbqp->ifnp.ctl_ = cl; return (0); } static int cbq_add_filter(afp) struct cbq_add_filter *afp; { char *ifacename; cbq_state_t *cbqp; struct rm_class *cl; ifacename = afp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* Get the pointer to class. */ if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL) return (EINVAL); return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter, cl, &afp->cbq_filter_handle); } static int cbq_delete_filter(dfp) struct cbq_delete_filter *dfp; { char *ifacename; cbq_state_t *cbqp; ifacename = dfp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); return acc_delete_filter(&cbqp->cbq_classifier, dfp->cbq_filter_handle); } /* * cbq_clear_hierarchy deletes all classes and their filters on the * given interface. */ static int cbq_clear_hierarchy(ifacep) struct cbq_interface *ifacep; { char *ifacename; cbq_state_t *cbqp; ifacename = ifacep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); return cbq_clear_interface(cbqp); } /* * static int * cbq_set_enable(struct cbq_enable *ep) - this function processed the * ioctl request to enable class based queueing. It searches the list * of interfaces for the specified interface and then enables CBQ on * that interface. * * Returns: 0, for no error. * EBADF, for specified inteface not found. */ static int cbq_set_enable(ep, enable) struct cbq_interface *ep; int enable; { int error = 0; cbq_state_t *cbqp; char *ifacename; ifacename = ep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); switch (enable) { case ENABLE: if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL || cbqp->ifnp.ctl_ == NULL) { if (cbqp->ifnp.root_ == NULL) printf("No Root Class for %s\n", ifacename); if (cbqp->ifnp.default_ == NULL) printf("No Default Class for %s\n", ifacename); if (cbqp->ifnp.ctl_ == NULL) printf("No Control Class for %s\n", ifacename); error = EINVAL; } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) { cbqp->cbq_qlen = 0; } break; case DISABLE: error = altq_disable(cbqp->ifnp.ifq_); break; } return (error); } static int cbq_getstats(gsp) struct cbq_getstats *gsp; { char *ifacename; int i, n, nclasses; cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats, *usp; int error = 0; ifacename = gsp->iface.cbq_ifacename; nclasses = gsp->nclasses; usp = gsp->stats; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); if (nclasses <= 0) return (EINVAL); for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) { while ((cl = cbqp->cbq_class_tbl[i]) == NULL) if (++i >= CBQ_MAX_CLASSES) goto out; get_class_stats(&stats, cl); stats.handle = cl->stats_.handle; if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } out: gsp->nclasses = n; return (error); } static int cbq_ifattach(ifacep) struct cbq_interface *ifacep; { int error = 0; char *ifacename; cbq_state_t *new_cbqp; struct ifnet *ifp; ifacename = ifacep->cbq_ifacename; if ((ifp = ifunit(ifacename)) == NULL) return (ENXIO); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENXIO); /* allocate and initialize cbq_state_t */ new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK); if (new_cbqp == NULL) return (ENOMEM); bzero(new_cbqp, sizeof(cbq_state_t)); CALLOUT_INIT(&new_cbqp->cbq_callout); new_cbqp->cbq_qlen = 0; new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* * set CBQ to this ifnet structure. */ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp, cbq_enqueue, cbq_dequeue, cbq_request, &new_cbqp->cbq_classifier, acc_classify); if (error) { free(new_cbqp, M_DEVBUF); return (error); } /* prepend to the list of cbq_state_t's. */ new_cbqp->cbq_next = cbq_list; cbq_list = new_cbqp; return (0); } static int cbq_ifdetach(ifacep) struct cbq_interface *ifacep; { char *ifacename; cbq_state_t *cbqp; ifacename = ifacep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); (void)cbq_set_enable(ifacep, DISABLE); cbq_clear_interface(cbqp); /* remove CBQ from the ifnet structure. */ (void)altq_detach(cbqp->ifnp.ifq_); /* remove from the list of cbq_state_t's. */ if (cbq_list == cbqp) cbq_list = cbqp->cbq_next; else { cbq_state_t *cp; for (cp = cbq_list; cp != NULL; cp = cp->cbq_next) if (cp->cbq_next == cbqp) { cp->cbq_next = cbqp->cbq_next; break; } ASSERT(cp != NULL); } /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } /* * cbq device interface */ altqdev_decl(cbq); int cbqopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { return (0); } int cbqclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct ifnet *ifp; struct cbq_interface iface; int err, error = 0; while (cbq_list) { ifp = cbq_list->ifnp.ifq_->altq_ifp; sprintf(iface.cbq_ifacename, "%s", ifp->if_xname); err = cbq_ifdetach(&iface); if (err != 0 && error == 0) error = err; } return (error); } int cbqioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { int error = 0; /* check cmd for superuser only */ switch (cmd) { case CBQ_GETSTATS: /* currently only command that an ordinary user can call */ break; default: #if (__FreeBSD_version > 700000) error = priv_check(p, PRIV_ALTQ_MANAGE); #elsif (__FreeBSD_version > 400000) error = suser(p); #else error = suser(p->p_ucred, &p->p_acflag); #endif if (error) return (error); break; } switch (cmd) { case CBQ_ENABLE: error = cbq_set_enable((struct cbq_interface *)addr, ENABLE); break; case CBQ_DISABLE: error = cbq_set_enable((struct cbq_interface *)addr, DISABLE); break; case CBQ_ADD_FILTER: error = cbq_add_filter((struct cbq_add_filter *)addr); break; case CBQ_DEL_FILTER: error = cbq_delete_filter((struct cbq_delete_filter *)addr); break; case CBQ_ADD_CLASS: error = cbq_add_class((struct cbq_add_class *)addr); break; case CBQ_DEL_CLASS: error = cbq_delete_class((struct cbq_delete_class *)addr); break; case CBQ_MODIFY_CLASS: error = cbq_modify_class((struct cbq_modify_class *)addr); break; case CBQ_CLEAR_HIERARCHY: error = cbq_clear_hierarchy((struct cbq_interface *)addr); break; case CBQ_IF_ATTACH: error = cbq_ifattach((struct cbq_interface *)addr); break; case CBQ_IF_DETACH: error = cbq_ifdetach((struct cbq_interface *)addr); break; case CBQ_GETSTATS: error = cbq_getstats((struct cbq_getstats *)addr); break; default: error = EINVAL; break; } return error; } #if 0 /* for debug */ static void cbq_class_dump(int); static void cbq_class_dump(i) int i; { struct rm_class *cl; rm_class_stats_t *s; struct _class_queue_ *q; if (cbq_list == NULL) { printf("cbq_class_dump: no cbq_state found\n"); return; } cl = cbq_list->cbq_class_tbl[i]; printf("class %d cl=%p\n", i, cl); if (cl != NULL) { s = &cl->stats_; q = cl->q_; printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n", cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_); printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n", cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_, cl->maxidle_); printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n", cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_); printf("handle=%d, depth=%d, packets=%d, bytes=%d\n", s->handle, s->depth, (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes); printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n", s->over, s->borrows, (int)s->drop_cnt.packets, s->overactions, s->delays); printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n", q->tail_, q->head_, q->qlen_, q->qlim_, q->qthresh_, q->qtype_); } } #endif /* 0 */ #ifdef KLD_MODULE static struct altqsw cbq_sw = {"cbq", cbqopen, cbqclose, cbqioctl}; ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw); MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1); MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_CBQ */ Index: projects/ifnet/sys/net/altq/altq_cdnr.c =================================================================== --- projects/ifnet/sys/net/altq/altq_cdnr.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_cdnr.c (revision 281650) @@ -1,1390 +1,1382 @@ -/* $FreeBSD$ */ -/* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */ - -/* +/*- * Copyright (C) 1999-2002 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #ifdef ALTQ3_COMPAT #include #endif #include #ifdef ALTQ3_COMPAT /* * diffserv traffic conditioning module */ int altq_cdnr_enabled = 0; /* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */ #ifdef ALTQ_CDNR /* cdnr_list keeps all cdnr's allocated. */ static LIST_HEAD(, top_cdnr) tcb_list; static int altq_cdnr_input(struct mbuf *, int); static struct top_cdnr *tcb_lookup(char *ifname); static struct cdnr_block *cdnr_handle2cb(u_long); static u_long cdnr_cb2handle(struct cdnr_block *); static void *cdnr_cballoc(struct top_cdnr *, int, struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *)); static void cdnr_cbdestroy(void *); static int tca_verify_action(struct tc_action *); static void tca_import_action(struct tc_action *, struct tc_action *); static void tca_invalidate_action(struct tc_action *); static int generic_element_destroy(struct cdnr_block *); static struct top_cdnr *top_create(struct ifaltq *); static int top_destroy(struct top_cdnr *); static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *); static int element_destroy(struct cdnr_block *); static void tb_import_profile(struct tbe *, struct tb_profile *); static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *, struct tc_action *, struct tc_action *); static int tbm_destroy(struct tbmeter *); static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *); static struct trtcm *trtcm_create(struct top_cdnr *, struct tb_profile *, struct tb_profile *, struct tc_action *, struct tc_action *, struct tc_action *, int); static int trtcm_destroy(struct trtcm *); static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); static struct tswtcm *tswtcm_create(struct top_cdnr *, u_int32_t, u_int32_t, u_int32_t, struct tc_action *, struct tc_action *, struct tc_action *); static int tswtcm_destroy(struct tswtcm *); static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); static int cdnrcmd_if_attach(char *); static int cdnrcmd_if_detach(char *); static int cdnrcmd_add_element(struct cdnr_add_element *); static int cdnrcmd_delete_element(struct cdnr_delete_element *); static int cdnrcmd_add_filter(struct cdnr_add_filter *); static int cdnrcmd_delete_filter(struct cdnr_delete_filter *); static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *); static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *); static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *); static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *); static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *); static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *); static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *); static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *); static int cdnrcmd_get_stats(struct cdnr_get_stats *); altqdev_decl(cdnr); /* * top level input function called from ip_input. * should be called before converting header fields to host-byte-order. */ int altq_cdnr_input(m, af) struct mbuf *m; int af; /* address family */ { struct ifnet *ifp; struct ip *ip; struct top_cdnr *top; struct tc_action *tca; struct cdnr_block *cb; struct cdnr_pktinfo pktinfo; ifp = m->m_pkthdr.rcvif; if (!ALTQ_IS_CNDTNING(&ifp->if_snd)) /* traffic conditioner is not enabled on this interface */ return (1); top = ifp->if_snd.altq_cdnr; ip = mtod(m, struct ip *); #ifdef INET6 if (af == AF_INET6) { u_int32_t flowlabel; flowlabel = ((struct ip6_hdr *)ip)->ip6_flow; pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK; } else #endif pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK; pktinfo.pkt_len = m_pktlen(m); tca = NULL; cb = acc_classify(&top->tc_classifier, m, af); if (cb != NULL) tca = &cb->cb_action; if (tca == NULL) tca = &top->tc_block.cb_action; while (1) { PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len); switch (tca->tca_code) { case TCACODE_PASS: return (1); case TCACODE_DROP: m_freem(m); return (0); case TCACODE_RETURN: return (0); case TCACODE_MARK: #ifdef INET6 if (af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); flowlabel = (tca->tca_dscp << 20) | (flowlabel & ~(DSCP_MASK << 20)); ip6->ip6_flow = htonl(flowlabel); } else #endif ip->ip_tos = tca->tca_dscp | (ip->ip_tos & DSCP_CUMASK); return (1); case TCACODE_NEXT: cb = tca->tca_next; tca = (*cb->cb_input)(cb, &pktinfo); break; case TCACODE_NONE: default: return (1); } } } static struct top_cdnr * tcb_lookup(ifname) char *ifname; { struct top_cdnr *top; struct ifnet *ifp; if ((ifp = ifunit(ifname)) != NULL) LIST_FOREACH(top, &tcb_list, tc_next) if (top->tc_ifq->altq_ifp == ifp) return (top); return (NULL); } static struct cdnr_block * cdnr_handle2cb(handle) u_long handle; { struct cdnr_block *cb; cb = (struct cdnr_block *)handle; if (handle != ALIGN(cb)) return (NULL); if (cb == NULL || cb->cb_handle != handle) return (NULL); return (cb); } static u_long cdnr_cb2handle(cb) struct cdnr_block *cb; { return (cb->cb_handle); } static void * cdnr_cballoc(top, type, input_func) struct top_cdnr *top; int type; struct tc_action *(*input_func)(struct cdnr_block *, struct cdnr_pktinfo *); { struct cdnr_block *cb; int size; switch (type) { case TCETYPE_TOP: size = sizeof(struct top_cdnr); break; case TCETYPE_ELEMENT: size = sizeof(struct cdnr_block); break; case TCETYPE_TBMETER: size = sizeof(struct tbmeter); break; case TCETYPE_TRTCM: size = sizeof(struct trtcm); break; case TCETYPE_TSWTCM: size = sizeof(struct tswtcm); break; default: return (NULL); } cb = malloc(size, M_DEVBUF, M_WAITOK); if (cb == NULL) return (NULL); bzero(cb, size); cb->cb_len = size; cb->cb_type = type; cb->cb_ref = 0; cb->cb_handle = (u_long)cb; if (top == NULL) cb->cb_top = (struct top_cdnr *)cb; else cb->cb_top = top; if (input_func != NULL) { /* * if this cdnr has an action function, * make tc_action to call itself. */ cb->cb_action.tca_code = TCACODE_NEXT; cb->cb_action.tca_next = cb; cb->cb_input = input_func; } else cb->cb_action.tca_code = TCACODE_NONE; /* if this isn't top, register the element to the top level cdnr */ if (top != NULL) LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next); return ((void *)cb); } static void cdnr_cbdestroy(cblock) void *cblock; { struct cdnr_block *cb = cblock; /* delete filters belonging to this cdnr */ acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0); /* remove from the top level cdnr */ if (cb->cb_top != cblock) LIST_REMOVE(cb, cb_next); free(cb, M_DEVBUF); } /* * conditioner common destroy routine */ static int generic_element_destroy(cb) struct cdnr_block *cb; { int error = 0; switch (cb->cb_type) { case TCETYPE_TOP: error = top_destroy((struct top_cdnr *)cb); break; case TCETYPE_ELEMENT: error = element_destroy(cb); break; case TCETYPE_TBMETER: error = tbm_destroy((struct tbmeter *)cb); break; case TCETYPE_TRTCM: error = trtcm_destroy((struct trtcm *)cb); break; case TCETYPE_TSWTCM: error = tswtcm_destroy((struct tswtcm *)cb); break; default: error = EINVAL; } return (error); } static int tca_verify_action(utca) struct tc_action *utca; { switch (utca->tca_code) { case TCACODE_PASS: case TCACODE_DROP: case TCACODE_MARK: /* these are ok */ break; case TCACODE_HANDLE: /* verify handle value */ if (cdnr_handle2cb(utca->tca_handle) == NULL) return (-1); break; case TCACODE_NONE: case TCACODE_RETURN: case TCACODE_NEXT: default: /* should not be passed from a user */ return (-1); } return (0); } static void tca_import_action(ktca, utca) struct tc_action *ktca, *utca; { struct cdnr_block *cb; *ktca = *utca; if (ktca->tca_code == TCACODE_HANDLE) { cb = cdnr_handle2cb(ktca->tca_handle); if (cb == NULL) { ktca->tca_code = TCACODE_NONE; return; } ktca->tca_code = TCACODE_NEXT; ktca->tca_next = cb; cb->cb_ref++; } else if (ktca->tca_code == TCACODE_MARK) { ktca->tca_dscp &= DSCP_MASK; } return; } static void tca_invalidate_action(tca) struct tc_action *tca; { struct cdnr_block *cb; if (tca->tca_code == TCACODE_NEXT) { cb = tca->tca_next; if (cb == NULL) return; cb->cb_ref--; } tca->tca_code = TCACODE_NONE; } /* * top level traffic conditioner */ static struct top_cdnr * top_create(ifq) struct ifaltq *ifq; { struct top_cdnr *top; if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL) return (NULL); top->tc_ifq = ifq; /* set default action for the top level conditioner */ top->tc_block.cb_action.tca_code = TCACODE_PASS; LIST_INSERT_HEAD(&tcb_list, top, tc_next); ifq->altq_cdnr = top; return (top); } static int top_destroy(top) struct top_cdnr *top; { struct cdnr_block *cb; if (ALTQ_IS_CNDTNING(top->tc_ifq)) ALTQ_CLEAR_CNDTNING(top->tc_ifq); top->tc_ifq->altq_cdnr = NULL; /* * destroy all the conditioner elements belonging to this interface */ while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) { while (cb != NULL && cb->cb_ref > 0) cb = LIST_NEXT(cb, cb_next); if (cb != NULL) generic_element_destroy(cb); } LIST_REMOVE(top, tc_next); cdnr_cbdestroy(top); /* if there is no active conditioner, remove the input hook */ if (altq_input != NULL) { LIST_FOREACH(top, &tcb_list, tc_next) if (ALTQ_IS_CNDTNING(top->tc_ifq)) break; if (top == NULL) altq_input = NULL; } return (0); } /* * simple tc elements without input function (e.g., dropper and makers). */ static struct cdnr_block * element_create(top, action) struct top_cdnr *top; struct tc_action *action; { struct cdnr_block *cb; if (tca_verify_action(action) < 0) return (NULL); if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL) return (NULL); tca_import_action(&cb->cb_action, action); return (cb); } static int element_destroy(cb) struct cdnr_block *cb; { if (cb->cb_ref > 0) return (EBUSY); tca_invalidate_action(&cb->cb_action); cdnr_cbdestroy(cb); return (0); } /* * internal representation of token bucket parameters * rate: byte_per_unittime << 32 * (((bits_per_sec) / 8) << 32) / machclk_freq * depth: byte << 32 * */ #define TB_SHIFT 32 #define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT) #define TB_UNSCALE(x) ((x) >> TB_SHIFT) static void tb_import_profile(tb, profile) struct tbe *tb; struct tb_profile *profile; { tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq; tb->depth = TB_SCALE(profile->depth); if (tb->rate > 0) tb->filluptime = tb->depth / tb->rate; else tb->filluptime = 0xffffffffffffffffLL; tb->token = tb->depth; tb->last = read_machclk(); } /* * simple token bucket meter */ static struct tbmeter * tbm_create(top, profile, in_action, out_action) struct top_cdnr *top; struct tb_profile *profile; struct tc_action *in_action, *out_action; { struct tbmeter *tbm = NULL; if (tca_verify_action(in_action) < 0 || tca_verify_action(out_action) < 0) return (NULL); if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER, tbm_input)) == NULL) return (NULL); tb_import_profile(&tbm->tb, profile); tca_import_action(&tbm->in_action, in_action); tca_import_action(&tbm->out_action, out_action); return (tbm); } static int tbm_destroy(tbm) struct tbmeter *tbm; { if (tbm->cdnrblk.cb_ref > 0) return (EBUSY); tca_invalidate_action(&tbm->in_action); tca_invalidate_action(&tbm->out_action); cdnr_cbdestroy(tbm); return (0); } static struct tc_action * tbm_input(cb, pktinfo) struct cdnr_block *cb; struct cdnr_pktinfo *pktinfo; { struct tbmeter *tbm = (struct tbmeter *)cb; u_int64_t len; u_int64_t interval, now; len = TB_SCALE(pktinfo->pkt_len); if (tbm->tb.token < len) { now = read_machclk(); interval = now - tbm->tb.last; if (interval >= tbm->tb.filluptime) tbm->tb.token = tbm->tb.depth; else { tbm->tb.token += interval * tbm->tb.rate; if (tbm->tb.token > tbm->tb.depth) tbm->tb.token = tbm->tb.depth; } tbm->tb.last = now; } if (tbm->tb.token < len) { PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len); return (&tbm->out_action); } tbm->tb.token -= len; PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len); return (&tbm->in_action); } /* * two rate three color marker * as described in draft-heinanen-diffserv-trtcm-01.txt */ static struct trtcm * trtcm_create(top, cmtd_profile, peak_profile, green_action, yellow_action, red_action, coloraware) struct top_cdnr *top; struct tb_profile *cmtd_profile, *peak_profile; struct tc_action *green_action, *yellow_action, *red_action; int coloraware; { struct trtcm *tcm = NULL; if (tca_verify_action(green_action) < 0 || tca_verify_action(yellow_action) < 0 || tca_verify_action(red_action) < 0) return (NULL); if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM, trtcm_input)) == NULL) return (NULL); tb_import_profile(&tcm->cmtd_tb, cmtd_profile); tb_import_profile(&tcm->peak_tb, peak_profile); tca_import_action(&tcm->green_action, green_action); tca_import_action(&tcm->yellow_action, yellow_action); tca_import_action(&tcm->red_action, red_action); /* set dscps to use */ if (tcm->green_action.tca_code == TCACODE_MARK) tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK; else tcm->green_dscp = DSCP_AF11; if (tcm->yellow_action.tca_code == TCACODE_MARK) tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK; else tcm->yellow_dscp = DSCP_AF12; if (tcm->red_action.tca_code == TCACODE_MARK) tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK; else tcm->red_dscp = DSCP_AF13; tcm->coloraware = coloraware; return (tcm); } static int trtcm_destroy(tcm) struct trtcm *tcm; { if (tcm->cdnrblk.cb_ref > 0) return (EBUSY); tca_invalidate_action(&tcm->green_action); tca_invalidate_action(&tcm->yellow_action); tca_invalidate_action(&tcm->red_action); cdnr_cbdestroy(tcm); return (0); } static struct tc_action * trtcm_input(cb, pktinfo) struct cdnr_block *cb; struct cdnr_pktinfo *pktinfo; { struct trtcm *tcm = (struct trtcm *)cb; u_int64_t len; u_int64_t interval, now; u_int8_t color; len = TB_SCALE(pktinfo->pkt_len); if (tcm->coloraware) { color = pktinfo->pkt_dscp; if (color != tcm->yellow_dscp && color != tcm->red_dscp) color = tcm->green_dscp; } else { /* if color-blind, precolor it as green */ color = tcm->green_dscp; } now = read_machclk(); if (tcm->cmtd_tb.token < len) { interval = now - tcm->cmtd_tb.last; if (interval >= tcm->cmtd_tb.filluptime) tcm->cmtd_tb.token = tcm->cmtd_tb.depth; else { tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate; if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth) tcm->cmtd_tb.token = tcm->cmtd_tb.depth; } tcm->cmtd_tb.last = now; } if (tcm->peak_tb.token < len) { interval = now - tcm->peak_tb.last; if (interval >= tcm->peak_tb.filluptime) tcm->peak_tb.token = tcm->peak_tb.depth; else { tcm->peak_tb.token += interval * tcm->peak_tb.rate; if (tcm->peak_tb.token > tcm->peak_tb.depth) tcm->peak_tb.token = tcm->peak_tb.depth; } tcm->peak_tb.last = now; } if (color == tcm->red_dscp || tcm->peak_tb.token < len) { pktinfo->pkt_dscp = tcm->red_dscp; PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len); return (&tcm->red_action); } if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) { pktinfo->pkt_dscp = tcm->yellow_dscp; tcm->peak_tb.token -= len; PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len); return (&tcm->yellow_action); } pktinfo->pkt_dscp = tcm->green_dscp; tcm->cmtd_tb.token -= len; tcm->peak_tb.token -= len; PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len); return (&tcm->green_action); } /* * time sliding window three color marker * as described in draft-fang-diffserv-tc-tswtcm-00.txt */ static struct tswtcm * tswtcm_create(top, cmtd_rate, peak_rate, avg_interval, green_action, yellow_action, red_action) struct top_cdnr *top; u_int32_t cmtd_rate, peak_rate, avg_interval; struct tc_action *green_action, *yellow_action, *red_action; { struct tswtcm *tsw; if (tca_verify_action(green_action) < 0 || tca_verify_action(yellow_action) < 0 || tca_verify_action(red_action) < 0) return (NULL); if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM, tswtcm_input)) == NULL) return (NULL); tca_import_action(&tsw->green_action, green_action); tca_import_action(&tsw->yellow_action, yellow_action); tca_import_action(&tsw->red_action, red_action); /* set dscps to use */ if (tsw->green_action.tca_code == TCACODE_MARK) tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK; else tsw->green_dscp = DSCP_AF11; if (tsw->yellow_action.tca_code == TCACODE_MARK) tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK; else tsw->yellow_dscp = DSCP_AF12; if (tsw->red_action.tca_code == TCACODE_MARK) tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK; else tsw->red_dscp = DSCP_AF13; /* convert rates from bits/sec to bytes/sec */ tsw->cmtd_rate = cmtd_rate / 8; tsw->peak_rate = peak_rate / 8; tsw->avg_rate = 0; /* timewin is converted from msec to machine clock unit */ tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000; return (tsw); } static int tswtcm_destroy(tsw) struct tswtcm *tsw; { if (tsw->cdnrblk.cb_ref > 0) return (EBUSY); tca_invalidate_action(&tsw->green_action); tca_invalidate_action(&tsw->yellow_action); tca_invalidate_action(&tsw->red_action); cdnr_cbdestroy(tsw); return (0); } static struct tc_action * tswtcm_input(cb, pktinfo) struct cdnr_block *cb; struct cdnr_pktinfo *pktinfo; { struct tswtcm *tsw = (struct tswtcm *)cb; int len; u_int32_t avg_rate; u_int64_t interval, now, tmp; /* * rate estimator */ len = pktinfo->pkt_len; now = read_machclk(); interval = now - tsw->t_front; /* * calculate average rate: * avg = (avg * timewin + pkt_len)/(timewin + interval) * pkt_len needs to be multiplied by machclk_freq in order to * get (bytes/sec). * note: when avg_rate (bytes/sec) and timewin (machclk unit) are * less than 32 bits, the following 64-bit operation has enough * precision. */ tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval); tsw->avg_rate = avg_rate = (u_int32_t)tmp; tsw->t_front = now; /* * marker */ if (avg_rate > tsw->cmtd_rate) { u_int32_t randval = arc4random() % avg_rate; if (avg_rate > tsw->peak_rate) { if (randval < avg_rate - tsw->peak_rate) { /* mark red */ pktinfo->pkt_dscp = tsw->red_dscp; PKTCNTR_ADD(&tsw->red_cnt, len); return (&tsw->red_action); } else if (randval < avg_rate - tsw->cmtd_rate) goto mark_yellow; } else { /* peak_rate >= avg_rate > cmtd_rate */ if (randval < avg_rate - tsw->cmtd_rate) { mark_yellow: pktinfo->pkt_dscp = tsw->yellow_dscp; PKTCNTR_ADD(&tsw->yellow_cnt, len); return (&tsw->yellow_action); } } } /* mark green */ pktinfo->pkt_dscp = tsw->green_dscp; PKTCNTR_ADD(&tsw->green_cnt, len); return (&tsw->green_action); } /* * ioctl requests */ static int cdnrcmd_if_attach(ifname) char *ifname; { struct ifnet *ifp; struct top_cdnr *top; if ((ifp = ifunit(ifname)) == NULL) return (EBADF); if (ifp->if_snd.altq_cdnr != NULL) return (EBUSY); if ((top = top_create(&ifp->if_snd)) == NULL) return (ENOMEM); return (0); } static int cdnrcmd_if_detach(ifname) char *ifname; { struct top_cdnr *top; if ((top = tcb_lookup(ifname)) == NULL) return (EBADF); return top_destroy(top); } static int cdnrcmd_add_element(ap) struct cdnr_add_element *ap; { struct top_cdnr *top; struct cdnr_block *cb; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); cb = element_create(top, &ap->action); if (cb == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(cb); return (0); } static int cdnrcmd_delete_element(ap) struct cdnr_delete_element *ap; { struct top_cdnr *top; struct cdnr_block *cb; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); if (cb->cb_type != TCETYPE_ELEMENT) return generic_element_destroy(cb); return element_destroy(cb); } static int cdnrcmd_add_filter(ap) struct cdnr_add_filter *ap; { struct top_cdnr *top; struct cdnr_block *cb; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); return acc_add_filter(&top->tc_classifier, &ap->filter, cb, &ap->filter_handle); } static int cdnrcmd_delete_filter(ap) struct cdnr_delete_filter *ap; { struct top_cdnr *top; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); return acc_delete_filter(&top->tc_classifier, ap->filter_handle); } static int cdnrcmd_add_tbm(ap) struct cdnr_add_tbmeter *ap; { struct top_cdnr *top; struct tbmeter *tbm; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action); if (tbm == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk); return (0); } static int cdnrcmd_modify_tbm(ap) struct cdnr_modify_tbmeter *ap; { struct tbmeter *tbm; if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); tb_import_profile(&tbm->tb, &ap->profile); return (0); } static int cdnrcmd_tbm_stats(ap) struct cdnr_tbmeter_stats *ap; { struct tbmeter *tbm; if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); ap->in_cnt = tbm->in_cnt; ap->out_cnt = tbm->out_cnt; return (0); } static int cdnrcmd_add_trtcm(ap) struct cdnr_add_trtcm *ap; { struct top_cdnr *top; struct trtcm *tcm; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile, &ap->green_action, &ap->yellow_action, &ap->red_action, ap->coloraware); if (tcm == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk); return (0); } static int cdnrcmd_modify_trtcm(ap) struct cdnr_modify_trtcm *ap; { struct trtcm *tcm; if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile); tb_import_profile(&tcm->peak_tb, &ap->peak_profile); return (0); } static int cdnrcmd_tcm_stats(ap) struct cdnr_tcm_stats *ap; { struct cdnr_block *cb; if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); if (cb->cb_type == TCETYPE_TRTCM) { struct trtcm *tcm = (struct trtcm *)cb; ap->green_cnt = tcm->green_cnt; ap->yellow_cnt = tcm->yellow_cnt; ap->red_cnt = tcm->red_cnt; } else if (cb->cb_type == TCETYPE_TSWTCM) { struct tswtcm *tsw = (struct tswtcm *)cb; ap->green_cnt = tsw->green_cnt; ap->yellow_cnt = tsw->yellow_cnt; ap->red_cnt = tsw->red_cnt; } else return (EINVAL); return (0); } static int cdnrcmd_add_tswtcm(ap) struct cdnr_add_tswtcm *ap; { struct top_cdnr *top; struct tswtcm *tsw; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); if (ap->cmtd_rate > ap->peak_rate) return (EINVAL); tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate, ap->avg_interval, &ap->green_action, &ap->yellow_action, &ap->red_action); if (tsw == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk); return (0); } static int cdnrcmd_modify_tswtcm(ap) struct cdnr_modify_tswtcm *ap; { struct tswtcm *tsw; if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); if (ap->cmtd_rate > ap->peak_rate) return (EINVAL); /* convert rates from bits/sec to bytes/sec */ tsw->cmtd_rate = ap->cmtd_rate / 8; tsw->peak_rate = ap->peak_rate / 8; tsw->avg_rate = 0; /* timewin is converted from msec to machine clock unit */ tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000; return (0); } static int cdnrcmd_get_stats(ap) struct cdnr_get_stats *ap; { struct top_cdnr *top; struct cdnr_block *cb; struct tbmeter *tbm; struct trtcm *tcm; struct tswtcm *tsw; struct tce_stats tce, *usp; int error, n, nskip, nelements; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); /* copy action stats */ bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts)); /* stats for each element */ nelements = ap->nelements; usp = ap->tce_stats; if (nelements <= 0 || usp == NULL) return (0); nskip = ap->nskip; n = 0; LIST_FOREACH(cb, &top->tc_elements, cb_next) { if (nskip > 0) { nskip--; continue; } bzero(&tce, sizeof(tce)); tce.tce_handle = cb->cb_handle; tce.tce_type = cb->cb_type; switch (cb->cb_type) { case TCETYPE_TBMETER: tbm = (struct tbmeter *)cb; tce.tce_cnts[0] = tbm->in_cnt; tce.tce_cnts[1] = tbm->out_cnt; break; case TCETYPE_TRTCM: tcm = (struct trtcm *)cb; tce.tce_cnts[0] = tcm->green_cnt; tce.tce_cnts[1] = tcm->yellow_cnt; tce.tce_cnts[2] = tcm->red_cnt; break; case TCETYPE_TSWTCM: tsw = (struct tswtcm *)cb; tce.tce_cnts[0] = tsw->green_cnt; tce.tce_cnts[1] = tsw->yellow_cnt; tce.tce_cnts[2] = tsw->red_cnt; break; default: continue; } if ((error = copyout((caddr_t)&tce, (caddr_t)usp++, sizeof(tce))) != 0) return (error); if (++n == nelements) break; } ap->nelements = n; return (0); } /* * conditioner device interface */ int cdnropen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("cdnr: no cpu clock available!\n"); return (ENXIO); } /* everything will be done when the queueing scheme is attached. */ return 0; } int cdnrclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct top_cdnr *top; int err, error = 0; while ((top = LIST_FIRST(&tcb_list)) != NULL) { /* destroy all */ err = top_destroy(top); if (err != 0 && error == 0) error = err; } altq_input = NULL; return (error); } int cdnrioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct top_cdnr *top; struct cdnr_interface *ifacep; int s, error = 0; /* check super-user privilege */ switch (cmd) { case CDNR_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) #endif return (error); break; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif switch (cmd) { case CDNR_IF_ATTACH: ifacep = (struct cdnr_interface *)addr; error = cdnrcmd_if_attach(ifacep->cdnr_ifname); break; case CDNR_IF_DETACH: ifacep = (struct cdnr_interface *)addr; error = cdnrcmd_if_detach(ifacep->cdnr_ifname); break; case CDNR_ENABLE: case CDNR_DISABLE: ifacep = (struct cdnr_interface *)addr; if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) { error = EBADF; break; } switch (cmd) { case CDNR_ENABLE: ALTQ_SET_CNDTNING(top->tc_ifq); if (altq_input == NULL) altq_input = altq_cdnr_input; break; case CDNR_DISABLE: ALTQ_CLEAR_CNDTNING(top->tc_ifq); LIST_FOREACH(top, &tcb_list, tc_next) if (ALTQ_IS_CNDTNING(top->tc_ifq)) break; if (top == NULL) altq_input = NULL; break; } break; case CDNR_ADD_ELEM: error = cdnrcmd_add_element((struct cdnr_add_element *)addr); break; case CDNR_DEL_ELEM: error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr); break; case CDNR_ADD_TBM: error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr); break; case CDNR_MOD_TBM: error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr); break; case CDNR_TBM_STATS: error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr); break; case CDNR_ADD_TCM: error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr); break; case CDNR_MOD_TCM: error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr); break; case CDNR_TCM_STATS: error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr); break; case CDNR_ADD_FILTER: error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr); break; case CDNR_DEL_FILTER: error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr); break; case CDNR_GETSTATS: error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr); break; case CDNR_ADD_TSW: error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr); break; case CDNR_MOD_TSW: error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr); break; default: error = EINVAL; break; } splx(s); return error; } #ifdef KLD_MODULE static struct altqsw cdnr_sw = {"cdnr", cdnropen, cdnrclose, cdnrioctl}; ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_CDNR */ Index: projects/ifnet/sys/net/altq/altq_classq.h =================================================================== --- projects/ifnet/sys/net/altq/altq_classq.h (revision 281649) +++ projects/ifnet/sys/net/altq/altq_classq.h (revision 281650) @@ -1,206 +1,207 @@ -/* $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $ */ - -/* +/*- * Copyright (c) 1991-1997 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Network Research * Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $ + * $FreeBSD$ */ /* * class queue definitions extracted from rm_class.h. */ #ifndef _ALTQ_ALTQ_CLASSQ_H_ #define _ALTQ_ALTQ_CLASSQ_H_ #ifdef __cplusplus extern "C" { #endif /* * Packet Queue types: RED or DROPHEAD. */ #define Q_DROPHEAD 0x00 #define Q_RED 0x01 #define Q_RIO 0x02 #define Q_DROPTAIL 0x03 #ifdef _KERNEL /* * Packet Queue structures and macros to manipulate them. */ struct _class_queue_ { struct mbuf *tail_; /* Tail of packet queue */ int qlen_; /* Queue length (in number of packets) */ int qlim_; /* Queue limit (in number of packets*) */ int qtype_; /* Queue type */ }; typedef struct _class_queue_ class_queue_t; #define qtype(q) (q)->qtype_ /* Get queue type */ #define qlimit(q) (q)->qlim_ /* Max packets to be queued */ #define qlen(q) (q)->qlen_ /* Current queue length. */ #define qtail(q) (q)->tail_ /* Tail of the queue */ #define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL) #define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */ #define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */ #define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */ #define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO) #if !defined(__GNUC__) || defined(ALTQ_DEBUG) extern void _addq(class_queue_t *, struct mbuf *); extern struct mbuf *_getq(class_queue_t *); extern struct mbuf *_getq_tail(class_queue_t *); extern struct mbuf *_getq_random(class_queue_t *); extern void _removeq(class_queue_t *, struct mbuf *); extern void _flushq(class_queue_t *); #else /* __GNUC__ && !ALTQ_DEBUG */ /* * inlined versions */ static __inline void _addq(class_queue_t *q, struct mbuf *m) { struct mbuf *m0; if ((m0 = qtail(q)) != NULL) m->m_nextpkt = m0->m_nextpkt; else m0 = m; m0->m_nextpkt = m; qtail(q) = m; qlen(q)++; } static __inline struct mbuf * _getq(class_queue_t *q) { struct mbuf *m, *m0; if ((m = qtail(q)) == NULL) return (NULL); if ((m0 = m->m_nextpkt) != m) m->m_nextpkt = m0->m_nextpkt; else qtail(q) = NULL; qlen(q)--; m0->m_nextpkt = NULL; return (m0); } /* drop a packet at the tail of the queue */ static __inline struct mbuf * _getq_tail(class_queue_t *q) { struct mbuf *m, *m0, *prev; if ((m = m0 = qtail(q)) == NULL) return NULL; do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) qtail(q) = NULL; else qtail(q) = prev; qlen(q)--; m->m_nextpkt = NULL; return (m); } /* randomly select a packet in the queue */ static __inline struct mbuf * _getq_random(class_queue_t *q) { struct mbuf *m; int i, n; if ((m = qtail(q)) == NULL) return NULL; if (m->m_nextpkt == m) qtail(q) = NULL; else { struct mbuf *prev = NULL; n = random() % qlen(q) + 1; for (i = 0; i < n; i++) { prev = m; m = m->m_nextpkt; } prev->m_nextpkt = m->m_nextpkt; if (m == qtail(q)) qtail(q) = prev; } qlen(q)--; m->m_nextpkt = NULL; return (m); } static __inline void _removeq(class_queue_t *q, struct mbuf *m) { struct mbuf *m0, *prev; m0 = qtail(q); do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) qtail(q) = NULL; else if (qtail(q) == m) qtail(q) = prev; qlen(q)--; } static __inline void _flushq(class_queue_t *q) { struct mbuf *m; while ((m = _getq(q)) != NULL) m_freem(m); } #endif /* __GNUC__ && !ALTQ_DEBUG */ #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_CLASSQ_H_ */ Index: projects/ifnet/sys/net/altq/altq_hfsc.c =================================================================== --- projects/ifnet/sys/net/altq/altq_hfsc.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_hfsc.c (revision 281650) @@ -1,2222 +1,2202 @@ -/* $FreeBSD$ */ -/* $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ */ - -/* +/*- * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. + * + * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ + * $FreeBSD$ */ /* * H-FSC is described in Proceedings of SIGCOMM'97, * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, * Real-Time and Priority Service" * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. * * Oleg Cherevko added the upperlimit for link-sharing. * when a class has an upperlimit, the fit-time is computed from the * upperlimit service curve. the link-sharing scheduler does not schedule * a class whose fit-time exceeds the current time. */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */ #include #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* * function prototypes */ static int hfsc_clear_interface(struct hfsc_if *); static int hfsc_request(struct ifaltq *, int, void *); static void hfsc_purge(struct hfsc_if *); static struct hfsc_class *hfsc_class_create(struct hfsc_if *, struct service_curve *, struct service_curve *, struct service_curve *, struct hfsc_class *, int, int, int); static int hfsc_class_destroy(struct hfsc_class *); static struct hfsc_class *hfsc_nextclass(struct hfsc_class *); static int hfsc_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *hfsc_dequeue(struct ifaltq *, int); static int hfsc_addq(struct hfsc_class *, struct mbuf *); static struct mbuf *hfsc_getq(struct hfsc_class *); static struct mbuf *hfsc_pollq(struct hfsc_class *); static void hfsc_purgeq(struct hfsc_class *); static void update_cfmin(struct hfsc_class *); static void set_active(struct hfsc_class *, int); static void set_passive(struct hfsc_class *); static void init_ed(struct hfsc_class *, int); static void update_ed(struct hfsc_class *, int); static void update_d(struct hfsc_class *, int); static void init_vf(struct hfsc_class *, int); static void update_vf(struct hfsc_class *, int, u_int64_t); static void ellist_insert(struct hfsc_class *); static void ellist_remove(struct hfsc_class *); static void ellist_update(struct hfsc_class *); struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t); static void actlist_insert(struct hfsc_class *); static void actlist_remove(struct hfsc_class *); static void actlist_update(struct hfsc_class *); static struct hfsc_class *actlist_firstfit(struct hfsc_class *, u_int64_t); static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); static __inline u_int64_t m2sm(u_int); static __inline u_int64_t m2ism(u_int); static __inline u_int64_t d2dx(u_int); static u_int sm2m(u_int64_t); static u_int dx2d(u_int64_t); static void sc2isc(struct service_curve *, struct internal_sc *); static void rtsc_init(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t); static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t); static void rtsc_min(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static void get_class_stats(struct hfsc_classstats *, struct hfsc_class *); static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t); #ifdef ALTQ3_COMPAT static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int); static int hfsc_detach(struct hfsc_if *); static int hfsc_class_modify(struct hfsc_class *, struct service_curve *, struct service_curve *, struct service_curve *); static int hfsccmd_if_attach(struct hfsc_attach *); static int hfsccmd_if_detach(struct hfsc_interface *); static int hfsccmd_add_class(struct hfsc_add_class *); static int hfsccmd_delete_class(struct hfsc_delete_class *); static int hfsccmd_modify_class(struct hfsc_modify_class *); static int hfsccmd_add_filter(struct hfsc_add_filter *); static int hfsccmd_delete_filter(struct hfsc_delete_filter *); static int hfsccmd_class_stats(struct hfsc_class_stats *); altqdev_decl(hfsc); #endif /* ALTQ3_COMPAT */ /* * macros */ #define is_a_parent_class(cl) ((cl)->cl_children != NULL) #define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */ #ifdef ALTQ3_COMPAT /* hif_list keeps all hfsc_if's allocated. */ static struct hfsc_if *hif_list = NULL; #endif /* ALTQ3_COMPAT */ int hfsc_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc, hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL); splx(s); return (error); } int hfsc_add_altq(struct pf_altq *a) { struct hfsc_if *hif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (hif == NULL) return (ENOMEM); TAILQ_INIT(&hif->hif_eligible); hif->hif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = hif; return (0); } int hfsc_remove_altq(struct pf_altq *a) { struct hfsc_if *hif; if ((hif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); free(hif, M_DEVBUF); return (0); } int hfsc_add_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl, *parent; struct hfsc_opts *opts; struct service_curve rtsc, lssc, ulsc; if ((hif = a->altq_disc) == NULL) return (EINVAL); opts = &a->pq_u.hfsc_opts; if (a->parent_qid == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); if (clh_to_clp(hif, a->qid) != NULL) return (EBUSY); rtsc.m1 = opts->rtsc_m1; rtsc.d = opts->rtsc_d; rtsc.m2 = opts->rtsc_m2; lssc.m1 = opts->lssc_m1; lssc.d = opts->lssc_d; lssc.m2 = opts->lssc_m2; ulsc.m1 = opts->ulsc_m1; ulsc.d = opts->ulsc_d; ulsc.m2 = opts->ulsc_m2; cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, parent, a->qlimit, opts->flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int hfsc_remove_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); return (hfsc_class_destroy(cl)); } int hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { struct hfsc_if *hif; struct hfsc_class *cl; struct hfsc_classstats stats; int error = 0; if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes except the root class. */ static int hfsc_clear_interface(struct hfsc_if *hif) { struct hfsc_class *cl; #ifdef ALTQ3_COMPAT /* free the filters for this interface */ acc_discard_filters(&hif->hif_classifier, NULL, 1); #endif /* clear out the classes */ while (hif->hif_rootclass != NULL && (cl = hif->hif_rootclass->cl_children) != NULL) { /* * remove the first leaf class found in the hierarchy * then start over */ for (; cl != NULL; cl = hfsc_nextclass(cl)) { if (!is_a_parent_class(cl)) { (void)hfsc_class_destroy(cl); break; } } } return (0); } static int hfsc_request(struct ifaltq *ifq, int req, void *arg) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: hfsc_purge(hif); break; } return (0); } /* discard all the queued packets on the interface */ static void hfsc_purge(struct hfsc_if *hif) { struct hfsc_class *cl; for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (ALTQ_IS_ENABLED(hif->hif_ifq)) hif->hif_ifq->ifq_len = 0; } struct hfsc_class * hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, struct service_curve *fsc, struct service_curve *usc, struct hfsc_class *parent, int qlimit, int flags, int qid) { struct hfsc_class *cl, *p; int i, s; if (hif->hif_classes >= HFSC_MAX_CLASSES) return (NULL); #ifndef ALTQ_RED if (flags & HFCF_RED) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: RED not configured for HFSC!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; TAILQ_INIT(&cl->cl_actc); if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; cl->cl_flags = flags; #ifdef ALTQ_RED if (flags & (HFCF_RED|HFCF_RIO)) { int red_flags, red_pkttime; u_int m2; m2 = 0; if (rsc != NULL && rsc->m2 > m2) m2 = rsc->m2; if (fsc != NULL && fsc->m2 > m2) m2 = fsc->m2; if (usc != NULL && usc->m2 > m2) m2 = usc->m2; red_flags = 0; if (flags & HFCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & HFCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (m2 < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (m2 / 8); if (flags & HFCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RED; } #ifdef ALTQ_RIO else { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RIO; } #endif } #endif /* ALTQ_RED */ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { cl->cl_rsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_rsc == NULL) goto err_ret; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0); rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) { cl->cl_fsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_fsc == NULL) goto err_ret; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0); } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) { cl->cl_usc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_usc == NULL) goto err_ret; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0); } cl->cl_id = hif->hif_classid++; cl->cl_handle = qid; cl->cl_hif = hif; cl->cl_parent = parent; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(hif->hif_ifq); hif->hif_classes++; /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = qid % HFSC_MAX_CLASSES; if (hif->hif_class_tbl[i] == NULL) hif->hif_class_tbl[i] = cl; else { for (i = 0; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) { hif->hif_class_tbl[i] = cl; break; } if (i == HFSC_MAX_CLASSES) { IFQ_UNLOCK(hif->hif_ifq); splx(s); goto err_ret; } } if (flags & HFCF_DEFAULTCLASS) hif->hif_defaultclass = cl; if (parent == NULL) { /* this is root class */ hif->hif_rootclass = cl; } else { /* add this class to the children list of the parent */ if ((p = parent->cl_children) == NULL) parent->cl_children = cl; else { while (p->cl_siblings != NULL) p = p->cl_siblings; p->cl_siblings = cl; } } IFQ_UNLOCK(hif->hif_ifq); splx(s); return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int hfsc_class_destroy(struct hfsc_class *cl) { int i, s; if (cl == NULL) return (0); if (is_a_parent_class(cl)) return (EBUSY); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_hif->hif_ifq); #ifdef ALTQ3_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0); #endif /* ALTQ3_COMPAT */ if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (cl->cl_parent == NULL) { /* this is root class */ } else { struct hfsc_class *p = cl->cl_parent->cl_children; if (p == cl) cl->cl_parent->cl_children = cl->cl_siblings; else do { if (p->cl_siblings == cl) { p->cl_siblings = cl->cl_siblings; break; } } while ((p = p->cl_siblings) != NULL); ASSERT(p != NULL); } for (i = 0; i < HFSC_MAX_CLASSES; i++) if (cl->cl_hif->hif_class_tbl[i] == cl) { cl->cl_hif->hif_class_tbl[i] = NULL; break; } cl->cl_hif->hif_classes--; IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } IFQ_LOCK(cl->cl_hif->hif_ifq); if (cl == cl->cl_hif->hif_rootclass) cl->cl_hif->hif_rootclass = NULL; if (cl == cl->cl_hif->hif_defaultclass) cl->cl_hif->hif_defaultclass = NULL; IFQ_UNLOCK(cl->cl_hif->hif_ifq); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * hfsc_nextclass returns the next class in the tree. * usage: * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) * do_something; */ static struct hfsc_class * hfsc_nextclass(struct hfsc_class *cl) { if (cl->cl_children != NULL) cl = cl->cl_children; else if (cl->cl_siblings != NULL) cl = cl->cl_siblings; else { while ((cl = cl->cl_parent) != NULL) if (cl->cl_siblings) { cl = cl->cl_siblings; break; } } return (cl); } /* * hfsc_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(hif, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL || is_a_parent_class(cl)) { cl = hif->hif_defaultclass; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ else #endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (hfsc_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in hfsc_addq. */ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); cl->cl_hif->hif_packets++; /* successfully queued. */ if (qlen(cl->cl_q) == 1) set_active(cl, m_pktlen(m)); return (0); } /* * hfsc_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * hfsc_dequeue(struct ifaltq *ifq, int op) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct mbuf *m; int len, next_len; int realtime = 0; u_int64_t cur_time; IFQ_LOCK_ASSERT(ifq); if (hif->hif_packets == 0) /* no packet in the tree */ return (NULL); cur_time = read_machclk(); if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) { cl = hif->hif_pollcache; hif->hif_pollcache = NULL; /* check if the class was scheduled by real-time criteria */ if (cl->cl_rsc != NULL) realtime = (cl->cl_e <= cur_time); } else { /* * if there are eligible classes, use real-time criteria. * find the class with the minimum deadline among * the eligible classes. */ if ((cl = hfsc_get_mindl(hif, cur_time)) != NULL) { realtime = 1; } else { #ifdef ALTQ_DEBUG int fits = 0; #endif /* * use link-sharing criteria * get the class with the minimum vt in the hierarchy */ cl = hif->hif_rootclass; while (is_a_parent_class(cl)) { cl = actlist_firstfit(cl, cur_time); if (cl == NULL) { #ifdef ALTQ_DEBUG if (fits > 0) printf("%d fit but none found\n",fits); #endif return (NULL); } /* * update parent's cl_cvtmin. * don't update if the new vt is smaller. */ if (cl->cl_parent->cl_cvtmin < cl->cl_vt) cl->cl_parent->cl_cvtmin = cl->cl_vt; #ifdef ALTQ_DEBUG fits++; #endif } } if (op == ALTDQ_POLL) { hif->hif_pollcache = cl; m = hfsc_pollq(cl); return (m); } } m = hfsc_getq(cl); if (m == NULL) panic("hfsc_dequeue:"); len = m_pktlen(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len); update_vf(cl, len, cur_time); if (realtime) cl->cl_cumul += len; if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) { /* update ed */ next_len = m_pktlen(qhead(cl->cl_q)); if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); } } else { /* the class becomes passive */ set_passive(cl); } return (m); } static int hfsc_addq(struct hfsc_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & HFCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * hfsc_getq(struct hfsc_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * hfsc_pollq(struct hfsc_class *cl) { return qhead(cl->cl_q); } static void hfsc_purgeq(struct hfsc_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m)); m_freem(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(cl->cl_hif->hif_ifq); } ASSERT(qlen(cl->cl_q) == 0); update_vf(cl, 0, 0); /* remove cl from the actlist */ set_passive(cl); } static void set_active(struct hfsc_class *cl, int len) { if (cl->cl_rsc != NULL) init_ed(cl, len); if (cl->cl_fsc != NULL) init_vf(cl, len); cl->cl_stats.period++; } static void set_passive(struct hfsc_class *cl) { if (cl->cl_rsc != NULL) ellist_remove(cl); /* * actlist is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from actlist */ } static void init_ed(struct hfsc_class *cl, int next_len) { u_int64_t cur_time; cur_time = read_machclk(); /* update the deadline curve */ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); /* * update the eligible curve. * for concave, it is equal to the deadline curve. * for convex, it is a linear curve with slope m2. */ cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } /* compute e and d */ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_insert(cl); } static void update_ed(struct hfsc_class *cl, int next_len) { cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_update(cl); } static void update_d(struct hfsc_class *cl, int next_len) { cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); } static void init_vf(struct hfsc_class *cl, int len) { struct hfsc_class *max_cl, *p; u_int64_t vt, f, cur_time; int go_active; cur_time = 0; go_active = 1; for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) { if (go_active && cl->cl_nactive++ == 0) go_active = 1; else go_active = 0; if (go_active) { max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); if (max_cl != NULL) { /* * set vt to the average of the min and max * classes. if the parent's period didn't * change, don't decrease vt of the class. */ vt = max_cl->cl_vt; if (cl->cl_parent->cl_cvtmin != 0) vt = (cl->cl_parent->cl_cvtmin + vt)/2; if (cl->cl_parent->cl_vtperiod != cl->cl_parentperiod || vt > cl->cl_vt) cl->cl_vt = vt; } else { /* * first child for a new parent backlog period. * add parent's cvtmax to vtoff of children * to make a new vt (vtoff + vt) larger than * the vt in the last period for all children. */ vt = cl->cl_parent->cl_cvtmax; for (p = cl->cl_parent->cl_children; p != NULL; p = p->cl_siblings) p->cl_vtoff += vt; cl->cl_vt = 0; cl->cl_parent->cl_cvtmax = 0; cl->cl_parent->cl_cvtmin = 0; } cl->cl_initvt = cl->cl_vt; /* update the virtual curve */ vt = cl->cl_vt + cl->cl_vtoff; rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total); if (cl->cl_virtual.x == vt) { cl->cl_virtual.x -= cl->cl_vtoff; cl->cl_vtoff = 0; } cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; if (cl->cl_parent->cl_nactive == 0) cl->cl_parentperiod++; cl->cl_f = 0; actlist_insert(cl); if (cl->cl_usc != NULL) { /* class has upper limit curve */ if (cur_time == 0) cur_time = read_machclk(); /* update the ulimit curve */ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); cl->cl_myfadj = 0; } } if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time) { u_int64_t f, myf_bound, delta; int go_passive; go_passive = qempty(cl->cl_q); for (; cl->cl_parent != NULL; cl = cl->cl_parent) { cl->cl_total += len; if (cl->cl_fsc == NULL || cl->cl_nactive == 0) continue; if (go_passive && --cl->cl_nactive == 0) go_passive = 1; else go_passive = 0; if (go_passive) { /* no more active child, going passive */ /* update cvtmax of the parent class */ if (cl->cl_vt > cl->cl_parent->cl_cvtmax) cl->cl_parent->cl_cvtmax = cl->cl_vt; /* remove this class from the vt list */ actlist_remove(cl); update_cfmin(cl->cl_parent); continue; } /* * update vt and f */ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - cl->cl_vtoff + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, * the class was skipped in the past due to non-fit. * if so, we need to adjust vtadj. */ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; cl->cl_vt = cl->cl_parent->cl_cvtmin; } /* update the vt list */ actlist_update(cl); if (cl->cl_usc != NULL) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); /* * if myf lags behind by more than one clock tick * from the current time, adjust myfadj to prevent * a rate-limited class from going greedy. * in a steady state under rate-limiting, myf * fluctuates within one clock tick. */ myf_bound = cur_time - machclk_per_tick; if (cl->cl_myf < myf_bound) { delta = cur_time - cl->cl_myf; cl->cl_myfadj += delta; cl->cl_myf += delta; } } /* cl_f is max(cl_myf, cl_cfmin) */ if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_cfmin(struct hfsc_class *cl) { struct hfsc_class *p; u_int64_t cfmin; if (TAILQ_EMPTY(&cl->cl_actc)) { cl->cl_cfmin = 0; return; } cfmin = HT_INFINITY; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f == 0) { cl->cl_cfmin = 0; return; } if (p->cl_f < cfmin) cfmin = p->cl_f; } cl->cl_cfmin = cfmin; } /* * TAILQ based ellist and actlist implementation * (ion wanted to make a calendar queue based implementation) */ /* * eligible list holds backlogged classes being sorted by their eligible times. * there is one eligible list per interface. */ static void ellist_insert(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL || p->cl_e <= cl->cl_e) { TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (cl->cl_e < p->cl_e) { TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } static void ellist_remove(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); } static void ellist_update(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p, *last; /* * the eligible time of a class increases monotonically. * if the next entry has a larger eligible time, nothing to do. */ p = TAILQ_NEXT(cl, cl_ellist); if (p == NULL || cl->cl_e <= p->cl_e) return; /* check the last entry */ last = TAILQ_LAST(&hif->hif_eligible, elighead); ASSERT(last != NULL); if (last->cl_e <= cl->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) { if (cl->cl_e < p->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } /* find the class with the minimum deadline among the eligible classes */ struct hfsc_class * hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time) { struct hfsc_class *p, *cl = NULL; TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) cl = p; } return (cl); } /* * active children list holds backlogged child classes being sorted * by their virtual time. * each intermediate class has one active children list. */ static void actlist_insert(struct hfsc_class *cl) { struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL || p->cl_vt <= cl->cl_vt) { TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) { if (cl->cl_vt < p->cl_vt) { TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static void actlist_remove(struct hfsc_class *cl) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); } static void actlist_update(struct hfsc_class *cl) { struct hfsc_class *p, *last; /* * the virtual time of a class increases monotonically during its * backlogged period. * if the next entry has a larger virtual time, nothing to do. */ p = TAILQ_NEXT(cl, cl_actlist); if (p == NULL || cl->cl_vt < p->cl_vt) return; /* check the last entry */ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); ASSERT(last != NULL); if (last->cl_vt <= cl->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) { if (cl->cl_vt < p->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static struct hfsc_class * actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) { struct hfsc_class *p; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f <= cur_time) return (p); } return (NULL); } /* * service curve support functions * * external service curve parameters * m: bits/sec * d: msec * internal service curve parameters * sm: (bytes/tsc_interval) << SM_SHIFT * ism: (tsc_count/byte) << ISM_SHIFT * dx: tsc_count * * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective * digits in decimal using the following table. * * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps * ----------+------------------------------------------------------- * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6 * * nsec/byte 80000 8000 800 80 8 * ism(500MHz) 40000 4000 400 40 4 * ism(200MHz) 16000 1600 160 16 1.6 */ #define SM_SHIFT 24 #define ISM_SHIFT 10 #define SM_MASK ((1LL << SM_SHIFT) - 1) #define ISM_MASK ((1LL << ISM_SHIFT) - 1) static __inline u_int64_t seg_x2y(u_int64_t x, u_int64_t sm) { u_int64_t y; /* * compute * y = x * sm >> SM_SHIFT * but divide it for the upper and lower bits to avoid overflow */ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); return (y); } static __inline u_int64_t seg_y2x(u_int64_t y, u_int64_t ism) { u_int64_t x; if (y == 0) x = 0; else if (ism == HT_INFINITY) x = HT_INFINITY; else { x = (y >> ISM_SHIFT) * ism + (((y & ISM_MASK) * ism) >> ISM_SHIFT); } return (x); } static __inline u_int64_t m2sm(u_int m) { u_int64_t sm; sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq; return (sm); } static __inline u_int64_t m2ism(u_int m) { u_int64_t ism; if (m == 0) ism = HT_INFINITY; else ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m; return (ism); } static __inline u_int64_t d2dx(u_int d) { u_int64_t dx; dx = ((u_int64_t)d * machclk_freq) / 1000; return (dx); } static u_int sm2m(u_int64_t sm) { u_int64_t m; m = (sm * 8 * machclk_freq) >> SM_SHIFT; return ((u_int)m); } static u_int dx2d(u_int64_t dx) { u_int64_t d; d = dx * 1000 / machclk_freq; return ((u_int)d); } static void sc2isc(struct service_curve *sc, struct internal_sc *isc) { isc->sm1 = m2sm(sc->m1); isc->ism1 = m2ism(sc->m1); isc->dx = d2dx(sc->d); isc->dy = seg_x2y(isc->dx, isc->sm1); isc->sm2 = m2sm(sc->m2); isc->ism2 = m2ism(sc->m2); } /* * initialize the runtime service curve with the given internal * service curve starting at (x, y). */ static void rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x, u_int64_t y) { rtsc->x = x; rtsc->y = y; rtsc->sm1 = isc->sm1; rtsc->ism1 = isc->ism1; rtsc->dx = isc->dx; rtsc->dy = isc->dy; rtsc->sm2 = isc->sm2; rtsc->ism2 = isc->ism2; } /* * calculate the y-projection of the runtime service curve by the * given x-projection value */ static u_int64_t rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y) { u_int64_t x; if (y < rtsc->y) x = rtsc->x; else if (y <= rtsc->y + rtsc->dy) { /* x belongs to the 1st segment */ if (rtsc->dy == 0) x = rtsc->x + rtsc->dx; else x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); } else { /* x belongs to the 2nd segment */ x = rtsc->x + rtsc->dx + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); } return (x); } static u_int64_t rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x) { u_int64_t y; if (x <= rtsc->x) y = rtsc->y; else if (x <= rtsc->x + rtsc->dx) /* y belongs to the 1st segment */ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); else /* y belongs to the 2nd segment */ y = rtsc->y + rtsc->dy + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); return (y); } /* * update the runtime service curve by taking the minimum of the current * runtime service curve and the service curve starting at (x, y). */ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x, u_int64_t y) { u_int64_t y1, y2, dx, dy; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ y1 = rtsc_x2y(rtsc, x); if (y1 < y) /* the current rtsc is smaller */ return; rtsc->x = x; rtsc->y = y; return; } /* * service curve is concave * compute the two y values of the current rtsc * y1: at x * y2: at (x + dx) */ y1 = rtsc_x2y(rtsc, x); if (y1 <= y) { /* rtsc is below isc, no change to rtsc */ return; } y2 = rtsc_x2y(rtsc, x + isc->dx); if (y2 >= y + isc->dy) { /* rtsc is above isc, replace rtsc by isc */ rtsc->x = x; rtsc->y = y; rtsc->dx = isc->dx; rtsc->dy = isc->dy; return; } /* * the two curves intersect * compute the offsets (dx, dy) using the reverse * function of seg_x2y() * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) */ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. */ if (rtsc->x + rtsc->dx > x) dx += rtsc->x + rtsc->dx - x; dy = seg_x2y(dx, isc->sm1); rtsc->x = x; rtsc->y = y; rtsc->dx = dx; rtsc->dy = dy; return; } static void get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; if (cl->cl_rsc != NULL) { sp->rsc.m1 = sm2m(cl->cl_rsc->sm1); sp->rsc.d = dx2d(cl->cl_rsc->dx); sp->rsc.m2 = sm2m(cl->cl_rsc->sm2); } else { sp->rsc.m1 = 0; sp->rsc.d = 0; sp->rsc.m2 = 0; } if (cl->cl_fsc != NULL) { sp->fsc.m1 = sm2m(cl->cl_fsc->sm1); sp->fsc.d = dx2d(cl->cl_fsc->dx); sp->fsc.m2 = sm2m(cl->cl_fsc->sm2); } else { sp->fsc.m1 = 0; sp->fsc.d = 0; sp->fsc.m2 = 0; } if (cl->cl_usc != NULL) { sp->usc.m1 = sm2m(cl->cl_usc->sm1); sp->usc.d = dx2d(cl->cl_usc->dx); sp->usc.m2 = sm2m(cl->cl_usc->sm2); } else { sp->usc.m1 = 0; sp->usc.d = 0; sp->usc.m2 = 0; } sp->total = cl->cl_total; sp->cumul = cl->cl_cumul; sp->d = cl->cl_d; sp->e = cl->cl_e; sp->vt = cl->cl_vt; sp->f = cl->cl_f; sp->initvt = cl->cl_initvt; sp->vtperiod = cl->cl_vtperiod; sp->parentperiod = cl->cl_parentperiod; sp->nactive = cl->cl_nactive; sp->vtoff = cl->cl_vtoff; sp->cvtmax = cl->cl_cvtmax; sp->myf = cl->cl_myf; sp->cfmin = cl->cl_cfmin; sp->cvtmin = cl->cl_cvtmin; sp->myfadj = cl->cl_myfadj; sp->vtadj = cl->cl_vtadj; sp->cur_time = read_machclk(); sp->machclk_freq = machclk_freq; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->xmit_cnt = cl->cl_stats.xmit_cnt; sp->drop_cnt = cl->cl_stats.drop_cnt; sp->period = cl->cl_stats.period; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif } /* convert a class handle to the corresponding class pointer */ static struct hfsc_class * clh_to_clp(struct hfsc_if *hif, u_int32_t chandle) { int i; struct hfsc_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % HFSC_MAX_CLASSES; if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); for (i = 0; i < HFSC_MAX_CLASSES; i++) if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #ifdef ALTQ3_COMPAT static struct hfsc_if * hfsc_attach(ifq, bandwidth) struct ifaltq *ifq; u_int bandwidth; { struct hfsc_if *hif; hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK); if (hif == NULL) return (NULL); bzero(hif, sizeof(struct hfsc_if)); hif->hif_eligible = ellist_alloc(); if (hif->hif_eligible == NULL) { free(hif, M_DEVBUF); return NULL; } hif->hif_ifq = ifq; /* add this state to the hfsc list */ hif->hif_next = hif_list; hif_list = hif; return (hif); } static int hfsc_detach(hif) struct hfsc_if *hif; { (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); /* remove this interface from the hif list */ if (hif_list == hif) hif_list = hif->hif_next; else { struct hfsc_if *h; for (h = hif_list; h != NULL; h = h->hif_next) if (h->hif_next == hif) { h->hif_next = hif->hif_next; break; } ASSERT(h != NULL); } ellist_destroy(hif->hif_eligible); free(hif, M_DEVBUF); return (0); } static int hfsc_class_modify(cl, rsc, fsc, usc) struct hfsc_class *cl; struct service_curve *rsc, *fsc, *usc; { struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp; u_int64_t cur_time; int s; rsc_tmp = fsc_tmp = usc_tmp = NULL; if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) && cl->cl_rsc == NULL) { rsc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (rsc_tmp == NULL) return (ENOMEM); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) && cl->cl_fsc == NULL) { fsc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (fsc_tmp == NULL) { free(rsc_tmp); return (ENOMEM); } } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) && cl->cl_usc == NULL) { usc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (usc_tmp == NULL) { free(rsc_tmp); free(fsc_tmp); return (ENOMEM); } } cur_time = read_machclk(); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_hif->hif_ifq); if (rsc != NULL) { if (rsc->m1 == 0 && rsc->m2 == 0) { if (cl->cl_rsc != NULL) { if (!qempty(cl->cl_q)) hfsc_purgeq(cl); free(cl->cl_rsc, M_DEVBUF); cl->cl_rsc = NULL; } } else { if (cl->cl_rsc == NULL) cl->cl_rsc = rsc_tmp; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } } } if (fsc != NULL) { if (fsc->m1 == 0 && fsc->m2 == 0) { if (cl->cl_fsc != NULL) { if (!qempty(cl->cl_q)) hfsc_purgeq(cl); free(cl->cl_fsc, M_DEVBUF); cl->cl_fsc = NULL; } } else { if (cl->cl_fsc == NULL) cl->cl_fsc = fsc_tmp; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt, cl->cl_total); } } if (usc != NULL) { if (usc->m1 == 0 && usc->m2 == 0) { if (cl->cl_usc != NULL) { free(cl->cl_usc, M_DEVBUF); cl->cl_usc = NULL; cl->cl_myf = 0; } } else { if (cl->cl_usc == NULL) cl->cl_usc = usc_tmp; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); } } if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) update_ed(cl, m_pktlen(qhead(cl->cl_q))); if (cl->cl_fsc != NULL) update_vf(cl, 0, cur_time); /* is this enough? */ } IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); return (0); } /* * hfsc device interface */ int hfscopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("hfsc: no cpu clock available!\n"); return (ENXIO); } /* everything will be done when the queueing scheme is attached. */ return 0; } int hfscclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct hfsc_if *hif; int err, error = 0; while ((hif = hif_list) != NULL) { /* destroy all */ if (ALTQ_IS_ENABLED(hif->hif_ifq)) altq_disable(hif->hif_ifq); err = altq_detach(hif->hif_ifq); if (err == 0) err = hfsc_detach(hif); if (err != 0 && error == 0) error = err; } return error; } int hfscioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct hfsc_if *hif; struct hfsc_interface *ifacep; int error = 0; /* check super-user privilege */ switch (cmd) { case HFSC_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case HFSC_IF_ATTACH: error = hfsccmd_if_attach((struct hfsc_attach *)addr); break; case HFSC_IF_DETACH: error = hfsccmd_if_detach((struct hfsc_interface *)addr); break; case HFSC_ENABLE: case HFSC_DISABLE: case HFSC_CLEAR_HIERARCHY: ifacep = (struct hfsc_interface *)addr; if ((hif = altq_lookup(ifacep->hfsc_ifname, ALTQT_HFSC)) == NULL) { error = EBADF; break; } switch (cmd) { case HFSC_ENABLE: if (hif->hif_defaultclass == NULL) { #ifdef ALTQ_DEBUG printf("hfsc: no default class\n"); #endif error = EINVAL; break; } error = altq_enable(hif->hif_ifq); break; case HFSC_DISABLE: error = altq_disable(hif->hif_ifq); break; case HFSC_CLEAR_HIERARCHY: hfsc_clear_interface(hif); break; } break; case HFSC_ADD_CLASS: error = hfsccmd_add_class((struct hfsc_add_class *)addr); break; case HFSC_DEL_CLASS: error = hfsccmd_delete_class((struct hfsc_delete_class *)addr); break; case HFSC_MOD_CLASS: error = hfsccmd_modify_class((struct hfsc_modify_class *)addr); break; case HFSC_ADD_FILTER: error = hfsccmd_add_filter((struct hfsc_add_filter *)addr); break; case HFSC_DEL_FILTER: error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr); break; case HFSC_GETSTATS: error = hfsccmd_class_stats((struct hfsc_class_stats *)addr); break; default: error = EINVAL; break; } return error; } static int hfsccmd_if_attach(ap) struct hfsc_attach *ap; { struct hfsc_if *hif; struct ifnet *ifp; int error; if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL) return (ENXIO); if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL) return (ENOMEM); /* * set HFSC to this ifnet structure. */ if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif, hfsc_enqueue, hfsc_dequeue, hfsc_request, &hif->hif_classifier, acc_classify)) != 0) (void)hfsc_detach(hif); return (error); } static int hfsccmd_if_detach(ap) struct hfsc_interface *ap; { struct hfsc_if *hif; int error; if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if (ALTQ_IS_ENABLED(hif->hif_ifq)) altq_disable(hif->hif_ifq); if ((error = altq_detach(hif->hif_ifq))) return (error); return hfsc_detach(hif); } static int hfsccmd_add_class(ap) struct hfsc_add_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl, *parent; int i; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if (ap->parent_handle == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL) return (EINVAL); /* assign a class handle (use a free slot number for now) */ for (i = 1; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) break; if (i == HFSC_MAX_CLASSES) return (EBUSY); if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL, parent, ap->qlimit, ap->flags, i)) == NULL) return (ENOMEM); /* return a class handle to the user */ ap->class_handle = i; return (0); } static int hfsccmd_delete_class(ap) struct hfsc_delete_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); return hfsc_class_destroy(cl); } static int hfsccmd_modify_class(ap) struct hfsc_modify_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl; struct service_curve *rsc = NULL; struct service_curve *fsc = NULL; struct service_curve *usc = NULL; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); if (ap->sctype & HFSC_REALTIMESC) rsc = &ap->service_curve; if (ap->sctype & HFSC_LINKSHARINGSC) fsc = &ap->service_curve; if (ap->sctype & HFSC_UPPERLIMITSC) usc = &ap->service_curve; return hfsc_class_modify(cl, rsc, fsc, usc); } static int hfsccmd_add_filter(ap) struct hfsc_add_filter *ap; { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); if (is_a_parent_class(cl)) { #ifdef ALTQ_DEBUG printf("hfsccmd_add_filter: not a leaf class!\n"); #endif return (EINVAL); } return acc_add_filter(&hif->hif_classifier, &ap->filter, cl, &ap->filter_handle); } static int hfsccmd_delete_filter(ap) struct hfsc_delete_filter *ap; { struct hfsc_if *hif; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); return acc_delete_filter(&hif->hif_classifier, ap->filter_handle); } static int hfsccmd_class_stats(ap) struct hfsc_class_stats *ap; { struct hfsc_if *hif; struct hfsc_class *cl; struct hfsc_classstats stats, *usp; int n, nclasses, error; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); ap->cur_time = read_machclk(); ap->machclk_freq = machclk_freq; ap->hif_classes = hif->hif_classes; ap->hif_packets = hif->hif_packets; /* skip the first N classes in the tree */ nclasses = ap->nskip; for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) ; if (n != nclasses) return (EINVAL); /* then, read the next N classes in the tree */ nclasses = ap->nclasses; usp = ap->stats; for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) { get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } ap->nclasses = n; return (0); } #ifdef KLD_MODULE static struct altqsw hfsc_sw = {"hfsc", hfscopen, hfscclose, hfscioctl}; ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw); MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1); MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_HFSC */ Index: projects/ifnet/sys/net/altq/altq_priq.c =================================================================== --- projects/ifnet/sys/net/altq/altq_priq.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_priq.c (revision 281650) @@ -1,1046 +1,1031 @@ -/* $FreeBSD$ */ -/* $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ */ -/* +/*- * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ + * $FreeBSD$ */ /* * priority queue */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #include /* * function prototypes */ #ifdef ALTQ3_COMPAT static struct priq_if *priq_attach(struct ifaltq *, u_int); static int priq_detach(struct priq_if *); #endif static int priq_clear_interface(struct priq_if *); static int priq_request(struct ifaltq *, int, void *); static void priq_purge(struct priq_if *); static struct priq_class *priq_class_create(struct priq_if *, int, int, int, int); static int priq_class_destroy(struct priq_class *); static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *priq_dequeue(struct ifaltq *, int); static int priq_addq(struct priq_class *, struct mbuf *); static struct mbuf *priq_getq(struct priq_class *); static struct mbuf *priq_pollq(struct priq_class *); static void priq_purgeq(struct priq_class *); #ifdef ALTQ3_COMPAT static int priqcmd_if_attach(struct priq_interface *); static int priqcmd_if_detach(struct priq_interface *); static int priqcmd_add_class(struct priq_add_class *); static int priqcmd_delete_class(struct priq_delete_class *); static int priqcmd_modify_class(struct priq_modify_class *); static int priqcmd_add_filter(struct priq_add_filter *); static int priqcmd_delete_filter(struct priq_delete_filter *); static int priqcmd_class_stats(struct priq_class_stats *); #endif /* ALTQ3_COMPAT */ static void get_class_stats(struct priq_classstats *, struct priq_class *); static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t); #ifdef ALTQ3_COMPAT altqdev_decl(priq); /* pif_list keeps all priq_if's allocated. */ static struct priq_if *pif_list = NULL; #endif /* ALTQ3_COMPAT */ int priq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc, priq_enqueue, priq_dequeue, priq_request, NULL, NULL); splx(s); return (error); } int priq_add_altq(struct pf_altq *a) { struct priq_if *pif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (pif == NULL) return (ENOMEM); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = pif; return (0); } int priq_remove_altq(struct pf_altq *a) { struct priq_if *pif; if ((pif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)priq_clear_interface(pif); free(pif, M_DEVBUF); return (0); } int priq_add_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); /* check parameters */ if (a->priority >= PRIQ_MAXPRI) return (EINVAL); if (a->qid == 0) return (EINVAL); if (pif->pif_classes[a->priority] != NULL) return (EBUSY); if (clh_to_clp(pif, a->qid) != NULL) return (EBUSY); cl = priq_class_create(pif, a->priority, a->qlimit, a->pq_u.priq_opts.flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int priq_remove_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); return (priq_class_destroy(cl)); } int priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats; int error = 0; if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes. */ static int priq_clear_interface(struct priq_if *pif) { struct priq_class *cl; int pri; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&pif->pif_classifier, NULL, 1); #endif /* clear out the classes */ for (pri = 0; pri <= pif->pif_maxpri; pri++) if ((cl = pif->pif_classes[pri]) != NULL) priq_class_destroy(cl); return (0); } static int priq_request(struct ifaltq *ifq, int req, void *arg) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: priq_purge(pif); break; } return (0); } /* discard all the queued packets on the interface */ static void priq_purge(struct priq_if *pif) { struct priq_class *cl; int pri; for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) priq_purgeq(cl); } if (ALTQ_IS_ENABLED(pif->pif_ifq)) pif->pif_ifq->ifq_len = 0; } static struct priq_class * priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) { struct priq_class *cl; int s; #ifndef ALTQ_RED if (flags & PRCF_RED) { #ifdef ALTQ_DEBUG printf("priq_class_create: RED not configured for PRIQ!\n"); #endif return (NULL); } #endif if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_pif->pif_ifq); if (!qempty(cl->cl_q)) priq_purgeq(cl); IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } else { cl = malloc(sizeof(struct priq_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; } pif->pif_classes[pri] = cl; if (flags & PRCF_DEFAULTCLASS) pif->pif_default = cl; if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; cl->cl_flags = flags; cl->cl_pri = pri; if (pri > pif->pif_maxpri) pif->pif_maxpri = pri; cl->cl_pif = pif; cl->cl_handle = qid; #ifdef ALTQ_RED if (flags & (PRCF_RED|PRCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & PRCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & PRCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (pif->pif_bandwidth < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); #ifdef ALTQ_RIO if (flags & PRCF_RIO) { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RIO; } else #endif if (flags & PRCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RED; } } #endif /* ALTQ_RED */ return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int priq_class_destroy(struct priq_class *cl) { struct priq_if *pif; int s, pri; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_pif->pif_ifq); #ifdef ALTQ3_CLFIER_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0); #endif if (!qempty(cl->cl_q)) priq_purgeq(cl); pif = cl->cl_pif; pif->pif_classes[cl->cl_pri] = NULL; if (pif->pif_maxpri == cl->cl_pri) { for (pri = cl->cl_pri; pri >= 0; pri--) if (pif->pif_classes[pri] != NULL) { pif->pif_maxpri = pri; break; } if (pri < 0) pif->pif_maxpri = -1; } IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * priq_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(pif, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ else #endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (priq_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in priq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); /* successfully queued. */ return (0); } /* * priq_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * priq_dequeue(struct ifaltq *ifq, int op) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct mbuf *m; int pri; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) /* no packet in the queue */ return (NULL); for (pri = pif->pif_maxpri; pri >= 0; pri--) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) { if (op == ALTDQ_POLL) return (priq_pollq(cl)); m = priq_getq(cl); if (m != NULL) { IFQ_DEC_LEN(ifq); if (qempty(cl->cl_q)) cl->cl_period++; PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m)); } return (m); } } return (NULL); } static int priq_addq(struct priq_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & PRCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * priq_getq(struct priq_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * priq_pollq(cl) struct priq_class *cl; { return qhead(cl->cl_q); } static void priq_purgeq(struct priq_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); m_freem(m); } ASSERT(qlen(cl->cl_q) == 0); } static void get_class_stats(struct priq_classstats *sp, struct priq_class *cl) { sp->class_handle = cl->cl_handle; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->period = cl->cl_period; sp->xmitcnt = cl->cl_xmitcnt; sp->dropcnt = cl->cl_dropcnt; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif } /* convert a class handle to the corresponding class pointer */ static struct priq_class * clh_to_clp(struct priq_if *pif, u_int32_t chandle) { struct priq_class *cl; int idx; if (chandle == 0) return (NULL); for (idx = pif->pif_maxpri; idx >= 0; idx--) if ((cl = pif->pif_classes[idx]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #ifdef ALTQ3_COMPAT static struct priq_if * priq_attach(ifq, bandwidth) struct ifaltq *ifq; u_int bandwidth; { struct priq_if *pif; pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_WAITOK); if (pif == NULL) return (NULL); bzero(pif, sizeof(struct priq_if)); pif->pif_bandwidth = bandwidth; pif->pif_maxpri = -1; pif->pif_ifq = ifq; /* add this state to the priq list */ pif->pif_next = pif_list; pif_list = pif; return (pif); } static int priq_detach(pif) struct priq_if *pif; { (void)priq_clear_interface(pif); /* remove this interface from the pif list */ if (pif_list == pif) pif_list = pif->pif_next; else { struct priq_if *p; for (p = pif_list; p != NULL; p = p->pif_next) if (p->pif_next == pif) { p->pif_next = pif->pif_next; break; } ASSERT(p != NULL); } free(pif, M_DEVBUF); return (0); } /* * priq device interface */ int priqopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int priqclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct priq_if *pif; int err, error = 0; while ((pif = pif_list) != NULL) { /* destroy all */ if (ALTQ_IS_ENABLED(pif->pif_ifq)) altq_disable(pif->pif_ifq); err = altq_detach(pif->pif_ifq); if (err == 0) err = priq_detach(pif); if (err != 0 && error == 0) error = err; } return error; } int priqioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct priq_if *pif; struct priq_interface *ifacep; int error = 0; /* check super-user privilege */ switch (cmd) { case PRIQ_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case PRIQ_IF_ATTACH: error = priqcmd_if_attach((struct priq_interface *)addr); break; case PRIQ_IF_DETACH: error = priqcmd_if_detach((struct priq_interface *)addr); break; case PRIQ_ENABLE: case PRIQ_DISABLE: case PRIQ_CLEAR: ifacep = (struct priq_interface *)addr; if ((pif = altq_lookup(ifacep->ifname, ALTQT_PRIQ)) == NULL) { error = EBADF; break; } switch (cmd) { case PRIQ_ENABLE: if (pif->pif_default == NULL) { #ifdef ALTQ_DEBUG printf("priq: no default class\n"); #endif error = EINVAL; break; } error = altq_enable(pif->pif_ifq); break; case PRIQ_DISABLE: error = altq_disable(pif->pif_ifq); break; case PRIQ_CLEAR: priq_clear_interface(pif); break; } break; case PRIQ_ADD_CLASS: error = priqcmd_add_class((struct priq_add_class *)addr); break; case PRIQ_DEL_CLASS: error = priqcmd_delete_class((struct priq_delete_class *)addr); break; case PRIQ_MOD_CLASS: error = priqcmd_modify_class((struct priq_modify_class *)addr); break; case PRIQ_ADD_FILTER: error = priqcmd_add_filter((struct priq_add_filter *)addr); break; case PRIQ_DEL_FILTER: error = priqcmd_delete_filter((struct priq_delete_filter *)addr); break; case PRIQ_GETSTATS: error = priqcmd_class_stats((struct priq_class_stats *)addr); break; default: error = EINVAL; break; } return error; } static int priqcmd_if_attach(ap) struct priq_interface *ap; { struct priq_if *pif; struct ifnet *ifp; int error; if ((ifp = ifunit(ap->ifname)) == NULL) return (ENXIO); if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL) return (ENOMEM); /* * set PRIQ to this ifnet structure. */ if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif, priq_enqueue, priq_dequeue, priq_request, &pif->pif_classifier, acc_classify)) != 0) (void)priq_detach(pif); return (error); } static int priqcmd_if_detach(ap) struct priq_interface *ap; { struct priq_if *pif; int error; if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ALTQ_IS_ENABLED(pif->pif_ifq)) altq_disable(pif->pif_ifq); if ((error = altq_detach(pif->pif_ifq))) return (error); return priq_detach(pif); } static int priqcmd_add_class(ap) struct priq_add_class *ap; { struct priq_if *pif; struct priq_class *cl; int qid; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) return (EINVAL); if (pif->pif_classes[ap->pri] != NULL) return (EBUSY); qid = ap->pri + 1; if ((cl = priq_class_create(pif, ap->pri, ap->qlimit, ap->flags, qid)) == NULL) return (ENOMEM); /* return a class handle to the user */ ap->class_handle = cl->cl_handle; return (0); } static int priqcmd_delete_class(ap) struct priq_delete_class *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); return priq_class_destroy(cl); } static int priqcmd_modify_class(ap) struct priq_modify_class *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) return (EINVAL); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); /* * if priority is changed, move the class to the new priority */ if (pif->pif_classes[ap->pri] != cl) { if (pif->pif_classes[ap->pri] != NULL) return (EEXIST); pif->pif_classes[cl->cl_pri] = NULL; pif->pif_classes[ap->pri] = cl; cl->cl_pri = ap->pri; } /* call priq_class_create to change class parameters */ if ((cl = priq_class_create(pif, ap->pri, ap->qlimit, ap->flags, ap->class_handle)) == NULL) return (ENOMEM); return 0; } static int priqcmd_add_filter(ap) struct priq_add_filter *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); return acc_add_filter(&pif->pif_classifier, &ap->filter, cl, &ap->filter_handle); } static int priqcmd_delete_filter(ap) struct priq_delete_filter *ap; { struct priq_if *pif; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); return acc_delete_filter(&pif->pif_classifier, ap->filter_handle); } static int priqcmd_class_stats(ap) struct priq_class_stats *ap; { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats, *usp; int pri, error; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); ap->maxpri = pif->pif_maxpri; /* then, read the next N classes in the tree */ usp = ap->stats; for (pri = 0; pri <= pif->pif_maxpri; pri++) { cl = pif->pif_classes[pri]; if (cl != NULL) get_class_stats(&stats, cl); else bzero(&stats, sizeof(stats)); if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } return (0); } #ifdef KLD_MODULE static struct altqsw priq_sw = {"priq", priqopen, priqclose, priqioctl}; ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw); MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1); MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_PRIQ */ Index: projects/ifnet/sys/net/altq/altq_red.c =================================================================== --- projects/ifnet/sys/net/altq/altq_red.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_red.c (revision 281650) @@ -1,1500 +1,1492 @@ -/* $FreeBSD$ */ -/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */ - -/* +/*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ -/* +/*- * Copyright (c) 1990-1994 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #ifdef ALTQ_FLOWVALVE #include #include #endif #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #ifdef ALTQ_FLOWVALVE #include #endif #endif /* * ALTQ/RED (Random Early Detection) implementation using 32-bit * fixed-point calculation. * * written by kjc using the ns code as a reference. * you can learn more about red and ns from Sally's home page at * http://www-nrg.ee.lbl.gov/floyd/ * * most of the red parameter values are fixed in this implementation * to prevent fixed-point overflow/underflow. * if you change the parameters, watch out for overflow/underflow! * * the parameters used are recommended values by Sally. * the corresponding ns config looks: * q_weight=0.00195 * minthresh=5 maxthresh=15 queue-size=60 * linterm=30 * dropmech=drop-tail * bytes=false (can't be handled by 32-bit fixed-point) * doubleq=false dqthresh=false * wait=true */ /* * alternative red parameters for a slow link. * * assume the queue length becomes from zero to L and keeps L, it takes * N packets for q_avg to reach 63% of L. * when q_weight is 0.002, N is about 500 packets. * for a slow link like dial-up, 500 packets takes more than 1 minute! * when q_weight is 0.008, N is about 127 packets. * when q_weight is 0.016, N is about 63 packets. * bursts of 50 packets are allowed for 0.002, bursts of 25 packets * are allowed for 0.016. * see Sally's paper for more details. */ /* normal red parameters */ #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ /* q_weight = 0.00195 */ /* red parameters for a slow link */ #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ /* q_weight = 0.0078125 */ /* red parameters for a very slow link (e.g., dialup) */ #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ /* q_weight = 0.015625 */ /* fixed-point uses 12-bit decimal places */ #define FP_SHIFT 12 /* fixed-point shift */ /* red parameters for drop probability */ #define INV_P_MAX 10 /* inverse of max drop probability */ #define TH_MIN 5 /* min threshold */ #define TH_MAX 15 /* max threshold */ #define RED_LIMIT 60 /* default max queue lenght */ #define RED_STATS /* collect statistics */ /* * our default policy for forced-drop is drop-tail. * (in altq-1.1.2 or earlier, the default was random-drop. * but it makes more sense to punish the cause of the surge.) * to switch to the random-drop policy, define "RED_RANDOM_DROP". */ #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE /* * flow-valve is an extention to protect red from unresponsive flows * and to promote end-to-end congestion control. * flow-valve observes the average drop rates of the flows that have * experienced packet drops in the recent past. * when the average drop rate exceeds the threshold, the flow is * blocked by the flow-valve. the trapped flow should back off * exponentially to escape from the flow-valve. */ #ifdef RED_RANDOM_DROP #error "random-drop can't be used with flow-valve!" #endif #endif /* ALTQ_FLOWVALVE */ /* red_list keeps all red_queue_t's allocated. */ static red_queue_t *red_list = NULL; #endif /* ALTQ3_COMPAT */ /* default red parameter values */ static int default_th_min = TH_MIN; static int default_th_max = TH_MAX; static int default_inv_pmax = INV_P_MAX; #ifdef ALTQ3_COMPAT /* internal function prototypes */ static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *red_dequeue(struct ifaltq *, int); static int red_request(struct ifaltq *, int, void *); static void red_purgeq(red_queue_t *); static int red_detach(red_queue_t *); #ifdef ALTQ_FLOWVALVE static __inline struct fve *flowlist_lookup(struct flowvalve *, struct altq_pktattr *, struct timeval *); static __inline struct fve *flowlist_reclaim(struct flowvalve *, struct altq_pktattr *); static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); static __inline int fv_p2f(struct flowvalve *, int); #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ static struct flowvalve *fv_alloc(struct red *); #endif static void fv_destroy(struct flowvalve *); static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, struct fve **); static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, struct fve *); #endif #endif /* ALTQ3_COMPAT */ /* * red support routines */ red_t * red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, int pkttime) { red_t *rp; int w, i; int npkts_per_sec; rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) return (NULL); if (weight == 0) rp->red_weight = W_WEIGHT; else rp->red_weight = weight; /* allocate weight table */ rp->red_wtab = wtab_alloc(rp->red_weight); if (rp->red_wtab == NULL) { free(rp, M_DEVBUF); return (NULL); } rp->red_avg = 0; rp->red_idle = 1; if (inv_pmax == 0) rp->red_inv_pmax = default_inv_pmax; else rp->red_inv_pmax = inv_pmax; if (th_min == 0) rp->red_thmin = default_th_min; else rp->red_thmin = th_min; if (th_max == 0) rp->red_thmax = default_th_max; else rp->red_thmax = th_max; rp->red_flags = flags; if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->red_pkttime = 800; else rp->red_pkttime = pkttime; if (weight == 0) { /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->red_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->red_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->red_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->red_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->red_wshift = i; w = 1 << rp->red_wshift; if (w != rp->red_weight) { printf("invalid weight value %d for red! use %d\n", rp->red_weight, w); rp->red_weight = w; } /* * thmin_s and thmax_s are scaled versions of th_min and th_max * to be compared with avg. */ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) * rp->red_inv_pmax) << FP_SHIFT; microtime(&rp->red_last); return (rp); } void red_destroy(red_t *rp) { #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE if (rp->red_flowvalve != NULL) fv_destroy(rp->red_flowvalve); #endif #endif /* ALTQ3_COMPAT */ wtab_destroy(rp->red_wtab); free(rp, M_DEVBUF); } void red_getstats(red_t *rp, struct redstats *sp) { sp->q_avg = rp->red_avg >> rp->red_wshift; sp->xmit_cnt = rp->red_stats.xmit_cnt; sp->drop_cnt = rp->red_stats.drop_cnt; sp->drop_forced = rp->red_stats.drop_forced; sp->drop_unforced = rp->red_stats.drop_unforced; sp->marked_packets = rp->red_stats.marked_packets; } int red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) { int avg, droptype; int n; #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE struct fve *fve = NULL; if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { m_freem(m); return (-1); } #endif #endif /* ALTQ3_COMPAT */ avg = rp->red_avg; /* * if we were idle, we pretend that n packets arrived during * the idle period. */ if (rp->red_idle) { struct timeval now; int t; rp->red_idle = 0; microtime(&now); t = (now.tv_sec - rp->red_last.tv_sec); if (t > 60) { /* * being idle for more than 1 minute, set avg to zero. * this prevents t from overflow. */ avg = 0; } else { t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); n = t / rp->red_pkttime - 1; /* the following line does (avg = (1 - Wq)^n * avg) */ if (n > 0) avg = (avg >> FP_SHIFT) * pow_w(rp->red_wtab, n); } } /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); rp->red_avg = avg; /* save the new value */ /* * red_count keeps a tally of arriving traffic that has not * been dropped. */ rp->red_count++; /* see if we drop early */ droptype = DTYPE_NODROP; if (avg >= rp->red_thmin_s && qlen(q) > 1) { if (avg >= rp->red_thmax_s) { /* avg >= th_max: forced drop */ droptype = DTYPE_FORCED; } else if (rp->red_old == 0) { /* first exceeds th_min */ rp->red_count = 1; rp->red_old = 1; } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, rp->red_probd, rp->red_count)) { /* mark or drop by red */ if ((rp->red_flags & REDF_ECN) && mark_ecn(m, pktattr, rp->red_flags)) { /* successfully marked. do not drop. */ rp->red_count = 0; #ifdef RED_STATS rp->red_stats.marked_packets++; #endif } else { /* unforced drop by red */ droptype = DTYPE_EARLY; } } } else { /* avg < th_min */ rp->red_old = 0; } /* * if the queue length hits the hard limit, it's a forced drop. */ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) droptype = DTYPE_FORCED; #ifdef RED_RANDOM_DROP /* if successful or forced drop, enqueue this packet. */ if (droptype != DTYPE_EARLY) _addq(q, m); #else /* if successful, enqueue this packet. */ if (droptype == DTYPE_NODROP) _addq(q, m); #endif if (droptype != DTYPE_NODROP) { if (droptype == DTYPE_EARLY) { /* drop the incoming packet */ #ifdef RED_STATS rp->red_stats.drop_unforced++; #endif } else { /* forced drop, select a victim packet in the queue. */ #ifdef RED_RANDOM_DROP m = _getq_random(q); #endif #ifdef RED_STATS rp->red_stats.drop_forced++; #endif } #ifdef RED_STATS PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); #endif rp->red_count = 0; #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE if (rp->red_flowvalve != NULL) fv_dropbyred(rp->red_flowvalve, pktattr, fve); #endif #endif /* ALTQ3_COMPAT */ m_freem(m); return (-1); } /* successfully queued */ #ifdef RED_STATS PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); #endif return (0); } /* * early-drop probability is calculated as follows: * prob = p_max * (avg - th_min) / (th_max - th_min) * prob_a = prob / (2 - count*prob) * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) * here prob_a increases as successive undrop count increases. * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), * becomes 1 when (count >= (2 / prob))). */ int drop_early(int fp_len, int fp_probd, int count) { int d; /* denominator of drop-probability */ d = fp_probd - count * fp_len; if (d <= 0) /* count exceeds the hard limit: drop or mark */ return (1); /* * now the range of d is [1..600] in fixed-point. (when * th_max-th_min=10 and p_max=1/30) * drop probability = (avg - TH_MIN) / d */ if ((arc4random() % d) < fp_len) { /* drop or mark */ return (1); } /* no drop/mark */ return (0); } /* * try to mark CE bit to the packet. * returns 1 if successfully marked, 0 otherwise. */ int mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) { struct mbuf *m0; struct pf_mtag *at; void *hdr; at = pf_find_mtag(m); if (at != NULL) { hdr = at->hdr; #ifdef ALTQ3_COMPAT } else if (pktattr != NULL) { af = pktattr->pattr_af; hdr = pktattr->pattr_hdr; #endif /* ALTQ3_COMPAT */ } else return (0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)hdr >= m0->m_data) && ((caddr_t)hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, tag info is stale */ return (0); } switch (((struct ip *)hdr)->ip_v) { case IPVERSION: if (flags & REDF_ECN4) { struct ip *ip = hdr; u_int8_t otos; int sum; if (ip->ip_v != 4) return (0); /* version mismatch! */ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) return (0); /* not-ECT */ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) return (1); /* already marked */ /* * ecn-capable but not marked, * mark CE and update checksum */ otos = ip->ip_tos; ip->ip_tos |= IPTOS_ECN_CE; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += (~otos & 0xffff) + ip->ip_tos; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); return (1); } break; #ifdef INET6 case (IPV6_VERSION >> 4): if (flags & REDF_ECN6) { struct ip6_hdr *ip6 = hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return (0); /* version mismatch! */ if ((flowlabel & (IPTOS_ECN_MASK << 20)) == (IPTOS_ECN_NOTECT << 20)) return (0); /* not-ECT */ if ((flowlabel & (IPTOS_ECN_MASK << 20)) == (IPTOS_ECN_CE << 20)) return (1); /* already marked */ /* * ecn-capable but not marked, mark CE */ flowlabel |= (IPTOS_ECN_CE << 20); ip6->ip6_flow = htonl(flowlabel); return (1); } break; #endif /* INET6 */ } /* not marked */ return (0); } struct mbuf * red_getq(rp, q) red_t *rp; class_queue_t *q; { struct mbuf *m; if ((m = _getq(q)) == NULL) { if (rp->red_idle == 0) { rp->red_idle = 1; microtime(&rp->red_last); } return NULL; } rp->red_idle = 0; return (m); } /* * helper routine to calibrate avg during idle. * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point * here Wq = 1/weight and the code assumes Wq is close to zero. * * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. */ static struct wtab *wtab_list = NULL; /* pointer to wtab list */ struct wtab * wtab_alloc(int weight) { struct wtab *w; int i; for (w = wtab_list; w != NULL; w = w->w_next) if (w->w_weight == weight) { w->w_refcount++; return (w); } w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO); if (w == NULL) return (NULL); w->w_weight = weight; w->w_refcount = 1; w->w_next = wtab_list; wtab_list = w; /* initialize the weight table */ w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; for (i = 1; i < 32; i++) { w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; if (w->w_tab[i] == 0 && w->w_param_max == 0) w->w_param_max = 1 << i; } return (w); } int wtab_destroy(struct wtab *w) { struct wtab *prev; if (--w->w_refcount > 0) return (0); if (wtab_list == w) wtab_list = w->w_next; else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) if (prev->w_next == w) { prev->w_next = w->w_next; break; } free(w, M_DEVBUF); return (0); } int32_t pow_w(struct wtab *w, int n) { int i, bit; int32_t val; if (n >= w->w_param_max) return (0); val = 1 << FP_SHIFT; if (n <= 0) return (val); bit = 1; i = 0; while (n) { if (n & bit) { val = (val * w->w_tab[i]) >> FP_SHIFT; n &= ~bit; } i++; bit <<= 1; } return (val); } #ifdef ALTQ3_COMPAT /* * red device interface */ altqdev_decl(red); int redopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int redclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { red_queue_t *rqp; int err, error = 0; while ((rqp = red_list) != NULL) { /* destroy all */ err = red_detach(rqp); if (err != 0 && error == 0) error = err; } return error; } int redioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { red_queue_t *rqp; struct red_interface *ifacep; struct ifnet *ifp; int error = 0; /* check super-user privilege */ switch (cmd) { case RED_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) #endif return (error); break; } switch (cmd) { case RED_ENABLE: ifacep = (struct red_interface *)addr; if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } error = altq_enable(rqp->rq_ifq); break; case RED_DISABLE: ifacep = (struct red_interface *)addr; if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } error = altq_disable(rqp->rq_ifq); break; case RED_IF_ATTACH: ifp = ifunit(((struct red_interface *)addr)->red_ifname); if (ifp == NULL) { error = ENXIO; break; } /* allocate and initialize red_queue_t */ rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); if (rqp == NULL) { error = ENOMEM; break; } bzero(rqp, sizeof(red_queue_t)); rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK); if (rqp->rq_q == NULL) { free(rqp, M_DEVBUF); error = ENOMEM; break; } bzero(rqp->rq_q, sizeof(class_queue_t)); rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); if (rqp->rq_red == NULL) { free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); error = ENOMEM; break; } rqp->rq_ifq = &ifp->if_snd; qtail(rqp->rq_q) = NULL; qlen(rqp->rq_q) = 0; qlimit(rqp->rq_q) = RED_LIMIT; qtype(rqp->rq_q) = Q_RED; /* * set RED to this ifnet structure. */ error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, red_enqueue, red_dequeue, red_request, NULL, NULL); if (error) { red_destroy(rqp->rq_red); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); break; } /* add this state to the red list */ rqp->rq_next = red_list; red_list = rqp; break; case RED_IF_DETACH: ifacep = (struct red_interface *)addr; if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } error = red_detach(rqp); break; case RED_GETSTATS: do { struct red_stats *q_stats; red_t *rp; q_stats = (struct red_stats *)addr; if ((rqp = altq_lookup(q_stats->iface.red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } q_stats->q_len = qlen(rqp->rq_q); q_stats->q_limit = qlimit(rqp->rq_q); rp = rqp->rq_red; q_stats->q_avg = rp->red_avg >> rp->red_wshift; q_stats->xmit_cnt = rp->red_stats.xmit_cnt; q_stats->drop_cnt = rp->red_stats.drop_cnt; q_stats->drop_forced = rp->red_stats.drop_forced; q_stats->drop_unforced = rp->red_stats.drop_unforced; q_stats->marked_packets = rp->red_stats.marked_packets; q_stats->weight = rp->red_weight; q_stats->inv_pmax = rp->red_inv_pmax; q_stats->th_min = rp->red_thmin; q_stats->th_max = rp->red_thmax; #ifdef ALTQ_FLOWVALVE if (rp->red_flowvalve != NULL) { struct flowvalve *fv = rp->red_flowvalve; q_stats->fv_flows = fv->fv_flows; q_stats->fv_pass = fv->fv_stats.pass; q_stats->fv_predrop = fv->fv_stats.predrop; q_stats->fv_alloc = fv->fv_stats.alloc; q_stats->fv_escape = fv->fv_stats.escape; } else { #endif /* ALTQ_FLOWVALVE */ q_stats->fv_flows = 0; q_stats->fv_pass = 0; q_stats->fv_predrop = 0; q_stats->fv_alloc = 0; q_stats->fv_escape = 0; #ifdef ALTQ_FLOWVALVE } #endif /* ALTQ_FLOWVALVE */ } while (/*CONSTCOND*/ 0); break; case RED_CONFIG: do { struct red_conf *fc; red_t *new; int s, limit; fc = (struct red_conf *)addr; if ((rqp = altq_lookup(fc->iface.red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } new = red_alloc(fc->red_weight, fc->red_inv_pmax, fc->red_thmin, fc->red_thmax, fc->red_flags, fc->red_pkttime); if (new == NULL) { error = ENOMEM; break; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif red_purgeq(rqp); limit = fc->red_limit; if (limit < fc->red_thmax) limit = fc->red_thmax; qlimit(rqp->rq_q) = limit; fc->red_limit = limit; /* write back the new value */ red_destroy(rqp->rq_red); rqp->rq_red = new; splx(s); /* write back new values */ fc->red_limit = limit; fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; fc->red_thmin = rqp->rq_red->red_thmin; fc->red_thmax = rqp->rq_red->red_thmax; } while (/*CONSTCOND*/ 0); break; case RED_SETDEFAULTS: do { struct redparams *rp; rp = (struct redparams *)addr; default_th_min = rp->th_min; default_th_max = rp->th_max; default_inv_pmax = rp->inv_pmax; } while (/*CONSTCOND*/ 0); break; default: error = EINVAL; break; } return error; } static int red_detach(rqp) red_queue_t *rqp; { red_queue_t *tmp; int error = 0; if (ALTQ_IS_ENABLED(rqp->rq_ifq)) altq_disable(rqp->rq_ifq); if ((error = altq_detach(rqp->rq_ifq))) return (error); if (red_list == rqp) red_list = rqp->rq_next; else { for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) if (tmp->rq_next == rqp) { tmp->rq_next = rqp->rq_next; break; } if (tmp == NULL) printf("red_detach: no state found in red_list!\n"); } red_destroy(rqp->rq_red); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); return (error); } /* * enqueue routine: * * returns: 0 when successfully queued. * ENOBUFS when drop occurs. */ static int red_enqueue(ifq, m, pktattr) struct ifaltq *ifq; struct mbuf *m; struct altq_pktattr *pktattr; { red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) return ENOBUFS; ifq->ifq_len++; return 0; } /* * dequeue routine: * must be called in splimp. * * returns: mbuf dequeued. * NULL when no packet is available in the queue. */ static struct mbuf * red_dequeue(ifq, op) struct ifaltq *ifq; int op; { red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); if (op == ALTDQ_POLL) return qhead(rqp->rq_q); /* op == ALTDQ_REMOVE */ m = red_getq(rqp->rq_red, rqp->rq_q); if (m != NULL) ifq->ifq_len--; return (m); } static int red_request(ifq, req, arg) struct ifaltq *ifq; int req; void *arg; { red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: red_purgeq(rqp); break; } return (0); } static void red_purgeq(rqp) red_queue_t *rqp; { _flushq(rqp->rq_q); if (ALTQ_IS_ENABLED(rqp->rq_ifq)) rqp->rq_ifq->ifq_len = 0; } #ifdef ALTQ_FLOWVALVE #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ #define FV_PSCALE(x) ((x) << FV_PSHIFT) #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ #define FV_FSCALE(x) ((x) << FV_FSHIFT) #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) #define FV_TIMER (3 * hz) /* timer value for garbage collector */ #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ #define FV_N 10 /* update fve_f every FV_N packets */ #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ #define FV_TTHRESH 3 /* time threshold to delete fve */ #define FV_ALPHA 5 /* extra packet count */ #define FV_STATS #if (__FreeBSD_version > 300000) #define FV_TIMESTAMP(tp) getmicrotime(tp) #else #define FV_TIMESTAMP(tp) { (*(tp)) = time; } #endif /* * Brtt table: 127 entry table to convert drop rate (p) to * the corresponding bandwidth fraction (f) * the following equation is implemented to use scaled values, * fve_p and fve_f, in the fixed point format. * * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) * f = Brtt(p) / (max_th + alpha) */ #define BRTT_SIZE 128 #define BRTT_SHIFT 12 #define BRTT_MASK 0x0007f000 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) const int brtt_tab[BRTT_SIZE] = { 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 }; static __inline struct fve * flowlist_lookup(fv, pktattr, now) struct flowvalve *fv; struct altq_pktattr *pktattr; struct timeval *now; { struct fve *fve; int flows; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif struct timeval tthresh; if (pktattr == NULL) return (NULL); tthresh.tv_sec = now->tv_sec - FV_TTHRESH; flows = 0; /* * search the flow list */ switch (pktattr->pattr_af) { case AF_INET: ip = (struct ip *)pktattr->pattr_hdr; TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ if (fve->fve_lastdrop.tv_sec == 0) break; if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { fve->fve_lastdrop.tv_sec = 0; break; } if (fve->fve_flow.flow_af == AF_INET && fve->fve_flow.flow_ip.ip_src.s_addr == ip->ip_src.s_addr && fve->fve_flow.flow_ip.ip_dst.s_addr == ip->ip_dst.s_addr) return (fve); flows++; } break; #ifdef INET6 case AF_INET6: ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ if (fve->fve_lastdrop.tv_sec == 0) break; if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { fve->fve_lastdrop.tv_sec = 0; break; } if (fve->fve_flow.flow_af == AF_INET6 && IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, &ip6->ip6_src) && IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, &ip6->ip6_dst)) return (fve); flows++; } break; #endif /* INET6 */ default: /* unknown protocol. no drop. */ return (NULL); } fv->fv_flows = flows; /* save the number of active fve's */ return (NULL); } static __inline struct fve * flowlist_reclaim(fv, pktattr) struct flowvalve *fv; struct altq_pktattr *pktattr; { struct fve *fve; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif /* * get an entry from the tail of the LRU list. */ fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); switch (pktattr->pattr_af) { case AF_INET: ip = (struct ip *)pktattr->pattr_hdr; fve->fve_flow.flow_af = AF_INET; fve->fve_flow.flow_ip.ip_src = ip->ip_src; fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; break; #ifdef INET6 case AF_INET6: ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; fve->fve_flow.flow_af = AF_INET6; fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; break; #endif } fve->fve_state = Green; fve->fve_p = 0.0; fve->fve_f = 0.0; fve->fve_ifseq = fv->fv_ifseq - 1; fve->fve_count = 0; fv->fv_flows++; #ifdef FV_STATS fv->fv_stats.alloc++; #endif return (fve); } static __inline void flowlist_move_to_head(fv, fve) struct flowvalve *fv; struct fve *fve; { if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); } } #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ /* * allocate flowvalve structure */ static struct flowvalve * fv_alloc(rp) struct red *rp; { struct flowvalve *fv; struct fve *fve; int i, num; num = FV_FLOWLISTSIZE; fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK); if (fv == NULL) return (NULL); bzero(fv, sizeof(struct flowvalve)); fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF, M_WAITOK); if (fv->fv_fves == NULL) { free(fv, M_DEVBUF); return (NULL); } bzero(fv->fv_fves, sizeof(struct fve) * num); fv->fv_flows = 0; TAILQ_INIT(&fv->fv_flowlist); for (i = 0; i < num; i++) { fve = &fv->fv_fves[i]; fve->fve_lastdrop.tv_sec = 0; TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); } /* initialize drop rate threshold in scaled fixed-point */ fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; /* initialize drop rate to fraction table */ fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK); if (fv->fv_p2ftab == NULL) { free(fv->fv_fves, M_DEVBUF); free(fv, M_DEVBUF); return (NULL); } /* * create the p2f table. * (shift is used to keep the precision) */ for (i = 1; i < BRTT_SIZE; i++) { int f; f = brtt_tab[i] << 8; fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; } return (fv); } #endif static void fv_destroy(fv) struct flowvalve *fv; { free(fv->fv_p2ftab, M_DEVBUF); free(fv->fv_fves, M_DEVBUF); free(fv, M_DEVBUF); } static __inline int fv_p2f(fv, p) struct flowvalve *fv; int p; { int val, f; if (p >= BRTT_PMAX) f = fv->fv_p2ftab[BRTT_SIZE-1]; else if ((val = (p & BRTT_MASK))) f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; else f = fv->fv_p2ftab[1]; return (f); } /* * check if an arriving packet should be pre-dropped. * called from red_addq() when a packet arrives. * returns 1 when the packet should be pre-dropped. * should be called in splimp. */ static int fv_checkflow(fv, pktattr, fcache) struct flowvalve *fv; struct altq_pktattr *pktattr; struct fve **fcache; { struct fve *fve; struct timeval now; fv->fv_ifseq++; FV_TIMESTAMP(&now); if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) /* no matching entry in the flowlist */ return (0); *fcache = fve; /* update fraction f for every FV_N packets */ if (++fve->fve_count == FV_N) { /* * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f */ fve->fve_f = (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) + fve->fve_f - FV_FUNSCALE(fve->fve_f); fve->fve_ifseq = fv->fv_ifseq; fve->fve_count = 0; } /* * overpumping test */ if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { int fthresh; /* calculate a threshold */ fthresh = fv_p2f(fv, fve->fve_p); if (fve->fve_f > fthresh) fve->fve_state = Red; } if (fve->fve_state == Red) { /* * backoff test */ if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { /* no drop for at least FV_BACKOFFTHRESH sec */ fve->fve_p = 0; fve->fve_state = Green; #ifdef FV_STATS fv->fv_stats.escape++; #endif } else { /* block this flow */ flowlist_move_to_head(fv, fve); fve->fve_lastdrop = now; #ifdef FV_STATS fv->fv_stats.predrop++; #endif return (1); } } /* * p = (1 - Wp) * p */ fve->fve_p -= FV_PUNSCALE(fve->fve_p); if (fve->fve_p < 0) fve->fve_p = 0; #ifdef FV_STATS fv->fv_stats.pass++; #endif return (0); } /* * called from red_addq when a packet is dropped by red. * should be called in splimp. */ static void fv_dropbyred(fv, pktattr, fcache) struct flowvalve *fv; struct altq_pktattr *pktattr; struct fve *fcache; { struct fve *fve; struct timeval now; if (pktattr == NULL) return; FV_TIMESTAMP(&now); if (fcache != NULL) /* the fve of this packet is already cached */ fve = fcache; else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) fve = flowlist_reclaim(fv, pktattr); flowlist_move_to_head(fv, fve); /* * update p: the following line cancels the update * in fv_checkflow() and calculate * p = Wp + (1 - Wp) * p */ fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; fve->fve_lastdrop = now; } #endif /* ALTQ_FLOWVALVE */ #ifdef KLD_MODULE static struct altqsw red_sw = {"red", redopen, redclose, redioctl}; ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); MODULE_VERSION(altq_red, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_RED */ Index: projects/ifnet/sys/net/altq/altq_rio.c =================================================================== --- projects/ifnet/sys/net/altq/altq_rio.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_rio.c (revision 281650) @@ -1,852 +1,844 @@ -/* $FreeBSD$ */ -/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */ - -/* +/*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* +/*- * Copyright (c) 1990-1994 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* * RIO: RED with IN/OUT bit * described in * "Explicit Allocation of Best Effort Packet Delivery Service" * David D. Clark and Wenjia Fang, MIT Lab for Computer Science * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} * * this implementation is extended to support more than 2 drop precedence * values as described in RFC2597 (Assured Forwarding PHB Group). * */ /* * AF DS (differentiated service) codepoints. * (classes can be mapped to CBQ or H-FSC classes.) * * 0 1 2 3 4 5 6 7 * +---+---+---+---+---+---+---+---+ * | CLASS |DropPre| 0 | CU | * +---+---+---+---+---+---+---+---+ * * class 1: 001 * class 2: 010 * class 3: 011 * class 4: 100 * * low drop prec: 01 * medium drop prec: 10 * high drop prec: 01 */ /* normal red parameters */ #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ /* q_weight = 0.00195 */ /* red parameters for a slow link */ #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ /* q_weight = 0.0078125 */ /* red parameters for a very slow link (e.g., dialup) */ #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ /* q_weight = 0.015625 */ /* fixed-point uses 12-bit decimal places */ #define FP_SHIFT 12 /* fixed-point shift */ /* red parameters for drop probability */ #define INV_P_MAX 10 /* inverse of max drop probability */ #define TH_MIN 5 /* min threshold */ #define TH_MAX 15 /* max threshold */ #define RIO_LIMIT 60 /* default max queue lenght */ #define RIO_STATS /* collect statistics */ #define TV_DELTA(a, b, delta) { \ register int xxs; \ \ delta = (a)->tv_usec - (b)->tv_usec; \ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ if (xxs < 0) { \ delta = 60000000; \ } else if (xxs > 4) { \ if (xxs > 60) \ delta = 60000000; \ else \ delta += xxs * 1000000; \ } else while (xxs > 0) { \ delta += 1000000; \ xxs--; \ } \ } \ } #ifdef ALTQ3_COMPAT /* rio_list keeps all rio_queue_t's allocated. */ static rio_queue_t *rio_list = NULL; #endif /* default rio parameter values */ static struct redparams default_rio_params[RIO_NDROPPREC] = { /* th_min, th_max, inv_pmax */ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ }; /* internal function prototypes */ static int dscp2index(u_int8_t); #ifdef ALTQ3_COMPAT static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *rio_dequeue(struct ifaltq *, int); static int rio_request(struct ifaltq *, int, void *); static int rio_detach(rio_queue_t *); /* * rio device interface */ altqdev_decl(rio); #endif /* ALTQ3_COMPAT */ rio_t * rio_alloc(int weight, struct redparams *params, int flags, int pkttime) { rio_t *rp; int w, i; int npkts_per_sec; rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) return (NULL); rp->rio_flags = flags; if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->rio_pkttime = 800; else rp->rio_pkttime = pkttime; if (weight != 0) rp->rio_weight = weight; else { /* use default */ rp->rio_weight = W_WEIGHT; /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->rio_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->rio_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->rio_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->rio_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->rio_wshift = i; w = 1 << rp->rio_wshift; if (w != rp->rio_weight) { printf("invalid weight value %d for red! use %d\n", rp->rio_weight, w); rp->rio_weight = w; } /* allocate weight table */ rp->rio_wtab = wtab_alloc(rp->rio_weight); for (i = 0; i < RIO_NDROPPREC; i++) { struct dropprec_state *prec = &rp->rio_precstate[i]; prec->avg = 0; prec->idle = 1; if (params == NULL || params[i].inv_pmax == 0) prec->inv_pmax = default_rio_params[i].inv_pmax; else prec->inv_pmax = params[i].inv_pmax; if (params == NULL || params[i].th_min == 0) prec->th_min = default_rio_params[i].th_min; else prec->th_min = params[i].th_min; if (params == NULL || params[i].th_max == 0) prec->th_max = default_rio_params[i].th_max; else prec->th_max = params[i].th_max; /* * th_min_s and th_max_s are scaled versions of th_min * and th_max to be compared with avg. */ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ prec->probd = (2 * (prec->th_max - prec->th_min) * prec->inv_pmax) << FP_SHIFT; microtime(&prec->last); } return (rp); } void rio_destroy(rio_t *rp) { wtab_destroy(rp->rio_wtab); free(rp, M_DEVBUF); } void rio_getstats(rio_t *rp, struct redstats *sp) { int i; for (i = 0; i < RIO_NDROPPREC; i++) { bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; sp++; } } #if (RIO_NDROPPREC == 3) /* * internally, a drop precedence value is converted to an index * starting from 0. */ static int dscp2index(u_int8_t dscp) { int dpindex = dscp & AF_DROPPRECMASK; if (dpindex == 0) return (0); return ((dpindex >> 3) - 1); } #endif #if 1 /* * kludge: when a packet is dequeued, we need to know its drop precedence * in order to keep the queue length of each drop precedence. * use m_pkthdr.rcvif to pass this info. */ #define RIOM_SET_PRECINDEX(m, idx) \ do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0) #define RIOM_GET_PRECINDEX(m) \ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ (m)->m_pkthdr.rcvif = NULL; idx; }) #endif int rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) { int avg, droptype; u_int8_t dsfield, odsfield; int dpindex, i, n, t; struct timeval now; struct dropprec_state *prec; dsfield = odsfield = read_dsfield(m, pktattr); dpindex = dscp2index(dsfield); /* * update avg of the precedence states whose drop precedence * is larger than or equal to the drop precedence of the packet */ now.tv_sec = 0; for (i = dpindex; i < RIO_NDROPPREC; i++) { prec = &rp->rio_precstate[i]; avg = prec->avg; if (prec->idle) { prec->idle = 0; if (now.tv_sec == 0) microtime(&now); t = (now.tv_sec - prec->last.tv_sec); if (t > 60) avg = 0; else { t = t * 1000000 + (now.tv_usec - prec->last.tv_usec); n = t / rp->rio_pkttime; /* calculate (avg = (1 - Wq)^n * avg) */ if (n > 0) avg = (avg >> FP_SHIFT) * pow_w(rp->rio_wtab, n); } } /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); prec->avg = avg; /* save the new value */ /* * count keeps a tally of arriving traffic that has not * been dropped. */ prec->count++; } prec = &rp->rio_precstate[dpindex]; avg = prec->avg; /* see if we drop early */ droptype = DTYPE_NODROP; if (avg >= prec->th_min_s && prec->qlen > 1) { if (avg >= prec->th_max_s) { /* avg >= th_max: forced drop */ droptype = DTYPE_FORCED; } else if (prec->old == 0) { /* first exceeds th_min */ prec->count = 1; prec->old = 1; } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, prec->probd, prec->count)) { /* unforced drop by red */ droptype = DTYPE_EARLY; } } else { /* avg < th_min */ prec->old = 0; } /* * if the queue length hits the hard limit, it's a forced drop. */ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) droptype = DTYPE_FORCED; if (droptype != DTYPE_NODROP) { /* always drop incoming packet (as opposed to randomdrop) */ for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].count = 0; #ifdef RIO_STATS if (droptype == DTYPE_EARLY) rp->q_stats[dpindex].drop_unforced++; else rp->q_stats[dpindex].drop_forced++; PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); #endif m_freem(m); return (-1); } for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].qlen++; /* save drop precedence index in mbuf hdr */ RIOM_SET_PRECINDEX(m, dpindex); if (rp->rio_flags & RIOF_CLEARDSCP) dsfield &= ~DSCP_MASK; if (dsfield != odsfield) write_dsfield(m, pktattr, dsfield); _addq(q, m); #ifdef RIO_STATS PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); #endif return (0); } struct mbuf * rio_getq(rio_t *rp, class_queue_t *q) { struct mbuf *m; int dpindex, i; if ((m = _getq(q)) == NULL) return NULL; dpindex = RIOM_GET_PRECINDEX(m); for (i = dpindex; i < RIO_NDROPPREC; i++) { if (--rp->rio_precstate[i].qlen == 0) { if (rp->rio_precstate[i].idle == 0) { rp->rio_precstate[i].idle = 1; microtime(&rp->rio_precstate[i].last); } } } return (m); } #ifdef ALTQ3_COMPAT int rioopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int rioclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { rio_queue_t *rqp; int err, error = 0; while ((rqp = rio_list) != NULL) { /* destroy all */ err = rio_detach(rqp); if (err != 0 && error == 0) error = err; } return error; } int rioioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { rio_queue_t *rqp; struct rio_interface *ifacep; struct ifnet *ifp; int error = 0; /* check super-user privilege */ switch (cmd) { case RIO_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case RIO_ENABLE: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = altq_enable(rqp->rq_ifq); break; case RIO_DISABLE: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = altq_disable(rqp->rq_ifq); break; case RIO_IF_ATTACH: ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); if (ifp == NULL) { error = ENXIO; break; } /* allocate and initialize rio_queue_t */ rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); if (rqp == NULL) { error = ENOMEM; break; } bzero(rqp, sizeof(rio_queue_t)); rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK); if (rqp->rq_q == NULL) { free(rqp, M_DEVBUF); error = ENOMEM; break; } bzero(rqp->rq_q, sizeof(class_queue_t)); rqp->rq_rio = rio_alloc(0, NULL, 0, 0); if (rqp->rq_rio == NULL) { free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); error = ENOMEM; break; } rqp->rq_ifq = &ifp->if_snd; qtail(rqp->rq_q) = NULL; qlen(rqp->rq_q) = 0; qlimit(rqp->rq_q) = RIO_LIMIT; qtype(rqp->rq_q) = Q_RIO; /* * set RIO to this ifnet structure. */ error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, rio_enqueue, rio_dequeue, rio_request, NULL, NULL); if (error) { rio_destroy(rqp->rq_rio); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); break; } /* add this state to the rio list */ rqp->rq_next = rio_list; rio_list = rqp; break; case RIO_IF_DETACH: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = rio_detach(rqp); break; case RIO_GETSTATS: do { struct rio_stats *q_stats; rio_t *rp; int i; q_stats = (struct rio_stats *)addr; if ((rqp = altq_lookup(q_stats->iface.rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } rp = rqp->rq_rio; q_stats->q_limit = qlimit(rqp->rq_q); q_stats->weight = rp->rio_weight; q_stats->flags = rp->rio_flags; for (i = 0; i < RIO_NDROPPREC; i++) { q_stats->q_len[i] = rp->rio_precstate[i].qlen; bcopy(&rp->q_stats[i], &q_stats->q_stats[i], sizeof(struct redstats)); q_stats->q_stats[i].q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; q_stats->q_params[i].inv_pmax = rp->rio_precstate[i].inv_pmax; q_stats->q_params[i].th_min = rp->rio_precstate[i].th_min; q_stats->q_params[i].th_max = rp->rio_precstate[i].th_max; } } while (/*CONSTCOND*/ 0); break; case RIO_CONFIG: do { struct rio_conf *fc; rio_t *new; int s, limit, i; fc = (struct rio_conf *)addr; if ((rqp = altq_lookup(fc->iface.rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } new = rio_alloc(fc->rio_weight, &fc->q_params[0], fc->rio_flags, fc->rio_pkttime); if (new == NULL) { error = ENOMEM; break; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif _flushq(rqp->rq_q); limit = fc->rio_limit; if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) limit = fc->q_params[RIO_NDROPPREC-1].th_max; qlimit(rqp->rq_q) = limit; rio_destroy(rqp->rq_rio); rqp->rq_rio = new; splx(s); /* write back new values */ fc->rio_limit = limit; for (i = 0; i < RIO_NDROPPREC; i++) { fc->q_params[i].inv_pmax = rqp->rq_rio->rio_precstate[i].inv_pmax; fc->q_params[i].th_min = rqp->rq_rio->rio_precstate[i].th_min; fc->q_params[i].th_max = rqp->rq_rio->rio_precstate[i].th_max; } } while (/*CONSTCOND*/ 0); break; case RIO_SETDEFAULTS: do { struct redparams *rp; int i; rp = (struct redparams *)addr; for (i = 0; i < RIO_NDROPPREC; i++) default_rio_params[i] = rp[i]; } while (/*CONSTCOND*/ 0); break; default: error = EINVAL; break; } return error; } static int rio_detach(rqp) rio_queue_t *rqp; { rio_queue_t *tmp; int error = 0; if (ALTQ_IS_ENABLED(rqp->rq_ifq)) altq_disable(rqp->rq_ifq); if ((error = altq_detach(rqp->rq_ifq))) return (error); if (rio_list == rqp) rio_list = rqp->rq_next; else { for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) if (tmp->rq_next == rqp) { tmp->rq_next = rqp->rq_next; break; } if (tmp == NULL) printf("rio_detach: no state found in rio_list!\n"); } rio_destroy(rqp->rq_rio); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); return (error); } /* * rio support routines */ static int rio_request(ifq, req, arg) struct ifaltq *ifq; int req; void *arg; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: _flushq(rqp->rq_q); if (ALTQ_IS_ENABLED(ifq)) ifq->ifq_len = 0; break; } return (0); } /* * enqueue routine: * * returns: 0 when successfully queued. * ENOBUFS when drop occurs. */ static int rio_enqueue(ifq, m, pktattr) struct ifaltq *ifq; struct mbuf *m; struct altq_pktattr *pktattr; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; int error = 0; IFQ_LOCK_ASSERT(ifq); if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) ifq->ifq_len++; else error = ENOBUFS; return error; } /* * dequeue routine: * must be called in splimp. * * returns: mbuf dequeued. * NULL when no packet is available in the queue. */ static struct mbuf * rio_dequeue(ifq, op) struct ifaltq *ifq; int op; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; struct mbuf *m = NULL; IFQ_LOCK_ASSERT(ifq); if (op == ALTDQ_POLL) return qhead(rqp->rq_q); m = rio_getq(rqp->rq_rio, rqp->rq_q); if (m != NULL) ifq->ifq_len--; return m; } #ifdef KLD_MODULE static struct altqsw rio_sw = {"rio", rioopen, rioclose, rioioctl}; ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); MODULE_VERSION(altq_rio, 1); MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_RIO */ Index: projects/ifnet/sys/net/altq/altq_rmclass.c =================================================================== --- projects/ifnet/sys/net/altq/altq_rmclass.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_rmclass.c (revision 281650) @@ -1,1836 +1,1810 @@ -/* $FreeBSD$ */ -/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */ - -/* +/*- * Copyright (c) 1991-1997 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Network Research * Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * LBL code modified by speer@eng.sun.com, May 1977. * For questions and/or comments, please send mail to cbq@ee.lbl.gov * * @(#)rm_class.c 1.48 97/12/05 SMI + * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #include #include #ifdef ALTQ3_COMPAT #include #include #include #endif #include #include #include #include #include #include /* * Local Macros */ #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } /* * Local routines. */ static int rmc_satisfied(struct rm_class *, struct timeval *); static void rmc_wrr_set_weights(struct rm_ifdat *); static void rmc_depth_compute(struct rm_class *); static void rmc_depth_recompute(rm_class_t *); static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); static int _rmc_addq(rm_class_t *, mbuf_t *); static void _rmc_dropq(rm_class_t *); static mbuf_t *_rmc_getq(rm_class_t *); static mbuf_t *_rmc_pollq(rm_class_t *); static int rmc_under_limit(struct rm_class *, struct timeval *); static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); static void rmc_drop_action(struct rm_class *); static void rmc_restart(struct rm_class *); static void rmc_root_overlimit(struct rm_class *, struct rm_class *); #define BORROW_OFFTIME /* * BORROW_OFFTIME (experimental): * borrow the offtime of the class borrowing from. * the reason is that when its own offtime is set, the class is unable * to borrow much, especially when cutoff is taking effect. * but when the borrowed class is overloaded (advidle is close to minidle), * use the borrowing class's offtime to avoid overload. */ #define ADJUST_CUTOFF /* * ADJUST_CUTOFF (experimental): * if no underlimit class is found due to cutoff, increase cutoff and * retry the scheduling loop. * also, don't invoke delay_actions while cutoff is taking effect, * since a sleeping class won't have a chance to be scheduled in the * next loop. * * now heuristics for setting the top-level variable (cutoff_) becomes: * 1. if a packet arrives for a not-overlimit class, set cutoff * to the depth of the class. * 2. if cutoff is i, and a packet arrives for an overlimit class * with an underlimit ancestor at a lower level than i (say j), * then set cutoff to j. * 3. at scheduling a packet, if there is no underlimit class * due to the current cutoff level, increase cutoff by 1 and * then try to schedule again. */ /* * rm_class_t * * rmc_newclass(...) - Create a new resource management class at priority * 'pri' on the interface given by 'ifd'. * * nsecPerByte is the data rate of the interface in nanoseconds/byte. * E.g., 800 for a 10Mb/s ethernet. If the class gets less * than 100% of the bandwidth, this number should be the * 'effective' rate for the class. Let f be the * bandwidth fraction allocated to this class, and let * nsPerByte be the data rate of the output link in * nanoseconds/byte. Then nsecPerByte is set to * nsPerByte / f. E.g., 1600 (= 800 / .5) * for a class that gets 50% of an ethernet's bandwidth. * * action the routine to call when the class is over limit. * * maxq max allowable queue size for class (in packets). * * parent parent class pointer. * * borrow class to borrow from (should be either 'parent' or null). * * maxidle max value allowed for class 'idle' time estimate (this * parameter determines how large an initial burst of packets * can be before overlimit action is invoked. * * offtime how long 'delay' action will delay when class goes over * limit (this parameter determines the steady-state burst * size when a class is running over its limit). * * Maxidle and offtime have to be computed from the following: If the * average packet size is s, the bandwidth fraction allocated to this * class is f, we want to allow b packet bursts, and the gain of the * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: * * ptime = s * nsPerByte * (1 - f) / f * maxidle = ptime * (1 - g^b) / g^b * minidle = -ptime * (1 / (f - 1)) * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) * * Operationally, it's convenient to specify maxidle & offtime in units * independent of the link bandwidth so the maxidle & offtime passed to * this routine are the above values multiplied by 8*f/(1000*nsPerByte). * (The constant factor is a scale factor needed to make the parameters * integers. This scaling also means that the 'unscaled' values of * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, * not nanoseconds.) Also note that the 'idle' filter computation keeps * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of * maxidle also must be scaled upward by this value. Thus, the passed * values for maxidle and offtime can be computed as follows: * * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) * offtime = offtime * 8 / (1000 * nsecPerByte) * * When USE_HRTIME is employed, then maxidle and offtime become: * maxidle = maxilde * (8.0 / nsecPerByte); * offtime = offtime * (8.0 / nsecPerByte); */ struct rm_class * rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, void (*action)(rm_class_t *, rm_class_t *), int maxq, struct rm_class *parent, struct rm_class *borrow, u_int maxidle, int minidle, u_int offtime, int pktsize, int flags) { struct rm_class *cl; struct rm_class *peer; int s; if (pri >= RM_MAXPRIO) return (NULL); #ifndef ALTQ_RED if (flags & RMCF_RED) { #ifdef ALTQ_DEBUG printf("rmc_newclass: RED not configured for CBQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_RIO if (flags & RMCF_RIO) { #ifdef ALTQ_DEBUG printf("rmc_newclass: RIO not configured for CBQ!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); CALLOUT_INIT(&cl->callout_); cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->q_ == NULL) { free(cl, M_DEVBUF); return (NULL); } /* * Class initialization. */ cl->children_ = NULL; cl->parent_ = parent; cl->borrow_ = borrow; cl->leaf_ = 1; cl->ifdat_ = ifd; cl->pri_ = pri; cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ cl->depth_ = 0; cl->qthresh_ = 0; cl->ns_per_byte_ = nsecPerByte; qlimit(cl->q_) = maxq; qtype(cl->q_) = Q_DROPHEAD; qlen(cl->q_) = 0; cl->flags_ = flags; #if 1 /* minidle is also scaled in ALTQ */ cl->minidle_ = (minidle * (int)nsecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif cl->maxidle_ = (maxidle * nsecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else cl->avgidle_ = 0; cl->offtime_ = (offtime * nsecPerByte) / 8; #endif cl->overlimit = action; #ifdef ALTQ_RED if (flags & (RMCF_RED|RMCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & RMCF_ECN) red_flags |= REDF_ECN; if (flags & RMCF_FLOWVALVE) red_flags |= REDF_FLOWVALVE; #ifdef ALTQ_RIO if (flags & RMCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif red_pkttime = nsecPerByte * pktsize / 1000; if (flags & RMCF_RED) { cl->red_ = red_alloc(0, 0, qlimit(cl->q_) * 10/100, qlimit(cl->q_) * 30/100, red_flags, red_pkttime); if (cl->red_ != NULL) qtype(cl->q_) = Q_RED; } #ifdef ALTQ_RIO else { cl->red_ = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->red_ != NULL) qtype(cl->q_) = Q_RIO; } #endif } #endif /* ALTQ_RED */ /* * put the class into the class tree */ -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); if ((peer = ifd->active_[pri]) != NULL) { /* find the last class at this pri */ cl->peer_ = peer; while (peer->peer_ != ifd->active_[pri]) peer = peer->peer_; peer->peer_ = cl; } else { ifd->active_[pri] = cl; cl->peer_ = cl; } if (cl->parent_) { cl->next_ = parent->children_; parent->children_ = cl; parent->leaf_ = 0; } /* * Compute the depth of this class and its ancestors in the class * hierarchy. */ rmc_depth_compute(cl); /* * If CBQ's WRR is enabled, then initialize the class WRR state. */ if (ifd->wrr_) { ifd->num_[pri]++; ifd->alloc_[pri] += cl->allotment_; rmc_wrr_set_weights(ifd); } IFQ_UNLOCK(ifd->ifq_); splx(s); return (cl); } int rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, int minidle, u_int offtime, int pktsize) { struct rm_ifdat *ifd; u_int old_allotment; int s; ifd = cl->ifdat_; old_allotment = cl->allotment_; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ cl->qthresh_ = 0; cl->ns_per_byte_ = nsecPerByte; qlimit(cl->q_) = maxq; #if 1 /* minidle is also scaled in ALTQ */ cl->minidle_ = (minidle * nsecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif cl->maxidle_ = (maxidle * nsecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else cl->avgidle_ = 0; cl->offtime_ = (offtime * nsecPerByte) / 8; #endif /* * If CBQ's WRR is enabled, then initialize the class WRR state. */ if (ifd->wrr_) { ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; rmc_wrr_set_weights(ifd); } IFQ_UNLOCK(ifd->ifq_); splx(s); return (0); } /* * static void * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes * the appropriate run robin weights for the CBQ weighted round robin * algorithm. * * Returns: NONE */ static void rmc_wrr_set_weights(struct rm_ifdat *ifd) { int i; struct rm_class *cl, *clh; for (i = 0; i < RM_MAXPRIO; i++) { /* * This is inverted from that of the simulator to * maintain precision. */ if (ifd->num_[i] == 0) ifd->M_[i] = 0; else ifd->M_[i] = ifd->alloc_[i] / (ifd->num_[i] * ifd->maxpkt_); /* * Compute the weighted allotment for each class. * This takes the expensive div instruction out * of the main loop for the wrr scheduling path. * These only get recomputed when a class comes or * goes. */ if (ifd->active_[i] != NULL) { clh = cl = ifd->active_[i]; do { /* safe-guard for slow link or alloc_ == 0 */ if (ifd->M_[i] == 0) cl->w_allotment_ = 0; else cl->w_allotment_ = cl->allotment_ / ifd->M_[i]; cl = cl->peer_; } while ((cl != NULL) && (cl != clh)); } } } int rmc_get_weight(struct rm_ifdat *ifd, int pri) { if ((pri >= 0) && (pri < RM_MAXPRIO)) return (ifd->M_[pri]); else return (0); } /* * static void * rmc_depth_compute(struct rm_class *cl) - This function computes the * appropriate depth of class 'cl' and its ancestors. * * Returns: NONE */ static void rmc_depth_compute(struct rm_class *cl) { rm_class_t *t = cl, *p; /* * Recompute the depth for the branch of the tree. */ while (t != NULL) { p = t->parent_; if (p && (t->depth_ >= p->depth_)) { p->depth_ = t->depth_ + 1; t = p; } else t = NULL; } } /* * static void * rmc_depth_recompute(struct rm_class *cl) - This function re-computes * the depth of the tree after a class has been deleted. * * Returns: NONE */ static void rmc_depth_recompute(rm_class_t *cl) { #if 1 /* ALTQ */ rm_class_t *p, *t; p = cl; while (p != NULL) { if ((t = p->children_) == NULL) { p->depth_ = 0; } else { int cdepth = 0; while (t != NULL) { if (t->depth_ > cdepth) cdepth = t->depth_; t = t->next_; } if (p->depth_ == cdepth + 1) /* no change to this parent */ return; p->depth_ = cdepth + 1; } p = p->parent_; } #else rm_class_t *t; if (cl->depth_ >= 1) { if (cl->children_ == NULL) { cl->depth_ = 0; } else if ((t = cl->children_) != NULL) { while (t != NULL) { if (t->children_ != NULL) rmc_depth_recompute(t); t = t->next_; } } else rmc_depth_compute(cl); } #endif } /* * void * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This * function deletes a class from the link-sharing structure and frees * all resources associated with the class. * * Returns: NONE */ void rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) { struct rm_class *p, *head, *previous; int s; ASSERT(cl->children_ == NULL); if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); /* * Free packets in the packet queue. * XXX - this may not be a desired behavior. Packets should be * re-queued. */ rmc_dropall(cl); /* * If the class has a parent, then remove the class from the * class from the parent's children chain. */ if (cl->parent_ != NULL) { head = cl->parent_->children_; p = previous = head; if (head->next_ == NULL) { ASSERT(head == cl); cl->parent_->children_ = NULL; cl->parent_->leaf_ = 1; } else while (p != NULL) { if (p == cl) { if (cl == head) cl->parent_->children_ = cl->next_; else previous->next_ = cl->next_; cl->next_ = NULL; p = NULL; } else { previous = p; p = p->next_; } } } /* * Delete class from class priority peer list. */ if ((p = ifd->active_[cl->pri_]) != NULL) { /* * If there is more than one member of this priority * level, then look for class(cl) in the priority level. */ if (p != p->peer_) { while (p->peer_ != cl) p = p->peer_; p->peer_ = cl->peer_; if (ifd->active_[cl->pri_] == cl) ifd->active_[cl->pri_] = cl->peer_; } else { ASSERT(p == cl); ifd->active_[cl->pri_] = NULL; } } /* * Recompute the WRR weights. */ if (ifd->wrr_) { ifd->alloc_[cl->pri_] -= cl->allotment_; ifd->num_[cl->pri_]--; rmc_wrr_set_weights(ifd); } /* * Re-compute the depth of the tree. */ #if 1 /* ALTQ */ rmc_depth_recompute(cl->parent_); #else rmc_depth_recompute(ifd->root_); #endif IFQ_UNLOCK(ifd->ifq_); splx(s); /* * Free the class structure. */ if (cl->red_ != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_destroy((rio_t *)cl->red_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_destroy(cl->red_); #endif } free(cl->q_, M_DEVBUF); free(cl, M_DEVBUF); } /* * void * rmc_init(...) - Initialize the resource management data structures * associated with the output portion of interface 'ifp'. 'ifd' is * where the structures will be built (for backwards compatibility, the * structures aren't kept in the ifnet struct). 'nsecPerByte' * gives the link speed (inverse of bandwidth) in nanoseconds/byte. * 'restart' is the driver-specific routine that the generic 'delay * until under limit' action will call to restart output. `maxq' * is the queue size of the 'link' & 'default' classes. 'maxqueued' * is the maximum number of packets that the resource management * code will allow to be queued 'downstream' (this is typically 1). * * Returns: NONE */ void rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, int minidle, u_int offtime, int flags) { int i, mtu; /* * Initialize the CBQ tracing/debug facility. */ CBQTRACEINIT(); bzero((char *)ifd, sizeof (*ifd)); mtu = ifq->altq_ifp->if_mtu; ifd->ifq_ = ifq; ifd->restart = restart; ifd->maxqueued_ = maxqueued; ifd->ns_per_byte_ = nsecPerByte; ifd->maxpkt_ = mtu; ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; #if 1 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; if (mtu * nsecPerByte > 10 * 1000000) ifd->maxiftime_ /= 4; #endif reset_cutoff(ifd); CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); /* * Initialize the CBQ's WRR state. */ for (i = 0; i < RM_MAXPRIO; i++) { ifd->alloc_[i] = 0; ifd->M_[i] = 0; ifd->num_[i] = 0; ifd->na_[i] = 0; ifd->active_[i] = NULL; } /* * Initialize current packet state. */ ifd->qi_ = 0; ifd->qo_ = 0; for (i = 0; i < RM_MAXQUEUED; i++) { ifd->class_[i] = NULL; ifd->curlen_[i] = 0; ifd->borrowed_[i] = NULL; } /* * Create the root class of the link-sharing structure. */ if ((ifd->root_ = rmc_newclass(0, ifd, nsecPerByte, rmc_root_overlimit, maxq, 0, 0, maxidle, minidle, offtime, 0, 0)) == NULL) { printf("rmc_init: root class not allocated\n"); return ; } ifd->root_->depth_ = 0; } /* * void * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by * mbuf 'm' to queue for resource class 'cl'. This routine is called * by a driver's if_output routine. This routine must be called with * output packet completion interrupts locked out (to avoid racing with * rmc_dequeue_next). * * Returns: 0 on successful queueing * -1 when packet drop occurs */ int rmc_queue_packet(struct rm_class *cl, mbuf_t *m) { struct timeval now; struct rm_ifdat *ifd = cl->ifdat_; int cpri = cl->pri_; int is_empty = qempty(cl->q_); RM_GETTIME(now); if (ifd->cutoff_ > 0) { if (TV_LT(&cl->undertime_, &now)) { if (ifd->cutoff_ > cl->depth_) ifd->cutoff_ = cl->depth_; CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); } #if 1 /* ALTQ */ else { /* * the class is overlimit. if the class has * underlimit ancestors, set cutoff to the lowest * depth among them. */ struct rm_class *borrow = cl->borrow_; while (borrow != NULL && borrow->depth_ < ifd->cutoff_) { if (TV_LT(&borrow->undertime_, &now)) { ifd->cutoff_ = borrow->depth_; CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); break; } borrow = borrow->borrow_; } } #else /* !ALTQ */ else if ((ifd->cutoff_ > 1) && cl->borrow_) { if (TV_LT(&cl->borrow_->undertime_, &now)) { ifd->cutoff_ = cl->borrow_->depth_; CBQTRACE(rmc_queue_packet, 'ffob', cl->borrow_->depth_); } } #endif /* !ALTQ */ } if (_rmc_addq(cl, m) < 0) /* failed */ return (-1); if (is_empty) { CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); ifd->na_[cpri]++; } if (qlen(cl->q_) > qlimit(cl->q_)) { /* note: qlimit can be set to 0 or 1 */ rmc_drop_action(cl); return (-1); } return (0); } /* * void * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all * classes to see if there are satified. */ static void rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) { int i; rm_class_t *p, *bp; for (i = RM_MAXPRIO - 1; i >= 0; i--) { if ((bp = ifd->active_[i]) != NULL) { p = bp; do { if (!rmc_satisfied(p, now)) { ifd->cutoff_ = p->depth_; return; } p = p->peer_; } while (p != bp); } } reset_cutoff(ifd); } /* * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. */ static int rmc_satisfied(struct rm_class *cl, struct timeval *now) { rm_class_t *p; if (cl == NULL) return (1); if (TV_LT(now, &cl->undertime_)) return (1); if (cl->depth_ == 0) { if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) return (0); else return (1); } if (cl->children_ != NULL) { p = cl->children_; while (p != NULL) { if (!rmc_satisfied(p, now)) return (0); p = p->next_; } } return (1); } /* * Return 1 if class 'cl' is under limit or can borrow from a parent, * 0 if overlimit. As a side-effect, this routine will invoke the * class overlimit action if the class if overlimit. */ static int rmc_under_limit(struct rm_class *cl, struct timeval *now) { rm_class_t *p = cl; rm_class_t *top; struct rm_ifdat *ifd = cl->ifdat_; ifd->borrowed_[ifd->qi_] = NULL; /* * If cl is the root class, then always return that it is * underlimit. Otherwise, check to see if the class is underlimit. */ if (cl->parent_ == NULL) return (1); if (cl->sleeping_) { if (TV_LT(now, &cl->undertime_)) return (0); CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; return (1); } top = NULL; while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { if (((cl = cl->borrow_) == NULL) || (cl->depth_ > ifd->cutoff_)) { #ifdef ADJUST_CUTOFF if (cl != NULL) /* cutoff is taking effect, just return false without calling the delay action. */ return (0); #endif #ifdef BORROW_OFFTIME /* * check if the class can borrow offtime too. * borrow offtime from the top of the borrow * chain if the top class is not overloaded. */ if (cl != NULL) { /* cutoff is taking effect, use this class as top. */ top = cl; CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); } if (top != NULL && top->avgidle_ == top->minidle_) top = NULL; p->overtime_ = *now; (p->overlimit)(p, top); #else p->overtime_ = *now; (p->overlimit)(p, NULL); #endif return (0); } top = cl; } if (cl != p) ifd->borrowed_[ifd->qi_] = cl; return (1); } /* * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to * Packet-by-packet round robin. * * The heart of the weighted round-robin scheduler, which decides which * class next gets to send a packet. Highest priority first, then * weighted round-robin within priorites. * * Each able-to-send class gets to send until its byte allocation is * exhausted. Thus, the active pointer is only changed after a class has * exhausted its allocation. * * If the scheduler finds no class that is underlimit or able to borrow, * then the first class found that had a nonzero queue and is allowed to * borrow gets to send. */ static mbuf_t * _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) { struct rm_class *cl = NULL, *first = NULL; u_int deficit; int cpri; mbuf_t *m; struct timeval now; RM_GETTIME(now); /* * if the driver polls the top of the queue and then removes * the polled packet, we must return the same packet. */ if (op == ALTDQ_REMOVE && ifd->pollcache_) { cl = ifd->pollcache_; cpri = cl->pri_; if (ifd->efficient_) { /* check if this class is overlimit */ if (cl->undertime_.tv_sec != 0 && rmc_under_limit(cl, &now) == 0) first = cl; } ifd->pollcache_ = NULL; goto _wrr_out; } else { /* mode == ALTDQ_POLL || pollcache == NULL */ ifd->pollcache_ = NULL; ifd->borrowed_[ifd->qi_] = NULL; } #ifdef ADJUST_CUTOFF _again: #endif for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { if (ifd->na_[cpri] == 0) continue; deficit = 0; /* * Loop through twice for a priority level, if some class * was unable to send a packet the first round because * of the weighted round-robin mechanism. * During the second loop at this level, deficit==2. * (This second loop is not needed if for every class, * "M[cl->pri_])" times "cl->allotment" is greater than * the byte size for the largest packet in the class.) */ _wrr_loop: cl = ifd->active_[cpri]; ASSERT(cl != NULL); do { if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) cl->bytes_alloc_ += cl->w_allotment_; if (!qempty(cl->q_)) { if ((cl->undertime_.tv_sec == 0) || rmc_under_limit(cl, &now)) { if (cl->bytes_alloc_ > 0 || deficit > 1) goto _wrr_out; /* underlimit but no alloc */ deficit = 1; #if 1 ifd->borrowed_[ifd->qi_] = NULL; #endif } else if (first == NULL && cl->borrow_ != NULL) first = cl; /* borrowing candidate */ } cl->bytes_alloc_ = 0; cl = cl->peer_; } while (cl != ifd->active_[cpri]); if (deficit == 1) { /* first loop found an underlimit class with deficit */ /* Loop on same priority level, with new deficit. */ deficit = 2; goto _wrr_loop; } } #ifdef ADJUST_CUTOFF /* * no underlimit class found. if cutoff is taking effect, * increase cutoff and try again. */ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { ifd->cutoff_++; CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); goto _again; } #endif /* ADJUST_CUTOFF */ /* * If LINK_EFFICIENCY is turned on, then the first overlimit * class we encounter will send a packet if all the classes * of the link-sharing structure are overlimit. */ reset_cutoff(ifd); CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); if (!ifd->efficient_ || first == NULL) return (NULL); cl = first; cpri = cl->pri_; #if 0 /* too time-consuming for nothing */ if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; #endif ifd->borrowed_[ifd->qi_] = cl->borrow_; ifd->cutoff_ = cl->borrow_->depth_; /* * Deque the packet and do the book keeping... */ _wrr_out: if (op == ALTDQ_REMOVE) { m = _rmc_getq(cl); if (m == NULL) panic("_rmc_wrr_dequeue_next"); if (qempty(cl->q_)) ifd->na_[cpri]--; /* * Update class statistics and link data. */ if (cl->bytes_alloc_ > 0) cl->bytes_alloc_ -= m_pktlen(m); if ((cl->bytes_alloc_ <= 0) || first == cl) ifd->active_[cl->pri_] = cl->peer_; else ifd->active_[cl->pri_] = cl; ifd->class_[ifd->qi_] = cl; ifd->curlen_[ifd->qi_] = m_pktlen(m); ifd->now_[ifd->qi_] = now; ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; ifd->queued_++; } else { /* mode == ALTDQ_PPOLL */ m = _rmc_pollq(cl); ifd->pollcache_ = cl; } return (m); } /* * Dequeue & return next packet from the highest priority class that * has a packet to send & has enough allocation to send it. This * routine is called by a driver whenever it needs a new packet to * output. */ static mbuf_t * _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) { mbuf_t *m; int cpri; struct rm_class *cl, *first = NULL; struct timeval now; RM_GETTIME(now); /* * if the driver polls the top of the queue and then removes * the polled packet, we must return the same packet. */ if (op == ALTDQ_REMOVE && ifd->pollcache_) { cl = ifd->pollcache_; cpri = cl->pri_; ifd->pollcache_ = NULL; goto _prr_out; } else { /* mode == ALTDQ_POLL || pollcache == NULL */ ifd->pollcache_ = NULL; ifd->borrowed_[ifd->qi_] = NULL; } #ifdef ADJUST_CUTOFF _again: #endif for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { if (ifd->na_[cpri] == 0) continue; cl = ifd->active_[cpri]; ASSERT(cl != NULL); do { if (!qempty(cl->q_)) { if ((cl->undertime_.tv_sec == 0) || rmc_under_limit(cl, &now)) goto _prr_out; if (first == NULL && cl->borrow_ != NULL) first = cl; } cl = cl->peer_; } while (cl != ifd->active_[cpri]); } #ifdef ADJUST_CUTOFF /* * no underlimit class found. if cutoff is taking effect, increase * cutoff and try again. */ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { ifd->cutoff_++; goto _again; } #endif /* ADJUST_CUTOFF */ /* * If LINK_EFFICIENCY is turned on, then the first overlimit * class we encounter will send a packet if all the classes * of the link-sharing structure are overlimit. */ reset_cutoff(ifd); if (!ifd->efficient_ || first == NULL) return (NULL); cl = first; cpri = cl->pri_; #if 0 /* too time-consuming for nothing */ if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; #endif ifd->borrowed_[ifd->qi_] = cl->borrow_; ifd->cutoff_ = cl->borrow_->depth_; /* * Deque the packet and do the book keeping... */ _prr_out: if (op == ALTDQ_REMOVE) { m = _rmc_getq(cl); if (m == NULL) panic("_rmc_prr_dequeue_next"); if (qempty(cl->q_)) ifd->na_[cpri]--; ifd->active_[cpri] = cl->peer_; ifd->class_[ifd->qi_] = cl; ifd->curlen_[ifd->qi_] = m_pktlen(m); ifd->now_[ifd->qi_] = now; ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; ifd->queued_++; } else { /* mode == ALTDQ_POLL */ m = _rmc_pollq(cl); ifd->pollcache_ = cl; } return (m); } /* * mbuf_t * * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function * is invoked by the packet driver to get the next packet to be * dequeued and output on the link. If WRR is enabled, then the * WRR dequeue next routine will determine the next packet to sent. * Otherwise, packet-by-packet round robin is invoked. * * Returns: NULL, if a packet is not available or if all * classes are overlimit. * * Otherwise, Pointer to the next packet. */ mbuf_t * rmc_dequeue_next(struct rm_ifdat *ifd, int mode) { if (ifd->queued_ >= ifd->maxqueued_) return (NULL); else if (ifd->wrr_) return (_rmc_wrr_dequeue_next(ifd, mode)); else return (_rmc_prr_dequeue_next(ifd, mode)); } /* * Update the utilization estimate for the packet that just completed. * The packet's class & the parent(s) of that class all get their * estimators updated. This routine is called by the driver's output- * packet-completion interrupt service routine. */ /* * a macro to approximate "divide by 1000" that gives 0.000999, * if a value has enough effective digits. * (on pentium, mul takes 9 cycles but div takes 46!) */ #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) void rmc_update_class_util(struct rm_ifdat *ifd) { int idle, avgidle, pktlen; int pkt_time, tidle; rm_class_t *cl, *borrowed; rm_class_t *borrows; struct timeval *nowp; /* * Get the most recent completed class. */ if ((cl = ifd->class_[ifd->qo_]) == NULL) return; pktlen = ifd->curlen_[ifd->qo_]; borrowed = ifd->borrowed_[ifd->qo_]; borrows = borrowed; PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); /* * Run estimator on class and its ancestors. */ /* * rm_update_class_util is designed to be called when the * transfer is completed from a xmit complete interrupt, * but most drivers don't implement an upcall for that. * so, just use estimated completion time. * as a result, ifd->qi_ and ifd->qo_ are always synced. */ nowp = &ifd->now_[ifd->qo_]; /* get pkt_time (for link) in usec */ #if 1 /* use approximation */ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; pkt_time = NSEC_TO_USEC(pkt_time); #else pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; #endif #if 1 /* ALTQ4PPP */ if (TV_LT(nowp, &ifd->ifnow_)) { int iftime; /* * make sure the estimated completion time does not go * too far. it can happen when the link layer supports * data compression or the interface speed is set to * a much lower value. */ TV_DELTA(&ifd->ifnow_, nowp, iftime); if (iftime+pkt_time < ifd->maxiftime_) { TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); } } else { TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #else if (TV_LT(nowp, &ifd->ifnow_)) { TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #endif while (cl != NULL) { TV_DELTA(&ifd->ifnow_, &cl->last_, idle); if (idle >= 2000000) /* * this class is idle enough, reset avgidle. * (TV_DELTA returns 2000000 us when delta is large.) */ cl->avgidle_ = cl->maxidle_; /* get pkt_time (for class) in usec */ #if 1 /* use approximation */ pkt_time = pktlen * cl->ns_per_byte_; pkt_time = NSEC_TO_USEC(pkt_time); #else pkt_time = pktlen * cl->ns_per_byte_ / 1000; #endif idle -= pkt_time; avgidle = cl->avgidle_; avgidle += idle - (avgidle >> RM_FILTER_GAIN); cl->avgidle_ = avgidle; /* Are we overlimit ? */ if (avgidle <= 0) { CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); #if 1 /* ALTQ */ /* * need some lower bound for avgidle, otherwise * a borrowing class gets unbounded penalty. */ if (avgidle < cl->minidle_) avgidle = cl->avgidle_ = cl->minidle_; #endif /* set next idle to make avgidle 0 */ tidle = pkt_time + (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); TV_ADD_DELTA(nowp, tidle, &cl->undertime_); ++cl->stats_.over; } else { cl->avgidle_ = (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; cl->undertime_.tv_sec = 0; if (cl->sleeping_) { CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; } } if (borrows != NULL) { if (borrows != cl) ++cl->stats_.borrows; else borrows = NULL; } cl->last_ = ifd->ifnow_; cl->last_pkttime_ = pkt_time; #if 1 if (cl->parent_ == NULL) { /* take stats of root class */ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); } #endif cl = cl->parent_; } /* * Check to see if cutoff needs to set to a new level. */ cl = ifd->class_[ifd->qo_]; if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { #if 1 /* ALTQ */ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { rmc_tl_satisfied(ifd, nowp); CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); } else { ifd->cutoff_ = borrowed->depth_; CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); } #else /* !ALTQ */ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { reset_cutoff(ifd); #ifdef notdef rmc_tl_satisfied(ifd, &now); #endif CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); } else { ifd->cutoff_ = borrowed->depth_; CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); } #endif /* !ALTQ */ } /* * Release class slot */ ifd->borrowed_[ifd->qo_] = NULL; ifd->class_[ifd->qo_] = NULL; ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; ifd->queued_--; } /* * void * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) * over-limit action routines. These get invoked by rmc_under_limit() * if a class with packets to send if over its bandwidth limit & can't * borrow from a parent class. * * Returns: NONE */ static void rmc_drop_action(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; ASSERT(qlen(cl->q_) > 0); _rmc_dropq(cl); if (qempty(cl->q_)) ifd->na_[cl->pri_]--; } void rmc_dropall(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; if (!qempty(cl->q_)) { _flushq(cl->q_); ifd->na_[cl->pri_]--; } } #if (__FreeBSD_version > 300000) /* hzto() is removed from FreeBSD-3.0 */ static int hzto(struct timeval *); static int hzto(tv) struct timeval *tv; { struct timeval t2; getmicrotime(&t2); t2.tv_sec = tv->tv_sec - t2.tv_sec; t2.tv_usec = tv->tv_usec - t2.tv_usec; return (tvtohz(&t2)); } #endif /* __FreeBSD_version > 300000 */ /* * void * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ * delay action routine. It is invoked via rmc_under_limit when the * packet is discoverd to be overlimit. * * If the delay action is result of borrow class being overlimit, then * delay for the offtime of the borrowing class that is overlimit. * * Returns: NONE */ void rmc_delay_action(struct rm_class *cl, struct rm_class *borrow) { int delay, t, extradelay; cl->stats_.overactions++; TV_DELTA(&cl->undertime_, &cl->overtime_, delay); #ifndef BORROW_OFFTIME delay += cl->offtime_; #endif if (!cl->sleeping_) { CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); #ifdef BORROW_OFFTIME if (borrow != NULL) extradelay = borrow->offtime_; else #endif extradelay = cl->offtime_; #ifdef ALTQ /* * XXX recalculate suspend time: * current undertime is (tidle + pkt_time) calculated * from the last transmission. * tidle: time required to bring avgidle back to 0 * pkt_time: target waiting time for this class * we need to replace pkt_time by offtime */ extradelay -= cl->last_pkttime_; #endif if (extradelay > 0) { TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); delay += extradelay; } cl->sleeping_ = 1; cl->stats_.delays++; /* * Since packets are phased randomly with respect to the * clock, 1 tick (the next clock tick) can be an arbitrarily * short time so we have to wait for at least two ticks. * NOTE: If there's no other traffic, we need the timer as * a 'backstop' to restart this class. */ if (delay > tick * 2) { -#ifdef __FreeBSD__ /* FreeBSD rounds up the tick */ t = hzto(&cl->undertime_); -#else - /* other BSDs round down the tick */ - t = hzto(&cl->undertime_) + 1; -#endif } else t = 2; CALLOUT_RESET(&cl->callout_, t, (timeout_t *)rmc_restart, (caddr_t)cl); } } /* * void * rmc_restart() - is just a helper routine for rmc_delay_action -- it is * called by the system timer code & is responsible checking if the * class is still sleeping (it might have been restarted as a side * effect of the queue scan on a packet arrival) and, if so, restarting * output for the class. Inspecting the class state & restarting output * require locking the class structure. In general the driver is * responsible for locking but this is the only routine that is not * called directly or indirectly from the interface driver so it has * know about system locking conventions. Under bsd, locking is done * by raising IPL to splimp so that's what's implemented here. On a * different system this would probably need to be changed. * * Returns: NONE */ static void rmc_restart(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; int s; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); if (cl->sleeping_) { cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); (ifd->restart)(ifd->ifq_); } } IFQ_UNLOCK(ifd->ifq_); splx(s); } /* * void * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit * handling routine for the root class of the link sharing structure. * * Returns: NONE */ static void rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) { panic("rmc_root_overlimit"); } /* * Packet Queue handling routines. Eventually, this is to localize the * effects on the code whether queues are red queues or droptail * queues. */ static int _rmc_addq(rm_class_t *cl, mbuf_t *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) return red_addq(cl->red_, cl->q_, m, cl->pktattr_); #endif /* ALTQ_RED */ if (cl->flags_ & RMCF_CLEARDSCP) write_dsfield(m, cl->pktattr_, 0); _addq(cl->q_, m); return (0); } /* note: _rmc_dropq is not called for red */ static void _rmc_dropq(rm_class_t *cl) { mbuf_t *m; if ((m = _getq(cl->q_)) != NULL) m_freem(m); } static mbuf_t * _rmc_getq(rm_class_t *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) return rio_getq((rio_t *)cl->red_, cl->q_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) return red_getq(cl->red_, cl->q_); #endif return _getq(cl->q_); } static mbuf_t * _rmc_pollq(rm_class_t *cl) { return qhead(cl->q_); } #ifdef CBQ_TRACE struct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; struct cbqtrace *cbqtrace_ptr = NULL; int cbqtrace_count; /* * DDB hook to trace cbq events: * the last 1024 events are held in a circular buffer. * use "call cbqtrace_dump(N)" to display 20 events from Nth event. */ void cbqtrace_dump(int); static char *rmc_funcname(void *); static struct rmc_funcs { void *func; char *name; } rmc_funcs[] = { rmc_init, "rmc_init", rmc_queue_packet, "rmc_queue_packet", rmc_under_limit, "rmc_under_limit", rmc_update_class_util, "rmc_update_class_util", rmc_delay_action, "rmc_delay_action", rmc_restart, "rmc_restart", _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", NULL, NULL }; static char *rmc_funcname(void *func) { struct rmc_funcs *fp; for (fp = rmc_funcs; fp->func != NULL; fp++) if (fp->func == func) return (fp->name); return ("unknown"); } void cbqtrace_dump(int counter) { int i, *p; char *cp; counter = counter % NCBQTRACE; p = (int *)&cbqtrace_buffer[counter]; for (i=0; i<20; i++) { printf("[0x%x] ", *p++); printf("%s: ", rmc_funcname((void *)*p++)); cp = (char *)p++; printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); printf("%d\n",*p++); if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) p = (int *)cbqtrace_buffer; } } #endif /* CBQ_TRACE */ #endif /* ALTQ_CBQ */ #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) #if !defined(__GNUC__) || defined(ALTQ_DEBUG) void _addq(class_queue_t *q, mbuf_t *m) { mbuf_t *m0; if ((m0 = qtail(q)) != NULL) m->m_nextpkt = m0->m_nextpkt; else m0 = m; m0->m_nextpkt = m; qtail(q) = m; qlen(q)++; } mbuf_t * _getq(class_queue_t *q) { mbuf_t *m, *m0; if ((m = qtail(q)) == NULL) return (NULL); if ((m0 = m->m_nextpkt) != m) m->m_nextpkt = m0->m_nextpkt; else { ASSERT(qlen(q) == 1); qtail(q) = NULL; } qlen(q)--; m0->m_nextpkt = NULL; return (m0); } /* drop a packet at the tail of the queue */ mbuf_t * _getq_tail(class_queue_t *q) { mbuf_t *m, *m0, *prev; if ((m = m0 = qtail(q)) == NULL) return NULL; do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) { ASSERT(qlen(q) == 1); qtail(q) = NULL; } else qtail(q) = prev; qlen(q)--; m->m_nextpkt = NULL; return (m); } /* randomly select a packet in the queue */ mbuf_t * _getq_random(class_queue_t *q) { struct mbuf *m; int i, n; if ((m = qtail(q)) == NULL) return NULL; if (m->m_nextpkt == m) { ASSERT(qlen(q) == 1); qtail(q) = NULL; } else { struct mbuf *prev = NULL; n = arc4random() % qlen(q) + 1; for (i = 0; i < n; i++) { prev = m; m = m->m_nextpkt; } prev->m_nextpkt = m->m_nextpkt; if (m == qtail(q)) qtail(q) = prev; } qlen(q)--; m->m_nextpkt = NULL; return (m); } void _removeq(class_queue_t *q, mbuf_t *m) { mbuf_t *m0, *prev; m0 = qtail(q); do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) qtail(q) = NULL; else if (qtail(q) == m) qtail(q) = prev; qlen(q)--; } void _flushq(class_queue_t *q) { mbuf_t *m; while ((m = _getq(q)) != NULL) m_freem(m); ASSERT(qlen(q) == 0); } #endif /* !__GNUC__ || ALTQ_DEBUG */ #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ Index: projects/ifnet/sys/net/altq/altq_rmclass_debug.h =================================================================== --- projects/ifnet/sys/net/altq/altq_rmclass_debug.h (revision 281649) +++ projects/ifnet/sys/net/altq/altq_rmclass_debug.h (revision 281650) @@ -1,112 +1,113 @@ -/* $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $ */ - -/* +/*- * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. + * + * $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_ #define _ALTQ_ALTQ_RMCLASS_DEBUG_H_ /* #pragma ident "@(#)rm_class_debug.h 1.7 98/05/04 SMI" */ /* * Cbq debugging macros */ #ifdef __cplusplus extern "C" { #endif #ifdef CBQ_TRACE #ifndef NCBQTRACE #define NCBQTRACE (16 * 1024) #endif /* * To view the trace output, using adb, type: * adb -k /dev/ksyms /dev/mem , then type * cbqtrace_count/D to get the count, then type * cbqtrace_buffer,0tcount/Dp4C" "Xn * This will dump the trace buffer from 0 to count. */ /* * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events * from Nth event in the circular buffer. */ struct cbqtrace { int count; int function; /* address of function */ int trace_action; /* descriptive 4 characters */ int object; /* object operated on */ }; extern struct cbqtrace cbqtrace_buffer[]; extern struct cbqtrace *cbqtrace_ptr; extern int cbqtrace_count; #define CBQTRACEINIT() { \ if (cbqtrace_ptr == NULL) \ cbqtrace_ptr = cbqtrace_buffer; \ else { \ cbqtrace_ptr = cbqtrace_buffer; \ bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \ cbqtrace_count = 0; \ } \ } #define LOCK_TRACE() splimp() #define UNLOCK_TRACE(x) splx(x) #define CBQTRACE(func, act, obj) { \ int __s = LOCK_TRACE(); \ int *_p = &cbqtrace_ptr->count; \ *_p++ = ++cbqtrace_count; \ *_p++ = (int)(func); \ *_p++ = (int)(act); \ *_p++ = (int)(obj); \ if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\ cbqtrace_ptr = cbqtrace_buffer; \ else \ cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \ UNLOCK_TRACE(__s); \ } #else /* If no tracing, define no-ops */ #define CBQTRACEINIT() #define CBQTRACE(a, b, c) #endif /* !CBQ_TRACE */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */ Index: projects/ifnet/sys/net/altq/altq_subr.c =================================================================== --- projects/ifnet/sys/net/altq/altq_subr.c (revision 281649) +++ projects/ifnet/sys/net/altq/altq_subr.c (revision 281650) @@ -1,1981 +1,1925 @@ -/* $FreeBSD$ */ -/* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */ - -/* +/*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef __FreeBSD__ #include -#endif #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* machine dependent clock related includes */ -#ifdef __FreeBSD__ #include #include #include #include -#endif #if defined(__amd64__) || defined(__i386__) #include /* for pentium tsc */ #include /* for CPUID_TSC */ -#ifdef __FreeBSD__ #include /* for cpu_feature */ -#elif defined(__NetBSD__) || defined(__OpenBSD__) -#include /* for cpu_feature */ -#endif #endif /* __amd64 || __i386__ */ /* * internal function prototypes */ static void tbr_timeout(void *); int (*altq_input)(struct mbuf *, int) = NULL; static struct mbuf *tbr_dequeue(struct ifaltq *, int); static int tbr_timer = 0; /* token bucket regulator timer */ #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) static struct callout tbr_callout = CALLOUT_INITIALIZER; #else static struct callout tbr_callout; #endif #ifdef ALTQ3_CLFIER_COMPAT static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); #ifdef INET6 static int extract_ports6(struct mbuf *, struct ip6_hdr *, struct flowinfo_in6 *); #endif static int apply_filter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static int apply_ppfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); #ifdef INET6 static int apply_filter6(u_int32_t, struct flow_filter6 *, struct flowinfo_in6 *); #endif static int apply_tosfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static u_long get_filt_handle(struct acc_classifier *, int); static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); static u_int32_t filt2fibmask(struct flow_filter *); static void ip4f_cache(struct ip *, struct flowinfo_in *); static int ip4f_lookup(struct ip *, struct flowinfo_in *); static int ip4f_init(void); static struct ip4_frag *ip4f_alloc(void); static void ip4f_free(struct ip4_frag *); #endif /* ALTQ3_CLFIER_COMPAT */ /* * alternate queueing support routines */ /* look up the queue state by the interface name and the queueing type. */ void * altq_lookup(name, type) char *name; int type; { struct ifnet *ifp; if ((ifp = ifunit(name)) != NULL) { /* read if_snd unlocked */ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) return (ifp->if_snd.altq_disc); } return NULL; } int altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) struct ifaltq *ifq; int type; void *discipline; int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*dequeue)(struct ifaltq *, int); int (*request)(struct ifaltq *, int, void *); void *clfier; void *(*classify)(void *, struct mbuf *, int); { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } #ifdef ALTQ3_COMPAT /* * pfaltq can override the existing discipline, but altq3 cannot. * check these if clfier is not NULL (which implies altq3). */ if (clfier != NULL) { if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return EEXIST; } } #endif ifq->altq_type = type; ifq->altq_disc = discipline; ifq->altq_enqueue = enqueue; ifq->altq_dequeue = dequeue; ifq->altq_request = request; ifq->altq_clfier = clfier; ifq->altq_classify = classify; ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); #ifdef ALTQ3_COMPAT #ifdef ALTQ_KLD altq_module_incref(type); #endif #endif IFQ_UNLOCK(ifq); return 0; } int altq_detach(ifq) struct ifaltq *ifq; { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (!ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return (0); } #ifdef ALTQ3_COMPAT #ifdef ALTQ_KLD altq_module_declref(ifq->altq_type); #endif #endif ifq->altq_type = ALTQT_NONE; ifq->altq_disc = NULL; ifq->altq_enqueue = NULL; ifq->altq_dequeue = NULL; ifq->altq_request = NULL; ifq->altq_clfier = NULL; ifq->altq_classify = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; IFQ_UNLOCK(ifq); return 0; } int altq_enable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ ifq->altq_flags |= ALTQF_ENABLED; if (ifq->altq_clfier != NULL) ifq->altq_flags |= ALTQF_CLASSIFY; splx(s); IFQ_UNLOCK(ifq); return 0; } int altq_disable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); splx(s); IFQ_UNLOCK(ifq); return 0; } #ifdef ALTQ_DEBUG void altq_assert(file, line, failedexpr) const char *file, *failedexpr; int line; { (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", failedexpr, file, line); panic("altq assertion"); /* NOTREACHED */ } #endif /* * internal representation of token bucket parameters * rate: byte_per_unittime << 32 * (((bits_per_sec) / 8) << 32) / machclk_freq * depth: byte << 32 * */ #define TBR_SHIFT 32 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) static struct mbuf * tbr_dequeue(ifq, op) struct ifaltq *ifq; int op; { struct tb_regulator *tbr; struct mbuf *m; int64_t interval; u_int64_t now; IFQ_LOCK_ASSERT(ifq); tbr = ifq->altq_tbr; if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { /* if this is a remove after poll, bypass tbr check */ } else { /* update token only when it is negative */ if (tbr->tbr_token <= 0) { now = read_machclk(); interval = now - tbr->tbr_last; if (interval >= tbr->tbr_filluptime) tbr->tbr_token = tbr->tbr_depth; else { tbr->tbr_token += interval * tbr->tbr_rate; if (tbr->tbr_token > tbr->tbr_depth) tbr->tbr_token = tbr->tbr_depth; } tbr->tbr_last = now; } /* if token is still negative, don't allow dequeue */ if (tbr->tbr_token <= 0) return (NULL); } if (ALTQ_IS_ENABLED(ifq)) m = (*ifq->altq_dequeue)(ifq, op); else { if (op == ALTDQ_POLL) _IF_POLL(ifq, m); else _IF_DEQUEUE(ifq, m); } if (m != NULL && op == ALTDQ_REMOVE) tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); tbr->tbr_lastop = op; return (m); } /* * set a token bucket regulator. * if the specified rate is zero, the token bucket regulator is deleted. */ int tbr_set(ifq, profile) struct ifaltq *ifq; struct tb_profile *profile; { struct tb_regulator *tbr, *otbr; if (tbr_dequeue_ptr == NULL) tbr_dequeue_ptr = tbr_dequeue; if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("tbr_set: no cpu clock available!\n"); return (ENXIO); } IFQ_LOCK(ifq); if (profile->rate == 0) { /* delete this tbr */ if ((tbr = ifq->altq_tbr) == NULL) { IFQ_UNLOCK(ifq); return (ENOENT); } ifq->altq_tbr = NULL; free(tbr, M_DEVBUF); IFQ_UNLOCK(ifq); return (0); } tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); if (tbr == NULL) { IFQ_UNLOCK(ifq); return (ENOMEM); } tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; tbr->tbr_depth = TBR_SCALE(profile->depth); if (tbr->tbr_rate > 0) tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; else tbr->tbr_filluptime = 0xffffffffffffffffLL; tbr->tbr_token = tbr->tbr_depth; tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; otbr = ifq->altq_tbr; ifq->altq_tbr = tbr; /* set the new tbr */ if (otbr != NULL) free(otbr, M_DEVBUF); else { if (tbr_timer == 0) { CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); tbr_timer = 1; } } IFQ_UNLOCK(ifq); return (0); } /* * tbr_timeout goes through the interface list, and kicks the drivers * if necessary. * * MPSAFE */ static void tbr_timeout(arg) void *arg; { -#ifdef __FreeBSD__ VNET_ITERATOR_DECL(vnet_iter); -#endif struct ifnet *ifp; int active, s; active = 0; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif -#ifdef __FreeBSD__ IFNET_RLOCK_NOSLEEP(); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); -#endif for (ifp = TAILQ_FIRST(&V_ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { /* read from if_snd unlocked */ if (!TBR_IS_ENABLED(&ifp->if_snd)) continue; active++; if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) (*ifp->if_start)(ifp); } -#ifdef __FreeBSD__ CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); IFNET_RUNLOCK_NOSLEEP(); -#endif splx(s); if (active > 0) CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); else tbr_timer = 0; /* don't need tbr_timer anymore */ } /* * get token bucket regulator profile */ int tbr_get(ifq, profile) struct ifaltq *ifq; struct tb_profile *profile; { struct tb_regulator *tbr; IFQ_LOCK(ifq); if ((tbr = ifq->altq_tbr) == NULL) { profile->rate = 0; profile->depth = 0; } else { profile->rate = (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); } IFQ_UNLOCK(ifq); return (0); } /* * attach a discipline to the interface. if one already exists, it is * overridden. * Locking is done in the discipline specific attach functions. Basically * they call back to altq_attach which takes care of the attach and locking. */ int altq_pfattach(struct pf_altq *a) { int error = 0; switch (a->scheduler) { case ALTQT_NONE: break; #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_pfattach(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_pfattach(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_pfattach(a); break; #endif default: error = ENXIO; } return (error); } /* * detach a discipline from the interface. * it is possible that the discipline was already overridden by another * discipline. */ int altq_pfdetach(struct pf_altq *a) { struct ifnet *ifp; int s, error = 0; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); /* if this discipline is no longer referenced, just return */ /* read unlocked from if_snd */ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) return (0); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif /* read unlocked from if_snd, _disable and _detach take care */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) error = altq_disable(&ifp->if_snd); if (error == 0) error = altq_detach(&ifp->if_snd); splx(s); return (error); } /* * add a discipline or a queue * Locking is done in the discipline specific functions with regards to * malloc with WAITOK, also it is not yet clear which lock to use. */ int altq_add(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_add_queue(a)); if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) panic("altq_add: no cpu clock"); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a discipline or a queue * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_remove_queue(a)); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * add a queue to the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_add_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a queue from the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * get queue statistics * Locking is done in the discipline specific functions with regards to * copyout operations, also it is not yet clear which lock to use. */ int altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_getqstats(a, ubuf, nbytes); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_getqstats(a, ubuf, nbytes); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_getqstats(a, ubuf, nbytes); break; #endif default: error = ENXIO; } return (error); } /* * read and write diffserv field in IPv4 or IPv6 header */ u_int8_t read_dsfield(m, pktattr) struct mbuf *m; struct altq_pktattr *pktattr; { struct mbuf *m0; u_int8_t ds_field = 0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return ((u_int8_t)0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("read_dsfield: can't locate header!\n"); #endif return ((u_int8_t)0); } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; if (ip->ip_v != 4) return ((u_int8_t)0); /* version mismatch! */ ds_field = ip->ip_tos; } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return ((u_int8_t)0); /* version mismatch! */ ds_field = (flowlabel >> 20) & 0xff; } #endif return (ds_field); } void write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) { struct mbuf *m0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return; /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("write_dsfield: can't locate header!\n"); #endif return; } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; u_int8_t old; int32_t sum; if (ip->ip_v != 4) return; /* version mismatch! */ old = ip->ip_tos; dsfield |= old & 3; /* leave CU bits */ if (old == dsfield) return; ip->ip_tos = dsfield; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += 0xff00 + (~old & 0xff) + dsfield; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return; /* version mismatch! */ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); ip6->ip6_flow = htonl(flowlabel); } #endif return; } /* * high resolution clock support taking advantage of a machine dependent * high resolution time counter (e.g., timestamp counter of intel pentium). * we assume * - 64-bit-long monotonically-increasing counter * - frequency range is 100M-4GHz (CPU speed) */ /* if pcc is not available or disabled, emulate 256MHz using microtime() */ #define MACHCLK_SHIFT 8 int machclk_usepcc; u_int32_t machclk_freq; u_int32_t machclk_per_tick; #if defined(__i386__) && defined(__NetBSD__) extern u_int64_t cpu_tsc_freq; #endif #if (__FreeBSD_version >= 700035) /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) /* If TSC is P-state invariant, don't do anything. */ if (tsc_is_invariant) return; #endif /* Total setting for this level gives the new frequency in MHz. */ init_machclk(); } EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_LAST); #endif /* __FreeBSD_version >= 700035 */ static void init_machclk_setup(void) { #if (__FreeBSD_version >= 600000) callout_init(&tbr_callout, 0); #endif machclk_usepcc = 1; #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) machclk_usepcc = 0; #endif #if defined(__FreeBSD__) && defined(SMP) machclk_usepcc = 0; #endif #if defined(__NetBSD__) && defined(MULTIPROCESSOR) machclk_usepcc = 0; #endif #if defined(__amd64__) || defined(__i386__) /* check if TSC is available */ -#ifdef __FreeBSD__ if ((cpu_feature & CPUID_TSC) == 0 || atomic_load_acq_64(&tsc_freq) == 0) -#else - if ((cpu_feature & CPUID_TSC) == 0) -#endif machclk_usepcc = 0; #endif } void init_machclk(void) { static int called; /* Call one-time initialization function. */ if (!called) { init_machclk_setup(); called = 1; } if (machclk_usepcc == 0) { /* emulate 256MHz using microtime() */ machclk_freq = 1000000 << MACHCLK_SHIFT; machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: emulate %uHz cpu clock\n", machclk_freq); #endif return; } /* * if the clock frequency (of Pentium TSC or Alpha PCC) is * accessible, just use it. */ #if defined(__amd64__) || defined(__i386__) -#ifdef __FreeBSD__ machclk_freq = atomic_load_acq_64(&tsc_freq); -#elif defined(__NetBSD__) - machclk_freq = (u_int32_t)cpu_tsc_freq; -#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU)) - machclk_freq = pentium_mhz * 1000000; #endif -#endif /* * if we don't know the clock frequency, measure it. */ if (machclk_freq == 0) { static int wait; struct timeval tv_start, tv_end; u_int64_t start, end, diff; int timo; microtime(&tv_start); start = read_machclk(); timo = hz; /* 1 sec */ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); microtime(&tv_end); end = read_machclk(); diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 + tv_end.tv_usec - tv_start.tv_usec; if (diff != 0) machclk_freq = (u_int)((end - start) * 1000000 / diff); } machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: CPU clock: %uHz\n", machclk_freq); #endif } #if defined(__OpenBSD__) && defined(__i386__) static __inline u_int64_t rdtsc(void) { u_int64_t rv; __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); return (rv); } #endif /* __OpenBSD__ && __i386__ */ u_int64_t read_machclk(void) { u_int64_t val; if (machclk_usepcc) { #if defined(__amd64__) || defined(__i386__) val = rdtsc(); #else panic("read_machclk"); #endif } else { struct timeval tv; microtime(&tv); val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 + tv.tv_usec) << MACHCLK_SHIFT); } return (val); } #ifdef ALTQ3_CLFIER_COMPAT #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 /* encapsulating security payload */ #endif #ifndef IPPROTO_AH #define IPPROTO_AH 51 /* authentication header */ #endif /* * extract flow information from a given packet. * filt_mask shows flowinfo fields required. * we assume the ip header is in one mbuf, and addresses and ports are * in network byte order. */ int altq_extractflow(m, af, flow, filt_bmask) struct mbuf *m; int af; struct flowinfo *flow; u_int32_t filt_bmask; { switch (af) { case PF_INET: { struct flowinfo_in *fin; struct ip *ip; ip = mtod(m, struct ip *); if (ip->ip_v != 4) break; fin = (struct flowinfo_in *)flow; fin->fi_len = sizeof(struct flowinfo_in); fin->fi_family = AF_INET; fin->fi_proto = ip->ip_p; fin->fi_tos = ip->ip_tos; fin->fi_src.s_addr = ip->ip_src.s_addr; fin->fi_dst.s_addr = ip->ip_dst.s_addr; if (filt_bmask & FIMB4_PORTS) /* if port info is required, extract port numbers */ extract_ports4(m, ip, fin); else { fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; } return (1); } #ifdef INET6 case PF_INET6: { struct flowinfo_in6 *fin6; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); /* should we check the ip version? */ fin6 = (struct flowinfo_in6 *)flow; fin6->fi6_len = sizeof(struct flowinfo_in6); fin6->fi6_family = AF_INET6; fin6->fi6_proto = ip6->ip6_nxt; fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); fin6->fi6_src = ip6->ip6_src; fin6->fi6_dst = ip6->ip6_dst; if ((filt_bmask & FIMB6_PORTS) || ((filt_bmask & FIMB6_PROTO) && ip6->ip6_nxt > IPPROTO_IPV6)) /* * if port info is required, or proto is required * but there are option headers, extract port * and protocol numbers. */ extract_ports6(m, ip6, fin6); else { fin6->fi6_sport = 0; fin6->fi6_dport = 0; fin6->fi6_gpi = 0; } return (1); } #endif /* INET6 */ default: break; } /* failed */ flow->fi_len = sizeof(struct flowinfo); flow->fi_family = AF_UNSPEC; return (0); } /* * helper routine to extract port numbers */ /* structure for ipsec and ipv6 option header template */ struct _opt6 { u_int8_t opt6_nxt; /* next header */ u_int8_t opt6_hlen; /* header extension length */ u_int16_t _pad; u_int32_t ah_spi; /* security parameter index for authentication header */ }; /* * extract port numbers from a ipv4 packet. */ static int extract_ports4(m, ip, fin) struct mbuf *m; struct ip *ip; struct flowinfo_in *fin; { struct mbuf *m0; u_short ip_off; u_int8_t proto; int off; fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; ip_off = ntohs(ip->ip_off); /* if it is a fragment, try cached fragment info */ if (ip_off & IP_OFFMASK) { ip4f_lookup(ip, fin); return (1); } /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip >= m0->m_data) && ((caddr_t)ip < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports4: can't locate header! ip=%p\n", ip); #endif return (0); } off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); proto = ip->ip_p; #ifdef ALTQ_IPSEC again: #endif while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); /* bogus ip_hl! */ } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin->fi_sport = udp->uh_sport; fin->fi_dport = udp->uh_dport; fin->fi_proto = proto; } break; #ifdef ALTQ_IPSEC case IPPROTO_ESP: if (fin->fi_gpi == 0){ u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin->fi_gpi = *gpi; } fin->fi_proto = proto; break; case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); if (fin->fi_gpi == 0 && m0->m_len >= off + 8) fin->fi_gpi = opt6->ah_spi; } /* goto the next header */ goto again; #endif /* ALTQ_IPSEC */ default: fin->fi_proto = proto; return (0); } /* if this is a first fragment, cache it. */ if (ip_off & IP_MF) ip4f_cache(ip, fin); return (1); } #ifdef INET6 static int extract_ports6(m, ip6, fin6) struct mbuf *m; struct ip6_hdr *ip6; struct flowinfo_in6 *fin6; { struct mbuf *m0; int off; u_int8_t proto; fin6->fi6_gpi = 0; fin6->fi6_sport = 0; fin6->fi6_dport = 0; /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip6 >= m0->m_data) && ((caddr_t)ip6 < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports6: can't locate header! ip6=%p\n", ip6); #endif return (0); } off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; do { while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin6->fi6_sport = udp->uh_sport; fin6->fi6_dport = udp->uh_dport; fin6->fi6_proto = proto; } return (1); case IPPROTO_ESP: if (fin6->fi6_gpi == 0) { u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin6->fi6_gpi = *gpi; } fin6->fi6_proto = proto; return (1); case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) fin6->fi6_gpi = opt6->ah_spi; proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); /* goto the next header */ break; } case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += (opt6->opt6_hlen + 1) * 8; /* goto the next header */ break; } case IPPROTO_FRAGMENT: /* ipv6 fragmentations are not supported yet */ default: fin6->fi6_proto = proto; return (0); } } while (1); /*NOTREACHED*/ } #endif /* INET6 */ /* * altq common classifier */ int acc_add_filter(classifier, filter, class, phandle) struct acc_classifier *classifier; struct flow_filter *filter; void *class; u_long *phandle; { struct acc_filter *afp, *prev, *tmp; int i, s; #ifdef INET6 if (filter->ff_flow.fi_family != AF_INET && filter->ff_flow.fi_family != AF_INET6) return (EINVAL); #else if (filter->ff_flow.fi_family != AF_INET) return (EINVAL); #endif afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK); if (afp == NULL) return (ENOMEM); bzero(afp, sizeof(struct acc_filter)); afp->f_filter = *filter; afp->f_class = class; i = ACC_WILDCARD_INDEX; if (filter->ff_flow.fi_family == AF_INET) { struct flow_filter *filter4 = &afp->f_filter; /* * if address is 0, it's a wildcard. if address mask * isn't set, use full mask. */ if (filter4->ff_flow.fi_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0; else if (filter4->ff_mask.mask_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0xffffffff; if (filter4->ff_flow.fi_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0; else if (filter4->ff_mask.mask_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0xffffffff; /* clear extra bits in addresses */ filter4->ff_flow.fi_dst.s_addr &= filter4->ff_mask.mask_dst.s_addr; filter4->ff_flow.fi_src.s_addr &= filter4->ff_mask.mask_src.s_addr; /* * if dst address is a wildcard, use hash-entry * ACC_WILDCARD_INDEX. */ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); } #ifdef INET6 else if (filter->ff_flow.fi_family == AF_INET6) { struct flow_filter6 *filter6 = (struct flow_filter6 *)&afp->f_filter; #ifndef IN6MASK0 /* taken from kame ipv6 */ #define IN6MASK0 {{{ 0, 0, 0, 0 }}} #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask128 = IN6MASK128; #endif if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) filter6->ff_mask6.mask6_dst = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) filter6->ff_mask6.mask6_dst = in6mask128; if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) filter6->ff_mask6.mask6_src = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) filter6->ff_mask6.mask6_src = in6mask128; /* clear extra bits in addresses */ for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_dst.s6_addr[i] &= filter6->ff_mask6.mask6_dst.s6_addr[i]; for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_src.s6_addr[i] &= filter6->ff_mask6.mask6_src.s6_addr[i]; if (filter6->ff_flow6.fi6_flowlabel == 0) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); } #endif /* INET6 */ afp->f_handle = get_filt_handle(classifier, i); /* update filter bitmask */ afp->f_fbmask = filt2fibmask(filter); classifier->acc_fbmask |= afp->f_fbmask; /* * add this filter to the filter list. * filters are ordered from the highest rule number. */ -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif prev = NULL; LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) prev = tmp; else break; } if (prev == NULL) LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); else LIST_INSERT_AFTER(prev, afp, f_chain); splx(s); *phandle = afp->f_handle; return (0); } int acc_delete_filter(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int s; if ((afp = filth_to_filtp(classifier, handle)) == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif LIST_REMOVE(afp, f_chain); splx(s); free(afp, M_DEVBUF); /* todo: update filt_bmask */ return (0); } /* * delete filters referencing to the specified class. * if the all flag is not 0, delete all the filters. */ int acc_discard_filters(classifier, class, all) struct acc_classifier *classifier; void *class; int all; { struct acc_filter *afp; int i, s; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (all || afp->f_class == class) { LIST_REMOVE(afp, f_chain); free(afp, M_DEVBUF); /* start again from the head */ break; } } while (afp != NULL); } splx(s); if (all) classifier->acc_fbmask = 0; return (0); } void * acc_classify(clfier, m, af) void *clfier; struct mbuf *m; int af; { struct acc_classifier *classifier; struct flowinfo flow; struct acc_filter *afp; int i; classifier = (struct acc_classifier *)clfier; altq_extractflow(m, af, &flow, classifier->acc_fbmask); if (flow.fi_family == AF_INET) { struct flowinfo_in *fp = (struct flowinfo_in *)&flow; if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { /* only tos is used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_tosfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else if ((classifier->acc_fbmask & (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) == 0) { /* only proto and ports are used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_ppfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else { /* get the filter hash entry from its dest address */ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); do { /* * go through this loop twice. first for dst * hash, second for wildcards. */ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); /* * check again for filters with a dst addr * wildcard. * (daddr == 0 || dmask != 0xffffffff). */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } } #ifdef INET6 else if (flow.fi_family == AF_INET6) { struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; /* get the filter hash entry from its flow ID */ if (fp6->fi6_flowlabel != 0) i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); else /* flowlable can be zero */ i = ACC_WILDCARD_INDEX; /* go through this loop twice. first for flow hash, second for wildcards. */ do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter6(afp->f_fbmask, (struct flow_filter6 *)&afp->f_filter, fp6)) /* filter matched */ return (afp->f_class); /* * check again for filters with a wildcard. */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } #endif /* INET6 */ /* no filter matched */ return (NULL); } static int apply_filter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_DADDR) && filt->ff_flow.fi_dst.s_addr != (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) return (0); if ((fbmask & FIMB4_SADDR) && filt->ff_flow.fi_src.s_addr != (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) return (0); /* match */ return (1); } /* * filter matching function optimized for a common case that checks * only protocol and port numbers */ static int apply_ppfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); /* match */ return (1); } /* * filter matching function only for tos field. */ static int apply_tosfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); /* match */ return (1); } #ifdef INET6 static int apply_filter6(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter6 *filt; struct flowinfo_in6 *pkt; { int i; if (filt->ff_flow6.fi6_family != AF_INET6) return (0); if ((fbmask & FIMB6_FLABEL) && filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) return (0); if ((fbmask & FIMB6_PROTO) && filt->ff_flow6.fi6_proto != pkt->fi6_proto) return (0); if ((fbmask & FIMB6_SPORT) && filt->ff_flow6.fi6_sport != pkt->fi6_sport) return (0); if ((fbmask & FIMB6_DPORT) && filt->ff_flow6.fi6_dport != pkt->fi6_dport) return (0); if (fbmask & FIMB6_SADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_src.s6_addr32[i] != (pkt->fi6_src.s6_addr32[i] & filt->ff_mask6.mask6_src.s6_addr32[i])) return (0); } if (fbmask & FIMB6_DADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_dst.s6_addr32[i] != (pkt->fi6_dst.s6_addr32[i] & filt->ff_mask6.mask6_dst.s6_addr32[i])) return (0); } if ((fbmask & FIMB6_TCLASS) && filt->ff_flow6.fi6_tclass != (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) return (0); if ((fbmask & FIMB6_GPI) && filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) return (0); /* match */ return (1); } #endif /* INET6 */ /* * filter handle: * bit 20-28: index to the filter hash table * bit 0-19: unique id in the hash bucket. */ static u_long get_filt_handle(classifier, i) struct acc_classifier *classifier; int i; { static u_long handle_number = 1; u_long handle; struct acc_filter *afp; while (1) { handle = handle_number++ & 0x000fffff; if (LIST_EMPTY(&classifier->acc_filters[i])) break; LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if ((afp->f_handle & 0x000fffff) == handle) break; if (afp == NULL) break; /* this handle is already used, try again */ } return ((i << 20) | handle); } /* convert filter handle to filter pointer */ static struct acc_filter * filth_to_filtp(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int i; i = ACC_GET_HINDEX(handle); LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (afp->f_handle == handle) return (afp); return (NULL); } /* create flowinfo bitmask */ static u_int32_t filt2fibmask(filt) struct flow_filter *filt; { u_int32_t mask = 0; #ifdef INET6 struct flow_filter6 *filt6; #endif switch (filt->ff_flow.fi_family) { case AF_INET: if (filt->ff_flow.fi_proto != 0) mask |= FIMB4_PROTO; if (filt->ff_flow.fi_tos != 0) mask |= FIMB4_TOS; if (filt->ff_flow.fi_dst.s_addr != 0) mask |= FIMB4_DADDR; if (filt->ff_flow.fi_src.s_addr != 0) mask |= FIMB4_SADDR; if (filt->ff_flow.fi_sport != 0) mask |= FIMB4_SPORT; if (filt->ff_flow.fi_dport != 0) mask |= FIMB4_DPORT; if (filt->ff_flow.fi_gpi != 0) mask |= FIMB4_GPI; break; #ifdef INET6 case AF_INET6: filt6 = (struct flow_filter6 *)filt; if (filt6->ff_flow6.fi6_proto != 0) mask |= FIMB6_PROTO; if (filt6->ff_flow6.fi6_tclass != 0) mask |= FIMB6_TCLASS; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) mask |= FIMB6_DADDR; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) mask |= FIMB6_SADDR; if (filt6->ff_flow6.fi6_sport != 0) mask |= FIMB6_SPORT; if (filt6->ff_flow6.fi6_dport != 0) mask |= FIMB6_DPORT; if (filt6->ff_flow6.fi6_gpi != 0) mask |= FIMB6_GPI; if (filt6->ff_flow6.fi6_flowlabel != 0) mask |= FIMB6_FLABEL; break; #endif /* INET6 */ } return (mask); } /* * helper functions to handle IPv4 fragments. * currently only in-sequence fragments are handled. * - fragment info is cached in a LRU list. * - when a first fragment is found, cache its flow info. * - when a non-first fragment is found, lookup the cache. */ struct ip4_frag { TAILQ_ENTRY(ip4_frag) ip4f_chain; char ip4f_valid; u_short ip4f_id; struct flowinfo_in ip4f_info; }; static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ static void ip4f_cache(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; if (TAILQ_EMPTY(&ip4f_list)) { /* first time call, allocate fragment cache entries. */ if (ip4f_init() < 0) /* allocation failed! */ return; } fp = ip4f_alloc(); fp->ip4f_id = ip->ip_id; fp->ip4f_info.fi_proto = ip->ip_p; fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; /* save port numbers */ fp->ip4f_info.fi_sport = fin->fi_sport; fp->ip4f_info.fi_dport = fin->fi_dport; fp->ip4f_info.fi_gpi = fin->fi_gpi; } static int ip4f_lookup(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; fp = TAILQ_NEXT(fp, ip4f_chain)) if (ip->ip_id == fp->ip4f_id && ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && ip->ip_p == fp->ip4f_info.fi_proto) { /* found the matching entry */ fin->fi_sport = fp->ip4f_info.fi_sport; fin->fi_dport = fp->ip4f_info.fi_dport; fin->fi_gpi = fp->ip4f_info.fi_gpi; if ((ntohs(ip->ip_off) & IP_MF) == 0) /* this is the last fragment, release the entry. */ ip4f_free(fp); return (1); } /* no matching entry found */ return (0); } static int ip4f_init(void) { struct ip4_frag *fp; int i; TAILQ_INIT(&ip4f_list); for (i=0; iip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } return (0); } static struct ip4_frag * ip4f_alloc(void) { struct ip4_frag *fp; /* reclaim an entry at the tail, put it at the head */ fp = TAILQ_LAST(&ip4f_list, ip4f_list); TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 1; TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); return (fp); } static void ip4f_free(fp) struct ip4_frag *fp; { TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } #endif /* ALTQ3_CLFIER_COMPAT */ Index: projects/ifnet/sys/net/altq/altq_var.h =================================================================== --- projects/ifnet/sys/net/altq/altq_var.h (revision 281649) +++ projects/ifnet/sys/net/altq/altq_var.h (revision 281650) @@ -1,261 +1,231 @@ -/* $FreeBSD$ */ -/* $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ */ - -/* +/*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_VAR_H_ #define _ALTQ_ALTQ_VAR_H_ #ifdef _KERNEL #include #include #include #ifdef ALTQ3_CLFIER_COMPAT /* * filter structure for altq common classifier */ struct acc_filter { LIST_ENTRY(acc_filter) f_chain; void *f_class; /* pointer to the class */ u_long f_handle; /* filter id */ u_int32_t f_fbmask; /* filter bitmask */ struct flow_filter f_filter; /* filter value */ }; /* * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix * the handle assignment. */ #define ACC_FILTER_TABLESIZE (256+1) #define ACC_FILTER_MASK (ACC_FILTER_TABLESIZE - 2) #define ACC_WILDCARD_INDEX (ACC_FILTER_TABLESIZE - 1) #ifdef __GNUC__ #define ACC_GET_HASH_INDEX(addr) \ ({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;}) #else #define ACC_GET_HASH_INDEX(addr) \ (((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \ & ACC_FILTER_MASK) #endif #define ACC_GET_HINDEX(handle) ((handle) >> 20) #if (__FreeBSD_version > 500000) #define ACC_LOCK_INIT(ac) mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF) #define ACC_LOCK_DESTROY(ac) mtx_destroy(&(ac)->acc_mtx) #define ACC_LOCK(ac) mtx_lock(&(ac)->acc_mtx) #define ACC_UNLOCK(ac) mtx_unlock(&(ac)->acc_mtx) #else #define ACC_LOCK_INIT(ac) #define ACC_LOCK_DESTROY(ac) #define ACC_LOCK(ac) #define ACC_UNLOCK(ac) #endif struct acc_classifier { u_int32_t acc_fbmask; LIST_HEAD(filt, acc_filter) acc_filters[ACC_FILTER_TABLESIZE]; #if (__FreeBSD_version > 500000) struct mtx acc_mtx; #endif }; /* * flowinfo mask bits used by classifier */ /* for ipv4 */ #define FIMB4_PROTO 0x0001 #define FIMB4_TOS 0x0002 #define FIMB4_DADDR 0x0004 #define FIMB4_SADDR 0x0008 #define FIMB4_DPORT 0x0010 #define FIMB4_SPORT 0x0020 #define FIMB4_GPI 0x0040 #define FIMB4_ALL 0x007f /* for ipv6 */ #define FIMB6_PROTO 0x0100 #define FIMB6_TCLASS 0x0200 #define FIMB6_DADDR 0x0400 #define FIMB6_SADDR 0x0800 #define FIMB6_DPORT 0x1000 #define FIMB6_SPORT 0x2000 #define FIMB6_GPI 0x4000 #define FIMB6_FLABEL 0x8000 #define FIMB6_ALL 0xff00 #define FIMB_ALL (FIMB4_ALL|FIMB6_ALL) #define FIMB4_PORTS (FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI) #define FIMB6_PORTS (FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI) #endif /* ALTQ3_CLFIER_COMPAT */ /* * machine dependent clock * a 64bit high resolution time counter. */ extern int machclk_usepcc; extern u_int32_t machclk_freq; extern u_int32_t machclk_per_tick; extern void init_machclk(void); extern u_int64_t read_machclk(void); /* * debug support */ #ifdef ALTQ_DEBUG #ifdef __STDC__ #define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e)) #else /* PCC */ #define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e")) #endif #else #define ASSERT(e) ((void)0) #endif /* * misc stuff for compatibility */ /* ioctl cmd type */ typedef u_long ioctlcmd_t; /* * queue macros: * the interface of TAILQ_LAST macro changed after the introduction * of softupdate. redefine it here to make it work with pre-2.2.7. */ #undef TAILQ_LAST #define TAILQ_LAST(head, headname) \ (*(((struct headname *)((head)->tqh_last))->tqh_last)) #ifndef TAILQ_EMPTY #define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) #endif #ifndef TAILQ_FOREACH #define TAILQ_FOREACH(var, head, field) \ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field)) #endif /* macro for timeout/untimeout */ -#if (__FreeBSD_version > 300000) || defined(__NetBSD__) /* use callout */ #include #if (__FreeBSD_version > 500000) #define CALLOUT_INIT(c) callout_init((c), 0) #else #define CALLOUT_INIT(c) callout_init((c)) #endif #define CALLOUT_RESET(c,t,f,a) callout_reset((c),(t),(f),(a)) #define CALLOUT_STOP(c) callout_stop((c)) #if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000) #define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 } -#endif -#elif defined(__OpenBSD__) -#include -/* callout structure as a wrapper of struct timeout */ -struct callout { - struct timeout c_to; -}; -#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0) -#define CALLOUT_RESET(c,t,f,a) do { if (!timeout_initialized(&(c)->c_to)) \ - timeout_set(&(c)->c_to, (f), (a)); \ - timeout_add(&(c)->c_to, (t)); } while (/*CONSTCOND*/ 0) -#define CALLOUT_STOP(c) timeout_del(&(c)->c_to) -#define CALLOUT_INITIALIZER { { { NULL }, NULL, NULL, 0, 0 } } -#else -/* use old-style timeout/untimeout */ -/* dummy callout structure */ -struct callout { - void *c_arg; /* function argument */ - void (*c_func)(void *); /* functiuon to call */ -}; -#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0) -#define CALLOUT_RESET(c,t,f,a) do { (c)->c_arg = (a); \ - (c)->c_func = (f); \ - timeout((f),(a),(t)); } while (/*CONSTCOND*/ 0) -#define CALLOUT_STOP(c) untimeout((c)->c_func,(c)->c_arg) -#define CALLOUT_INITIALIZER { NULL, NULL } -#endif -#if !defined(__FreeBSD__) -typedef void (timeout_t)(void *); #endif #define m_pktlen(m) ((m)->m_pkthdr.len) struct ifnet; struct mbuf; struct pf_altq; #ifdef ALTQ3_CLFIER_COMPAT struct flowinfo; #endif void *altq_lookup(char *, int); #ifdef ALTQ3_CLFIER_COMPAT int altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t); int acc_add_filter(struct acc_classifier *, struct flow_filter *, void *, u_long *); int acc_delete_filter(struct acc_classifier *, u_long); int acc_discard_filters(struct acc_classifier *, void *, int); void *acc_classify(void *, struct mbuf *, int); #endif u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *); void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t); void altq_assert(const char *, int, const char *); int tbr_set(struct ifaltq *, struct tb_profile *); int tbr_get(struct ifaltq *, struct tb_profile *); int altq_pfattach(struct pf_altq *); int altq_pfdetach(struct pf_altq *); int altq_add(struct pf_altq *); int altq_remove(struct pf_altq *); int altq_add_queue(struct pf_altq *); int altq_remove_queue(struct pf_altq *); int altq_getqstats(struct pf_altq *, void *, int *); int cbq_pfattach(struct pf_altq *); int cbq_add_altq(struct pf_altq *); int cbq_remove_altq(struct pf_altq *); int cbq_add_queue(struct pf_altq *); int cbq_remove_queue(struct pf_altq *); int cbq_getqstats(struct pf_altq *, void *, int *); int priq_pfattach(struct pf_altq *); int priq_add_altq(struct pf_altq *); int priq_remove_altq(struct pf_altq *); int priq_add_queue(struct pf_altq *); int priq_remove_queue(struct pf_altq *); int priq_getqstats(struct pf_altq *, void *, int *); int hfsc_pfattach(struct pf_altq *); int hfsc_add_altq(struct pf_altq *); int hfsc_remove_altq(struct pf_altq *); int hfsc_add_queue(struct pf_altq *); int hfsc_remove_queue(struct pf_altq *); int hfsc_getqstats(struct pf_altq *, void *, int *); #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_VAR_H_ */ Index: projects/ifnet/sys/net/altq/if_altq.h =================================================================== --- projects/ifnet/sys/net/altq/if_altq.h (revision 281649) +++ projects/ifnet/sys/net/altq/if_altq.h (revision 281650) @@ -1,190 +1,182 @@ -/* $FreeBSD$ */ -/* $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ */ - -/* +/*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_IF_ALTQ_H_ #define _ALTQ_IF_ALTQ_H_ -#ifdef __FreeBSD__ #include /* XXX */ #include /* XXX */ #include /* XXX */ -#endif -#ifdef _KERNEL_OPT -#include -#endif - struct altq_pktattr; struct tb_regulator; struct top_cdnr; /* * Structure defining a queue for a network interface. */ struct ifaltq { /* fields compatible with struct ifqueue */ struct mbuf *ifq_head; struct mbuf *ifq_tail; int ifq_len; int ifq_maxlen; -#ifdef __FreeBSD__ struct mtx ifq_mtx; -#endif /* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */ struct mbuf *ifq_drv_head; struct mbuf *ifq_drv_tail; int ifq_drv_len; int ifq_drv_maxlen; /* alternate queueing related fields */ int altq_type; /* discipline type */ int altq_flags; /* flags (e.g. ready, in-use) */ void *altq_disc; /* for discipline-specific use */ struct ifnet *altq_ifp; /* back pointer to interface */ int (*altq_enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*altq_dequeue)(struct ifaltq *, int); int (*altq_request)(struct ifaltq *, int, void *); /* classifier fields */ void *altq_clfier; /* classifier-specific use */ void *(*altq_classify)(void *, struct mbuf *, int); /* token bucket regulator */ struct tb_regulator *altq_tbr; /* input traffic conditioner (doesn't belong to the output queue...) */ struct top_cdnr *altq_cdnr; }; #ifdef _KERNEL /* * packet attributes used by queueing disciplines. * pattr_class is a discipline-dependent scheduling class that is * set by a classifier. * pattr_hdr and pattr_af may be used by a discipline to access * the header within a mbuf. (e.g. ECN needs to update the CE bit) * note that pattr_hdr could be stale after m_pullup, though link * layer output routines usually don't use m_pullup. link-level * compression also invalidates these fields. thus, pattr_hdr needs * to be verified when a discipline touches the header. */ struct altq_pktattr { void *pattr_class; /* sched class set by classifier */ int pattr_af; /* address family */ caddr_t pattr_hdr; /* saved header position in mbuf */ }; /* * mbuf tag to carry a queue id (and hints for ECN). */ struct altq_tag { u_int32_t qid; /* queue id */ /* hints for ecn */ int af; /* address family */ void *hdr; /* saved header position in mbuf */ }; /* * a token-bucket regulator limits the rate that a network driver can * dequeue packets from the output queue. * modern cards are able to buffer a large amount of packets and dequeue * too many packets at a time. this bursty dequeue behavior makes it * impossible to schedule packets by queueing disciplines. * a token-bucket is used to control the burst size in a device * independent manner. */ struct tb_regulator { int64_t tbr_rate; /* (scaled) token bucket rate */ int64_t tbr_depth; /* (scaled) token bucket depth */ int64_t tbr_token; /* (scaled) current token */ int64_t tbr_filluptime; /* (scaled) time to fill up bucket */ u_int64_t tbr_last; /* last time token was updated */ int tbr_lastop; /* last dequeue operation type needed for poll-and-dequeue */ }; /* if_altqflags */ #define ALTQF_READY 0x01 /* driver supports alternate queueing */ #define ALTQF_ENABLED 0x02 /* altq is in use */ #define ALTQF_CLASSIFY 0x04 /* classify packets */ #define ALTQF_CNDTNING 0x08 /* altq traffic conditioning is enabled */ #define ALTQF_DRIVER1 0x40 /* driver specific */ /* if_altqflags set internally only: */ #define ALTQF_CANTCHANGE (ALTQF_READY) /* altq_dequeue 2nd arg */ #define ALTDQ_REMOVE 1 /* dequeue mbuf from the queue */ #define ALTDQ_POLL 2 /* don't dequeue mbuf from the queue */ /* altq request types (currently only purge is defined) */ #define ALTRQ_PURGE 1 /* purge all packets */ #define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY) #define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED) #define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY) #define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING) #define ALTQ_SET_CNDTNING(ifq) ((ifq)->altq_flags |= ALTQF_CNDTNING) #define ALTQ_CLEAR_CNDTNING(ifq) ((ifq)->altq_flags &= ~ALTQF_CNDTNING) #define ALTQ_IS_ATTACHED(ifq) ((ifq)->altq_disc != NULL) #define ALTQ_ENQUEUE(ifq, m, pa, err) \ (err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa)) #define ALTQ_DEQUEUE(ifq, m) \ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE) #define ALTQ_POLL(ifq, m) \ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL) #define ALTQ_PURGE(ifq) \ (void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0) #define ALTQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0) #define TBR_IS_ENABLED(ifq) ((ifq)->altq_tbr != NULL) extern int altq_attach(struct ifaltq *, int, void *, int (*)(struct ifaltq *, struct mbuf *, struct altq_pktattr *), struct mbuf *(*)(struct ifaltq *, int), int (*)(struct ifaltq *, int, void *), void *, void *(*)(void *, struct mbuf *, int)); extern int altq_detach(struct ifaltq *); extern int altq_enable(struct ifaltq *); extern int altq_disable(struct ifaltq *); extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int); extern int (*altq_input)(struct mbuf *, int); #if 0 /* ALTQ3_CLFIER_COMPAT */ void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *); #endif #endif /* _KERNEL */ #endif /* _ALTQ_IF_ALTQ_H_ */ Index: projects/ifnet/sys/netinet/in.c =================================================================== --- projects/ifnet/sys/netinet/in.c (revision 281649) +++ projects/ifnet/sys/netinet/in.c (revision 281650) @@ -1,1276 +1,1300 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (C) 2001 WIDE Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.4 (Berkeley) 1/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *); static void in_socktrim(struct sockaddr_in *); static void in_purgemaddrs(struct ifnet *); static VNET_DEFINE(int, nosameprefix); #define V_nosameprefix VNET(nosameprefix) SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nosameprefix), 0, "Refuse to create same prefixes on different interfaces"); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) static struct sx in_control_sx; SX_SYSINIT(in_control_sx, &in_control_sx, "in_control"); /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). */ int in_localaddr(struct in_addr in) { register u_long i = ntohl(in.s_addr); register struct in_ifaddr *ia; IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if ((i & ia->ia_subnetmask) == ia->ia_subnet) { IN_IFADDR_RUNLOCK(); return (1); } } IN_IFADDR_RUNLOCK(); return (0); } /* * Return 1 if an internet address is for the local host and configured * on one of its interfaces. */ int in_localip(struct in_addr in) { struct in_ifaddr *ia; IN_IFADDR_RLOCK(); LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) { if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) { IN_IFADDR_RUNLOCK(); return (1); } } IN_IFADDR_RUNLOCK(); return (0); } /* + * Return 1 if an internet address is configured on an interface. + */ +int +in_ifhasaddr(struct ifnet *ifp, struct in_addr in) +{ + struct ifaddr *ifa; + struct in_ifaddr *ia; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + ia = (struct in_ifaddr *)ifa; + if (ia->ia_addr.sin_addr.s_addr == in.s_addr) { + IF_ADDR_RUNLOCK(ifp); + return (1); + } + } + IF_ADDR_RUNLOCK(ifp); + + return (0); +} + +/* * Return a reference to the interface address which is different to * the supplied one but with same IP address value. */ static struct in_ifaddr * in_localip_more(struct in_ifaddr *ia) { in_addr_t in = IA_SIN(ia)->sin_addr.s_addr; struct in_ifaddr *it; IN_IFADDR_RLOCK(); LIST_FOREACH(it, INADDR_HASH(in), ia_hash) { if (it != ia && IA_SIN(it)->sin_addr.s_addr == in) { ifa_ref(&it->ia_ifa); IN_IFADDR_RUNLOCK(); return (it); } } IN_IFADDR_RUNLOCK(); return (NULL); } /* * Determine whether an IP address is in a reserved set of addresses * that may not be forwarded, or whether datagrams to that destination * may be forwarded. */ int in_canforward(struct in_addr in) { register u_long i = ntohl(in.s_addr); register u_long net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i)) return (0); if (IN_CLASSA(i)) { net = i & IN_CLASSA_NET; if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT)) return (0); } return (1); } /* * Trim a mask in a sockaddr */ static void in_socktrim(struct sockaddr_in *ap) { register char *cplim = (char *) &ap->sin_addr; register char *cp = (char *) (&ap->sin_addr + 1); ap->sin_len = 0; while (--cp >= cplim) if (*cp) { (ap)->sin_len = cp - (char *) (ap) + 1; break; } } /* * Generic internet control operations (ioctl's). */ int in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr; struct ifaddr *ifa; struct in_ifaddr *ia; int error; if (ifp == NULL) return (EADDRNOTAVAIL); /* * Filter out 4 ioctls we implement directly. Forward the rest * to specific functions and ifp->if_ioctl(). */ switch (cmd) { case SIOCGIFADDR: case SIOCGIFBRDADDR: case SIOCGIFDSTADDR: case SIOCGIFNETMASK: break; case SIOCDIFADDR: sx_xlock(&in_control_sx); error = in_difaddr_ioctl(data, ifp, td); sx_xunlock(&in_control_sx); return (error); case OSIOCAIFADDR: /* 9.x compat */ case SIOCAIFADDR: sx_xlock(&in_control_sx); error = in_aifaddr_ioctl(cmd, data, ifp, td); sx_xunlock(&in_control_sx); return (error); case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFDSTADDR: case SIOCSIFNETMASK: /* We no longer support that old commands. */ return (EINVAL); default: return (if_ioctl(ifp, cmd, data, td)); } if (addr->sin_addr.s_addr != INADDR_ANY && prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0) return (EADDRNOTAVAIL); /* * Find address for this interface, if it exists. If an * address was specified, find that one instead of the * first one on the interface, if possible. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = (struct in_ifaddr *)ifa; if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr) break; } if (ifa == NULL) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET) { ia = (struct in_ifaddr *)ifa; if (prison_check_ip4(td->td_ucred, &ia->ia_addr.sin_addr) == 0) break; } if (ifa == NULL) { IF_ADDR_RUNLOCK(ifp); return (EADDRNOTAVAIL); } error = 0; switch (cmd) { case SIOCGIFADDR: *addr = ia->ia_addr; break; case SIOCGIFBRDADDR: if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EINVAL; break; } *addr = ia->ia_broadaddr; break; case SIOCGIFDSTADDR: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; break; } *addr = ia->ia_dstaddr; break; case SIOCGIFNETMASK: *addr = ia->ia_sockmask; break; } IF_ADDR_RUNLOCK(ifp); return (error); } static int in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { const struct in_aliasreq *ifra = (struct in_aliasreq *)data; const struct sockaddr_in *addr = &ifra->ifra_addr; const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr; const struct sockaddr_in *mask = &ifra->ifra_mask; const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr; const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0; struct ifaddr *ifa; struct in_ifaddr *ia; bool iaIsFirst; int error = 0; error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); /* * ifra_addr must be present and be of INET family. * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional. */ if (addr->sin_len != sizeof(struct sockaddr_in) || addr->sin_family != AF_INET) return (EINVAL); if (broadaddr->sin_len != 0 && (broadaddr->sin_len != sizeof(struct sockaddr_in) || broadaddr->sin_family != AF_INET)) return (EINVAL); if (mask->sin_len != 0 && (mask->sin_len != sizeof(struct sockaddr_in) || mask->sin_family != AF_INET)) return (EINVAL); if ((ifp->if_flags & IFF_POINTOPOINT) && (dstaddr->sin_len != sizeof(struct sockaddr_in) || dstaddr->sin_addr.s_addr == INADDR_ANY)) return (EDESTADDRREQ); if (vhid > 0 && carp_attach_p == NULL) return (EPROTONOSUPPORT); /* * Historically assigning address to an interface is an * implicit IFF_UP. Even if address assignment fails, * the interface stays up. */ if ((ifp->if_flags & IFF_UP) == 0) { struct ifreq ifr; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; ifr.ifr_flags |= IFF_UP; error = if_drvioctl(ifp, SIOCSIFFLAGS, &ifr, td); if (error) return (error); } /* * See whether address already exist. */ iaIsFirst = true; ia = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in_ifaddr *it; if (ifa->ifa_addr->sa_family != AF_INET) continue; it = (struct in_ifaddr *)ifa; iaIsFirst = false; if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0) ia = it; } IF_ADDR_RUNLOCK(ifp); if (ia != NULL) (void )in_difaddr_ioctl(data, ifp, td); ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK); ia = (struct in_ifaddr *)ifa; ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; ia->ia_ifp = ifp; ia->ia_addr = *addr; if (mask->sin_len != 0) { ia->ia_sockmask = *mask; ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); } else { in_addr_t i = ntohl(addr->sin_addr.s_addr); /* * Be compatible with network classes, if netmask isn't * supplied, guess it based on classes. */ if (IN_CLASSA(i)) ia->ia_subnetmask = IN_CLASSA_NET; else if (IN_CLASSB(i)) ia->ia_subnetmask = IN_CLASSB_NET; else ia->ia_subnetmask = IN_CLASSC_NET; ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask); } ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask; in_socktrim(&ia->ia_sockmask); if (ifp->if_flags & IFF_BROADCAST) { if (broadaddr->sin_len != 0) { ia->ia_broadaddr = *broadaddr; } else if (ia->ia_subnetmask == IN_RFC3021_MASK) { ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST; ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); ia->ia_broadaddr.sin_family = AF_INET; } else { ia->ia_broadaddr.sin_addr.s_addr = htonl(ia->ia_subnet | ~ia->ia_subnetmask); ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); ia->ia_broadaddr.sin_family = AF_INET; } } if (ifp->if_flags & IFF_POINTOPOINT) ia->ia_dstaddr = *dstaddr; /* XXXGL: rtinit() needs this strange assignment. */ if (ifp->if_flags & IFF_LOOPBACK) ia->ia_dstaddr = ia->ia_addr; if (vhid != 0) { error = (*carp_attach_p)(&ia->ia_ifa, vhid); if (error) return (error); } /* if_addrhead is already referenced by ifa_alloc() */ IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_ref(ifa); /* in_ifaddrhead */ IN_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link); LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * Add route for the network. */ if (vhid == 0) { int flags = RTF_UP; if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) flags |= RTF_HOST; error = in_addprefix(ia, flags); if (error) goto fail1; } /* * Add a loopback route to self. */ if (vhid == 0 && (ifp->if_flags & IFF_LOOPBACK) == 0 && ia->ia_addr.sin_addr.s_addr != INADDR_ANY && !((ifp->if_flags & IFF_POINTOPOINT) && ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)) { struct in_ifaddr *eia; eia = in_localip_more(ia); if (eia == NULL) { error = ifa_add_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error) goto fail2; } else ifa_free(&eia->ia_ifa); } if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) { struct in_addr allhosts_addr; struct in_ifinfo *ii; ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); error = in_joingroup(ifp, &allhosts_addr, NULL, &ii->ii_allhosts); } if (vhid == 0 && (ifp->if_flags & IFF_BROADCAST)) arp_ifinit(ifp, ifa); EVENTHANDLER_INVOKE(ifaddr_event, ifp); return (error); fail2: if (vhid == 0) (void )in_scrubprefix(ia, LLE_STATIC); fail1: if (ia->ia_ifa.ifa_carp) (*carp_detach_p)(&ia->ia_ifa); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ return (error); } static int in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) { const struct ifreq *ifr = (struct ifreq *)data; const struct sockaddr_in *addr = (const struct sockaddr_in *) &ifr->ifr_addr; struct ifaddr *ifa; struct in_ifaddr *ia; bool deleteAny, iaIsLast; int error; if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } if (addr->sin_len != sizeof(struct sockaddr_in) || addr->sin_family != AF_INET) deleteAny = true; else deleteAny = false; iaIsLast = true; ia = NULL; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in_ifaddr *it; if (ifa->ifa_addr->sa_family != AF_INET) continue; it = (struct in_ifaddr *)ifa; if (deleteAny && ia == NULL && (td == NULL || prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0)) ia = it; if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && (td == NULL || prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)) ia = it; if (it != ia) iaIsLast = false; } if (ia == NULL) { IF_ADDR_WUNLOCK(ifp); return (EADDRNOTAVAIL); } TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, LLE_STATIC); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right * thing to do, but at least if we are running * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 1); if (ia->ia_ifa.ifa_carp) (*carp_detach_p)(&ia->ia_ifa); /* * If this is the last IPv4 address configured on this * interface, leave the all-hosts group. * No state-change report need be transmitted. */ if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) { struct in_ifinfo *ii; ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); IN_MULTI_LOCK(); if (ii->ii_allhosts) { (void)in_leavegroup_locked(ii->ii_allhosts, NULL); ii->ii_allhosts = NULL; } IN_MULTI_UNLOCK(); } EVENTHANDLER_INVOKE(ifaddr_event, ifp); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ return (0); } #define rtinitflags(x) \ ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \ ? RTF_HOST : 0) /* * Check if we have a route for the given prefix already or add one accordingly. */ int in_addprefix(struct in_ifaddr *target, int flags) { struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error; if ((flags & RTF_HOST) != 0) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } IN_IFADDR_RLOCK(); /* Look for an existing address with the same prefix, mask, and fib */ TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib) continue; /* * If we got a matching prefix route inserted by other * interface address, we are done here. */ if (ia->ia_flags & IFA_ROUTE) { #ifdef RADIX_MPATH if (ia->ia_addr.sin_addr.s_addr == target->ia_addr.sin_addr.s_addr) { IN_IFADDR_RUNLOCK(); return (EEXIST); } else break; #endif if (V_nosameprefix) { IN_IFADDR_RUNLOCK(); return (EEXIST); } else { int fibnum; fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum); IN_IFADDR_RUNLOCK(); return (0); } } } IN_IFADDR_RUNLOCK(); /* * No-one seem to have this prefix route, so we try to insert it. */ error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags); if (!error) target->ia_flags |= IFA_ROUTE; return (error); } /* * If there is no other address in the system that can serve a route to the * same prefix, remove the route. Hand over the route to the new address * otherwise. */ int in_scrubprefix(struct in_ifaddr *target, u_int flags) { struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error = 0; struct sockaddr_in prefix0, mask0; /* * Remove the loopback route to the interface address. */ if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) && !(target->ia_ifp->if_flags & IFF_LOOPBACK) && (flags & LLE_STATIC)) { struct in_ifaddr *eia; eia = in_localip_more(target); if (eia != NULL) { int fibnum = target->ia_ifp->if_fib; error = ifa_switch_loopback_route((struct ifaddr *)eia, (struct sockaddr *)&target->ia_addr, fibnum); ifa_free(&eia->ia_ifa); } else { error = ifa_del_loopback_route((struct ifaddr *)target, (struct sockaddr *)&target->ia_addr); } if (!(target->ia_ifp->if_flags & IFF_NOARP)) /* remove arp cache */ arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr); } if (rtinitflags(target)) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } if ((target->ia_flags & IFA_ROUTE) == 0) { int fibnum; fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum); return (0); } IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if ((ia->ia_ifp->if_flags & IFF_UP) == 0) continue; /* * If we got a matching prefix address, move IFA_ROUTE and * the route itself to it. Make sure that routing daemons * get a heads-up. */ if ((ia->ia_flags & IFA_ROUTE) == 0) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", error); error = rtinit(&ia->ia_ifa, (int)RTM_ADD, rtinitflags(ia) | RTF_UP); if (error == 0) ia->ia_flags |= IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", error); ifa_free(&ia->ia_ifa); return (error); } } IN_IFADDR_RUNLOCK(); /* * remove all L2 entries on the given prefix */ bzero(&prefix0, sizeof(prefix0)); prefix0.sin_len = sizeof(prefix0); prefix0.sin_family = AF_INET; prefix0.sin_addr.s_addr = target->ia_subnet; bzero(&mask0, sizeof(mask0)); mask0.sin_len = sizeof(mask0); mask0.sin_family = AF_INET; mask0.sin_addr.s_addr = target->ia_subnetmask; lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0, (struct sockaddr *)&mask0, flags); /* * As no-one seem to have this prefix, we can remove the route. */ error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); return (error); } #undef rtinitflags /* * Return 1 if the address might be a local broadcast address. */ int in_broadcast(struct in_addr in, struct ifnet *ifp) { register struct ifaddr *ifa; u_long t; if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY) return (1); if ((ifp->if_flags & IFF_BROADCAST) == 0) return (0); t = ntohl(in.s_addr); /* * Look through the list of addresses for a match * with a broadcast address. */ #define ia ((struct in_ifaddr *)ifa) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET && (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr || /* * Check for old-style (host 0) broadcast, but * taking into account that RFC 3021 obsoletes it. */ (ia->ia_subnetmask != IN_RFC3021_MASK && t == ia->ia_subnet)) && /* * Check for an all one subnetmask. These * only exist when an interface gets a secondary * address. */ ia->ia_subnetmask != (u_long)0xffffffff) return (1); return (0); #undef ia } /* * On interface removal, clean up IPv4 data structures hung off of the ifnet. */ void in_ifdetach(struct ifnet *ifp) { in_pcbpurgeif0(&V_ripcbinfo, ifp); in_pcbpurgeif0(&V_udbinfo, ifp); in_pcbpurgeif0(&V_ulitecbinfo, ifp); in_purgemaddrs(ifp); } /* * Delete all IPv4 multicast address records, and associated link-layer * multicast address records, associated with ifp. * XXX It looks like domifdetach runs AFTER the link layer cleanup. * XXX This should not race with ifma_protospec being set during * a new allocation, if it does, we have bigger problems. */ static void in_purgemaddrs(struct ifnet *ifp) { LIST_HEAD(,in_multi) purgeinms; struct in_multi *inm, *tinm; struct ifmultiaddr *ifma; LIST_INIT(&purgeinms); IN_MULTI_LOCK(); /* * Extract list of in_multi associated with the detaching ifp * which the PF_INET layer is about to release. * We need to do this as IF_ADDR_LOCK() may be re-acquired * by code further down. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; #if 0 KASSERT(ifma->ifma_protospec != NULL, ("%s: ifma_protospec is NULL", __func__)); #endif inm = (struct in_multi *)ifma->ifma_protospec; LIST_INSERT_HEAD(&purgeinms, inm, inm_link); } IF_ADDR_RUNLOCK(ifp); LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) { LIST_REMOVE(inm, inm_link); inm_release_locked(inm); } igmp_ifdetach(ifp); IN_MULTI_UNLOCK(); } struct in_llentry { struct llentry base; struct sockaddr_in l3_addr4; }; /* * Deletes an address from the address table. * This function is called by the timer functions * such as arptimer() and nd6_llinfo_timer(), and * the caller does the locking. */ static void in_lltable_free(struct lltable *llt, struct llentry *lle) { LLE_WUNLOCK(lle); LLE_LOCK_DESTROY(lle); free(lle, M_LLTABLE); } static struct llentry * in_lltable_new(const struct sockaddr *l3addr, u_int flags) { struct in_llentry *lle; lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); if (lle == NULL) /* NB: caller generates msg */ return NULL; /* * For IPv4 this will trigger "arpresolve" to generate * an ARP request. */ lle->base.la_expire = time_uptime; /* mark expired */ lle->l3_addr4 = *(const struct sockaddr_in *)l3addr; lle->base.lle_refcnt = 1; lle->base.lle_free = in_lltable_free; LLE_LOCK_INIT(&lle->base); callout_init(&lle->base.la_timer, 1); return (&lle->base); } #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 ) static void in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, const struct sockaddr *mask, u_int flags) { const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix; const struct sockaddr_in *msk = (const struct sockaddr_in *)mask; struct llentry *lle, *next; int i; size_t pkts_dropped; IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { /* * (flags & LLE_STATIC) means deleting all entries * including static ARP entries. */ if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)), pfx, msk) && ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { LLE_WLOCK(lle); if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } } } IF_AFDATA_WUNLOCK(llt->llt_ifp); } static int in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { struct rtentry *rt; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); /* XXX rtalloc1_fib should take a const param */ rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0, ifp->if_fib); if (rt == NULL) return (EINVAL); /* * If the gateway for an existing host route matches the target L3 * address, which is a special route inserted by some implementation * such as MANET, and the interface is of the correct type, then * allow for ARP to proceed. */ if (rt->rt_flags & RTF_GATEWAY) { if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || if_type(rt->rt_ifp) != IFT_ETHER || (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, sizeof(in_addr_t)) != 0) { RTFREE_LOCKED(rt); return (EINVAL); } } /* * Make sure that at least the destination address is covered * by the route. This is for handling the case where 2 or more * interfaces have the same prefix. An incoming packet arrives * on one interface and the corresponding outgoing packet leaves * another interface. */ if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { const char *sa, *mask, *addr, *lim; int len; mask = (const char *)rt_mask(rt); /* * Just being extra cautious to avoid some custom * code getting into trouble. */ if (mask == NULL) { RTFREE_LOCKED(rt); return (EINVAL); } sa = (const char *)rt_key(rt); addr = (const char *)l3addr; len = ((const struct sockaddr_in *)l3addr)->sin_len; lim = addr + len; for ( ; addr < lim; sa++, mask++, addr++) { if ((*sa ^ *addr) & *mask) { #ifdef DIAGNOSTIC log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); #endif RTFREE_LOCKED(rt); return (EINVAL); } } } RTFREE_LOCKED(rt); return (0); } /* * Return NULL if not found or marked for deletion. * If found return lle read locked. */ static struct llentry * in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; struct llentries *lleh; u_int hashkey; IF_AFDATA_LOCK_ASSERT(ifp); KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); hashkey = sin->sin_addr.s_addr; lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; LIST_FOREACH(lle, lleh, lle_next) { struct sockaddr_in *sa2 = satosin(L3_ADDR(lle)); if (lle->la_flags & LLE_DELETED) continue; if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr) break; } if (lle == NULL) { #ifdef DIAGNOSTIC if (flags & LLE_DELETE) log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle); #endif if (!(flags & LLE_CREATE)) return (NULL); IF_AFDATA_WLOCK_ASSERT(ifp); /* * A route that covers the given address must have * been installed 1st because we are doing a resolution, * verify this. */ if (!(flags & LLE_IFADDR) && in_lltable_rtcheck(ifp, flags, l3addr) != 0) goto done; lle = in_lltable_new(l3addr, flags); if (lle == NULL) { log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); goto done; } lle->la_flags = flags & ~LLE_CREATE; if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { bcopy(if_lladdr(ifp), &lle->ll_addr, if_addrlen(ifp)); lle->la_flags |= (LLE_VALID | LLE_STATIC); } lle->lle_tbl = llt; lle->lle_head = lleh; lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } else if (flags & LLE_DELETE) { if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { LLE_WLOCK(lle); lle->la_flags |= LLE_DELETED; EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif if ((lle->la_flags & (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) llentry_free(lle); else LLE_WUNLOCK(lle); } lle = (void *)-1; } if (LLE_IS_VALID(lle)) { if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else LLE_RLOCK(lle); } done: return (lle); } static int in_lltable_dump(struct lltable *llt, struct sysctl_req *wr) { #define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle)) struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; /* XXX stack use */ struct { struct rt_msghdr rtm; struct sockaddr_in sin; struct sockaddr_dl sdl; } arpc; int error, i; LLTABLE_LOCK_ASSERT(); error = 0; for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { struct sockaddr_dl *sdl; /* skip deleted entries */ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) continue; /* Skip if jailed and not a valid IP of the prison. */ if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) continue; /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_in; (IPv4) * struct sockaddr_dl; */ bzero(&arpc, sizeof(arpc)); arpc.rtm.rtm_msglen = sizeof(arpc); arpc.rtm.rtm_version = RTM_VERSION; arpc.rtm.rtm_type = RTM_GET; arpc.rtm.rtm_flags = RTF_UP; arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; arpc.sin.sin_family = AF_INET; arpc.sin.sin_len = sizeof(arpc.sin); arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr; /* publish */ if (lle->la_flags & LLE_PUB) arpc.rtm.rtm_flags |= RTF_ANNOUNCE; sdl = &arpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); sdl->sdl_index = ifp->if_index; sdl->sdl_type = if_type(ifp); if ((lle->la_flags & LLE_VALID) == LLE_VALID) { sdl->sdl_alen = if_addrlen(ifp); bcopy(&lle->ll_addr, LLADDR(sdl), if_addrlen(ifp)); } else { sdl->sdl_alen = 0; bzero(LLADDR(sdl), if_addrlen(ifp)); } arpc.rtm.rtm_rmx.rmx_expire = lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) arpc.rtm.rtm_flags |= RTF_STATIC; arpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); if (error) break; } } return error; #undef SIN } void * in_domifattach(struct ifnet *ifp) { struct in_ifinfo *ii; struct lltable *llt; ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO); llt = lltable_init(ifp, AF_INET); if (llt != NULL) { llt->llt_prefix_free = in_lltable_prefix_free; llt->llt_lookup = in_lltable_lookup; llt->llt_dump = in_lltable_dump; } ii->ii_llt = llt; ii->ii_igmp = igmp_domifattach(ifp); return ii; } void in_domifdetach(struct ifnet *ifp, void *aux) { struct in_ifinfo *ii = (struct in_ifinfo *)aux; igmp_domifdetach(ifp); lltable_free(ii->ii_llt); free(ii, M_IFADDR); } Index: projects/ifnet/sys/netinet/in.h =================================================================== --- projects/ifnet/sys/netinet/in.h (revision 281649) +++ projects/ifnet/sys/netinet/in.h (revision 281650) @@ -1,668 +1,669 @@ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 * $FreeBSD$ */ #ifndef _NETINET_IN_H_ #define _NETINET_IN_H_ #include #include #include /* Protocols common to RFC 1700, POSIX, and X/Open. */ #define IPPROTO_IP 0 /* dummy for IP */ #define IPPROTO_ICMP 1 /* control message protocol */ #define IPPROTO_TCP 6 /* tcp */ #define IPPROTO_UDP 17 /* user datagram protocol */ #define INADDR_ANY ((in_addr_t)0x00000000) #define INADDR_BROADCAST ((in_addr_t)0xffffffff) /* must be masked */ #ifndef _UINT8_T_DECLARED typedef __uint8_t uint8_t; #define _UINT8_T_DECLARED #endif #ifndef _UINT16_T_DECLARED typedef __uint16_t uint16_t; #define _UINT16_T_DECLARED #endif #ifndef _UINT32_T_DECLARED typedef __uint32_t uint32_t; #define _UINT32_T_DECLARED #endif #ifndef _IN_ADDR_T_DECLARED typedef uint32_t in_addr_t; #define _IN_ADDR_T_DECLARED #endif #ifndef _IN_PORT_T_DECLARED typedef uint16_t in_port_t; #define _IN_PORT_T_DECLARED #endif #ifndef _SA_FAMILY_T_DECLARED typedef __sa_family_t sa_family_t; #define _SA_FAMILY_T_DECLARED #endif /* Internet address (a structure for historical reasons). */ #ifndef _STRUCT_IN_ADDR_DECLARED struct in_addr { in_addr_t s_addr; }; #define _STRUCT_IN_ADDR_DECLARED #endif #ifndef _SOCKLEN_T_DECLARED typedef __socklen_t socklen_t; #define _SOCKLEN_T_DECLARED #endif #include /* Socket address, internet style. */ struct sockaddr_in { uint8_t sin_len; sa_family_t sin_family; in_port_t sin_port; struct in_addr sin_addr; char sin_zero[8]; }; #if !defined(_KERNEL) && __POSIX_VISIBLE >= 200112 #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED __BEGIN_DECLS uint32_t htonl(uint32_t); uint16_t htons(uint16_t); uint32_t ntohl(uint32_t); uint16_t ntohs(uint16_t); __END_DECLS #endif #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif #endif /* !_KERNEL && __POSIX_VISIBLE >= 200112 */ #if __POSIX_VISIBLE >= 200112 #define IPPROTO_IPV6 41 /* IP6 header */ #define IPPROTO_RAW 255 /* raw IP packet */ #define INET_ADDRSTRLEN 16 #endif #if __BSD_VISIBLE /* * Constants and structures defined by the internet system, * Per RFC 790, September 1981, and numerous additions. */ /* * Protocols (RFC 1700) */ #define IPPROTO_HOPOPTS 0 /* IP6 hop-by-hop options */ #define IPPROTO_IGMP 2 /* group mgmt protocol */ #define IPPROTO_GGP 3 /* gateway^2 (deprecated) */ #define IPPROTO_IPV4 4 /* IPv4 encapsulation */ #define IPPROTO_IPIP IPPROTO_IPV4 /* for compatibility */ #define IPPROTO_ST 7 /* Stream protocol II */ #define IPPROTO_EGP 8 /* exterior gateway protocol */ #define IPPROTO_PIGP 9 /* private interior gateway */ #define IPPROTO_RCCMON 10 /* BBN RCC Monitoring */ #define IPPROTO_NVPII 11 /* network voice protocol*/ #define IPPROTO_PUP 12 /* pup */ #define IPPROTO_ARGUS 13 /* Argus */ #define IPPROTO_EMCON 14 /* EMCON */ #define IPPROTO_XNET 15 /* Cross Net Debugger */ #define IPPROTO_CHAOS 16 /* Chaos*/ #define IPPROTO_MUX 18 /* Multiplexing */ #define IPPROTO_MEAS 19 /* DCN Measurement Subsystems */ #define IPPROTO_HMP 20 /* Host Monitoring */ #define IPPROTO_PRM 21 /* Packet Radio Measurement */ #define IPPROTO_IDP 22 /* xns idp */ #define IPPROTO_TRUNK1 23 /* Trunk-1 */ #define IPPROTO_TRUNK2 24 /* Trunk-2 */ #define IPPROTO_LEAF1 25 /* Leaf-1 */ #define IPPROTO_LEAF2 26 /* Leaf-2 */ #define IPPROTO_RDP 27 /* Reliable Data */ #define IPPROTO_IRTP 28 /* Reliable Transaction */ #define IPPROTO_TP 29 /* tp-4 w/ class negotiation */ #define IPPROTO_BLT 30 /* Bulk Data Transfer */ #define IPPROTO_NSP 31 /* Network Services */ #define IPPROTO_INP 32 /* Merit Internodal */ #define IPPROTO_SEP 33 /* Sequential Exchange */ #define IPPROTO_3PC 34 /* Third Party Connect */ #define IPPROTO_IDPR 35 /* InterDomain Policy Routing */ #define IPPROTO_XTP 36 /* XTP */ #define IPPROTO_DDP 37 /* Datagram Delivery */ #define IPPROTO_CMTP 38 /* Control Message Transport */ #define IPPROTO_TPXX 39 /* TP++ Transport */ #define IPPROTO_IL 40 /* IL transport protocol */ #define IPPROTO_SDRP 42 /* Source Demand Routing */ #define IPPROTO_ROUTING 43 /* IP6 routing header */ #define IPPROTO_FRAGMENT 44 /* IP6 fragmentation header */ #define IPPROTO_IDRP 45 /* InterDomain Routing*/ #define IPPROTO_RSVP 46 /* resource reservation */ #define IPPROTO_GRE 47 /* General Routing Encap. */ #define IPPROTO_MHRP 48 /* Mobile Host Routing */ #define IPPROTO_BHA 49 /* BHA */ #define IPPROTO_ESP 50 /* IP6 Encap Sec. Payload */ #define IPPROTO_AH 51 /* IP6 Auth Header */ #define IPPROTO_INLSP 52 /* Integ. Net Layer Security */ #define IPPROTO_SWIPE 53 /* IP with encryption */ #define IPPROTO_NHRP 54 /* Next Hop Resolution */ #define IPPROTO_MOBILE 55 /* IP Mobility */ #define IPPROTO_TLSP 56 /* Transport Layer Security */ #define IPPROTO_SKIP 57 /* SKIP */ #define IPPROTO_ICMPV6 58 /* ICMP6 */ #define IPPROTO_NONE 59 /* IP6 no next header */ #define IPPROTO_DSTOPTS 60 /* IP6 destination option */ #define IPPROTO_AHIP 61 /* any host internal protocol */ #define IPPROTO_CFTP 62 /* CFTP */ #define IPPROTO_HELLO 63 /* "hello" routing protocol */ #define IPPROTO_SATEXPAK 64 /* SATNET/Backroom EXPAK */ #define IPPROTO_KRYPTOLAN 65 /* Kryptolan */ #define IPPROTO_RVD 66 /* Remote Virtual Disk */ #define IPPROTO_IPPC 67 /* Pluribus Packet Core */ #define IPPROTO_ADFS 68 /* Any distributed FS */ #define IPPROTO_SATMON 69 /* Satnet Monitoring */ #define IPPROTO_VISA 70 /* VISA Protocol */ #define IPPROTO_IPCV 71 /* Packet Core Utility */ #define IPPROTO_CPNX 72 /* Comp. Prot. Net. Executive */ #define IPPROTO_CPHB 73 /* Comp. Prot. HeartBeat */ #define IPPROTO_WSN 74 /* Wang Span Network */ #define IPPROTO_PVP 75 /* Packet Video Protocol */ #define IPPROTO_BRSATMON 76 /* BackRoom SATNET Monitoring */ #define IPPROTO_ND 77 /* Sun net disk proto (temp.) */ #define IPPROTO_WBMON 78 /* WIDEBAND Monitoring */ #define IPPROTO_WBEXPAK 79 /* WIDEBAND EXPAK */ #define IPPROTO_EON 80 /* ISO cnlp */ #define IPPROTO_VMTP 81 /* VMTP */ #define IPPROTO_SVMTP 82 /* Secure VMTP */ #define IPPROTO_VINES 83 /* Banyon VINES */ #define IPPROTO_TTP 84 /* TTP */ #define IPPROTO_IGP 85 /* NSFNET-IGP */ #define IPPROTO_DGP 86 /* dissimilar gateway prot. */ #define IPPROTO_TCF 87 /* TCF */ #define IPPROTO_IGRP 88 /* Cisco/GXS IGRP */ #define IPPROTO_OSPFIGP 89 /* OSPFIGP */ #define IPPROTO_SRPC 90 /* Strite RPC protocol */ #define IPPROTO_LARP 91 /* Locus Address Resoloution */ #define IPPROTO_MTP 92 /* Multicast Transport */ #define IPPROTO_AX25 93 /* AX.25 Frames */ #define IPPROTO_IPEIP 94 /* IP encapsulated in IP */ #define IPPROTO_MICP 95 /* Mobile Int.ing control */ #define IPPROTO_SCCSP 96 /* Semaphore Comm. security */ #define IPPROTO_ETHERIP 97 /* Ethernet IP encapsulation */ #define IPPROTO_ENCAP 98 /* encapsulation header */ #define IPPROTO_APES 99 /* any private encr. scheme */ #define IPPROTO_GMTP 100 /* GMTP*/ #define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */ #define IPPROTO_SCTP 132 /* SCTP */ #define IPPROTO_MH 135 /* IPv6 Mobility Header */ #define IPPROTO_UDPLITE 136 /* UDP-Lite */ #define IPPROTO_HIP 139 /* IP6 Host Identity Protocol */ #define IPPROTO_SHIM6 140 /* IP6 Shim6 Protocol */ /* 101-254: Partly Unassigned */ #define IPPROTO_PIM 103 /* Protocol Independent Mcast */ #define IPPROTO_CARP 112 /* CARP */ #define IPPROTO_PGM 113 /* PGM */ #define IPPROTO_MPLS 137 /* MPLS-in-IP */ #define IPPROTO_PFSYNC 240 /* PFSYNC */ #define IPPROTO_RESERVED_253 253 /* Reserved */ #define IPPROTO_RESERVED_254 254 /* Reserved */ /* 255: Reserved */ /* BSD Private, local use, namespace incursion, no longer used */ #define IPPROTO_OLD_DIVERT 254 /* OLD divert pseudo-proto */ #define IPPROTO_MAX 256 /* last return value of *_input(), meaning "all job for this pkt is done". */ #define IPPROTO_DONE 257 /* Only used internally, so can be outside the range of valid IP protocols. */ #define IPPROTO_DIVERT 258 /* divert pseudo-protocol */ #define IPPROTO_SEND 259 /* SeND pseudo-protocol */ /* * Defined to avoid confusion. The master value is defined by * PROTO_SPACER in sys/protosw.h. */ #define IPPROTO_SPACER 32767 /* spacer for loadable protos */ /* * Local port number conventions: * * When a user does a bind(2) or connect(2) with a port number of zero, * a non-conflicting local port address is chosen. * The default range is IPPORT_HIFIRSTAUTO through * IPPORT_HILASTAUTO, although that is settable by sysctl. * * A user may set the IPPROTO_IP option IP_PORTRANGE to change this * default assignment range. * * The value IP_PORTRANGE_DEFAULT causes the default behavior. * * The value IP_PORTRANGE_HIGH changes the range of candidate port numbers * into the "high" range. These are reserved for client outbound connections * which do not want to be filtered by any firewalls. * * The value IP_PORTRANGE_LOW changes the range to the "low" are * that is (by convention) restricted to privileged processes. This * convention is based on "vouchsafe" principles only. It is only secure * if you trust the remote host to restrict these ports. * * The default range of ports and the high range can be changed by * sysctl(3). (net.inet.ip.port{hi,low}{first,last}_auto) * * Changing those values has bad security implications if you are * using a stateless firewall that is allowing packets outside of that * range in order to allow transparent outgoing connections. * * Such a firewall configuration will generally depend on the use of these * default values. If you change them, you may find your Security * Administrator looking for you with a heavy object. * * For a slightly more orthodox text view on this: * * ftp://ftp.isi.edu/in-notes/iana/assignments/port-numbers * * port numbers are divided into three ranges: * * 0 - 1023 Well Known Ports * 1024 - 49151 Registered Ports * 49152 - 65535 Dynamic and/or Private Ports * */ /* * Ports < IPPORT_RESERVED are reserved for * privileged processes (e.g. root). (IP_PORTRANGE_LOW) */ #define IPPORT_RESERVED 1024 /* * Default local port range, used by IP_PORTRANGE_DEFAULT */ #define IPPORT_EPHEMERALFIRST 10000 #define IPPORT_EPHEMERALLAST 65535 /* * Dynamic port range, used by IP_PORTRANGE_HIGH. */ #define IPPORT_HIFIRSTAUTO 49152 #define IPPORT_HILASTAUTO 65535 /* * Scanning for a free reserved port return a value below IPPORT_RESERVED, * but higher than IPPORT_RESERVEDSTART. Traditionally the start value was * 512, but that conflicts with some well-known-services that firewalls may * have a fit if we use. */ #define IPPORT_RESERVEDSTART 600 #define IPPORT_MAX 65535 /* * Definitions of bits in internet address integers. * On subnets, the decomposition of addresses to host and net parts * is done according to subnet mask, not the masks here. */ #define IN_CLASSA(i) (((in_addr_t)(i) & 0x80000000) == 0) #define IN_CLASSA_NET 0xff000000 #define IN_CLASSA_NSHIFT 24 #define IN_CLASSA_HOST 0x00ffffff #define IN_CLASSA_MAX 128 #define IN_CLASSB(i) (((in_addr_t)(i) & 0xc0000000) == 0x80000000) #define IN_CLASSB_NET 0xffff0000 #define IN_CLASSB_NSHIFT 16 #define IN_CLASSB_HOST 0x0000ffff #define IN_CLASSB_MAX 65536 #define IN_CLASSC(i) (((in_addr_t)(i) & 0xe0000000) == 0xc0000000) #define IN_CLASSC_NET 0xffffff00 #define IN_CLASSC_NSHIFT 8 #define IN_CLASSC_HOST 0x000000ff #define IN_CLASSD(i) (((in_addr_t)(i) & 0xf0000000) == 0xe0000000) #define IN_CLASSD_NET 0xf0000000 /* These ones aren't really */ #define IN_CLASSD_NSHIFT 28 /* net and host fields, but */ #define IN_CLASSD_HOST 0x0fffffff /* routing needn't know. */ #define IN_MULTICAST(i) IN_CLASSD(i) #define IN_EXPERIMENTAL(i) (((in_addr_t)(i) & 0xf0000000) == 0xf0000000) #define IN_BADCLASS(i) (((in_addr_t)(i) & 0xf0000000) == 0xf0000000) #define IN_LINKLOCAL(i) (((in_addr_t)(i) & 0xffff0000) == 0xa9fe0000) #define IN_LOOPBACK(i) (((in_addr_t)(i) & 0xff000000) == 0x7f000000) #define IN_ZERONET(i) (((in_addr_t)(i) & 0xff000000) == 0) #define IN_PRIVATE(i) ((((in_addr_t)(i) & 0xff000000) == 0x0a000000) || \ (((in_addr_t)(i) & 0xfff00000) == 0xac100000) || \ (((in_addr_t)(i) & 0xffff0000) == 0xc0a80000)) #define IN_LOCAL_GROUP(i) (((in_addr_t)(i) & 0xffffff00) == 0xe0000000) #define IN_ANY_LOCAL(i) (IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i)) #define INADDR_LOOPBACK ((in_addr_t)0x7f000001) #ifndef _KERNEL #define INADDR_NONE ((in_addr_t)0xffffffff) /* -1 return */ #endif #define INADDR_UNSPEC_GROUP ((in_addr_t)0xe0000000) /* 224.0.0.0 */ #define INADDR_ALLHOSTS_GROUP ((in_addr_t)0xe0000001) /* 224.0.0.1 */ #define INADDR_ALLRTRS_GROUP ((in_addr_t)0xe0000002) /* 224.0.0.2 */ #define INADDR_ALLRPTS_GROUP ((in_addr_t)0xe0000016) /* 224.0.0.22, IGMPv3 */ #define INADDR_CARP_GROUP ((in_addr_t)0xe0000012) /* 224.0.0.18 */ #define INADDR_PFSYNC_GROUP ((in_addr_t)0xe00000f0) /* 224.0.0.240 */ #define INADDR_ALLMDNS_GROUP ((in_addr_t)0xe00000fb) /* 224.0.0.251 */ #define INADDR_MAX_LOCAL_GROUP ((in_addr_t)0xe00000ff) /* 224.0.0.255 */ #define IN_LOOPBACKNET 127 /* official! */ #define IN_RFC3021_MASK ((in_addr_t)0xfffffffe) /* * Options for use with [gs]etsockopt at the IP level. * First word of comment is data type; bool is stored in int. */ #define IP_OPTIONS 1 /* buf/ip_opts; set/get IP options */ #define IP_HDRINCL 2 /* int; header is included with data */ #define IP_TOS 3 /* int; IP type of service and preced. */ #define IP_TTL 4 /* int; IP time to live */ #define IP_RECVOPTS 5 /* bool; receive all IP opts w/dgram */ #define IP_RECVRETOPTS 6 /* bool; receive IP opts for response */ #define IP_RECVDSTADDR 7 /* bool; receive IP dst addr w/dgram */ #define IP_SENDSRCADDR IP_RECVDSTADDR /* cmsg_type to set src addr */ #define IP_RETOPTS 8 /* ip_opts; set/get IP options */ #define IP_MULTICAST_IF 9 /* struct in_addr *or* struct ip_mreqn; * set/get IP multicast i/f */ #define IP_MULTICAST_TTL 10 /* u_char; set/get IP multicast ttl */ #define IP_MULTICAST_LOOP 11 /* u_char; set/get IP multicast loopback */ #define IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */ #define IP_DROP_MEMBERSHIP 13 /* ip_mreq; drop an IP group membership */ #define IP_MULTICAST_VIF 14 /* set/get IP mcast virt. iface */ #define IP_RSVP_ON 15 /* enable RSVP in kernel */ #define IP_RSVP_OFF 16 /* disable RSVP in kernel */ #define IP_RSVP_VIF_ON 17 /* set RSVP per-vif socket */ #define IP_RSVP_VIF_OFF 18 /* unset RSVP per-vif socket */ #define IP_PORTRANGE 19 /* int; range to choose for unspec port */ #define IP_RECVIF 20 /* bool; receive reception if w/dgram */ /* for IPSEC */ #define IP_IPSEC_POLICY 21 /* int; set/get security policy */ /* unused; was IP_FAITH */ #define IP_ONESBCAST 23 /* bool: send all-ones broadcast */ #define IP_BINDANY 24 /* bool: allow bind to any address */ #define IP_BINDMULTI 25 /* bool: allow multiple listeners on a tuple */ #define IP_RSS_LISTEN_BUCKET 26 /* int; set RSS listen bucket */ /* * Options for controlling the firewall and dummynet. * Historical options (from 40 to 64) will eventually be * replaced by only two options, IP_FW3 and IP_DUMMYNET3. */ #define IP_FW_TABLE_ADD 40 /* add entry */ #define IP_FW_TABLE_DEL 41 /* delete entry */ #define IP_FW_TABLE_FLUSH 42 /* flush table */ #define IP_FW_TABLE_GETSIZE 43 /* get table size */ #define IP_FW_TABLE_LIST 44 /* list table contents */ #define IP_FW3 48 /* generic ipfw v.3 sockopts */ #define IP_DUMMYNET3 49 /* generic dummynet v.3 sockopts */ #define IP_FW_ADD 50 /* add a firewall rule to chain */ #define IP_FW_DEL 51 /* delete a firewall rule from chain */ #define IP_FW_FLUSH 52 /* flush firewall rule chain */ #define IP_FW_ZERO 53 /* clear single/all firewall counter(s) */ #define IP_FW_GET 54 /* get entire firewall rule chain */ #define IP_FW_RESETLOG 55 /* reset logging counters */ #define IP_FW_NAT_CFG 56 /* add/config a nat rule */ #define IP_FW_NAT_DEL 57 /* delete a nat rule */ #define IP_FW_NAT_GET_CONFIG 58 /* get configuration of a nat rule */ #define IP_FW_NAT_GET_LOG 59 /* get log of a nat rule */ #define IP_DUMMYNET_CONFIGURE 60 /* add/configure a dummynet pipe */ #define IP_DUMMYNET_DEL 61 /* delete a dummynet pipe from chain */ #define IP_DUMMYNET_FLUSH 62 /* flush dummynet */ #define IP_DUMMYNET_GET 64 /* get entire dummynet pipes */ #define IP_RECVTTL 65 /* bool; receive IP TTL w/dgram */ #define IP_MINTTL 66 /* minimum TTL for packet or drop */ #define IP_DONTFRAG 67 /* don't fragment packet */ #define IP_RECVTOS 68 /* bool; receive IP TOS w/dgram */ /* IPv4 Source Filter Multicast API [RFC3678] */ #define IP_ADD_SOURCE_MEMBERSHIP 70 /* join a source-specific group */ #define IP_DROP_SOURCE_MEMBERSHIP 71 /* drop a single source */ #define IP_BLOCK_SOURCE 72 /* block a source */ #define IP_UNBLOCK_SOURCE 73 /* unblock a source */ /* The following option is private; do not use it from user applications. */ #define IP_MSFILTER 74 /* set/get filter list */ /* Protocol Independent Multicast API [RFC3678] */ #define MCAST_JOIN_GROUP 80 /* join an any-source group */ #define MCAST_LEAVE_GROUP 81 /* leave all sources for group */ #define MCAST_JOIN_SOURCE_GROUP 82 /* join a source-specific group */ #define MCAST_LEAVE_SOURCE_GROUP 83 /* leave a single source */ #define MCAST_BLOCK_SOURCE 84 /* block a source */ #define MCAST_UNBLOCK_SOURCE 85 /* unblock a source */ /* Flow and RSS definitions */ #define IP_FLOWID 90 /* get flow id for the given socket/inp */ #define IP_FLOWTYPE 91 /* get flow type (M_HASHTYPE) */ #define IP_RSSBUCKETID 92 /* get RSS flowid -> bucket mapping */ #define IP_RECVFLOWID 93 /* bool; receive IP flowid/flowtype w/ datagram */ #define IP_RECVRSSBUCKETID 94 /* bool; receive IP RSS bucket id w/ datagram */ /* * Defaults and limits for options */ #define IP_DEFAULT_MULTICAST_TTL 1 /* normally limit m'casts to 1 hop */ #define IP_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ /* * The imo_membership vector for each socket is now dynamically allocated at * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized * according to a power-of-two increment. */ #define IP_MIN_MEMBERSHIPS 31 #define IP_MAX_MEMBERSHIPS 4095 #define IP_MAX_SOURCE_FILTER 1024 /* XXX to be unused */ /* * Default resource limits for IPv4 multicast source filtering. * These may be modified by sysctl. */ #define IP_MAX_GROUP_SRC_FILTER 512 /* sources per group */ #define IP_MAX_SOCK_SRC_FILTER 128 /* sources per socket/group */ #define IP_MAX_SOCK_MUTE_FILTER 128 /* XXX no longer used */ /* * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP. */ struct ip_mreq { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_interface; /* local IP address of interface */ }; /* * Modified argument structure for IP_MULTICAST_IF, obtained from Linux. * This is used to specify an interface index for multicast sends, as * the IPv4 legacy APIs do not support this (unless IP_SENDIF is available). */ struct ip_mreqn { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_address; /* local IP address of interface */ int imr_ifindex; /* Interface index; cast to uint32_t */ }; /* * Argument structure for IPv4 Multicast Source Filter APIs. [RFC3678] */ struct ip_mreq_source { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_sourceaddr; /* IP address of source */ struct in_addr imr_interface; /* local IP address of interface */ }; /* * Argument structures for Protocol-Independent Multicast Source * Filter APIs. [RFC3678] */ struct group_req { uint32_t gr_interface; /* interface index */ struct sockaddr_storage gr_group; /* group address */ }; struct group_source_req { uint32_t gsr_interface; /* interface index */ struct sockaddr_storage gsr_group; /* group address */ struct sockaddr_storage gsr_source; /* source address */ }; #ifndef __MSFILTERREQ_DEFINED #define __MSFILTERREQ_DEFINED /* * The following structure is private; do not use it from user applications. * It is used to communicate IP_MSFILTER/IPV6_MSFILTER information between * the RFC 3678 libc functions and the kernel. */ struct __msfilterreq { uint32_t msfr_ifindex; /* interface index */ uint32_t msfr_fmode; /* filter mode for group */ uint32_t msfr_nsrcs; /* # of sources in msfr_srcs */ struct sockaddr_storage msfr_group; /* group address */ struct sockaddr_storage *msfr_srcs; /* pointer to the first member * of a contiguous array of * sources to filter in full. */ }; #endif struct sockaddr; /* * Advanced (Full-state) APIs [RFC3678] * The RFC specifies uint_t for the 6th argument to [sg]etsourcefilter(). * We use uint32_t here to be consistent. */ int setipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t, uint32_t, struct in_addr *); int getipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t *, uint32_t *, struct in_addr *); int setsourcefilter(int, uint32_t, struct sockaddr *, socklen_t, uint32_t, uint32_t, struct sockaddr_storage *); int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t, uint32_t *, uint32_t *, struct sockaddr_storage *); /* * Filter modes; also used to represent per-socket filter mode internally. */ #define MCAST_UNDEFINED 0 /* fmode: not yet defined */ #define MCAST_INCLUDE 1 /* fmode: include these source(s) */ #define MCAST_EXCLUDE 2 /* fmode: exclude these source(s) */ /* * Argument for IP_PORTRANGE: * - which range to search when port is unspecified at bind() or connect() */ #define IP_PORTRANGE_DEFAULT 0 /* default range */ #define IP_PORTRANGE_HIGH 1 /* "high" - request firewall bypass */ #define IP_PORTRANGE_LOW 2 /* "low" - vouchsafe security */ /* * Identifiers for IP sysctl nodes */ #define IPCTL_FORWARDING 1 /* act as router */ #define IPCTL_SENDREDIRECTS 2 /* may send redirects when forwarding */ #define IPCTL_DEFTTL 3 /* default TTL */ #ifdef notyet #define IPCTL_DEFMTU 4 /* default MTU */ #endif /* IPCTL_RTEXPIRE 5 deprecated */ /* IPCTL_RTMINEXPIRE 6 deprecated */ /* IPCTL_RTMAXCACHE 7 deprecated */ #define IPCTL_SOURCEROUTE 8 /* may perform source routes */ #define IPCTL_DIRECTEDBROADCAST 9 /* may re-broadcast received packets */ #define IPCTL_INTRQMAXLEN 10 /* max length of netisr queue */ #define IPCTL_INTRQDROPS 11 /* number of netisr q drops */ #define IPCTL_STATS 12 /* ipstat structure */ #define IPCTL_ACCEPTSOURCEROUTE 13 /* may accept source routed packets */ #define IPCTL_FASTFORWARDING 14 /* use fast IP forwarding code */ /* 15, unused, was: IPCTL_KEEPFAITH */ #define IPCTL_GIF_TTL 16 /* default TTL for gif encap packet */ #endif /* __BSD_VISIBLE */ #ifdef _KERNEL struct ifnet; struct mbuf; /* forward declarations for Standard C */ int in_broadcast(struct in_addr, struct ifnet *); int in_canforward(struct in_addr); int in_localaddr(struct in_addr); int in_localip(struct in_addr); +int in_ifhasaddr(struct ifnet *, struct in_addr); int inet_aton(const char *, struct in_addr *); /* in libkern */ char *inet_ntoa(struct in_addr); /* in libkern */ char *inet_ntoa_r(struct in_addr ina, char *buf); /* in libkern */ char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern */ int inet_pton(int af, const char *, void *); /* in libkern */ void in_ifdetach(struct ifnet *); #define in_hosteq(s, t) ((s).s_addr == (t).s_addr) #define in_nullhost(x) ((x).s_addr == INADDR_ANY) #define in_allhosts(x) ((x).s_addr == htonl(INADDR_ALLHOSTS_GROUP)) #define satosin(sa) ((struct sockaddr_in *)(sa)) #define sintosa(sin) ((struct sockaddr *)(sin)) #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) #endif /* _KERNEL */ /* INET6 stuff */ #if __POSIX_VISIBLE >= 200112 #define __KAME_NETINET_IN_H_INCLUDED_ #include #undef __KAME_NETINET_IN_H_INCLUDED_ #endif #endif /* !_NETINET_IN_H_*/ Index: projects/ifnet/sys/netinet6/in6.c =================================================================== --- projects/ifnet/sys/netinet6/in6.c (revision 281649) +++ projects/ifnet/sys/netinet6/in6.c (revision 281650) @@ -1,2400 +1,2430 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.2 (Berkeley) 11/15/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix); #define V_icmp6_nodeinfo_oldmcprefix VNET(icmp6_nodeinfo_oldmcprefix) /* * Definitions of some costant IP6 addresses. */ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_nodelocal_allnodes = IN6ADDR_NODELOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; const struct in6_addr in6addr_linklocal_allv2routers = IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT; const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask32 = IN6MASK32; const struct in6_addr in6mask64 = IN6MASK64; const struct in6_addr in6mask96 = IN6MASK96; const struct in6_addr in6mask128 = IN6MASK128; const struct sockaddr_in6 sa6_any = { sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 }; static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *, struct in6_aliasreq *, int); static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *, struct in6_ifaddr *, int); static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *, struct in6_aliasreq *, int flags); static int in6_update_ifa_internal(struct ifnet *, struct in6_aliasreq *, struct in6_ifaddr *, int, int); static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *, struct in6_ifaddr *, int); #define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) #define ia62ifa(ia6) (&((ia6)->ia_ifa)) void in6_newaddrmsg(struct in6_ifaddr *ia, int cmd) { struct sockaddr_dl gateway; struct sockaddr_in6 mask, addr; struct rtentry rt; /* * initialize for rtmsg generation */ bzero(&gateway, sizeof(gateway)); gateway.sdl_len = sizeof(gateway); gateway.sdl_family = AF_LINK; bzero(&rt, sizeof(rt)); rt.rt_gateway = (struct sockaddr *)&gateway; memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); rt_mask(&rt) = (struct sockaddr *)&mask; rt_key(&rt) = (struct sockaddr *)&addr; rt.rt_flags = RTF_HOST | RTF_STATIC; if (cmd == RTM_ADD) rt.rt_flags |= RTF_UP; /* Announce arrival of local address to all FIBs. */ rt_newaddrmsg(cmd, &ia->ia_ifa, 0, &rt); } int in6_mask2len(struct in6_addr *mask, u_char *lim0) { int x = 0, y; u_char *lim = lim0, *p; /* ignore the scope_id part */ if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask)) lim = (u_char *)mask + sizeof(*mask); for (p = (u_char *)mask; p < lim; x++, p++) { if (*p != 0xff) break; } y = 0; if (p < lim) { for (y = 0; y < 8; y++) { if ((*p & (0x80 >> y)) == 0) break; } } /* * when the limit pointer is given, do a stricter check on the * remaining bits. */ if (p < lim) { if (y != 0 && (*p & (0x00ff >> y)) != 0) return (-1); for (p = p + 1; p < lim; p++) if (*p != 0) return (-1); } return x * 8 + y; } #ifdef COMPAT_FREEBSD32 struct in6_ndifreq32 { char ifname[IFNAMSIZ]; uint32_t ifindex; }; #define SIOCGDEFIFACE32_IN6 _IOWR('i', 86, struct in6_ndifreq32) #endif int in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct in6_ifreq *ifr = (struct in6_ifreq *)data; struct in6_ifaddr *ia = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; struct sockaddr_in6 *sa6; int carp_attached = 0; int error; u_long ocmd = cmd; /* * Compat to make pre-10.x ifconfig(8) operable. */ if (cmd == OSIOCAIFADDR_IN6) cmd = SIOCAIFADDR_IN6; switch (cmd) { case SIOCGETSGCNT_IN6: case SIOCGETMIFCNT_IN6: /* * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c. * We cannot see how that would be needed, so do not adjust the * KPI blindly; more likely should clean up the IPv4 variant. */ return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP); } switch (cmd) { case SIOCAADDRCTL_POLICY: case SIOCDADDRCTL_POLICY: if (td != NULL) { error = priv_check(td, PRIV_NETINET_ADDRCTRL6); if (error) return (error); } return (in6_src_ioctl(cmd, data)); } if (ifp == NULL) return (EOPNOTSUPP); switch (cmd) { case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCSDEFIFACE_IN6: case SIOCSIFINFO_FLAGS: case SIOCSIFINFO_IN6: if (td != NULL) { error = priv_check(td, PRIV_NETINET_ND6); if (error) return (error); } /* FALLTHROUGH */ case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: case SIOCGDRLST_IN6: case SIOCGPRLST_IN6: case SIOCGNBRINFO_IN6: case SIOCGDEFIFACE_IN6: return (nd6_ioctl(cmd, data, ifp)); #ifdef COMPAT_FREEBSD32 case SIOCGDEFIFACE32_IN6: { struct in6_ndifreq ndif; struct in6_ndifreq32 *ndif32; error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif, ifp); if (error) return (error); ndif32 = (struct in6_ndifreq32 *)data; ndif32->ifindex = ndif.ifindex; return (0); } #endif } switch (cmd) { case SIOCSIFPREFIX_IN6: case SIOCDIFPREFIX_IN6: case SIOCAIFPREFIX_IN6: case SIOCCIFPREFIX_IN6: case SIOCSGIFPREFIX_IN6: case SIOCGIFPREFIX_IN6: log(LOG_NOTICE, "prefix ioctls are now invalidated. " "please use ifconfig.\n"); return (EOPNOTSUPP); } switch (cmd) { case SIOCSSCOPE6: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SCOPE6); if (error) return (error); } /* FALLTHROUGH */ case SIOCGSCOPE6: case SIOCGSCOPE6DEF: return (scope6_ioctl(cmd, data, ifp)); } /* * Find address for this interface, if it exists. * * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation * only, and used the first interface address as the target of other * operations (without checking ifra_addr). This was because netinet * code/API assumed at most 1 interface address per interface. * Since IPv6 allows a node to assign multiple addresses * on a single interface, we almost always look and check the * presence of ifra_addr, and reject invalid ones here. * It also decreases duplicated code among SIOC*_IN6 operations. */ switch (cmd) { case SIOCAIFADDR_IN6: case SIOCSIFPHYADDR_IN6: sa6 = &ifra->ifra_addr; break; case SIOCSIFADDR_IN6: case SIOCGIFADDR_IN6: case SIOCSIFDSTADDR_IN6: case SIOCSIFNETMASK_IN6: case SIOCGIFDSTADDR_IN6: case SIOCGIFNETMASK_IN6: case SIOCDIFADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: case SIOCGIFAFLAG_IN6: case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCGIFALIFETIME_IN6: case SIOCSIFALIFETIME_IN6: case SIOCGIFSTAT_IN6: case SIOCGIFSTAT_ICMP6: sa6 = &ifr->ifr_addr; break; case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFDSTADDR: case SIOCSIFNETMASK: /* * Although we should pass any non-INET6 ioctl requests * down to driver, we filter some legacy INET requests. * Drivers trust SIOCSIFADDR et al to come from an already * privileged layer, and do not perform any credentials * checks or input validation. */ return (EINVAL); default: sa6 = NULL; break; } if (sa6 && sa6->sin6_family == AF_INET6) { if (sa6->sin6_scope_id != 0) error = sa6_embedscope(sa6, 0); else error = in6_setscope(&sa6->sin6_addr, ifp, NULL); if (error != 0) return (error); if (td != NULL && (error = prison_check_ip6(td->td_ucred, &sa6->sin6_addr)) != 0) return (error); ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr); } else ia = NULL; switch (cmd) { case SIOCSIFADDR_IN6: case SIOCSIFDSTADDR_IN6: case SIOCSIFNETMASK_IN6: /* * Since IPv6 allows a node to assign multiple addresses * on a single interface, SIOCSIFxxx ioctls are deprecated. */ /* we decided to obsolete this command (20000704) */ error = EINVAL; goto out; case SIOCDIFADDR_IN6: /* * for IPv4, we look for existing in_ifaddr here to allow * "ifconfig if0 delete" to remove the first IPv4 address on * the interface. For IPv6, as the spec allows multiple * interface address from the day one, we consider "remove the * first one" semantics to be not preferable. */ if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } /* FALLTHROUGH */ case SIOCAIFADDR_IN6: /* * We always require users to specify a valid IPv6 address for * the corresponding operation. */ if (ifra->ifra_addr.sin6_family != AF_INET6 || ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) { error = EAFNOSUPPORT; goto out; } if (td != NULL) { error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ? PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR); if (error) goto out; } /* FALLTHROUGH */ case SIOCGIFSTAT_IN6: case SIOCGIFSTAT_ICMP6: if (ifp->if_afdata[AF_INET6] == NULL) { error = EPFNOSUPPORT; goto out; } break; case SIOCGIFADDR_IN6: /* This interface is basically deprecated. use SIOCGIFCONF. */ /* FALLTHROUGH */ case SIOCGIFAFLAG_IN6: case SIOCGIFNETMASK_IN6: case SIOCGIFDSTADDR_IN6: case SIOCGIFALIFETIME_IN6: /* must think again about its semantics */ if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } break; case SIOCSIFALIFETIME_IN6: { struct in6_addrlifetime *lt; if (td != NULL) { error = priv_check(td, PRIV_NETINET_ALIFETIME6); if (error) goto out; } if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } /* sanity for overflow - beware unsigned */ lt = &ifr->ifr_ifru.ifru_lifetime; if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && lt->ia6t_vltime + time_uptime < time_uptime) { error = EINVAL; goto out; } if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && lt->ia6t_pltime + time_uptime < time_uptime) { error = EINVAL; goto out; } break; } } switch (cmd) { case SIOCGIFADDR_IN6: ifr->ifr_addr = ia->ia_addr; if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0) goto out; break; case SIOCGIFDSTADDR_IN6: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; goto out; } /* * XXX: should we check if ifa_dstaddr is NULL and return * an error? */ ifr->ifr_dstaddr = ia->ia_dstaddr; if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0) goto out; break; case SIOCGIFNETMASK_IN6: ifr->ifr_addr = ia->ia_prefixmask; break; case SIOCGIFAFLAG_IN6: ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags; break; case SIOCGIFSTAT_IN6: COUNTER_ARRAY_COPY(((struct in6_ifextra *) ifp->if_afdata[AF_INET6])->in6_ifstat, &ifr->ifr_ifru.ifru_stat, sizeof(struct in6_ifstat) / sizeof(uint64_t)); break; case SIOCGIFSTAT_ICMP6: COUNTER_ARRAY_COPY(((struct in6_ifextra *) ifp->if_afdata[AF_INET6])->icmp6_ifstat, &ifr->ifr_ifru.ifru_icmp6stat, sizeof(struct icmp6_ifstat) / sizeof(uint64_t)); break; case SIOCGIFALIFETIME_IN6: ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime; if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { time_t maxexpire; struct in6_addrlifetime *retlt = &ifr->ifr_ifru.ifru_lifetime; /* * XXX: adjust expiration time assuming time_t is * signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (ia->ia6_lifetime.ia6t_vltime < maxexpire - ia->ia6_updatetime) { retlt->ia6t_expire = ia->ia6_updatetime + ia->ia6_lifetime.ia6t_vltime; } else retlt->ia6t_expire = maxexpire; } if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { time_t maxexpire; struct in6_addrlifetime *retlt = &ifr->ifr_ifru.ifru_lifetime; /* * XXX: adjust expiration time assuming time_t is * signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (ia->ia6_lifetime.ia6t_pltime < maxexpire - ia->ia6_updatetime) { retlt->ia6t_preferred = ia->ia6_updatetime + ia->ia6_lifetime.ia6t_pltime; } else retlt->ia6t_preferred = maxexpire; } break; case SIOCSIFALIFETIME_IN6: ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_uptime + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_uptime + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; break; case SIOCAIFADDR_IN6: { struct nd_prefixctl pr0; struct nd_prefix *pr; /* * first, make or update the interface address structure, * and link it to the list. */ if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0) goto out; if (ia != NULL) ifa_free(&ia->ia_ifa); if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) == NULL) { /* * this can happen when the user specify the 0 valid * lifetime. */ break; } if (cmd == ocmd && ifra->ifra_vhid > 0) { if (carp_attach_p != NULL) error = (*carp_attach_p)(&ia->ia_ifa, ifra->ifra_vhid); else error = EPROTONOSUPPORT; if (error) goto out; else carp_attached = 1; } /* * then, make the prefix on-link on the interface. * XXX: we'd rather create the prefix before the address, but * we need at least one address to install the corresponding * interface route, so we configure the address first. */ /* * convert mask to prefix length (prefixmask has already * been validated in in6_update_ifa(). */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); if (pr0.ndpr_plen == 128) { /* we don't need to install a host route. */ goto aifaddr_out; } pr0.ndpr_prefix = ifra->ifra_addr; /* apply the mask for safety. */ IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr, &ifra->ifra_prefixmask.sin6_addr); /* * XXX: since we don't have an API to set prefix (not address) * lifetimes, we just use the same lifetimes as addresses. * The (temporarily) installed lifetimes can be overridden by * later advertised RAs (when accept_rtadv is non 0), which is * an intended behavior. */ pr0.ndpr_raf_onlink = 1; /* should be configurable? */ pr0.ndpr_raf_auto = ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0); pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; /* add the prefix if not yet. */ if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { /* * nd6_prelist_add will install the corresponding * interface route. */ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) { if (carp_attached) (*carp_detach_p)(&ia->ia_ifa); goto out; } if (pr == NULL) { if (carp_attached) (*carp_detach_p)(&ia->ia_ifa); log(LOG_ERR, "nd6_prelist_add succeeded but " "no prefix\n"); error = EINVAL; goto out; } } /* relate the address to the prefix */ if (ia->ia6_ndpr == NULL) { ia->ia6_ndpr = pr; pr->ndpr_refcnt++; /* * If this is the first autoconf address from the * prefix, create a temporary address as well * (when required). */ if ((ia->ia6_flags & IN6_IFF_AUTOCONF) && V_ip6_use_tempaddr && pr->ndpr_refcnt == 1) { int e; if ((e = in6_tmpifadd(ia, 1, 0)) != 0) { log(LOG_NOTICE, "in6_control: failed " "to create a temporary address, " "errno=%d\n", e); } } } /* * this might affect the status of autoconfigured addresses, * that is, this address might make other addresses detached. */ pfxlist_onlink_check(); aifaddr_out: if (error != 0 || ia == NULL) break; /* * Try to clear the flag when a new IPv6 address is added * onto an IFDISABLED interface and it succeeds. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { struct in6_ndireq nd; memset(&nd, 0, sizeof(nd)); nd.ndi.flags = ND_IFINFO(ifp)->flags; nd.ndi.flags &= ~ND6_IFF_IFDISABLED; if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0) log(LOG_NOTICE, "SIOCAIFADDR_IN6: " "SIOCSIFINFO_FLAGS for -ifdisabled " "failed."); /* * Ignore failure of clearing the flag intentionally. * The failure means address duplication was detected. */ } EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } case SIOCDIFADDR_IN6: { struct nd_prefix *pr; /* * If the address being deleted is the only one that owns * the corresponding prefix, expire the prefix as well. * XXX: theoretically, we don't have to worry about such * relationship, since we separate the address management * and the prefix management. We do this, however, to provide * as much backward compatibility as possible in terms of * the ioctl operation. * Note that in6_purgeaddr() will decrement ndpr_refcnt. */ pr = ia->ia6_ndpr; in6_purgeaddr(&ia->ia_ifa); if (pr && pr->ndpr_refcnt == 0) prelist_remove(pr); EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } default: error = if_ioctl(ifp, cmd, data, td); goto out; } error = 0; out: if (ia != NULL) ifa_free(&ia->ia_ifa); return (error); } /* * Join necessary multicast groups. Factored out from in6_update_ifa(). * This entire work should only be done once, for the default FIB. */ static int in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol) { char ip6buf[INET6_ADDRSTRLEN]; struct in6_addr mltaddr; struct in6_multi_mship *imm; int delay, error; KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__)); /* Join solicited multicast addr for new host id. */ bzero(&mltaddr, sizeof(struct in6_addr)); mltaddr.s6_addr32[0] = IPV6_ADDR_INT32_MLL; mltaddr.s6_addr32[2] = htonl(1); mltaddr.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; mltaddr.s6_addr8[12] = 0xff; if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) { /* XXX: should not happen */ log(LOG_ERR, "%s: in6_setscope failed\n", __func__); goto cleanup; } delay = error = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * We need a random delay for DAD on the address being * configured. It also means delaying transmission of the * corresponding MLD report to avoid report collision. * [RFC 4861, Section 6.3.7] */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } imm = in6_joingroup(ifp, &mltaddr, &error, delay); if (imm == NULL) { nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); *in6m_sol = imm->i6mm_maddr; /* * Join link-local all-nodes address. */ mltaddr = in6addr_linklocal_allnodes; if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ imm = in6_joingroup(ifp, &mltaddr, &error, 0); if (imm == NULL) { nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); /* * Join node information group address. */ delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * The spec does not say anything about delay for this group, * but the same logic should apply. */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } if (in6_nigroup(ifp, NULL, -1, &mltaddr) == 0) { /* XXX jinmei */ imm = in6_joingroup(ifp, &mltaddr, &error, delay); if (imm == NULL) nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr), if_name(ifp), error)); /* XXX not very fatal, go on... */ else LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); } if (V_icmp6_nodeinfo_oldmcprefix && in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr) == 0) { imm = in6_joingroup(ifp, &mltaddr, &error, delay); if (imm == NULL) nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr), if_name(ifp), error)); /* XXX not very fatal, go on... */ else LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); } /* * Join interface-local all-nodes address. * (ff01::1%ifN, and ff01::%ifN/32) */ mltaddr = in6addr_nodelocal_allnodes; if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ imm = in6_joingroup(ifp, &mltaddr, &error, 0); if (imm == NULL) { nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); cleanup: return (error); } /* * Update parameters of an IPv6 interface address. * If necessary, a new entry is created and linked into address chains. * This function is separated from in6_control(). */ int in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags) { int error, hostIsNew = 0; if ((error = in6_validate_ifra(ifp, ifra, ia, flags)) != 0) return (error); if (ia == NULL) { hostIsNew = 1; if ((ia = in6_alloc_ifa(ifp, ifra, flags)) == NULL) return (ENOBUFS); } error = in6_update_ifa_internal(ifp, ifra, ia, hostIsNew, flags); if (error != 0) { if (hostIsNew != 0) { in6_unlink_ifa(ia, ifp); ifa_free(&ia->ia_ifa); } return (error); } if (hostIsNew) error = in6_broadcast_ifa(ifp, ifra, ia, flags); return (error); } /* * Fill in basic IPv6 address request info. */ void in6_prepare_ifra(struct in6_aliasreq *ifra, const struct in6_addr *addr, const struct in6_addr *mask) { memset(ifra, 0, sizeof(struct in6_aliasreq)); ifra->ifra_addr.sin6_family = AF_INET6; ifra->ifra_addr.sin6_len = sizeof(struct sockaddr_in6); if (addr != NULL) ifra->ifra_addr.sin6_addr = *addr; ifra->ifra_prefixmask.sin6_family = AF_INET6; ifra->ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); if (mask != NULL) ifra->ifra_prefixmask.sin6_addr = *mask; } static int in6_validate_ifra(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags) { int plen = -1; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; char ip6buf[INET6_ADDRSTRLEN]; /* Validate parameters */ if (ifp == NULL || ifra == NULL) /* this maybe redundant */ return (EINVAL); /* * The destination address for a p2p link must have a family * of AF_UNSPEC or AF_INET6. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ifra->ifra_dstaddr.sin6_family != AF_INET6 && ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) return (EAFNOSUPPORT); /* * Validate address */ if (ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6) || ifra->ifra_addr.sin6_family != AF_INET6) return (EINVAL); /* * validate ifra_prefixmask. don't check sin6_family, netmask * does not carry fields other than sin6_len. */ if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) return (EINVAL); /* * Because the IPv6 address architecture is classless, we require * users to specify a (non 0) prefix length (mask) for a new address. * We also require the prefix (when specified) mask is valid, and thus * reject a non-consecutive mask. */ if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0) return (EINVAL); if (ifra->ifra_prefixmask.sin6_len != 0) { plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, (u_char *)&ifra->ifra_prefixmask + ifra->ifra_prefixmask.sin6_len); if (plen <= 0) return (EINVAL); } else { /* * In this case, ia must not be NULL. We just use its prefix * length. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); } /* * If the destination address on a p2p interface is specified, * and the address is a scoped one, validate/set the scope * zone identifier. */ dst6 = ifra->ifra_dstaddr; if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 && (dst6.sin6_family == AF_INET6)) { struct in6_addr in6_tmp; u_int32_t zoneid; in6_tmp = dst6.sin6_addr; if (in6_setscope(&in6_tmp, ifp, &zoneid)) return (EINVAL); /* XXX: should be impossible */ if (dst6.sin6_scope_id != 0) { if (dst6.sin6_scope_id != zoneid) return (EINVAL); } else /* user omit to specify the ID. */ dst6.sin6_scope_id = zoneid; /* convert into the internal form */ if (sa6_embedscope(&dst6, 0)) return (EINVAL); /* XXX: should be impossible */ } /* Modify original ifra_dstaddr to reflect changes */ ifra->ifra_dstaddr = dst6; /* * The destination address can be specified only for a p2p or a * loopback interface. If specified, the corresponding prefix length * must be 128. */ if (ifra->ifra_dstaddr.sin6_family == AF_INET6) { if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) { /* XXX: noisy message */ nd6log((LOG_INFO, "in6_update_ifa: a destination can " "be specified for a p2p or a loopback IF only\n")); return (EINVAL); } if (plen != 128) { nd6log((LOG_INFO, "in6_update_ifa: prefixlen should " "be 128 when dstaddr is specified\n")); return (EINVAL); } } /* lifetime consistency check */ lt = &ifra->ifra_lifetime; if (lt->ia6t_pltime > lt->ia6t_vltime) return (EINVAL); if (lt->ia6t_vltime == 0) { /* * the following log might be noisy, but this is a typical * configuration mistake or a tool's bug. */ nd6log((LOG_INFO, "in6_update_ifa: valid lifetime is 0 for %s\n", ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr))); if (ia == NULL) return (0); /* there's nothing to do */ } /* Check prefix mask */ if (ia != NULL && ifra->ifra_prefixmask.sin6_len != 0) { /* * We prohibit changing the prefix length of an existing * address, because * + such an operation should be rare in IPv6, and * + the operation would confuse prefix management. */ if (ia->ia_prefixmask.sin6_len != 0 && in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { nd6log((LOG_INFO, "in6_validate_ifa: the prefix length " "of an existing %s address should not be changed\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); return (EINVAL); } } return (0); } /* * Allocate a new ifaddr and link it into chains. */ static struct in6_ifaddr * in6_alloc_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags) { struct in6_ifaddr *ia; /* * When in6_alloc_ifa() is called in a process of a received * RA, it is called under an interrupt context. So, we should * call malloc with M_NOWAIT. */ ia = (struct in6_ifaddr *)ifa_alloc(sizeof(*ia), M_NOWAIT); if (ia == NULL) return (NULL); LIST_INIT(&ia->ia6_memberships); /* Initialize the address and masks, and put time stamp */ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; ia->ia_addr.sin6_family = AF_INET6; ia->ia_addr.sin6_len = sizeof(ia->ia_addr); /* XXX: Can we assign ,sin6_addr and skip the rest? */ ia->ia_addr = ifra->ifra_addr; ia->ia6_createtime = time_uptime; if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { /* * Some functions expect that ifa_dstaddr is not * NULL for p2p interfaces. */ ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; } else { ia->ia_ifa.ifa_dstaddr = NULL; } /* set prefix mask if any */ ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; if (ifra->ifra_prefixmask.sin6_len != 0) { ia->ia_prefixmask.sin6_family = AF_INET6; ia->ia_prefixmask.sin6_len = ifra->ifra_prefixmask.sin6_len; ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr; } ia->ia_ifp = ifp; ifa_ref(&ia->ia_ifa); /* if_addrhead */ IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */ IN6_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link); LIST_INSERT_HEAD(IN6ADDR_HASH(&ia->ia_addr.sin6_addr), ia, ia6_hash); IN6_IFADDR_WUNLOCK(); return (ia); } /* * Update/configure interface address parameters: * * 1) Update lifetime * 2) Update interface metric ad flags * 3) Notify other subsystems */ static int in6_update_ifa_internal(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int hostIsNew, int flags) { int error; /* update timestamp */ ia->ia6_updatetime = time_uptime; /* * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred * to see if the address is deprecated or invalidated, but initialize * these members for applications. */ ia->ia6_lifetime = ifra->ifra_lifetime; if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_uptime + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_uptime + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; /* * backward compatibility - if IN6_IFF_DEPRECATED is set from the * userland, make it deprecated. */ if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) { ia->ia6_lifetime.ia6t_pltime = 0; ia->ia6_lifetime.ia6t_preferred = time_uptime; } /* * configure address flags. */ ia->ia6_flags = ifra->ifra_flags; /* * Make the address tentative before joining multicast addresses, * so that corresponding MLD responses would not have a tentative * source address. */ ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ if (hostIsNew && in6if_do_dad(ifp)) ia->ia6_flags |= IN6_IFF_TENTATIVE; /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ia->ia6_flags |= IN6_IFF_TENTATIVE; /* notify other subsystems */ error = in6_notify_ifa(ifp, ia, ifra, hostIsNew); return (error); } /* * Do link-level ifa job: * 1) Add lle entry for added address * 2) Notifies routing socket users about new address * 3) join appropriate multicast group * 4) start DAD if enabled */ static int in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags) { struct in6_multi *in6m_sol; int error = 0; /* Add local address to lltable, if necessary (ex. on p2p link). */ if ((error = nd6_add_ifa_lle(ia)) != 0) { in6_purgeaddr(&ia->ia_ifa); ifa_free(&ia->ia_ifa); return (error); } /* Join necessary multicast groups. */ in6m_sol = NULL; if ((ifp->if_flags & IFF_MULTICAST) != 0) { error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol); if (error != 0) { in6_purgeaddr(&ia->ia_ifa); ifa_free(&ia->ia_ifa); return (error); } } /* * Perform DAD, if needed. * XXX It may be of use, if we can administratively disable DAD. */ if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && (ia->ia6_flags & IN6_IFF_TENTATIVE)) { int delay, mindelay, maxdelay; delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * We need to impose a delay before sending an NS * for DAD. Check if we also needed a delay for the * corresponding MLD message. If we did, the delay * should be larger than the MLD delay (this could be * relaxed a bit, but this simple logic is at least * safe). * XXX: Break data hiding guidelines and look at * state for the solicited multicast group. */ mindelay = 0; if (in6m_sol != NULL && in6m_sol->in6m_state == MLD_REPORTING_MEMBER) { mindelay = in6m_sol->in6m_timer; } maxdelay = MAX_RTR_SOLICITATION_DELAY * hz; if (maxdelay - mindelay == 0) delay = 0; else { delay = (arc4random() % (maxdelay - mindelay)) + mindelay; } } nd6_dad_start((struct ifaddr *)ia, delay); } ifa_free(&ia->ia_ifa); return (error); } void in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; struct in6_multi_mship *imm; int plen, error; if (ifa->ifa_carp) (*carp_detach_p)(ifa); /* * Remove the loopback route to the interface address. * The check for the current setting of "nd6_useloopback" * is not needed. */ if (ia->ia_flags & IFA_RTSELF) { error = ifa_del_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error == 0) ia->ia_flags &= ~IFA_RTSELF; } /* stop DAD processing */ nd6_dad_stop(ifa); /* Remove local address entry from lltable. */ nd6_rem_ifa_lle(ia); /* Leave multicast groups. */ while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { LIST_REMOVE(imm, i6mm_chain); in6_leavegroup(imm); } plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags | (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0); if (error != 0) log(LOG_INFO, "%s: err=%d, destination address delete " "failed\n", __func__, error); ia->ia_flags &= ~IFA_ROUTE; } in6_unlink_ifa(ia, ifp); } static void in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) { char ip6buf[INET6_ADDRSTRLEN]; IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ /* * Defer the release of what might be the last reference to the * in6_ifaddr so that it can't be freed before the remainder of the * cleanup. */ IN6_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia6_hash); IN6_IFADDR_WUNLOCK(); /* * Release the reference to the base prefix. There should be a * positive reference. */ if (ia->ia6_ndpr == NULL) { nd6log((LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address " "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia)))); } else { ia->ia6_ndpr->ndpr_refcnt--; ia->ia6_ndpr = NULL; } /* * Also, if the address being removed is autoconf'ed, call * pfxlist_onlink_check() since the release might affect the status of * other (detached) addresses. */ if ((ia->ia6_flags & IN6_IFF_AUTOCONF)) { pfxlist_onlink_check(); } ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */ } /* * Notifies other other subsystems about address change/arrival: * 1) Notifies device handler on first IPv6 address assignment * 2) Handle routing table changes for P2P links and route * 3) Handle routing table changes for address host route */ static int in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia, struct in6_aliasreq *ifra, int hostIsNew) { int error = 0, plen, ifacount = 0; struct ifaddr *ifa; struct sockaddr_in6 *pdst; char ip6buf[INET6_ADDRSTRLEN]; /* * Give the interface a chance to initialize * if this is its first address, */ if (hostIsNew != 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifacount++; } IF_ADDR_RUNLOCK(ifp); } if (ifacount <= 1) { error = if_ioctl(ifp, SIOCSIFADDR, ia, curthread); if (error != 0 && error != EOPNOTSUPP) return (error); } /* * If a new destination address is specified, scrub the old one and * install the new destination. Note that the interface must be * p2p or loopback. */ pdst = &ifra->ifra_dstaddr; if (pdst->sin6_family == AF_INET6 && !IN6_ARE_ADDR_EQUAL(&pdst->sin6_addr, &ia->ia_dstaddr.sin6_addr)) { if ((ia->ia_flags & IFA_ROUTE) != 0 && (rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST) != 0)) { nd6log((LOG_ERR, "in6_update_ifa_internal: failed to " "remove a route to the old destination: %s\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); /* proceed anyway... */ } else ia->ia_flags &= ~IFA_ROUTE; ia->ia_dstaddr = *pdst; } /* * If a new destination address is specified for a point-to-point * interface, install a route to the destination as an interface * direct route. * XXX: the logic below rejects assigning multiple addresses on a p2p * interface that share the same destination. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { int rtflags = RTF_UP | RTF_HOST; /* * Handle the case for ::1 . */ if (ifp->if_flags & IFF_LOOPBACK) ia->ia_flags |= IFA_RTSELF; error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags); if (error) return (error); ia->ia_flags |= IFA_ROUTE; } /* * add a loopback route to self if not exists */ if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) { error = ifa_add_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error == 0) ia->ia_flags |= IFA_RTSELF; } return (error); } /* * Find an IPv6 interface link-local address specific to an interface. * ifaddr is returned referenced. */ struct in6_ifaddr * in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) { struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) { if ((((struct in6_ifaddr *)ifa)->ia6_flags & ignoreflags) != 0) continue; ifa_ref(ifa); break; } } IF_ADDR_RUNLOCK(ifp); return ((struct in6_ifaddr *)ifa); } /* * find the internet address corresponding to a given address. * ifaddr is returned referenced. */ struct in6_ifaddr * in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid) { struct in6_ifaddr *ia; IN6_IFADDR_RLOCK(); LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) { if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) { if (zoneid != 0 && zoneid != ia->ia_addr.sin6_scope_id) continue; ifa_ref(&ia->ia_ifa); break; } } IN6_IFADDR_RUNLOCK(); return (ia); } /* * find the internet address corresponding to a given interface and address. * ifaddr is returned referenced. */ struct in6_ifaddr * in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr) { struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) { ifa_ref(ifa); break; } } IF_ADDR_RUNLOCK(ifp); return ((struct in6_ifaddr *)ifa); } /* * Find a link-local scoped address on ifp and return it if any. */ struct in6_ifaddr * in6ifa_llaonifp(struct ifnet *ifp) { struct sockaddr_in6 *sin6; struct ifaddr *ifa; if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) return (NULL); IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) || IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr)) break; } IF_ADDR_RUNLOCK(ifp); return ((struct in6_ifaddr *)ifa); } /* * Convert IP6 address to printable (loggable) representation. Caller * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long. */ static char digits[] = "0123456789abcdef"; char * ip6_sprintf(char *ip6buf, const struct in6_addr *addr) { int i, cnt = 0, maxcnt = 0, idx = 0, index = 0; char *cp; const u_int16_t *a = (const u_int16_t *)addr; const u_int8_t *d; int dcolon = 0, zero = 0; cp = ip6buf; for (i = 0; i < 8; i++) { if (*(a + i) == 0) { cnt++; if (cnt == 1) idx = i; } else if (maxcnt < cnt) { maxcnt = cnt; index = idx; cnt = 0; } } if (maxcnt < cnt) { maxcnt = cnt; index = idx; } for (i = 0; i < 8; i++) { if (dcolon == 1) { if (*a == 0) { if (i == 7) *cp++ = ':'; a++; continue; } else dcolon = 2; } if (*a == 0) { if (dcolon == 0 && *(a + 1) == 0 && i == index) { if (i == 0) *cp++ = ':'; *cp++ = ':'; dcolon = 1; } else { *cp++ = '0'; *cp++ = ':'; } a++; continue; } d = (const u_char *)a; /* Try to eliminate leading zeros in printout like in :0001. */ zero = 1; *cp = digits[*d >> 4]; if (*cp != '0') { zero = 0; cp++; } *cp = digits[*d++ & 0xf]; if (zero == 0 || (*cp != '0')) { zero = 0; cp++; } *cp = digits[*d >> 4]; if (zero == 0 || (*cp != '0')) { zero = 0; cp++; } *cp++ = digits[*d & 0xf]; *cp++ = ':'; a++; } *--cp = '\0'; return (ip6buf); } int in6_localaddr(struct in6_addr *in6) { struct in6_ifaddr *ia; if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) return 1; IN6_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, &ia->ia_prefixmask.sin6_addr)) { IN6_IFADDR_RUNLOCK(); return 1; } } IN6_IFADDR_RUNLOCK(); return (0); } /* * Return 1 if an internet address is for the local host and configured * on one of its interfaces. */ int in6_localip(struct in6_addr *in6) { struct in6_ifaddr *ia; IN6_IFADDR_RLOCK(); LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) { if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) { IN6_IFADDR_RUNLOCK(); return (1); } } IN6_IFADDR_RUNLOCK(); return (0); } + +/* + * Return 1 if an internet address is configured on an interface. + */ +int +in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr) +{ + struct in6_addr in6; + struct ifaddr *ifa; + struct in6_ifaddr *ia6; + + in6 = *addr; + if (in6_clearscope(&in6) || in6_clearscope(&in6)) + return (0); + in6_setscope(&in6, ifp, NULL); + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ia6 = (struct in6_ifaddr *)ifa; + if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) { + IF_ADDR_RUNLOCK(ifp); + return (1); + } + } + IF_ADDR_RUNLOCK(ifp); + + return (0); +} int in6_is_addr_deprecated(struct sockaddr_in6 *sa6) { struct in6_ifaddr *ia; IN6_IFADDR_RLOCK(); LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) { if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) { if (ia->ia6_flags & IN6_IFF_DEPRECATED) { IN6_IFADDR_RUNLOCK(); return (1); /* true */ } break; } } IN6_IFADDR_RUNLOCK(); return (0); /* false */ } /* * return length of part which dst and src are equal * hard coding... */ int in6_matchlen(struct in6_addr *src, struct in6_addr *dst) { int match = 0; u_char *s = (u_char *)src, *d = (u_char *)dst; u_char *lim = s + 16, r; while (s < lim) if ((r = (*d++ ^ *s++)) != 0) { while (r < 128) { match++; r <<= 1; } break; } else match += 8; return match; } /* XXX: to be scope conscious */ int in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len) { int bytelen, bitlen; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n", len); return (0); } bytelen = len / 8; bitlen = len % 8; if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) return (0); if (bitlen != 0 && p1->s6_addr[bytelen] >> (8 - bitlen) != p2->s6_addr[bytelen] >> (8 - bitlen)) return (0); return (1); } void in6_prefixlen2mask(struct in6_addr *maskp, int len) { u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; int bytelen, bitlen, i; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n", len); return; } bzero(maskp, sizeof(*maskp)); bytelen = len / 8; bitlen = len % 8; for (i = 0; i < bytelen; i++) maskp->s6_addr[i] = 0xff; if (bitlen) maskp->s6_addr[bytelen] = maskarray[bitlen - 1]; } /* * return the best address out of the same scope. if no address was * found, return the first valid address from designated IF. */ struct in6_ifaddr * in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; struct in6_ifaddr *besta = 0; struct in6_ifaddr *dep[2]; /* last-resort: deprecated */ dep[0] = dep[1] = NULL; /* * We first look for addresses in the same scope. * If there is one, return it. * If two or more, return one which matches the dst longest. * If none, return one of global addresses assigned other ifs. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (V_ip6_use_deprecated) dep[0] = (struct in6_ifaddr *)ifa; continue; } if (dst_scope == in6_addrscope(IFA_IN6(ifa))) { /* * call in6_matchlen() as few as possible */ if (besta) { if (blen == -1) blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst); tlen = in6_matchlen(IFA_IN6(ifa), dst); if (tlen > blen) { blen = tlen; besta = (struct in6_ifaddr *)ifa; } } else besta = (struct in6_ifaddr *)ifa; } } if (besta) { ifa_ref(&besta->ia_ifa); IF_ADDR_RUNLOCK(ifp); return (besta); } TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (V_ip6_use_deprecated) dep[1] = (struct in6_ifaddr *)ifa; continue; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); return (struct in6_ifaddr *)ifa; } /* use the last-resort values, that are, deprecated addresses */ if (dep[0]) { ifa_ref((struct ifaddr *)dep[0]); IF_ADDR_RUNLOCK(ifp); return dep[0]; } if (dep[1]) { ifa_ref((struct ifaddr *)dep[1]); IF_ADDR_RUNLOCK(ifp); return dep[1]; } IF_ADDR_RUNLOCK(ifp); return NULL; } /* * perform DAD when interface becomes IFF_UP. */ void in6_if_up(struct ifnet *ifp) { struct ifaddr *ifa; struct in6_ifaddr *ia; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (ia->ia6_flags & IN6_IFF_TENTATIVE) { /* * The TENTATIVE flag was likely set by hand * beforehand, implicitly indicating the need for DAD. * We may be able to skip the random delay in this * case, but we impose delays just in case. */ nd6_dad_start(ifa, arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz)); } } IF_ADDR_RUNLOCK(ifp); /* * special cases, like 6to4, are handled in in6_ifattach */ in6_ifattach(ifp, NULL); } int in6if_do_dad(struct ifnet *ifp) { if ((ifp->if_flags & IFF_LOOPBACK) != 0) return (0); if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) || (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD)) return (0); /* * Our DAD routine requires the interface up and running. * However, some interfaces can be up before the RUNNING * status. Additionaly, users may try to assign addresses * before the interface becomes up (or running). * We simply skip DAD in such a case as a work around. * XXX: we should rather mark "tentative" on such addresses, * and do DAD after the interface becomes ready. */ if ((ifp->if_flags & IFF_UP) == 0) return (0); return (1); } /* * Calculate max IPv6 MTU through all the interfaces and store it * to in6_maxmtu. */ void in6_setmaxmtu(void) { unsigned long maxmtu = 0; struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { /* this function can be called during ifnet initialization */ if (!ifp->if_afdata[AF_INET6]) continue; if ((ifp->if_flags & IFF_LOOPBACK) == 0 && IN6_LINKMTU(ifp) > maxmtu) maxmtu = IN6_LINKMTU(ifp); } IFNET_RUNLOCK_NOSLEEP(); if (maxmtu) /* update only when maxmtu is positive */ V_in6_maxmtu = maxmtu; } /* * Provide the length of interface identifiers to be used for the link attached * to the given interface. The length should be defined in "IPv6 over * xxx-link" document. Note that address architecture might also define * the length for a particular set of address prefixes, regardless of the * link type. As clarified in rfc2462bis, those two definitions should be * consistent, and those really are as of August 2004. */ int in6_if2idlen(struct ifnet *ifp) { switch (if_type(ifp)) { case IFT_ETHER: /* RFC2464 */ #ifdef IFT_PROPVIRTUAL case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */ #endif #ifdef IFT_L2VLAN case IFT_L2VLAN: /* ditto */ #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: /* ditto */ #endif #ifdef IFT_MIP case IFT_MIP: /* ditto */ #endif case IFT_INFINIBAND: return (64); case IFT_FDDI: /* RFC2467 */ return (64); case IFT_ISO88025: /* RFC2470 (IPv6 over Token Ring) */ return (64); case IFT_PPP: /* RFC2472 */ return (64); case IFT_ARCNET: /* RFC2497 */ return (64); case IFT_FRELAY: /* RFC2590 */ return (64); case IFT_IEEE1394: /* RFC3146 */ return (64); case IFT_GIF: return (64); /* draft-ietf-v6ops-mech-v2-07 */ case IFT_LOOP: return (64); /* XXX: is this really correct? */ default: /* * Unknown link type: * It might be controversial to use the today's common constant * of 64 for these cases unconditionally. For full compliance, * we should return an error in this case. On the other hand, * if we simply miss the standard for the link type or a new * standard is defined for a new link type, the IFID length * is very likely to be the common constant. As a compromise, * we always use the constant, but make an explicit notice * indicating the "unknown" case. */ printf("%s: unknown link type (%d)\n", __func__, if_type(ifp)); return (64); } } #include struct in6_llentry { struct llentry base; struct sockaddr_in6 l3_addr6; }; /* * Deletes an address from the address table. * This function is called by the timer functions * such as arptimer() and nd6_llinfo_timer(), and * the caller does the locking. */ static void in6_lltable_free(struct lltable *llt, struct llentry *lle) { LLE_WUNLOCK(lle); LLE_LOCK_DESTROY(lle); free(lle, M_LLTABLE); } static struct llentry * in6_lltable_new(const struct sockaddr *l3addr, u_int flags) { struct in6_llentry *lle; lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); if (lle == NULL) /* NB: caller generates msg */ return NULL; lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr; lle->base.lle_refcnt = 1; lle->base.lle_free = in6_lltable_free; LLE_LOCK_INIT(&lle->base); callout_init(&lle->base.ln_timer_ch, 1); return (&lle->base); } static void in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, const struct sockaddr *mask, u_int flags) { const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix; const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask; struct llentry *lle, *next; int i; /* * (flags & LLE_STATIC) means deleting all entries * including static ND6 entries. */ IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { if (IN6_ARE_MASKED_ADDR_EQUAL( &satosin6(L3_ADDR(lle))->sin6_addr, &pfx->sin6_addr, &msk->sin6_addr) && ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { LLE_WLOCK(lle); if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); llentry_free(lle); } } } IF_AFDATA_WUNLOCK(llt->llt_ifp); } static int in6_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); /* Our local addresses are always only installed on the default FIB. */ /* XXX rtalloc1 should take a const param */ rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0, RT_DEFAULT_FIB); if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { struct ifaddr *ifa; /* * Create an ND6 cache for an IPv6 neighbor * that is not covered by our own prefix. */ /* XXX ifaof_ifpforaddr should take a const param */ ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp); if (ifa != NULL) { ifa_free(ifa); if (rt != NULL) RTFREE_LOCKED(rt); return 0; } log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr)); if (rt != NULL) RTFREE_LOCKED(rt); return EINVAL; } RTFREE_LOCKED(rt); return 0; } static struct llentry * in6_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; struct llentries *lleh; u_int hashkey; IF_AFDATA_LOCK_ASSERT(ifp); KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); hashkey = sin6->sin6_addr.s6_addr32[3]; lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; LIST_FOREACH(lle, lleh, lle_next) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle); if (lle->la_flags & LLE_DELETED) continue; if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr, sizeof(struct in6_addr)) == 0) break; } if (lle == NULL) { if (!(flags & LLE_CREATE)) return (NULL); IF_AFDATA_WLOCK_ASSERT(ifp); /* * A route that covers the given address must have * been installed 1st because we are doing a resolution, * verify this. */ if (!(flags & LLE_IFADDR) && in6_lltable_rtcheck(ifp, flags, l3addr) != 0) return NULL; lle = in6_lltable_new(l3addr, flags); if (lle == NULL) { log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); return NULL; } lle->la_flags = flags & ~LLE_CREATE; if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { bcopy(if_lladdr(ifp), &lle->ll_addr, if_addrlen(ifp)); lle->la_flags |= (LLE_VALID | LLE_STATIC); } lle->lle_tbl = llt; lle->lle_head = lleh; lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } else if (flags & LLE_DELETE) { if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { LLE_WLOCK(lle); lle->la_flags |= LLE_DELETED; #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif if ((lle->la_flags & (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) llentry_free(lle); else LLE_WUNLOCK(lle); } lle = (void *)-1; } if (LLE_IS_VALID(lle)) { if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else LLE_RLOCK(lle); } return (lle); } static int in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) { struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; /* XXX stack use */ struct { struct rt_msghdr rtm; struct sockaddr_in6 sin6; /* * ndp.c assumes that sdl is word aligned */ #ifdef __LP64__ uint32_t pad; #endif struct sockaddr_dl sdl; } ndpc; int i, error; if (ifp->if_flags & IFF_LOOPBACK) return 0; LLTABLE_LOCK_ASSERT(); error = 0; for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { struct sockaddr_dl *sdl; /* skip deleted or invalid entries */ if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) continue; /* Skip if jailed and not a valid IP of the prison. */ if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) continue; /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_in6 (IPv6) * struct sockaddr_dl; */ bzero(&ndpc, sizeof(ndpc)); ndpc.rtm.rtm_msglen = sizeof(ndpc); ndpc.rtm.rtm_version = RTM_VERSION; ndpc.rtm.rtm_type = RTM_GET; ndpc.rtm.rtm_flags = RTF_UP; ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; ndpc.sin6.sin6_family = AF_INET6; ndpc.sin6.sin6_len = sizeof(ndpc.sin6); bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle)); if (V_deembed_scopeid) sa6_recoverscope(&ndpc.sin6); /* publish */ if (lle->la_flags & LLE_PUB) ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; sdl = &ndpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); sdl->sdl_alen = if_addrlen(ifp); sdl->sdl_index = ifp->if_index; sdl->sdl_type = if_type(ifp); bcopy(&lle->ll_addr, LLADDR(sdl), if_addrlen(ifp)); ndpc.rtm.rtm_rmx.rmx_expire = lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) ndpc.rtm.rtm_flags |= RTF_STATIC; ndpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); if (error) break; } } return error; } void * in6_domifattach(struct ifnet *ifp) { struct in6_ifextra *ext; /* There are not IPv6-capable interfaces. */ switch (if_type(ifp)) { case IFT_PFLOG: case IFT_PFSYNC: case IFT_USB: return (NULL); default: break; } ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK); bzero(ext, sizeof(*ext)); ext->in6_ifstat = malloc(sizeof(counter_u64_t) * sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK); COUNTER_ARRAY_ALLOC(ext->in6_ifstat, sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK); ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) * sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK); COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat, sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK); ext->nd_ifinfo = nd6_ifattach(ifp); ext->scope6_id = scope6_ifattach(ifp); ext->lltable = lltable_init(ifp, AF_INET6); if (ext->lltable != NULL) { ext->lltable->llt_prefix_free = in6_lltable_prefix_free; ext->lltable->llt_lookup = in6_lltable_lookup; ext->lltable->llt_dump = in6_lltable_dump; } ext->mld_ifinfo = mld_domifattach(ifp); return ext; } int in6_domifmtu(struct ifnet *ifp) { return (IN6_LINKMTU(ifp)); } void in6_domifdetach(struct ifnet *ifp, void *aux) { struct in6_ifextra *ext = (struct in6_ifextra *)aux; mld_domifdetach(ifp); scope6_ifdetach(ext->scope6_id); nd6_ifdetach(ext->nd_ifinfo); lltable_free(ext->lltable); COUNTER_ARRAY_FREE(ext->in6_ifstat, sizeof(struct in6_ifstat) / sizeof(uint64_t)); free(ext->in6_ifstat, M_IFADDR); COUNTER_ARRAY_FREE(ext->icmp6_ifstat, sizeof(struct icmp6_ifstat) / sizeof(uint64_t)); free(ext->icmp6_ifstat, M_IFADDR); free(ext, M_IFADDR); } /* * Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be * v4 mapped addr or v4 compat addr */ void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin, sizeof(*sin)); sin->sin_len = sizeof(struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_port = sin6->sin6_port; sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3]; } /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(struct sockaddr_in6); sin6->sin6_family = AF_INET6; sin6->sin6_port = sin->sin_port; sin6->sin6_addr.s6_addr32[0] = 0; sin6->sin6_addr.s6_addr32[1] = 0; sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP; sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr; } /* Convert sockaddr_in6 into sockaddr_in. */ void in6_sin6_2_sin_in_sock(struct sockaddr *nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 sin6; /* * Save original sockaddr_in6 addr and convert it * to sockaddr_in. */ sin6 = *(struct sockaddr_in6 *)nam; sin_p = (struct sockaddr_in *)nam; in6_sin6_2_sin(sin_p, &sin6); } /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 *sin6_p; sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK); sin_p = (struct sockaddr_in *)*nam; in6_sin_2_v4mapsin6(sin_p, sin6_p); free(*nam, M_SONAME); *nam = (struct sockaddr *)sin6_p; } Index: projects/ifnet/sys/netinet6/in6.h =================================================================== --- projects/ifnet/sys/netinet6/in6.h (revision 281649) +++ projects/ifnet/sys/netinet6/in6.h (revision 281650) @@ -1,730 +1,731 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 * $FreeBSD$ */ #ifndef __KAME_NETINET_IN_H_INCLUDED_ #error "do not include netinet6/in6.h directly, include netinet/in.h. see RFC2553" #endif #ifndef _NETINET6_IN6_H_ #define _NETINET6_IN6_H_ /* * Identification of the network protocol stack * for *BSD-current/release: http://www.kame.net/dev/cvsweb.cgi/kame/COVERAGE * has the table of implementation/integration differences. */ #define __KAME__ #define __KAME_VERSION "FreeBSD" /* * IPv6 port allocation rules should mirror the IPv4 rules and are controlled * by the net.inet.ip.portrange sysctl tree. The following defines exist * for compatibility with userland applications that need them. */ #if __BSD_VISIBLE #define IPV6PORT_RESERVED 1024 #define IPV6PORT_ANONMIN 49152 #define IPV6PORT_ANONMAX 65535 #define IPV6PORT_RESERVEDMIN 600 #define IPV6PORT_RESERVEDMAX (IPV6PORT_RESERVED-1) #endif /* * IPv6 address */ struct in6_addr { union { uint8_t __u6_addr8[16]; uint16_t __u6_addr16[8]; uint32_t __u6_addr32[4]; } __u6_addr; /* 128-bit IP6 address */ }; #define s6_addr __u6_addr.__u6_addr8 #ifdef _KERNEL /* XXX nonstandard */ #define s6_addr8 __u6_addr.__u6_addr8 #define s6_addr16 __u6_addr.__u6_addr16 #define s6_addr32 __u6_addr.__u6_addr32 #endif #define INET6_ADDRSTRLEN 46 /* * XXX missing POSIX.1-2001 macro IPPROTO_IPV6. */ /* * Socket address for IPv6 */ #if __BSD_VISIBLE #define SIN6_LEN #endif struct sockaddr_in6 { uint8_t sin6_len; /* length of this struct */ sa_family_t sin6_family; /* AF_INET6 */ in_port_t sin6_port; /* Transport layer port # */ uint32_t sin6_flowinfo; /* IP6 flow information */ struct in6_addr sin6_addr; /* IP6 address */ uint32_t sin6_scope_id; /* scope zone index */ }; /* * Local definition for masks */ #ifdef _KERNEL /* XXX nonstandard */ #define IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}} #define IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK64 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK96 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK128 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}} #endif #ifdef _KERNEL extern const struct sockaddr_in6 sa6_any; extern const struct in6_addr in6mask0; extern const struct in6_addr in6mask32; extern const struct in6_addr in6mask64; extern const struct in6_addr in6mask96; extern const struct in6_addr in6mask128; #endif /* _KERNEL */ /* * Macros started with IPV6_ADDR is KAME local */ #ifdef _KERNEL /* XXX nonstandard */ #if _BYTE_ORDER == _BIG_ENDIAN #define IPV6_ADDR_INT32_ONE 1 #define IPV6_ADDR_INT32_TWO 2 #define IPV6_ADDR_INT32_MNL 0xff010000 #define IPV6_ADDR_INT32_MLL 0xff020000 #define IPV6_ADDR_INT32_SMP 0x0000ffff #define IPV6_ADDR_INT16_ULL 0xfe80 #define IPV6_ADDR_INT16_USL 0xfec0 #define IPV6_ADDR_INT16_MLL 0xff02 #elif _BYTE_ORDER == _LITTLE_ENDIAN #define IPV6_ADDR_INT32_ONE 0x01000000 #define IPV6_ADDR_INT32_TWO 0x02000000 #define IPV6_ADDR_INT32_MNL 0x000001ff #define IPV6_ADDR_INT32_MLL 0x000002ff #define IPV6_ADDR_INT32_SMP 0xffff0000 #define IPV6_ADDR_INT16_ULL 0x80fe #define IPV6_ADDR_INT16_USL 0xc0fe #define IPV6_ADDR_INT16_MLL 0x02ff #endif #endif /* * Definition of some useful macros to handle IP6 addresses */ #if __BSD_VISIBLE #define IN6ADDR_ANY_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6ADDR_LOOPBACK_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_NODELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}} #define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 }}} #endif extern const struct in6_addr in6addr_any; extern const struct in6_addr in6addr_loopback; #if __BSD_VISIBLE extern const struct in6_addr in6addr_nodelocal_allnodes; extern const struct in6_addr in6addr_linklocal_allnodes; extern const struct in6_addr in6addr_linklocal_allrouters; extern const struct in6_addr in6addr_linklocal_allv2routers; #endif /* * Equality * NOTE: Some of kernel programming environment (for example, openbsd/sparc) * does not supply memcmp(). For userland memcmp() is preferred as it is * in ANSI standard. */ #ifdef _KERNEL #define IN6_ARE_ADDR_EQUAL(a, b) \ (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) #else #if __BSD_VISIBLE #define IN6_ARE_ADDR_EQUAL(a, b) \ (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) #endif #endif /* * Unspecified */ #define IN6_IS_ADDR_UNSPECIFIED(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] == 0) /* * Loopback */ #define IN6_IS_ADDR_LOOPBACK(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] == ntohl(1)) /* * IPv4 compatible */ #define IN6_IS_ADDR_V4COMPAT(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] != 0 && \ (a)->__u6_addr.__u6_addr32[3] != ntohl(1)) /* * Mapped */ #define IN6_IS_ADDR_V4MAPPED(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == ntohl(0x0000ffff)) /* * KAME Scope Values */ #ifdef _KERNEL /* XXX nonstandard */ #define IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ #define IPV6_ADDR_SCOPE_GLOBAL 0x0e #else #define __IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define __IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define __IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define __IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ #define __IPV6_ADDR_SCOPE_GLOBAL 0x0e #endif /* * Unicast Scope * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373). */ #define IN6_IS_ADDR_LINKLOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80)) #define IN6_IS_ADDR_SITELOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0)) /* * Multicast */ #define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff) #ifdef _KERNEL /* XXX nonstandard */ #define IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #else #define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #endif /* * Multicast Scope */ #ifdef _KERNEL /* refers nonstandard items */ #define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL)) #define IN6_IS_ADDR_MC_INTFACELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL)) #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL)) #define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL)) #define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL)) #define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL)) #else #define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL)) #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL)) #define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL)) #define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL)) #define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL)) #endif #ifdef _KERNEL /* nonstandard */ /* * KAME Scope */ #define IN6_IS_SCOPE_LINKLOCAL(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a))) #define IN6_IS_SCOPE_EMBED(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_INTFACELOCAL(a))) #define IFA6_IS_DEPRECATED(a) \ ((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_pltime) #define IFA6_IS_INVALID(a) \ ((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_vltime) #endif /* _KERNEL */ /* * IP6 route structure */ #if __BSD_VISIBLE struct route_in6 { struct rtentry *ro_rt; struct llentry *ro_lle; struct in6_addr *ro_ia6; int ro_flags; struct sockaddr_in6 ro_dst; }; #endif /* * Options for use with [gs]etsockopt at the IPV6 level. * First word of comment is data type; bool is stored in int. */ /* no hdrincl */ #if 0 /* the followings are relic in IPv4 and hence are disabled */ #define IPV6_OPTIONS 1 /* buf/ip6_opts; set/get IP6 options */ #define IPV6_RECVOPTS 5 /* bool; receive all IP6 opts w/dgram */ #define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ #define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ #define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ #endif #define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ #define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ #define IPV6_MULTICAST_IF 9 /* u_int; set/get IP6 multicast i/f */ #define IPV6_MULTICAST_HOPS 10 /* int; set/get IP6 multicast hops */ #define IPV6_MULTICAST_LOOP 11 /* u_int; set/get IP6 multicast loopback */ #define IPV6_JOIN_GROUP 12 /* ipv6_mreq; join a group membership */ #define IPV6_LEAVE_GROUP 13 /* ipv6_mreq; leave a group membership */ #define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */ #define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */ /* RFC2292 options */ #ifdef _KERNEL #define IPV6_2292PKTINFO 19 /* bool; send/recv if, src/dst addr */ #define IPV6_2292HOPLIMIT 20 /* bool; hop limit */ #define IPV6_2292NEXTHOP 21 /* bool; next hop addr */ #define IPV6_2292HOPOPTS 22 /* bool; hop-by-hop option */ #define IPV6_2292DSTOPTS 23 /* bool; destinaion option */ #define IPV6_2292RTHDR 24 /* bool; routing header */ #define IPV6_2292PKTOPTIONS 25 /* buf/cmsghdr; set/get IPv6 options */ #endif #define IPV6_CHECKSUM 26 /* int; checksum offset for raw socket */ #define IPV6_V6ONLY 27 /* bool; make AF_INET6 sockets v6 only */ #ifndef _KERNEL #define IPV6_BINDV6ONLY IPV6_V6ONLY #endif #if 1 /* IPSEC */ #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ #endif /* IPSEC */ /* 29; unused; was IPV6_FAITH */ #if 1 /* IPV6FIREWALL */ #define IPV6_FW_ADD 30 /* add a firewall rule to chain */ #define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ #define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ #define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ #define IPV6_FW_GET 34 /* get entire firewall rule chain */ #endif /* new socket options introduced in RFC3542 */ #define IPV6_RTHDRDSTOPTS 35 /* ip6_dest; send dst option before rthdr */ #define IPV6_RECVPKTINFO 36 /* bool; recv if, dst addr */ #define IPV6_RECVHOPLIMIT 37 /* bool; recv hop limit */ #define IPV6_RECVRTHDR 38 /* bool; recv routing header */ #define IPV6_RECVHOPOPTS 39 /* bool; recv hop-by-hop option */ #define IPV6_RECVDSTOPTS 40 /* bool; recv dst option after rthdr */ #ifdef _KERNEL #define IPV6_RECVRTHDRDSTOPTS 41 /* bool; recv dst option before rthdr */ #endif #define IPV6_USE_MIN_MTU 42 /* bool; send packets at the minimum MTU */ #define IPV6_RECVPATHMTU 43 /* bool; notify an according MTU */ #define IPV6_PATHMTU 44 /* mtuinfo; get the current path MTU (sopt), 4 bytes int; MTU notification (cmsg) */ #if 0 /*obsoleted during 2292bis -> 3542*/ #define IPV6_REACHCONF 45 /* no data; ND reachability confirm (cmsg only/not in of RFC3542) */ #endif /* more new socket options introduced in RFC3542 */ #define IPV6_PKTINFO 46 /* in6_pktinfo; send if, src addr */ #define IPV6_HOPLIMIT 47 /* int; send hop limit */ #define IPV6_NEXTHOP 48 /* sockaddr; next hop addr */ #define IPV6_HOPOPTS 49 /* ip6_hbh; send hop-by-hop option */ #define IPV6_DSTOPTS 50 /* ip6_dest; send dst option befor rthdr */ #define IPV6_RTHDR 51 /* ip6_rthdr; send routing header */ #if 0 #define IPV6_PKTOPTIONS 52 /* buf/cmsghdr; set/get IPv6 options */ /* obsoleted by RFC3542 */ #endif #define IPV6_RECVTCLASS 57 /* bool; recv traffic class values */ #define IPV6_AUTOFLOWLABEL 59 /* bool; attach flowlabel automagically */ #define IPV6_TCLASS 61 /* int; send traffic class value */ #define IPV6_DONTFRAG 62 /* bool; disable IPv6 fragmentation */ #define IPV6_PREFER_TEMPADDR 63 /* int; prefer temporary addresses as * the source address. */ #define IPV6_BINDANY 64 /* bool: allow bind to any address */ #define IPV6_BINDMULTI 65 /* bool; allow multibind to same addr/port */ #define IPV6_RSS_LISTEN_BUCKET 66 /* int; set RSS listen bucket */ #define IPV6_FLOWID 67 /* int; flowid of given socket */ #define IPV6_FLOWTYPE 68 /* int; flowtype of given socket */ #define IPV6_RSSBUCKETID 69 /* int; RSS bucket ID of given socket */ /* * The following option is private; do not use it from user applications. * It is deliberately defined to the same value as IP_MSFILTER. */ #define IPV6_MSFILTER 74 /* struct __msfilterreq; * set/get multicast source filter list. */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */ #define IPV6_RTHDR_STRICT 1 /* this hop must be a neighbor. XXX old spec */ #define IPV6_RTHDR_TYPE_0 0 /* IPv6 routing header type 0 */ /* * Defaults and limits for options */ #define IPV6_DEFAULT_MULTICAST_HOPS 1 /* normally limit m'casts to 1 hop */ #define IPV6_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ /* * The im6o_membership vector for each socket is now dynamically allocated at * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized * according to a power-of-two increment. */ #define IPV6_MIN_MEMBERSHIPS 31 #define IPV6_MAX_MEMBERSHIPS 4095 /* * Default resource limits for IPv6 multicast source filtering. * These may be modified by sysctl. */ #define IPV6_MAX_GROUP_SRC_FILTER 512 /* sources per group */ #define IPV6_MAX_SOCK_SRC_FILTER 128 /* sources per socket/group */ /* * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP. */ struct ipv6_mreq { struct in6_addr ipv6mr_multiaddr; unsigned int ipv6mr_interface; }; /* * IPV6_PKTINFO: Packet information(RFC2292 sec 5) */ struct in6_pktinfo { struct in6_addr ipi6_addr; /* src/dst IPv6 address */ unsigned int ipi6_ifindex; /* send/recv interface index */ }; /* * Control structure for IPV6_RECVPATHMTU socket option. */ struct ip6_mtuinfo { struct sockaddr_in6 ip6m_addr; /* or sockaddr_storage? */ uint32_t ip6m_mtu; }; /* * Argument for IPV6_PORTRANGE: * - which range to search when port is unspecified at bind() or connect() */ #define IPV6_PORTRANGE_DEFAULT 0 /* default range */ #define IPV6_PORTRANGE_HIGH 1 /* "high" - request firewall bypass */ #define IPV6_PORTRANGE_LOW 2 /* "low" - vouchsafe security */ #if __BSD_VISIBLE /* * Definitions for inet6 sysctl operations. * * Third level is protocol number. * Fourth level is desired variable within that protocol. */ #define IPV6PROTO_MAXID (IPPROTO_PIM + 1) /* don't list to IPV6PROTO_MAX */ /* * Names for IP sysctl objects */ #define IPV6CTL_FORWARDING 1 /* act as router */ #define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding*/ #define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ #ifdef notyet #define IPV6CTL_DEFMTU 4 /* default MTU */ #endif #define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ #define IPV6CTL_STATS 6 /* stats */ #define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ #define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ #define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ #define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ #define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ #define IPV6CTL_ACCEPT_RTADV 12 /* 13; unused; was: IPV6CTL_KEEPFAITH */ #define IPV6CTL_LOG_INTERVAL 14 #define IPV6CTL_HDRNESTLIMIT 15 #define IPV6CTL_DAD_COUNT 16 #define IPV6CTL_AUTO_FLOWLABEL 17 #define IPV6CTL_DEFMCASTHLIM 18 #define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ #define IPV6CTL_KAME_VERSION 20 #define IPV6CTL_USE_DEPRECATED 21 /* use deprecated addr (RFC2462 5.5.4) */ #define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ #if 0 /* obsolete */ #define IPV6CTL_MAPPED_ADDR 23 #endif #define IPV6CTL_V6ONLY 24 /* IPV6CTL_RTEXPIRE 25 deprecated */ /* IPV6CTL_RTMINEXPIRE 26 deprecated */ /* IPV6CTL_RTMAXCACHE 27 deprecated */ #define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ #define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ #define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ #define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ #define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ #define IPV6CTL_PREFER_TEMPADDR 37 /* prefer temporary addr as src */ #define IPV6CTL_ADDRCTLPOLICY 38 /* get/set address selection policy */ #define IPV6CTL_USE_DEFAULTZONE 39 /* use default scope zone */ #define IPV6CTL_MAXFRAGS 41 /* max fragments */ #if 0 #define IPV6CTL_IFQ 42 /* ip6intrq node */ #define IPV6CTL_ISATAPRTR 43 /* isatap router */ #endif #define IPV6CTL_MCAST_PMTU 44 /* enable pMTU discovery for multicast? */ /* New entries should be added here from current IPV6CTL_MAXID value. */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6CTL_STEALTH 45 #define ICMPV6CTL_ND6_ONLINKNSRFC4861 47 #define IPV6CTL_NO_RADR 48 /* No defroute from RA */ #define IPV6CTL_NORBIT_RAIF 49 /* Disable R-bit in NA on RA * receiving IF. */ #define IPV6CTL_RFC6204W3 50 /* Accept defroute even when forwarding enabled */ #define IPV6CTL_MAXID 51 #endif /* __BSD_VISIBLE */ /* * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/, * the protocol specific mbuf flags are shared between them. */ #define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */ #define M_IP6_NEXTHOP M_PROTO2 /* explicit ip nexthop */ #define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */ #define M_SKIP_FIREWALL M_PROTO3 /* skip firewall processing */ #define M_AUTHIPHDR M_PROTO4 #define M_DECRYPTED M_PROTO5 #define M_LOOP M_PROTO6 #define M_AUTHIPDGM M_PROTO7 #define M_RTALERT_MLD M_PROTO8 #ifdef _KERNEL struct cmsghdr; struct ip6_hdr; int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t); int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t); int in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t, u_int32_t); int in6_localaddr(struct in6_addr *); int in6_localip(struct in6_addr *); +int in6_ifhasaddr(struct ifnet *, struct in6_addr *); int in6_addrscope(const struct in6_addr *); char *ip6_sprintf(char *, const struct in6_addr *); struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); extern void in6_if_up(struct ifnet *); struct sockaddr; extern u_char ip6_protox[]; void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); void in6_sin6_2_sin_in_sock(struct sockaddr *nam); void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); extern void addrsel_policy_init(void); #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) #endif /* _KERNEL */ #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #ifndef _SOCKLEN_T_DECLARED typedef __socklen_t socklen_t; #define _SOCKLEN_T_DECLARED #endif #if __BSD_VISIBLE __BEGIN_DECLS struct cmsghdr; extern int inet6_option_space(int); extern int inet6_option_init(void *, struct cmsghdr **, int); extern int inet6_option_append(struct cmsghdr *, const uint8_t *, int, int); extern uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int); extern int inet6_option_next(const struct cmsghdr *, uint8_t **); extern int inet6_option_find(const struct cmsghdr *, uint8_t **, int); extern size_t inet6_rthdr_space(int, int); extern struct cmsghdr *inet6_rthdr_init(void *, int); extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *, unsigned int); extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int); #if 0 /* not implemented yet */ extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *); #endif extern int inet6_rthdr_segments(const struct cmsghdr *); extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int); extern int inet6_rthdr_getflags(const struct cmsghdr *, int); extern int inet6_opt_init(void *, socklen_t); extern int inet6_opt_append(void *, socklen_t, int, uint8_t, socklen_t, uint8_t, void **); extern int inet6_opt_finish(void *, socklen_t, int); extern int inet6_opt_set_val(void *, int, void *, socklen_t); extern int inet6_opt_next(void *, socklen_t, int, uint8_t *, socklen_t *, void **); extern int inet6_opt_find(void *, socklen_t, int, uint8_t, socklen_t *, void **); extern int inet6_opt_get_val(void *, int, void *, socklen_t); extern socklen_t inet6_rth_space(int, int); extern void *inet6_rth_init(void *, socklen_t, int, int); extern int inet6_rth_add(void *, const struct in6_addr *); extern int inet6_rth_reverse(const void *, void *); extern int inet6_rth_segments(const void *); extern struct in6_addr *inet6_rth_getaddr(const void *, int); __END_DECLS #endif /* __BSD_VISIBLE */ #endif /* !_NETINET6_IN6_H_ */ Index: projects/ifnet/sys =================================================================== --- projects/ifnet/sys (revision 281649) +++ projects/ifnet/sys (revision 281650) Property changes on: projects/ifnet/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r281640-281649 Index: projects/ifnet =================================================================== --- projects/ifnet (revision 281649) +++ projects/ifnet (revision 281650) Property changes on: projects/ifnet ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r281640-281649