Index: projects/clang500-import/bin/date/date.c =================================================================== --- projects/clang500-import/bin/date/date.c (revision 321306) +++ projects/clang500-import/bin/date/date.c (revision 321307) @@ -1,331 +1,335 @@ /*- * Copyright (c) 1985, 1987, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static char const copyright[] = "@(#) Copyright (c) 1985, 1987, 1988, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #if 0 #ifndef lint static char sccsid[] = "@(#)date.c 8.2 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "extern.h" #include "vary.h" #ifndef TM_YEAR_BASE #define TM_YEAR_BASE 1900 #endif static time_t tval; int retval; static void setthetime(const char *, const char *, int, int); static void badformat(void); static void usage(void); static const char *rfc2822_format = "%a, %d %b %Y %T %z"; int main(int argc, char *argv[]) { struct timezone tz; int ch, rflag; int jflag, nflag, Rflag; const char *format; char buf[1024]; char *endptr, *fmt; char *tmp; int set_timezone; struct vary *v; const struct vary *badv; - struct tm lt; + struct tm *lt; struct stat sb; v = NULL; fmt = NULL; (void) setlocale(LC_TIME, ""); tz.tz_dsttime = tz.tz_minuteswest = 0; rflag = 0; jflag = nflag = Rflag = 0; set_timezone = 0; while ((ch = getopt(argc, argv, "d:f:jnRr:t:uv:")) != -1) switch((char)ch) { case 'd': /* daylight savings time */ tz.tz_dsttime = strtol(optarg, &endptr, 10) ? 1 : 0; if (endptr == optarg || *endptr != '\0') usage(); set_timezone = 1; break; case 'f': fmt = optarg; break; case 'j': jflag = 1; /* don't set time */ break; case 'n': /* don't set network */ nflag = 1; break; case 'R': /* RFC 2822 datetime format */ Rflag = 1; break; case 'r': /* user specified seconds */ rflag = 1; tval = strtoq(optarg, &tmp, 0); if (*tmp != 0) { if (stat(optarg, &sb) == 0) tval = sb.st_mtim.tv_sec; else usage(); } break; case 't': /* minutes west of UTC */ /* error check; don't allow "PST" */ tz.tz_minuteswest = strtol(optarg, &endptr, 10); if (endptr == optarg || *endptr != '\0') usage(); set_timezone = 1; break; case 'u': /* do everything in UTC */ (void)setenv("TZ", "UTC0", 1); break; case 'v': v = vary_append(v, optarg); break; default: usage(); } argc -= optind; argv += optind; /* * If -d or -t, set the timezone or daylight savings time; this * doesn't belong here; the kernel should not know about either. */ if (set_timezone && settimeofday(NULL, &tz) != 0) err(1, "settimeofday (timezone)"); if (!rflag && time(&tval) == -1) err(1, "time"); format = "%+"; if (Rflag) format = rfc2822_format; /* allow the operands in any order */ if (*argv && **argv == '+') { format = *argv + 1; ++argv; } if (*argv) { setthetime(fmt, *argv, jflag, nflag); ++argv; } else if (fmt != NULL) usage(); if (*argv && **argv == '+') format = *argv + 1; - lt = *localtime(&tval); - badv = vary_apply(v, <); + lt = localtime(&tval); + if (lt == NULL) + errx(1, "invalid time"); + badv = vary_apply(v, lt); if (badv) { fprintf(stderr, "%s: Cannot apply date adjustment\n", badv->arg); vary_destroy(v); usage(); } vary_destroy(v); if (format == rfc2822_format) /* * When using RFC 2822 datetime format, don't honor the * locale. */ setlocale(LC_TIME, "C"); - (void)strftime(buf, sizeof(buf), format, <); + (void)strftime(buf, sizeof(buf), format, lt); (void)printf("%s\n", buf); if (fflush(stdout)) err(1, "stdout"); exit(retval); } #define ATOI2(s) ((s) += 2, ((s)[-2] - '0') * 10 + ((s)[-1] - '0')) static void setthetime(const char *fmt, const char *p, int jflag, int nflag) { struct utmpx utx; struct tm *lt; struct timeval tv; const char *dot, *t; int century; lt = localtime(&tval); + if (lt == NULL) + errx(1, "invalid time"); lt->tm_isdst = -1; /* divine correct DST */ if (fmt != NULL) { t = strptime(p, fmt, lt); if (t == NULL) { fprintf(stderr, "Failed conversion of ``%s''" " using format ``%s''\n", p, fmt); badformat(); } else if (*t != '\0') fprintf(stderr, "Warning: Ignoring %ld extraneous" " characters in date string (%s)\n", (long) strlen(t), t); } else { for (t = p, dot = NULL; *t; ++t) { if (isdigit(*t)) continue; if (*t == '.' && dot == NULL) { dot = t; continue; } badformat(); } if (dot != NULL) { /* .ss */ dot++; /* *dot++ = '\0'; */ if (strlen(dot) != 2) badformat(); lt->tm_sec = ATOI2(dot); if (lt->tm_sec > 61) badformat(); } else lt->tm_sec = 0; century = 0; /* if p has a ".ss" field then let's pretend it's not there */ switch (strlen(p) - ((dot != NULL) ? 3 : 0)) { case 12: /* cc */ lt->tm_year = ATOI2(p) * 100 - TM_YEAR_BASE; century = 1; /* FALLTHROUGH */ case 10: /* yy */ if (century) lt->tm_year += ATOI2(p); else { lt->tm_year = ATOI2(p); if (lt->tm_year < 69) /* hack for 2000 ;-} */ lt->tm_year += 2000 - TM_YEAR_BASE; else lt->tm_year += 1900 - TM_YEAR_BASE; } /* FALLTHROUGH */ case 8: /* mm */ lt->tm_mon = ATOI2(p); if (lt->tm_mon > 12) badformat(); --lt->tm_mon; /* time struct is 0 - 11 */ /* FALLTHROUGH */ case 6: /* dd */ lt->tm_mday = ATOI2(p); if (lt->tm_mday > 31) badformat(); /* FALLTHROUGH */ case 4: /* HH */ lt->tm_hour = ATOI2(p); if (lt->tm_hour > 23) badformat(); /* FALLTHROUGH */ case 2: /* MM */ lt->tm_min = ATOI2(p); if (lt->tm_min > 59) badformat(); break; default: badformat(); } } /* convert broken-down time to GMT clock time */ if ((tval = mktime(lt)) == -1) errx(1, "nonexistent time"); if (!jflag) { /* set the time */ if (nflag || netsettime(tval)) { utx.ut_type = OLD_TIME; (void)gettimeofday(&utx.ut_tv, NULL); pututxline(&utx); tv.tv_sec = tval; tv.tv_usec = 0; if (settimeofday(&tv, NULL) != 0) err(1, "settimeofday (timeval)"); utx.ut_type = NEW_TIME; (void)gettimeofday(&utx.ut_tv, NULL); pututxline(&utx); } if ((p = getlogin()) == NULL) p = "???"; syslog(LOG_AUTH | LOG_NOTICE, "date set by %s", p); } } static void badformat(void) { warnx("illegal time format"); usage(); } static void usage(void) { (void)fprintf(stderr, "%s\n%s\n", "usage: date [-jnRu] [-d dst] [-r seconds] [-t west] " "[-v[+|-]val[ymwdHMS]] ... ", " " "[-f fmt date | [[[[[cc]yy]mm]dd]HH]MM[.ss]] [+format]"); exit(1); } Index: projects/clang500-import/lib/libc/sys/Symbol.map =================================================================== --- projects/clang500-import/lib/libc/sys/Symbol.map (revision 321306) +++ projects/clang500-import/lib/libc/sys/Symbol.map (revision 321307) @@ -1,1034 +1,1025 @@ /* * $FreeBSD$ */ /* * It'd be nice to automatically generate the syscall symbols, but we * don't know to what version they will eventually belong to, so for now * it has to be manual. */ FBSD_1.0 { __acl_aclcheck_fd; __acl_aclcheck_file; __acl_aclcheck_link; __acl_delete_fd; __acl_delete_file; __acl_delete_link; __acl_get_fd; __acl_get_file; __acl_get_link; __acl_set_fd; __acl_set_file; __acl_set_link; __getcwd; __mac_execve; __mac_get_fd; __mac_get_file; __mac_get_link; __mac_get_pid; __mac_get_proc; __mac_set_fd; __mac_set_file; __mac_set_link; __mac_set_proc; __setugid; __syscall; __sysctl; _umtx_op; abort2; accept; access; acct; adjtime; aio_cancel; aio_error; aio_fsync; aio_read; aio_return; aio_suspend; aio_waitcomplete; aio_write; audit; auditctl; auditon; bind; chdir; chflags; chmod; chown; chroot; clock_getres; clock_gettime; clock_settime; close; connect; dup; dup2; eaccess; execve; extattr_delete_fd; extattr_delete_file; extattr_delete_link; extattr_get_fd; extattr_get_file; extattr_get_link; extattr_list_fd; extattr_list_file; extattr_list_link; extattr_set_fd; extattr_set_file; extattr_set_link; extattrctl; fchdir; fchflags; fchmod; fchown; fcntl; fhopen; flock; fork; fpathconf; fsync; futimes; getaudit; getaudit_addr; getauid; getcontext; getdtablesize; getegid; geteuid; getfh; getgid; getgroups; getitimer; getpeername; getpgid; getpgrp; getpid; getppid; getpriority; getresgid; getresuid; getrlimit; getrusage; getsid; getsockname; getsockopt; gettimeofday; getuid; ioctl; issetugid; jail; jail_attach; kenv; kill; kldfind; kldfirstmod; kldload; kldnext; kldstat; kldsym; kldunload; kldunloadf; kqueue; kmq_notify; /* Do we want these to be public interfaces? */ kmq_open; /* librt uses them to provide mq_xxx. */ kmq_setattr; kmq_timedreceive; kmq_timedsend; kmq_unlink; ksem_close; ksem_destroy; ksem_getvalue; ksem_init; ksem_open; ksem_post; ksem_timedwait; ksem_trywait; ksem_unlink; ksem_wait; ktrace; lchflags; lchmod; lchown; lgetfh; link; lio_listio; listen; lutimes; mac_syscall; madvise; mincore; minherit; mkdir; mkfifo; mlock; mlockall; modfind; modfnext; modnext; modstat; mount; mprotect; msgget; msgrcv; msgsnd; msgsys; msync; munlock; munlockall; munmap; nanosleep; netbsd_lchown; netbsd_msync; nfssvc; nmount; ntp_adjtime; ntp_gettime; open; pathconf; pipe; poll; posix_openpt; preadv; profil; pselect; ptrace; pwritev; quotactl; read; readlink; readv; reboot; recvfrom; recvmsg; rename; revoke; rfork; rmdir; rtprio; rtprio_thread; sched_get_priority_max; sched_get_priority_min; sched_getparam; sched_getscheduler; sched_rr_get_interval; sched_setparam; sched_setscheduler; sched_yield; select; semget; semop; semsys; sendfile; sendmsg; sendto; setaudit; setaudit_addr; setauid; setegid; seteuid; setgid; setgroups; setitimer; setlogin; setpgid; setpriority; setregid; setresgid; setresuid; setreuid; setrlimit; setsid; setsockopt; settimeofday; setuid; shm_open; shm_unlink; shmat; shmdt; shmget; shmsys; shutdown; sigaction; sigaltstack; sigpending; sigprocmask; sigqueue; sigreturn; sigsuspend; sigtimedwait; sigwait; sigwaitinfo; socket; socketpair; swapoff; swapon; symlink; sync; sysarch; syscall; thr_create; thr_exit; thr_kill; thr_kill2; thr_new; thr_self; thr_set_name; thr_suspend; thr_wake; ktimer_create; /* Do we want these to be public interfaces? */ ktimer_delete; /* librt uses them to provide timer_xxx. */ ktimer_getoverrun; ktimer_gettime; ktimer_settime; umask; undelete; unlink; unmount; utimes; utrace; uuidgen; vadvise; wait4; write; writev; __error; ftruncate; lseek; mmap; pread; pwrite; truncate; }; FBSD_1.1 { __semctl; closefrom; cpuset; cpuset_getid; cpuset_setid; cpuset_getaffinity; cpuset_setaffinity; faccessat; fchmodat; fchownat; fexecve; futimesat; jail_get; jail_set; jail_remove; linkat; lpathconf; mkdirat; mkfifoat; msgctl; readlinkat; renameat; setfib; shmctl; symlinkat; unlinkat; }; FBSD_1.2 { cap_enter; cap_getmode; getloginclass; pdfork; pdgetpid; pdkill; posix_fallocate; rctl_get_racct; rctl_get_rules; rctl_get_limits; rctl_add_rule; rctl_remove_rule; setloginclass; }; FBSD_1.3 { accept4; aio_mlock; bindat; cap_fcntls_get; cap_fcntls_limit; cap_ioctls_get; cap_ioctls_limit; __cap_rights_get; cap_rights_limit; cap_sandboxed; chflagsat; clock_getcpuclockid2; connectat; ffclock_getcounter; ffclock_getestimate; ffclock_setestimate; pipe2; posix_fadvise; procctl; wait6; }; FBSD_1.4 { futimens; ppoll; utimensat; numa_setaffinity; numa_getaffinity; sendmmsg; recvmmsg; }; FBSD_1.5 { clock_nanosleep; fdatasync; fhstat; fhstatfs; fstat; fstatat; fstatfs; getdents; getdirentries; getfsstat; kevent; lstat; mknod; mknodat; stat; statfs; }; FBSDprivate_1.0 { ___acl_aclcheck_fd; __sys___acl_aclcheck_fd; ___acl_aclcheck_file; __sys___acl_aclcheck_file; ___acl_aclcheck_link; __sys___acl_aclcheck_link; ___acl_delete_fd; __sys___acl_delete_fd; ___acl_delete_file; __sys___acl_delete_file; ___acl_delete_link; __sys___acl_delete_link; ___acl_get_fd; __sys___acl_get_fd; ___acl_get_file; __sys___acl_get_file; ___acl_get_link; __sys___acl_get_link; ___acl_set_fd; __sys___acl_set_fd; ___acl_set_file; __sys___acl_set_file; ___acl_set_link; __sys___acl_set_link; ___getcwd; __sys___getcwd; ___mac_execve; __sys___mac_execve; ___mac_get_fd; __sys___mac_get_fd; ___mac_get_file; __sys___mac_get_file; ___mac_get_link; __sys___mac_get_link; ___mac_get_pid; __sys___mac_get_pid; ___mac_get_proc; __sys___mac_get_proc; ___mac_set_fd; __sys___mac_set_fd; ___mac_set_file; __sys___mac_set_file; ___mac_set_link; __sys___mac_set_link; ___mac_set_proc; __sys___mac_set_proc; ___semctl; __sys___semctl; ___setugid; __sys___setugid; ___syscall; __sys___syscall; ___sysctl; __sys___sysctl; __umtx_op; __sys__umtx_op; _abort2; __sys_abort2; _accept; __sys_accept; _accept4; __sys_accept4; _access; __sys_access; _acct; __sys_acct; _adjtime; __sys_adjtime; - _aio_cancel; __sys_aio_cancel; - _aio_error; __sys_aio_error; - _aio_fsync; __sys_aio_fsync; - _aio_read; __sys_aio_read; - _aio_return; __sys_aio_return; - _aio_suspend; __sys_aio_suspend; - _aio_waitcomplete; __sys_aio_waitcomplete; - _aio_write; __sys_aio_write; _audit; __sys_audit; _auditctl; __sys_auditctl; _auditon; __sys_auditon; _bind; __sys_bind; _chdir; __sys_chdir; _chflags; __sys_chflags; _chmod; __sys_chmod; _chown; __sys_chown; _chroot; __sys_chroot; _clock_getcpuclockid2; __sys_clock_getcpuclockid2; _clock_getres; __sys_clock_getres; _clock_gettime; __sys_clock_gettime; __sys_clock_nanosleep; _clock_settime; __sys_clock_settime; _close; __sys_close; _closefrom; __sys_closefrom; _connect; __sys_connect; _cpuset; __sys_cpuset; _cpuset_getid; __sys_cpuset_getid; _cpuset_setid; __sys_cpuset_setid; _cpuset_getaffinity; __sys_cpuset_getaffinity; _cpuset_setaffinity; __sys_cpuset_setaffinity; _dup; __sys_dup; _dup2; __sys_dup2; _eaccess; __sys_eaccess; _execve; __sys_execve; _extattr_delete_fd; __sys_extattr_delete_fd; _extattr_delete_file; __sys_extattr_delete_file; _extattr_delete_link; __sys_extattr_delete_link; _extattr_get_fd; __sys_extattr_get_fd; _extattr_get_file; __sys_extattr_get_file; _extattr_get_link; __sys_extattr_get_link; _extattr_list_fd; __sys_extattr_list_fd; _extattr_list_file; __sys_extattr_list_file; _extattr_list_link; __sys_extattr_list_link; _extattr_set_fd; __sys_extattr_set_fd; _extattr_set_file; __sys_extattr_set_file; _extattr_set_link; __sys_extattr_set_link; _extattrctl; __sys_extattrctl; _fchdir; __sys_fchdir; _fchflags; __sys_fchflags; _fchmod; __sys_fchmod; _fchown; __sys_fchown; _fcntl; __sys_fcntl; __fcntl_compat; _fhopen; __sys_fhopen; _fhstat; __sys_fhstat; _fhstatfs; __sys_fhstatfs; _flock; __sys_flock; _fork; __sys_fork; _fpathconf; __sys_fpathconf; _fstat; __sys_fstat; _fstatfs; __sys_fstatfs; _fsync; __sys_fsync; _fdatasync; __sys_fdatasync; _futimes; __sys_futimes; _getaudit; __sys_getaudit; _getaudit_addr; __sys_getaudit_addr; _getauid; __sys_getauid; _getcontext; __sys_getcontext; _getdirentries; __sys_getdirentries; _getdtablesize; __sys_getdtablesize; _getegid; __sys_getegid; _geteuid; __sys_geteuid; _getfh; __sys_getfh; _getfsstat; __sys_getfsstat; _getgid; __sys_getgid; _getgroups; __sys_getgroups; _getitimer; __sys_getitimer; _getpeername; __sys_getpeername; _getpgid; __sys_getpgid; _getpgrp; __sys_getpgrp; _getpid; __sys_getpid; _getppid; __sys_getppid; _getpriority; __sys_getpriority; _getresgid; __sys_getresgid; _getresuid; __sys_getresuid; _getrlimit; __sys_getrlimit; _getrusage; __sys_getrusage; _getsid; __sys_getsid; _getsockname; __sys_getsockname; _getsockopt; __sys_getsockopt; _gettimeofday; __sys_gettimeofday; _getuid; __sys_getuid; _ioctl; __sys_ioctl; _issetugid; __sys_issetugid; _jail; __sys_jail; _jail_attach; __sys_jail_attach; _kenv; __sys_kenv; _kevent; __sys_kevent; _kill; __sys_kill; _kldfind; __sys_kldfind; _kldfirstmod; __sys_kldfirstmod; _kldload; __sys_kldload; _kldnext; __sys_kldnext; _kldstat; __sys_kldstat; _kldsym; __sys_kldsym; _kldunload; __sys_kldunload; _kldunloadf; __sys_kldunloadf; _kmq_notify; __sys_kmq_notify; _kmq_open; __sys_kmq_open; _kmq_setattr; __sys_kmq_setattr; _kmq_timedreceive; __sys_kmq_timedreceive; _kmq_timedsend; __sys_kmq_timedsend; _kmq_unlink; __sys_kmq_unlink; _kqueue; __sys_kqueue; _ksem_close; __sys_ksem_close; _ksem_destroy; __sys_ksem_destroy; _ksem_getvalue; __sys_ksem_getvalue; _ksem_init; __sys_ksem_init; _ksem_open; __sys_ksem_open; _ksem_post; __sys_ksem_post; _ksem_timedwait; __sys_ksem_timedwait; _ksem_trywait; __sys_ksem_trywait; _ksem_unlink; __sys_ksem_unlink; _ksem_wait; __sys_ksem_wait; _ktrace; __sys_ktrace; _lchflags; __sys_lchflags; _lchmod; __sys_lchmod; _lchown; __sys_lchown; _lgetfh; __sys_lgetfh; _link; __sys_link; - _lio_listio; __sys_lio_listio; _listen; __sys_listen; _lutimes; __sys_lutimes; _mac_syscall; __sys_mac_syscall; _madvise; __sys_madvise; _mincore; __sys_mincore; _minherit; __sys_minherit; _mkdir; __sys_mkdir; _mkfifo; __sys_mkfifo; _mknod; __sys_mknod; _mlock; __sys_mlock; _mlockall; __sys_mlockall; _modfind; __sys_modfind; _modfnext; __sys_modfnext; _modnext; __sys_modnext; _modstat; __sys_modstat; _mount; __sys_mount; _mprotect; __sys_mprotect; _msgctl; __sys_msgctl; _msgget; __sys_msgget; _msgrcv; __sys_msgrcv; _msgsnd; __sys_msgsnd; _msgsys; __sys_msgsys; _msync; __sys_msync; _munlock; __sys_munlock; _munlockall; __sys_munlockall; _munmap; __sys_munmap; _nanosleep; __sys_nanosleep; _netbsd_lchown; __sys_netbsd_lchown; _netbsd_msync; __sys_netbsd_msync; _nfssvc; __sys_nfssvc; _nmount; __sys_nmount; _ntp_adjtime; __sys_ntp_adjtime; _ntp_gettime; __sys_ntp_gettime; _open; __sys_open; _openat; __sys_openat; _pathconf; __sys_pathconf; _pipe; __sys_pipe; _poll; __sys_poll; _ppoll; __sys_ppoll; _preadv; __sys_preadv; _procctl; __sys_procctl; _profil; __sys_profil; _pselect; __sys_pselect; _ptrace; __sys_ptrace; _pwritev; __sys_pwritev; _quotactl; __sys_quotactl; _read; __sys_read; _readlink; __sys_readlink; _readv; __sys_readv; _reboot; __sys_reboot; _recvfrom; __sys_recvfrom; _recvmsg; __sys_recvmsg; _rename; __sys_rename; _revoke; __sys_revoke; _rfork; __sys_rfork; _rmdir; __sys_rmdir; _rtprio; __sys_rtprio; _rtprio_thread; __sys_rtprio_thread; _sched_get_priority_max; __sys_sched_get_priority_max; _sched_get_priority_min; __sys_sched_get_priority_min; _sched_getparam; __sys_sched_getparam; _sched_getscheduler; __sys_sched_getscheduler; _sched_rr_get_interval; __sys_sched_rr_get_interval; _sched_setparam; __sys_sched_setparam; _sched_setscheduler; __sys_sched_setscheduler; _sched_yield; __sys_sched_yield; _select; __sys_select; _semget; __sys_semget; _semop; __sys_semop; _semsys; __sys_semsys; _sendfile; __sys_sendfile; _sendmsg; __sys_sendmsg; _sendto; __sys_sendto; _setaudit; __sys_setaudit; _setaudit_addr; __sys_setaudit_addr; _setauid; __sys_setauid; _setcontext; __sys_setcontext; _setegid; __sys_setegid; _seteuid; __sys_seteuid; _setgid; __sys_setgid; _setgroups; __sys_setgroups; _setitimer; __sys_setitimer; _setlogin; __sys_setlogin; _setpgid; __sys_setpgid; _setpriority; __sys_setpriority; _setregid; __sys_setregid; _setresgid; __sys_setresgid; _setresuid; __sys_setresuid; _setreuid; __sys_setreuid; _setrlimit; __sys_setrlimit; _setsid; __sys_setsid; _setsockopt; __sys_setsockopt; _settimeofday; __sys_settimeofday; _setuid; __sys_setuid; _shm_open; __sys_shm_open; _shm_unlink; __sys_shm_unlink; _shmat; __sys_shmat; _shmctl; __sys_shmctl; _shmdt; __sys_shmdt; _shmget; __sys_shmget; _shmsys; __sys_shmsys; _shutdown; __sys_shutdown; _sigaction; __sys_sigaction; _sigaltstack; __sys_sigaltstack; _sigpending; __sys_sigpending; _sigprocmask; __sys_sigprocmask; _sigqueue; __sys_sigqueue; _sigreturn; __sys_sigreturn; _sigsuspend; __sys_sigsuspend; _sigtimedwait; __sys_sigtimedwait; _sigwait; __sigwait; __sys_sigwait; _sigwaitinfo; __sys_sigwaitinfo; _socket; __sys_socket; _socketpair; __sys_socketpair; _statfs; __sys_statfs; _swapcontext; __sys_swapcontext; _swapoff; __sys_swapoff; _swapon; __sys_swapon; _symlink; __sys_symlink; _sync; __sys_sync; _sysarch; __sys_sysarch; _syscall; __sys_syscall; _thr_create; __sys_thr_create; _thr_exit; __sys_thr_exit; _thr_kill; __sys_thr_kill; _thr_kill2; __sys_thr_kill2; _thr_new; __sys_thr_new; _thr_self; __sys_thr_self; _thr_set_name; __sys_thr_set_name; _thr_suspend; __sys_thr_suspend; _thr_wake; __sys_thr_wake; _ktimer_create; __sys_ktimer_create; _ktimer_delete; __sys_ktimer_delete; _ktimer_getoverrun; __sys_ktimer_getoverrun; _ktimer_gettime; __sys_ktimer_gettime; _ktimer_settime; __sys_ktimer_settime; _umask; __sys_umask; _undelete; __sys_undelete; _unlink; __sys_unlink; _unmount; __sys_unmount; _utimes; __sys_utimes; _utrace; __sys_utrace; _uuidgen; __sys_uuidgen; _vadvise; __sys_vadvise; _wait4; __sys_wait4; _wait6; __sys_wait6; _write; __sys_write; _writev; __sys_writev; __set_error_selector; nlm_syscall; gssd_syscall; __libc_interposing_slot; __libc_sigwait; }; Index: projects/clang500-import/lib/librt/Symbol.map =================================================================== --- projects/clang500-import/lib/librt/Symbol.map (revision 321306) +++ projects/clang500-import/lib/librt/Symbol.map (revision 321307) @@ -1,74 +1,64 @@ /* * $FreeBSD$ */ FBSD_1.0 { aio_read; aio_write; aio_return; aio_waitcomplete; aio_fsync; mq_open; mq_close; mq_notify; mq_getattr; mq_setattr; mq_timedreceive; mq_timedsend; mq_unlink; mq_send; mq_receive; timer_create; timer_delete; timer_gettime; timer_settime; timer_getoverrun; }; FBSD_1.5 { mq_getfd_np; timer_oshandle_np; }; FBSDprivate_1.0 { - _aio_read; - _aio_write; - _aio_return; - _aio_waitcomplete; - _aio_fsync; - __aio_read; - __aio_write; - __aio_return; - __aio_waitcomplete; - __aio_fsync; _mq_open; _mq_close; _mq_notify; _mq_getattr; _mq_setattr; _mq_timedreceive; _mq_timedsend; _mq_unlink; _mq_send; _mq_receive; __mq_open; __mq_close; __mq_notify; __mq_getattr; __mq_setattr; __mq_timedreceive; __mq_timedsend; __mq_unlink; __mq_send; __mq_receive; _timer_create; _timer_delete; _timer_gettime; _timer_settime; _timer_getoverrun; __timer_create; __timer_delete; __timer_gettime; __timer_settime; __timer_getoverrun; }; Index: projects/clang500-import/lib/librt/aio.c =================================================================== --- projects/clang500-import/lib/librt/aio.c (revision 321306) +++ projects/clang500-import/lib/librt/aio.c (revision 321307) @@ -1,204 +1,199 @@ /* * Copyright (c) 2005 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #include #include #include #include #include "namespace.h" #include #include #include #include "sigev_thread.h" #include "un-namespace.h" -__weak_reference(__aio_read, _aio_read); __weak_reference(__aio_read, aio_read); -__weak_reference(__aio_write, _aio_write); __weak_reference(__aio_write, aio_write); -__weak_reference(__aio_return, _aio_return); __weak_reference(__aio_return, aio_return); -__weak_reference(__aio_waitcomplete, _aio_waitcomplete); __weak_reference(__aio_waitcomplete, aio_waitcomplete); -__weak_reference(__aio_fsync, _aio_fsync); __weak_reference(__aio_fsync, aio_fsync); typedef void (*aio_func)(union sigval val, struct aiocb *iocb); extern int __sys_aio_read(struct aiocb *iocb); extern int __sys_aio_write(struct aiocb *iocb); extern ssize_t __sys_aio_waitcomplete(struct aiocb **iocbp, struct timespec *timeout); extern ssize_t __sys_aio_return(struct aiocb *iocb); extern int __sys_aio_error(struct aiocb *iocb); extern int __sys_aio_fsync(int op, struct aiocb *iocb); static void aio_dispatch(struct sigev_node *sn) { aio_func f = sn->sn_func; f(sn->sn_value, (struct aiocb *)sn->sn_id); } static int aio_sigev_alloc(struct aiocb *iocb, struct sigev_node **sn, struct sigevent *saved_ev) { if (__sigev_check_init()) { /* This might be that thread library is not enabled. */ errno = EINVAL; return (-1); } *sn = __sigev_alloc(SI_ASYNCIO, &iocb->aio_sigevent, NULL, 1); if (*sn == NULL) { errno = EAGAIN; return (-1); } *saved_ev = iocb->aio_sigevent; (*sn)->sn_id = (sigev_id_t)iocb; __sigev_get_sigevent(*sn, &iocb->aio_sigevent, (*sn)->sn_id); (*sn)->sn_dispatch = aio_dispatch; __sigev_list_lock(); __sigev_register(*sn); __sigev_list_unlock(); return (0); } static int aio_io(struct aiocb *iocb, int (*sysfunc)(struct aiocb *iocb)) { struct sigev_node *sn; struct sigevent saved_ev; int ret, err; if (iocb->aio_sigevent.sigev_notify != SIGEV_THREAD) { ret = sysfunc(iocb); return (ret); } ret = aio_sigev_alloc(iocb, &sn, &saved_ev); if (ret) return (ret); ret = sysfunc(iocb); iocb->aio_sigevent = saved_ev; if (ret != 0) { err = errno; __sigev_list_lock(); __sigev_delete_node(sn); __sigev_list_unlock(); errno = err; } return (ret); } int __aio_read(struct aiocb *iocb) { return aio_io(iocb, &__sys_aio_read); } int __aio_write(struct aiocb *iocb) { return aio_io(iocb, &__sys_aio_write); } ssize_t __aio_waitcomplete(struct aiocb **iocbp, struct timespec *timeout) { ssize_t ret; int err; ret = __sys_aio_waitcomplete(iocbp, timeout); if (*iocbp) { if ((*iocbp)->aio_sigevent.sigev_notify == SIGEV_THREAD) { err = errno; __sigev_list_lock(); __sigev_delete(SI_ASYNCIO, (sigev_id_t)(*iocbp)); __sigev_list_unlock(); errno = err; } } return (ret); } ssize_t __aio_return(struct aiocb *iocb) { if (iocb->aio_sigevent.sigev_notify == SIGEV_THREAD) { if (__sys_aio_error(iocb) == EINPROGRESS) { /* * Fail with EINVAL to match the semantics of * __sys_aio_return() for an in-progress * request. */ errno = EINVAL; return (-1); } __sigev_list_lock(); __sigev_delete(SI_ASYNCIO, (sigev_id_t)iocb); __sigev_list_unlock(); } return __sys_aio_return(iocb); } int __aio_fsync(int op, struct aiocb *iocb) { struct sigev_node *sn; struct sigevent saved_ev; int ret, err; if (iocb->aio_sigevent.sigev_notify != SIGEV_THREAD) return __sys_aio_fsync(op, iocb); ret = aio_sigev_alloc(iocb, &sn, &saved_ev); if (ret) return (ret); ret = __sys_aio_fsync(op, iocb); iocb->aio_sigevent = saved_ev; if (ret != 0) { err = errno; __sigev_list_lock(); __sigev_delete_node(sn); __sigev_list_unlock(); errno = err; } return (ret); } Index: projects/clang500-import/share/misc/committers-src.dot =================================================================== --- projects/clang500-import/share/misc/committers-src.dot (revision 321306) +++ projects/clang500-import/share/misc/committers-src.dot (revision 321307) @@ -1,809 +1,811 @@ # $FreeBSD$ # This file is meant to list all FreeBSD src committers and describe the # mentor-mentee relationships between them. # The graphical output can be generated from this file with the following # command: # $ dot -T png -o file.png committers-src.dot # # The dot binary is part of the graphics/graphviz port. digraph src { # Node definitions follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??"] # # ????/??/?? is the date when the commit bit was obtained, usually the one you # can find looking at svn logs for the svnadmin/conf/access file. # Use YYYY/MM/DD format. # # For returned commit bits, the node definition will follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??\n????/??/??"] # # The first date is the same as for an active committer, the second date is # the date when the commit bit has been returned. Again, check svn logs. node [color=grey62, style=filled, bgcolor=black]; # Alumni go here.. Try to keep things sorted. alm [label="Andrew Moore\nalm@FreeBSD.org\n1993/06/12\n????/??/??"] anholt [label="Eric Anholt\nanholt@FreeBSD.org\n2002/04/22\n2008/08/07"] archie [label="Archie Cobbs\narchie@FreeBSD.org\n1998/11/06\n2006/06/09"] arr [label="Andrew R. Reiter\narr@FreeBSD.org\n2001/11/02\n2005/05/25"] arun [label="Arun Sharma\narun@FreeBSD.org\n2003/03/06\n2006/12/16"] asmodai [label="Jeroen Ruigrok\nasmodai@FreeBSD.org\n1999/12/16\n2001/11/16"] benjsc [label="Benjamin Close\nbenjsc@FreeBSD.org\n2007/02/09\n2010/09/15"] billf [label="Bill Fumerola\nbillf@FreeBSD.org\n1998/11/11\n2008/11/10"] bmah [label="Bruce A. Mah\nbmah@FreeBSD.org\n2002/01/29\n2009/09/13"] bmilekic [label="Bosko Milekic\nbmilekic@FreeBSD.org\n2000/09/21\n2008/11/10"] bushman [label="Michael Bushkov\nbushman@FreeBSD.org\n2007/03/10\n2010/04/29"] carl [label="Carl Delsey\ncarl@FreeBSD.org\n2013/01/14\n2014/03/06"] ceri [label="Ceri Davies\nceri@FreeBSD.org\n2006/11/07\n2012/03/07"] cjc [label="Crist J. Clark\ncjc@FreeBSD.org\n2001/06/01\n2006/12/29"] davidxu [label="David Xu\ndavidxu@FreeBSD.org\n2002/09/02\n2014/04/14"] dds [label="Diomidis Spinellis\ndds@FreeBSD.org\n2003/06/20\n2010/09/22"] dhartmei [label="Daniel Hartmeier\ndhartmei@FreeBSD.org\n2004/04/06\n2008/12/08"] dmlb [label="Duncan Barclay\ndmlb@FreeBSD.org\n2001/12/14\n2008/11/10"] dougb [label="Doug Barton\ndougb@FreeBSD.org\n2000/10/26\n2012/10/08"] eik [label="Oliver Eikemeier\neik@FreeBSD.org\n2004/05/20\n2008/11/10"] furuta [label="Atsushi Furuta\nfuruta@FreeBSD.org\n2000/06/21\n2003/03/08"] gj [label="Gary L. Jennejohn\ngj@FreeBSD.org\n1994/??/??\n2006/04/28"] groudier [label="Gerard Roudier\ngroudier@FreeBSD.org\n1999/12/30\n2006/04/06"] jake [label="Jake Burkholder\njake@FreeBSD.org\n2000/05/16\n2008/11/10"] jayanth [label="Jayanth Vijayaraghavan\njayanth@FreeBSD.org\n2000/05/08\n2008/11/10"] jb [label="John Birrell\njb@FreeBSD.org\n1997/03/27\n2009/12/15"] jdp [label="John Polstra\njdp@FreeBSD.org\n1995/12/07\n2008/02/26"] jedgar [label="Chris D. Faulhaber\njedgar@FreeBSD.org\n1999/12/15\n2006/04/07"] jkh [label="Jordan K. Hubbard\njkh@FreeBSD.org\n1993/06/12\n2008/06/13"] jlemon [label="Jonathan Lemon\njlemon@FreeBSD.org\n1997/08/14\n2008/11/10"] joe [label="Josef Karthauser\njoe@FreeBSD.org\n1999/10/22\n2008/08/10"] jtc [label="J.T. Conklin\njtc@FreeBSD.org\n1993/06/12\n????/??/??"] kargl [label="Steven G. Kargl\nkargl@FreeBSD.org\n2011/01/17\n2015/06/28"] kbyanc [label="Kelly Yancey\nkbyanc@FreeBSD.org\n2000/07/11\n2006/07/25"] keichii [label="Michael Wu\nkeichii@FreeBSD.org\n2001/03/07\n2006/04/28"] linimon [label="Mark Linimon\nlinimon@FreeBSD.org\n2006/09/30\n2008/05/04"] lulf [label="Ulf Lilleengen\nlulf@FreeBSD.org\n2007/10/24\n2012/01/19"] mb [label="Maxim Bolotin\nmb@FreeBSD.org\n2000/04/06\n2003/03/08"] marks [label="Mark Santcroos\nmarks@FreeBSD.org\n2004/03/18\n2008/09/29"] mike [label="Mike Barcroft\nmike@FreeBSD.org\n2001/07/17\n2006/04/28"] msmith [label="Mike Smith\nmsmith@FreeBSD.org\n1996/10/22\n2003/12/15"] murray [label="Murray Stokely\nmurray@FreeBSD.org\n2000/04/05\n2010/07/25"] mux [label="Maxime Henrion\nmux@FreeBSD.org\n2002/03/03\n2011/06/22"] nate [label="Nate Willams\nnate@FreeBSD.org\n1993/06/12\n2003/12/15"] njl [label="Nate Lawson\nnjl@FreeBSD.org\n2002/08/07\n2008/02/16"] non [label="Noriaki Mitsnaga\nnon@FreeBSD.org\n2000/06/19\n2007/03/06"] onoe [label="Atsushi Onoe\nonoe@FreeBSD.org\n2000/07/21\n2008/11/10"] rafan [label="Rong-En Fan\nrafan@FreeBSD.org\n2007/01/31\n2012/07/23"] randi [label="Randi Harper\nrandi@FreeBSD.org\n2010/04/20\n2012/05/10"] rink [label="Rink Springer\nrink@FreeBSD.org\n2006/01/16\n2010/11/04"] robert [label="Robert Drehmel\nrobert@FreeBSD.org\n2001/08/23\n2006/05/13"] sah [label="Sam Hopkins\nsah@FreeBSD.org\n2004/12/15\n2008/11/10"] shafeeq [label="Shafeeq Sinnamohideen\nshafeeq@FreeBSD.org\n2000/06/19\n2006/04/06"] sheldonh [label="Sheldon Hearn\nsheldonh@FreeBSD.org\n1999/06/14\n2006/05/13"] shiba [label="Takeshi Shibagaki\nshiba@FreeBSD.org\n2000/06/19\n2008/11/10"] shin [label="Yoshinobu Inoue\nshin@FreeBSD.org\n1999/07/29\n2003/03/08"] snb [label="Nick Barkas\nsnb@FreeBSD.org\n2009/05/05\n2010/11/04"] tmm [label="Thomas Moestl\ntmm@FreeBSD.org\n2001/03/07\n2006/07/12"] toshi [label="Toshihiko Arai\ntoshi@FreeBSD.org\n2000/07/06\n2003/03/08"] tshiozak [label="Takuya SHIOZAKI\ntshiozak@FreeBSD.org\n2001/04/25\n2003/03/08"] uch [label="UCHIYAMA Yasushi\nuch@FreeBSD.org\n2000/06/21\n2002/04/24"] wilko [label="Wilko Bulte\nwilko@FreeBSD.org\n2000/01/13\n2013/01/17"] yar [label="Yar Tikhiy\nyar@FreeBSD.org\n2001/03/25\n2012/05/23"] zack [label="Zack Kirsch\nzack@FreeBSD.org\n2010/11/05\n2012/09/08"] node [color=lightblue2, style=filled, bgcolor=black]; # Current src committers go here. Try to keep things sorted. ache [label="Andrey Chernov\nache@FreeBSD.org\n1993/10/31"] achim [label="Achim Leubner\nachim@FreeBSD.org\n2013/01/23"] adrian [label="Adrian Chadd\nadrian@FreeBSD.org\n2000/07/03"] ae [label="Andrey V. Elsukov\nae@FreeBSD.org\n2010/06/03"] akiyama [label="Shunsuke Akiyama\nakiyama@FreeBSD.org\n2000/06/19"] alc [label="Alan Cox\nalc@FreeBSD.org\n1999/02/23"] allanjude [label="Allan Jude\nallanjude@FreeBSD.org\n2015/07/30"] ambrisko [label="Doug Ambrisko\nambrisko@FreeBSD.org\n2001/12/19"] anchie [label="Ana Kukec\nanchie@FreeBSD.org\n2010/04/14"] andre [label="Andre Oppermann\nandre@FreeBSD.org\n2003/11/12"] andreast [label="Andreas Tobler\nandreast@FreeBSD.org\n2010/09/05"] andrew [label="Andrew Turner\nandrew@FreeBSD.org\n2010/07/19"] antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2008/02/03"] araujo [label="Marcelo Araujo\naraujo@FreeBSD.org\n2015/08/04"] ariff [label="Ariff Abdullah\nariff@FreeBSD.org\n2005/11/14"] art [label="Artem Belevich\nart@FreeBSD.org\n2011/03/29"] arybchik [label="Andrew Rybchenko\narybchik@FreeBSD.org\n2014/10/12"] asomers [label="Alan Somers\nasomers@FreeBSD.org\n2013/04/24"] avg [label="Andriy Gapon\navg@FreeBSD.org\n2009/02/18"] avos [label="Andriy Voskoboinyk\navos@FreeBSD.org\n2015/09/24"] badger [label="Eric Badger\nbadger@FreeBSD.org\n2016/07/01"] bapt [label="Baptiste Daroussin\nbapt@FreeBSD.org\n2011/12/23"] bdrewery [label="Bryan Drewery\nbdrewery@FreeBSD.org\n2013/12/14"] benl [label="Ben Laurie\nbenl@FreeBSD.org\n2011/05/18"] benno [label="Benno Rice\nbenno@FreeBSD.org\n2000/11/02"] bms [label="Bruce M Simpson\nbms@FreeBSD.org\n2003/08/06"] br [label="Ruslan Bukin\nbr@FreeBSD.org\n2013/09/02"] brian [label="Brian Somers\nbrian@FreeBSD.org\n1996/12/16"] brooks [label="Brooks Davis\nbrooks@FreeBSD.org\n2001/06/21"] brucec [label="Bruce Cran\nbrucec@FreeBSD.org\n2010/01/29"] brueffer [label="Christian Brueffer\nbrueffer@FreeBSD.org\n2006/02/28"] bruno [label="Bruno Ducrot\nbruno@FreeBSD.org\n2005/07/18"] bryanv [label="Bryan Venteicher\nbryanv@FreeBSD.org\n2012/11/03"] bschmidt [label="Bernhard Schmidt\nbschmidt@FreeBSD.org\n2010/02/06"] bz [label="Bjoern A. Zeeb\nbz@FreeBSD.org\n2004/07/27"] cem [label="Conrad Meyer\ncem@FreeBSD.org\n2015/07/05"] cognet [label="Olivier Houchard\ncognet@FreeBSD.org\n2002/10/09"] cokane [label="Coleman Kane\ncokane@FreeBSD.org\n2000/06/19"] cperciva [label="Colin Percival\ncperciva@FreeBSD.org\n2004/01/20"] csjp [label="Christian S.J. Peron\ncsjp@FreeBSD.org\n2004/05/04"] dab [label="David Bright\ndab@FreeBSD.org\n2016/10/24"] das [label="David Schultz\ndas@FreeBSD.org\n2003/02/21"] davide [label="Davide Italiano\ndavide@FreeBSD.org\n2012/01/27"] dchagin [label="Dmitry Chagin\ndchagin@FreeBSD.org\n2009/02/28"] def [label="Konrad Witaszczyk\ndef@FreeBSD.org\n2016/11/02"] delphij [label="Xin Li\ndelphij@FreeBSD.org\n2004/09/14"] des [label="Dag-Erling Smorgrav\ndes@FreeBSD.org\n1998/04/03"] dexuan [label="Dexuan Cui\ndexuan@FreeBSD.org\n2016/10/24"] dfr [label="Doug Rabson\ndfr@FreeBSD.org\n????/??/??"] dg [label="David Greenman\ndg@FreeBSD.org\n1993/06/14"] dim [label="Dimitry Andric\ndim@FreeBSD.org\n2010/08/30"] dteske [label="Devin Teske\ndteske@FreeBSD.org\n2012/04/10"] dumbbell [label="Jean-Sebastien Pedron\ndumbbell@FreeBSD.org\n2004/11/29"] dwmalone [label="David Malone\ndwmalone@FreeBSD.org\n2000/07/11"] eadler [label="Eitan Adler\neadler@FreeBSD.org\n2012/01/18"] ed [label="Ed Schouten\ned@FreeBSD.org\n2008/05/22"] edavis [label="Eric Davis\nedavis@FreeBSD.org\n2013/10/09"] edwin [label="Edwin Groothuis\nedwin@FreeBSD.org\n2007/06/25"] eivind [label="Eivind Eklund\neivind@FreeBSD.org\n1997/02/02"] emaste [label="Ed Maste\nemaste@FreeBSD.org\n2005/10/04"] emax [label="Maksim Yevmenkin\nemax@FreeBSD.org\n2003/10/12"] eri [label="Ermal Luci\neri@FreeBSD.org\n2008/06/11"] erj [label="Eric Joyner\nerj@FreeBSD.org\n2014/12/14"] fabient [label="Fabien Thomas\nfabient@FreeBSD.org\n2009/03/16"] fanf [label="Tony Finch\nfanf@FreeBSD.org\n2002/05/05"] fjoe [label="Max Khon\nfjoe@FreeBSD.org\n2001/08/06"] flz [label="Florent Thoumie\nflz@FreeBSD.org\n2006/03/30"] gabor [label="Gabor Kovesdan\ngabor@FreeBSD.org\n2010/02/02"] gad [label="Garance A. Drosehn\ngad@FreeBSD.org\n2000/10/27"] gallatin [label="Andrew Gallatin\ngallatin@FreeBSD.org\n1999/01/15"] ganbold [label="Ganbold Tsagaankhuu\nganbold@FreeBSD.org\n2013/12/18"] gavin [label="Gavin Atkinson\ngavin@FreeBSD.org\n2009/12/07"] gibbs [label="Justin T. Gibbs\ngibbs@FreeBSD.org\n????/??/??"] gjb [label="Glen Barber\ngjb@FreeBSD.org\n2013/06/04"] gleb [label="Gleb Kurtsou\ngleb@FreeBSD.org\n2011/09/19"] glebius [label="Gleb Smirnoff\nglebius@FreeBSD.org\n2004/07/14"] gnn [label="George V. Neville-Neil\ngnn@FreeBSD.org\n2004/10/11"] gordon [label="Gordon Tetlow\ngordon@FreeBSD.org\n2002/05/17"] grehan [label="Peter Grehan\ngrehan@FreeBSD.org\n2002/08/08"] grog [label="Greg Lehey\ngrog@FreeBSD.org\n1998/08/30"] gshapiro [label="Gregory Shapiro\ngshapiro@FreeBSD.org\n2000/07/12"] harti [label="Hartmut Brandt\nharti@FreeBSD.org\n2003/01/29"] hiren [label="Hiren Panchasara\nhiren@FreeBSD.org\n2013/04/12"] hmp [label="Hiten Pandya\nhmp@FreeBSD.org\n2004/03/23"] ian [label="Ian Lepore\nian@FreeBSD.org\n2013/01/07"] iedowse [label="Ian Dowse\niedowse@FreeBSD.org\n2000/12/01"] imp [label="Warner Losh\nimp@FreeBSD.org\n1996/09/20"] ivoras [label="Ivan Voras\nivoras@FreeBSD.org\n2008/06/10"] jah [label="Jason A. Harmening\njah@FreeBSD.org\n2015/03/08"] jamie [label="Jamie Gritton\njamie@FreeBSD.org\n2009/01/28"] jasone [label="Jason Evans\njasone@FreeBSD.org\n1999/03/03"] jceel [label="Jakub Klama\njceel@FreeBSD.org\n2011/09/25"] jch [label="Julien Charbon\njch@FreeBSD.org\n2014/09/24"] jchandra [label="Jayachandran C.\njchandra@FreeBSD.org\n2010/05/19"] jeff [label="Jeff Roberson\njeff@FreeBSD.org\n2002/02/21"] jh [label="Jaakko Heinonen\njh@FreeBSD.org\n2009/10/02"] jhb [label="John Baldwin\njhb@FreeBSD.org\n1999/08/23"] jhibbits [label="Justin Hibbits\njhibbits@FreeBSD.org\n2011/11/30"] jilles [label="Jilles Tjoelker\njilles@FreeBSD.org\n2009/05/22"] jimharris [label="Jim Harris\njimharris@FreeBSD.org\n2011/12/09"] jinmei [label="JINMEI Tatuya\njinmei@FreeBSD.org\n2007/03/17"] jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2005/07/06"] jkoshy [label="A. Joseph Koshy\njkoshy@FreeBSD.org\n1998/05/13"] jlh [label="Jeremie Le Hen\njlh@FreeBSD.org\n2012/04/22"] jls [label="Jordan Sissel\njls@FreeBSD.org\n2006/12/06"] jmcneill [label="Jared McNeill\njmcneill@FreeBSD.org\n2016/02/24"] jmg [label="John-Mark Gurney\njmg@FreeBSD.org\n1997/02/13"] jmmv [label="Julio Merino\njmmv@FreeBSD.org\n2013/11/02"] joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1993/11/14"] jon [label="Jonathan Chen\njon@FreeBSD.org\n2000/10/17"] jonathan [label="Jonathan Anderson\njonathan@FreeBSD.org\n2010/10/07"] jpaetzel [label="Josh Paetzel\njpaetzel@FreeBSD.org\n2011/01/21"] jtl [label="Jonathan T. Looney\njtl@FreeBSD.org\n2015/10/26"] julian [label="Julian Elischer\njulian@FreeBSD.org\n1993/04/19"] jwd [label="John De Boskey\njwd@FreeBSD.org\n2000/05/19"] kaiw [label="Kai Wang\nkaiw@FreeBSD.org\n2007/09/26"] kan [label="Alexander Kabaev\nkan@FreeBSD.org\n2002/07/21"] karels [label="Mike Karels\nkarels@FreeBSD.org\n2016/06/09"] ken [label="Ken Merry\nken@FreeBSD.org\n1998/09/08"] kensmith [label="Ken Smith\nkensmith@FreeBSD.org\n2004/01/23"] kevans [label="Kyle Evans\nkevans@FreeBSD.org\n2017/06/20"] kevlo [label="Kevin Lo\nkevlo@FreeBSD.org\n2006/07/23"] kib [label="Konstantin Belousov\nkib@FreeBSD.org\n2006/06/03"] kmacy [label="Kip Macy\nkmacy@FreeBSD.org\n2005/06/01"] kp [label="Kristof Provost\nkp@FreeBSD.org\n2015/03/22"] landonf [label="Landon Fuller\nlandonf@FreeBSD.org\n2016/05/31"] le [label="Lukas Ertl\nle@FreeBSD.org\n2004/02/02"] lidl [label="Kurt Lidl\nlidl@FreeBSD.org\n2015/10/21"] loos [label="Luiz Otavio O Souza\nloos@FreeBSD.org\n2013/07/03"] lstewart [label="Lawrence Stewart\nlstewart@FreeBSD.org\n2008/10/06"] manu [label="Emmanuel Vadot\nmanu@FreeBSD.org\n2016/04/24"] marcel [label="Marcel Moolenaar\nmarcel@FreeBSD.org\n1999/07/03"] marius [label="Marius Strobl\nmarius@FreeBSD.org\n2004/04/17"] markj [label="Mark Johnston\nmarkj@FreeBSD.org\n2012/12/18"] markm [label="Mark Murray\nmarkm@FreeBSD.org\n1995/04/24"] markus [label="Markus Brueffer\nmarkus@FreeBSD.org\n2006/06/01"] matteo [label="Matteo Riondato\nmatteo@FreeBSD.org\n2006/01/18"] mav [label="Alexander Motin\nmav@FreeBSD.org\n2007/04/12"] maxim [label="Maxim Konovalov\nmaxim@FreeBSD.org\n2002/02/07"] mdf [label="Matthew Fleming\nmdf@FreeBSD.org\n2010/06/04"] mdodd [label="Matthew N. Dodd\nmdodd@FreeBSD.org\n1999/07/27"] melifaro [label="Alexander V. Chernikov\nmelifaro@FreeBSD.org\n2011/10/04"] mizhka [label="Michael Zhilin\nmizhka@FreeBSD.org\n2016/07/19"] mjacob [label="Matt Jacob\nmjacob@FreeBSD.org\n1997/08/13"] mjg [label="Mateusz Guzik\nmjg@FreeBSD.org\n2012/06/04"] +mjoras [label="Matt Joras\nmjoras@FreeBSD.org\n2017/07/12"] mlaier [label="Max Laier\nmlaier@FreeBSD.org\n2004/02/10"] mmel [label="Michal Meloun\nmmel@FreeBSD.org\n2015/11/01"] monthadar [label="Monthadar Al Jaberi\nmonthadar@FreeBSD.org\n2012/04/02"] mp [label="Mark Peek\nmp@FreeBSD.org\n2001/07/27"] mr [label="Michael Reifenberger\nmr@FreeBSD.org\n2001/09/30"] neel [label="Neel Natu\nneel@FreeBSD.org\n2009/09/20"] netchild [label="Alexander Leidinger\nnetchild@FreeBSD.org\n2005/03/31"] ngie [label="Ngie Cooper\nngie@FreeBSD.org\n2014/07/27"] nork [label="Norikatsu Shigemura\nnork@FreeBSD.org\n2009/06/09"] np [label="Navdeep Parhar\nnp@FreeBSD.org\n2009/06/05"] nwhitehorn [label="Nathan Whitehorn\nnwhitehorn@FreeBSD.org\n2008/07/03"] n_hibma [label="Nick Hibma\nn_hibma@FreeBSD.org\n1998/11/26"] obrien [label="David E. O'Brien\nobrien@FreeBSD.org\n1996/10/29"] olli [label="Oliver Fromme\nolli@FreeBSD.org\n2008/02/14"] oshogbo [label="Mariusz Zaborski\noshogbo@FreeBSD.org\n2015/04/15"] peadar [label="Peter Edwards\npeadar@FreeBSD.org\n2004/03/08"] peter [label="Peter Wemm\npeter@FreeBSD.org\n1995/07/04"] peterj [label="Peter Jeremy\npeterj@FreeBSD.org\n2012/09/14"] pfg [label="Pedro Giffuni\npfg@FreeBSD.org\n2011/12/01"] phil [label="Phil Shafer\nphil@FreeBSD.ogr\n2015/12/30"] philip [label="Philip Paeps\nphilip@FreeBSD.org\n2004/01/21"] phk [label="Poul-Henning Kamp\nphk@FreeBSD.org\n1994/02/21"] pho [label="Peter Holm\npho@FreeBSD.org\n2008/11/16"] pjd [label="Pawel Jakub Dawidek\npjd@FreeBSD.org\n2004/02/02"] pkelsey [label="Patrick Kelsey\pkelsey@FreeBSD.org\n2014/05/29"] pluknet [label="Sergey Kandaurov\npluknet@FreeBSD.org\n2010/10/05"] ps [label="Paul Saab\nps@FreeBSD.org\n2000/02/23"] qingli [label="Qing Li\nqingli@FreeBSD.org\n2005/04/13"] ray [label="Aleksandr Rybalko\nray@FreeBSD.org\n2011/05/25"] rdivacky [label="Roman Divacky\nrdivacky@FreeBSD.org\n2008/03/13"] remko [label="Remko Lodder\nremko@FreeBSD.org\n2007/02/23"] rgrimes [label="Rodney W. Grimes\nrgrimes@FreeBSD.org\n1993/06/12\n2017/03/03"] rik [label="Roman Kurakin\nrik@FreeBSD.org\n2003/12/18"] rlibby [label="Ryan Libby\nrlibby@FreeBSD.org\n2017/06/07"] rmacklem [label="Rick Macklem\nrmacklem@FreeBSD.org\n2009/03/27"] rmh [label="Robert Millan\nrmh@FreeBSD.org\n2011/09/18"] rnoland [label="Robert Noland\nrnoland@FreeBSD.org\n2008/09/15"] roberto [label="Ollivier Robert\nroberto@FreeBSD.org\n1995/02/22"] rodrigc [label="Craig Rodrigues\nrodrigc@FreeBSD.org\n2005/05/14"] royger [label="Roger Pau Monne\nroyger@FreeBSD.org\n2013/11/26"] rpaulo [label="Rui Paulo\nrpaulo@FreeBSD.org\n2007/09/25"] rpokala [label="Ravi Pokala\nrpokala@FreeBSD.org\n2015/11/19"] rrs [label="Randall R Stewart\nrrs@FreeBSD.org\n2007/02/08"] rse [label="Ralf S. Engelschall\nrse@FreeBSD.org\n1997/07/31"] rstone [label="Ryan Stone\nrstone@FreeBSD.org\n2010/04/19"] ru [label="Ruslan Ermilov\nru@FreeBSD.org\n1999/05/27"] rwatson [label="Robert N. M. Watson\nrwatson@FreeBSD.org\n1999/12/16"] sam [label="Sam Leffler\nsam@FreeBSD.org\n2002/07/02"] sanpei [label="MIHIRA Sanpei Yoshiro\nsanpei@FreeBSD.org\n2000/06/19"] sbruno [label="Sean Bruno\nsbruno@FreeBSD.org\n2008/08/02"] scf [label="Sean C. Farley\nscf@FreeBSD.org\n2007/06/24"] schweikh [label="Jens Schweikhardt\nschweikh@FreeBSD.org\n2001/04/06"] scottl [label="Scott Long\nscottl@FreeBSD.org\n2000/09/28"] se [label="Stefan Esser\nse@FreeBSD.org\n1994/08/26"] sephe [label="Sepherosa Ziehau\nsephe@FreeBSD.org\n2007/03/28"] sepotvin [label="Stephane E. Potvin\nsepotvin@FreeBSD.org\n2007/02/15"] sgalabov [label="Stanislav Galabov\nsgalabov@FreeBSD.org\n2016/02/24"] simon [label="Simon L. Nielsen\nsimon@FreeBSD.org\n2006/03/07"] sjg [label="Simon J. Gerraty\nsjg@FreeBSD.org\n2012/10/23"] skra [label="Svatopluk Kraus\nskra@FreeBSD.org\n2015/10/28"] slm [label="Stephen McConnell\nslm@FreeBSD.org\n2014/05/07"] smh [label="Steven Hartland\nsmh@FreeBSD.org\n2012/11/12"] sobomax [label="Maxim Sobolev\nsobomax@FreeBSD.org\n2001/07/25"] sos [label="Soren Schmidt\nsos@FreeBSD.org\n????/??/??"] sson [label="Stacey Son\nsson@FreeBSD.org\n2008/07/08"] stas [label="Stanislav Sedov\nstas@FreeBSD.org\n2008/08/22"] stevek [label="Stephen J. Kiernan\nstevek@FreeBSD.org\n2016/07/18"] suz [label="SUZUKI Shinsuke\nsuz@FreeBSD.org\n2002/03/26"] syrinx [label="Shteryana Shopova\nsyrinx@FreeBSD.org\n2006/10/07"] takawata [label="Takanori Watanabe\ntakawata@FreeBSD.org\n2000/07/06"] theraven [label="David Chisnall\ntheraven@FreeBSD.org\n2011/11/11"] thompsa [label="Andrew Thompson\nthompsa@FreeBSD.org\n2005/05/25"] ticso [label="Bernd Walter\nticso@FreeBSD.org\n2002/01/31"] tijl [label="Tijl Coosemans\ntijl@FreeBSD.org\n2010/07/16"] tsoome [label="Toomas Soome\ntsoome@FreeBSD.org\n2016/08/10"] trasz [label="Edward Tomasz Napierala\ntrasz@FreeBSD.org\n2008/08/22"] trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/05/28"] trociny [label="Mikolaj Golub\ntrociny@FreeBSD.org\n2011/03/10"] tuexen [label="Michael Tuexen\ntuexen@FreeBSD.org\n2009/06/06"] tychon [label="Tycho Nightingale\ntychon@FreeBSD.org\n2014/01/21"] ume [label="Hajimu UMEMOTO\nume@FreeBSD.org\n2000/02/26"] uqs [label="Ulrich Spoerlein\nuqs@FreeBSD.org\n2010/01/28"] vangyzen [label="Eric van Gyzen\nvangyzen@FreeBSD.org\n2015/03/08"] vanhu [label="Yvan Vanhullebus\nvanhu@FreeBSD.org\n2008/07/21"] versus [label="Konrad Jankowski\nversus@FreeBSD.org\n2008/10/27"] weongyo [label="Weongyo Jeong\nweongyo@FreeBSD.org\n2007/12/21"] wes [label="Wes Peters\nwes@FreeBSD.org\n1998/11/25"] whu [label="Wei Hu\nwhu@FreeBSD.org\n2015/02/11"] wkoszek [label="Wojciech A. Koszek\nwkoszek@FreeBSD.org\n2006/02/21"] wma [label="Wojciech Macek\nwma@FreeBSD.org\n2016/01/18"] wollman [label="Garrett Wollman\nwollman@FreeBSD.org\n????/??/??"] wsalamon [label="Wayne Salamon\nwsalamon@FreeBSD.org\n2005/06/25"] wulf [label="Vladimir Kondratyev\nwulf@FreeBSD.org\n2017/04/27"] yongari [label="Pyun YongHyeon\nyongari@FreeBSD.org\n2004/08/01"] zbb [label="Zbigniew Bodek\nzbb@FreeBSD.org\n2013/09/02"] zec [label="Marko Zec\nzec@FreeBSD.org\n2008/06/22"] zml [label="Zachary Loafman\nzml@FreeBSD.org\n2009/05/27"] zont [label="Andrey Zonov\nzont@FreeBSD.org\n2012/08/21"] # Pseudo target representing rev 1.1 of commit.allow day1 [label="Birth of FreeBSD"] # Here are the mentor/mentee relationships. # Group together all the mentees for a particular mentor. # Keep the list sorted by mentor login. day1 -> jtc day1 -> jkh day1 -> nate day1 -> rgrimes day1 -> alm day1 -> dg adrian -> avos adrian -> jmcneill adrian -> landonf adrian -> lidl adrian -> loos adrian -> mizhka adrian -> monthadar adrian -> ray adrian -> rmh adrian -> sephe adrian -> sgalabov ae -> melifaro allanjude -> tsoome alc -> davide andre -> qingli andrew -> manu anholt -> jkim avg -> art avg -> pluknet avg -> smh bapt -> allanjude bapt -> araujo bapt -> bdrewery bapt -> wulf benno -> grehan billf -> dougb billf -> gad billf -> jedgar billf -> jhb billf -> shafeeq bmilekic -> csjp bms -> dhartmei bms -> mlaier bms -> thompsa brian -> joe brooks -> bushman brooks -> jamie brooks -> theraven bz -> anchie bz -> jamie bz -> syrinx cognet -> br cognet -> jceel cognet -> kevlo cognet -> ian cognet -> manu cognet -> wkoszek cognet -> wma cognet -> zbb cperciva -> eadler cperciva -> flz cperciva -> randi cperciva -> simon csjp -> bushman das -> kargl das -> rodrigc delphij -> gabor delphij -> rafan delphij -> sephe des -> anholt des -> hmp des -> mike des -> olli des -> ru des -> bapt dds -> versus dfr -> gallatin dfr -> zml dg -> peter dim -> theraven dwmalone -> fanf dwmalone -> peadar dwmalone -> snb ed -> dim ed -> gavin ed -> jilles ed -> rdivacky ed -> uqs eivind -> des eivind -> rwatson emaste -> achim emaste -> dteske emaste -> kevans emaste -> markj emaste -> rstone emax -> markus fjoe -> versus gallatin -> ticso gavin -> versus gibbs -> mjacob gibbs -> njl gibbs -> royger gibbs -> whu glebius -> mav gnn -> jinmei gnn -> rrs gnn -> ivoras gnn -> vanhu gnn -> lstewart gnn -> np gnn -> davide gnn -> arybchik gnn -> erj gnn -> kp gnn -> jtl gnn -> karels gonzo -> jmcneill gonzo -> wulf grehan -> bryanv grehan -> rgrimes grog -> edwin grog -> le grog -> peterj imp -> akiyama imp -> ambrisko imp -> andrew imp -> bmah imp -> bruno imp -> dmlb imp -> emax imp -> furuta imp -> joe imp -> jon imp -> keichii imp -> mb imp -> mr imp -> neel imp -> non imp -> nork imp -> onoe imp -> remko imp -> rik imp -> rink imp -> sanpei imp -> shiba imp -> takawata imp -> toshi imp -> tsoome imp -> uch jake -> bms jake -> gordon jake -> harti jake -> jeff jake -> kmacy jake -> robert jake -> yongari jb -> sson jdp -> fjoe jfv -> erj jhb -> arr jhb -> avg jhb -> jch jhb -> jeff jhb -> kbyanc jhb -> peterj jhb -> pfg jhb -> rnoland jhb -> rpokala jimharris -> carl jkh -> dfr jkh -> gj jkh -> grog jkh -> imp jkh -> jlemon jkh -> joerg jkh -> jwd jkh -> msmith jkh -> murray jkh -> phk jkh -> wes jkh -> yar jkoshy -> kaiw jkoshy -> fabient jkoshy -> rstone jlemon -> bmilekic jlemon -> brooks jmallett -> pkelsey jmmv -> ngie joerg -> brian joerg -> eik joerg -> jmg joerg -> le joerg -> netchild joerg -> schweikh julian -> glebius julian -> davidxu julian -> archie julian -> adrian julian -> zec julian -> mp kan -> kib ken -> asomers ken -> slm kib -> ae kib -> badger kib -> dchagin kib -> gjb kib -> jah kib -> jlh kib -> jpaetzel kib -> lulf kib -> melifaro kib -> mmel kib -> pho kib -> pluknet kib -> rdivacky kib -> rmacklem kib -> rmh kib -> skra kib -> stas kib -> tijl kib -> trociny kib -> vangyzen kib -> zont kmacy -> lstewart marcel -> allanjude marcel -> art marcel -> arun marcel -> marius marcel -> nwhitehorn marcel -> sjg markj -> cem markj -> rlibby markm -> jasone markm -> sheldonh mav -> ae mdf -> gleb mdodd -> jake mike -> das mlaier -> benjsc mlaier -> dhartmei mlaier -> thompsa mlaier -> eri msmith -> cokane msmith -> jasone msmith -> scottl murray -> delphij mux -> cognet mux -> dumbbell netchild -> ariff njl -> marks njl -> philip njl -> rpaulo njl -> sepotvin nwhitehorn -> andreast nwhitehorn -> jhibbits obrien -> benno obrien -> groudier obrien -> gshapiro obrien -> kan obrien -> sam peter -> asmodai peter -> jayanth peter -> ps philip -> benl philip -> ed philip -> jls philip -> matteo philip -> uqs philip -> kp phk -> jkoshy phk -> mux pjd -> def pjd -> kib pjd -> lulf pjd -> oshogbo pjd -> smh pjd -> trociny rgrimes -> markm rmacklem -> jwd royger -> whu rpaulo -> avg rpaulo -> bschmidt rpaulo -> dim rpaulo -> jmmv rpaulo -> lidl rpaulo -> ngie rrs -> brucec rrs -> jchandra rrs -> tuexen rstone -> markj +rstone -> mjoras ru -> ceri ru -> cjc ru -> eik ru -> maxim ru -> sobomax rwatson -> adrian rwatson -> antoine rwatson -> bmah rwatson -> brueffer rwatson -> bz rwatson -> cperciva rwatson -> emaste rwatson -> gnn rwatson -> jh rwatson -> jonathan rwatson -> kensmith rwatson -> kmacy rwatson -> linimon rwatson -> rmacklem rwatson -> shafeeq rwatson -> tmm rwatson -> trasz rwatson -> trhodes rwatson -> wsalamon rodrigc -> araujo sam -> andre sam -> benjsc sam -> sephe sbruno -> hiren sbruno -> jimharris schweikh -> dds scottl -> achim scottl -> jimharris scottl -> pjd scottl -> sah scottl -> sbruno scottl -> slm scottl -> yongari sephe -> dexuan sheldonh -> dwmalone sheldonh -> iedowse shin -> ume simon -> benl sjg -> phil sjg -> stevek sos -> marcel stas -> ganbold theraven -> phil thompsa -> weongyo thompsa -> eri trasz -> jh trasz -> mjg ume -> jinmei ume -> suz ume -> tshiozak vangyzen -> badger vangyzen -> dab wes -> scf wkoszek -> jceel wollman -> gad zml -> mdf zml -> zack } Index: projects/clang500-import/sys/amd64/include/efi.h =================================================================== --- projects/clang500-import/sys/amd64/include/efi.h (revision 321306) +++ projects/clang500-import/sys/amd64/include/efi.h (revision 321307) @@ -1,59 +1,65 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __AMD64_INCLUDE_EFI_H_ #define __AMD64_INCLUDE_EFI_H_ /* * XXX: from gcc 6.2 manual: * Note, the ms_abi attribute for Microsoft Windows 64-bit targets * currently requires the -maccumulate-outgoing-args option. + * + * Avoid EFIABI_ATTR declarations for compilers that don't support it. + * GCC support began in version 4.4. */ +#if defined(__clang__) || defined(__GNUC__) && \ + (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4) #define EFIABI_ATTR __attribute__((ms_abi)) +#endif #ifdef _KERNEL struct uuid; struct efi_tm; int efi_get_table(struct uuid *uuid, void **ptr); int efi_get_time(struct efi_tm *tm); int efi_get_time_locked(struct efi_tm *tm); int efi_reset_system(void); int efi_set_time(struct efi_tm *tm); int efi_set_time_locked(struct efi_tm *tm); int efi_var_get(uint16_t *name, struct uuid *vendor, uint32_t *attrib, size_t *datasize, void *data); int efi_var_nextname(size_t *namesize, uint16_t *name, struct uuid *vendor); int efi_var_set(uint16_t *name, struct uuid *vendor, uint32_t attrib, size_t datasize, void *data); #endif #endif /* __AMD64_INCLUDE_EFI_H_ */ Index: projects/clang500-import/sys/boot/efi/boot1/zfs_module.c =================================================================== --- projects/clang500-import/sys/boot/efi/boot1/zfs_module.c (revision 321306) +++ projects/clang500-import/sys/boot/efi/boot1/zfs_module.c (revision 321307) @@ -1,243 +1,243 @@ /*- * Copyright (c) 2015 Eric McCorkle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include "boot_module.h" #include "libzfs.h" #include "zfsimpl.c" static dev_info_t *devices; uint64_t ldi_get_size(void *priv) { dev_info_t *devinfo = priv; return (devinfo->dev->Media->BlockSize * (devinfo->dev->Media->LastBlock + 1)); } static int vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) { dev_info_t *devinfo; uint64_t lba; size_t size, remainder, rb_size, blksz; char *bouncebuf = NULL, *rb_buf; EFI_STATUS status; devinfo = (dev_info_t *)priv; lba = off / devinfo->dev->Media->BlockSize; remainder = off % devinfo->dev->Media->BlockSize; rb_buf = buf; rb_size = bytes; /* * If we have remainder from off, we need to add remainder part. * Since buffer must be multiple of the BlockSize, round it all up. */ size = roundup2(bytes + remainder, devinfo->dev->Media->BlockSize); blksz = size; if (remainder != 0 || size != bytes) { rb_size = devinfo->dev->Media->BlockSize; bouncebuf = malloc(rb_size); if (bouncebuf == NULL) { printf("vdev_read: out of memory\n"); return (-1); } rb_buf = bouncebuf; blksz = rb_size - remainder; } while (bytes > 0) { status = devinfo->dev->ReadBlocks(devinfo->dev, devinfo->dev->Media->MediaId, lba, rb_size, rb_buf); if (EFI_ERROR(status)) goto error; if (bytes < blksz) blksz = bytes; if (bouncebuf != NULL) memcpy(buf, rb_buf + remainder, blksz); buf = (void *)((uintptr_t)buf + blksz); bytes -= blksz; lba++; remainder = 0; blksz = rb_size; } free(bouncebuf); return (0); error: free(bouncebuf); DPRINTF("vdev_read: failed dev: %p, id: %u, lba: %ju, size: %zu," " rb_size: %zu, status: %lu\n", devinfo->dev, devinfo->dev->Media->MediaId, (uintmax_t)lba, bytes, rb_size, EFI_ERROR_CODE(status)); return (-1); } static EFI_STATUS probe(dev_info_t *dev) { spa_t *spa; dev_info_t *tdev; EFI_STATUS status; /* ZFS consumes the dev on success so we need a copy. */ if ((status = bs->AllocatePool(EfiLoaderData, sizeof(*dev), (void**)&tdev)) != EFI_SUCCESS) { DPRINTF("Failed to allocate tdev (%lu)\n", EFI_ERROR_CODE(status)); return (status); } memcpy(tdev, dev, sizeof(*dev)); if (vdev_probe(vdev_read, tdev, &spa) != 0) { (void)bs->FreePool(tdev); return (EFI_UNSUPPORTED); } dev->devdata = spa; add_device(&devices, dev); return (EFI_SUCCESS); } static EFI_STATUS load(const char *filepath, dev_info_t *devinfo, void **bufp, size_t *bufsize) { spa_t *spa; struct zfsmount zfsmount; dnode_phys_t dn; struct stat st; int err; void *buf; EFI_STATUS status; spa = devinfo->devdata; DPRINTF("load: '%s' spa: '%s', devpath: %s\n", filepath, spa->spa_name, devpath_str(devinfo->devpath)); if ((err = zfs_spa_init(spa)) != 0) { DPRINTF("Failed to load pool '%s' (%d)\n", spa->spa_name, err); return (EFI_NOT_FOUND); } if ((err = zfs_mount(spa, 0, &zfsmount)) != 0) { DPRINTF("Failed to mount pool '%s' (%d)\n", spa->spa_name, err); return (EFI_NOT_FOUND); } if ((err = zfs_lookup(&zfsmount, filepath, &dn)) != 0) { if (err == ENOENT) { DPRINTF("Failed to find '%s' on pool '%s' (%d)\n", filepath, spa->spa_name, err); return (EFI_NOT_FOUND); } printf("Failed to lookup '%s' on pool '%s' (%d)\n", filepath, spa->spa_name, err); return (EFI_INVALID_PARAMETER); } if ((err = zfs_dnode_stat(spa, &dn, &st)) != 0) { printf("Failed to stat '%s' on pool '%s' (%d)\n", filepath, spa->spa_name, err); return (EFI_INVALID_PARAMETER); } if ((status = bs->AllocatePool(EfiLoaderData, (UINTN)st.st_size, &buf)) != EFI_SUCCESS) { - printf("Failed to allocate load buffer %zd for pool '%s' for '%s' " - "(%lu)\n", st.st_size, spa->spa_name, filepath, EFI_ERROR_CODE(status)); + printf("Failed to allocate load buffer %jd for pool '%s' for '%s' " + "(%lu)\n", (intmax_t)st.st_size, spa->spa_name, filepath, EFI_ERROR_CODE(status)); return (EFI_INVALID_PARAMETER); } if ((err = dnode_read(spa, &dn, 0, buf, st.st_size)) != 0) { printf("Failed to read node from %s (%d)\n", spa->spa_name, err); (void)bs->FreePool(buf); return (EFI_INVALID_PARAMETER); } *bufsize = st.st_size; *bufp = buf; return (EFI_SUCCESS); } static void status(void) { spa_t *spa; spa = STAILQ_FIRST(&zfs_pools); if (spa == NULL) { printf("%s found no pools\n", zfs_module.name); return; } printf("%s found the following pools:", zfs_module.name); STAILQ_FOREACH(spa, &zfs_pools, spa_link) printf(" %s", spa->spa_name); printf("\n"); } static void init(void) { zfs_init(); } static dev_info_t * _devices(void) { return (devices); } const boot_module_t zfs_module = { .name = "ZFS", .init = init, .probe = probe, .load = load, .status = status, .devices = _devices }; Index: projects/clang500-import/sys/conf/kern.pre.mk =================================================================== --- projects/clang500-import/sys/conf/kern.pre.mk (revision 321306) +++ projects/clang500-import/sys/conf/kern.pre.mk (revision 321307) @@ -1,264 +1,266 @@ # $FreeBSD$ # Part of a unified Makefile for building kernels. This part contains all # of the definitions that need to be before %BEFORE_DEPEND. # Allow user to configure things that only effect src tree builds. # Note: This is duplicated from src.sys.mk to ensure that we include # /etc/src.conf when building the kernel. Kernels can be built without # the rest of /usr/src, but they still always process SRCCONF even though # the normal mechanisms to prevent that (compiling out of tree) won't # work. To ensure they do work, we have to duplicate thee few lines here. SRCCONF?= /etc/src.conf .if (exists(${SRCCONF}) || ${SRCCONF} != "/etc/src.conf") && !target(_srcconf_included_) .include "${SRCCONF}" _srcconf_included_: .endif .include .include .include "kern.opts.mk" # The kernel build always occurs in the object directory which is .CURDIR. .if ${.MAKE.MODE:Unormal:Mmeta} .MAKE.MODE+= curdirOk=yes .endif .if defined(NO_OBJ) || ${MK_AUTO_OBJ} == "yes" NO_OBJ= t NO_MODULES_OBJ= t .endif .if !defined(NO_OBJ) _obj= obj .endif # Can be overridden by makeoptions or /etc/make.conf KERNEL_KO?= kernel KERNEL?= kernel KODIR?= /boot/${KERNEL} LDSCRIPT_NAME?= ldscript.$M LDSCRIPT?= $S/conf/${LDSCRIPT_NAME} M= ${MACHINE} AWK?= awk CP?= cp LINT?= lint NM?= nm OBJCOPY?= objcopy SIZE?= size .if defined(DEBUG) _MINUS_O= -O CTFFLAGS+= -g .else .if ${MACHINE_CPUARCH} == "powerpc" _MINUS_O= -O # gcc miscompiles some code at -O2 .else _MINUS_O= -O2 .endif .endif .if ${MACHINE_CPUARCH} == "amd64" .if ${COMPILER_TYPE} == "clang" COPTFLAGS?=-O2 -pipe .else COPTFLAGS?=-O2 -frename-registers -pipe .endif .else COPTFLAGS?=${_MINUS_O} -pipe .endif .if !empty(COPTFLAGS:M-O[23s]) && empty(COPTFLAGS:M-fno-strict-aliasing) COPTFLAGS+= -fno-strict-aliasing .endif .if !defined(NO_CPU_COPTFLAGS) COPTFLAGS+= ${_CPUCFLAGS} .endif NOSTDINC= -nostdinc INCLUDES= ${NOSTDINC} ${INCLMAGIC} -I. -I$S CFLAGS= ${COPTFLAGS} ${DEBUG} CFLAGS+= ${INCLUDES} -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h CFLAGS_PARAM_INLINE_UNIT_GROWTH?=100 CFLAGS_PARAM_LARGE_FUNCTION_GROWTH?=1000 .if ${MACHINE_CPUARCH} == "mips" CFLAGS_ARCH_PARAMS?=--param max-inline-insns-single=1000 -DMACHINE_ARCH='"${MACHINE_ARCH}"' .endif CFLAGS.gcc+= -fno-common -fms-extensions -finline-limit=${INLINE_LIMIT} CFLAGS.gcc+= --param inline-unit-growth=${CFLAGS_PARAM_INLINE_UNIT_GROWTH} CFLAGS.gcc+= --param large-function-growth=${CFLAGS_PARAM_LARGE_FUNCTION_GROWTH} .if defined(CFLAGS_ARCH_PARAMS) CFLAGS.gcc+=${CFLAGS_ARCH_PARAMS} .endif WERROR?= -Werror # XXX LOCORE means "don't declare C stuff" not "for locore.s". ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${CFLAGS} ${ASM_CFLAGS.${.IMPSRC:T}} .if defined(PROFLEVEL) && ${PROFLEVEL} >= 1 CFLAGS+= -DGPROF CFLAGS.gcc+= -falign-functions=16 .if ${PROFLEVEL} >= 2 CFLAGS+= -DGPROF4 -DGUPROF PROF= -pg .if ${COMPILER_TYPE} == "gcc" PROF+= -mprofiler-epilogue .endif .else PROF= -pg .endif .endif DEFINED_PROF= ${PROF} # Put configuration-specific C flags last (except for ${PROF}) so that they # can override the others. CFLAGS+= ${CONF_CFLAGS} .if defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mbuild-id} LDFLAGS+= -Wl,--build-id=sha1 .endif # Optional linting. This can be overridden in /etc/make.conf. LINTFLAGS= ${LINTOBJKERNFLAGS} NORMAL_C= ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} NORMAL_S= ${CC:N${CCACHE_BIN}} -c ${ASM_CFLAGS} ${WERROR} ${.IMPSRC} PROFILE_C= ${CC} -c ${CFLAGS} ${WERROR} ${.IMPSRC} NORMAL_C_NOWERROR= ${CC} -c ${CFLAGS} ${PROF} ${.IMPSRC} NORMAL_M= ${AWK} -f $S/tools/makeobjops.awk ${.IMPSRC} -c ; \ ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.PREFIX}.c NORMAL_FW= uudecode -o ${.TARGET} ${.ALLSRC} NORMAL_FWO= ${LD} -b binary --no-warn-mismatch -d -warn-common -r \ -m ${LD_EMULATION} -o ${.TARGET} ${.ALLSRC:M*.fw} # Common for dtrace / zfs CDDL_CFLAGS= -DFREEBSD_NAMECACHE -nostdinc -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common -I$S -I$S/cddl/contrib/opensolaris/common ${CFLAGS} -Wno-unknown-pragmas -Wno-missing-prototypes -Wno-undef -Wno-strict-prototypes -Wno-cast-qual -Wno-parentheses -Wno-redundant-decls -Wno-missing-braces -Wno-uninitialized -Wno-unused -Wno-inline -Wno-switch -Wno-pointer-arith -Wno-unknown-pragmas CDDL_CFLAGS+= -include $S/cddl/compat/opensolaris/sys/debug_compat.h CDDL_C= ${CC} -c ${CDDL_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} # Special flags for managing the compat compiles for ZFS ZFS_CFLAGS= -DBUILDING_ZFS -I$S/cddl/contrib/opensolaris/uts/common/fs/zfs -I$S/cddl/contrib/opensolaris/uts/common/zmod -I$S/cddl/contrib/opensolaris/common/zfs ${CDDL_CFLAGS} ZFS_ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${ZFS_CFLAGS} ZFS_C= ${CC} -c ${ZFS_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} ZFS_S= ${CC} -c ${ZFS_ASM_CFLAGS} ${WERROR} ${.IMPSRC} # Special flags for managing the compat compiles for DTrace DTRACE_CFLAGS= -DBUILDING_DTRACE ${CDDL_CFLAGS} -I$S/cddl/dev/dtrace -I$S/cddl/dev/dtrace/${MACHINE_CPUARCH} .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" DTRACE_CFLAGS+= -I$S/cddl/contrib/opensolaris/uts/intel -I$S/cddl/dev/dtrace/x86 .endif DTRACE_CFLAGS+= -I$S/cddl/contrib/opensolaris/common/util -I$S -DDIS_MEM -DSMP DTRACE_ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${DTRACE_CFLAGS} DTRACE_C= ${CC} -c ${DTRACE_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} DTRACE_S= ${CC} -c ${DTRACE_ASM_CFLAGS} ${WERROR} ${.IMPSRC} # Special flags for managing the compat compiles for DTrace/FBT FBT_CFLAGS= -DBUILDING_DTRACE -nostdinc -I$S/cddl/dev/fbt/${MACHINE_CPUARCH} -I$S/cddl/dev/fbt -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common -I$S ${CDDL_CFLAGS} .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" FBT_CFLAGS+= -I$S/cddl/dev/fbt/x86 .endif FBT_C= ${CC} -c ${FBT_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} .if ${MK_CTF} != "no" NORMAL_CTFCONVERT= ${CTFCONVERT} ${CTFFLAGS} ${.TARGET} .elif ${MAKE_VERSION} >= 5201111300 NORMAL_CTFCONVERT= .else NORMAL_CTFCONVERT= @: .endif NORMAL_LINT= ${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC} # Linux Kernel Programming Interface C-flags LINUXKPI_INCLUDES= -I$S/compat/linuxkpi/common/include LINUXKPI_C= ${NORMAL_C} ${LINUXKPI_INCLUDES} # Infiniband C flags. Correct include paths and omit errors that linux # does not honor. OFEDINCLUDES= -I$S/ofed/include ${LINUXKPI_INCLUDES} OFEDNOERR= -Wno-cast-qual -Wno-pointer-arith OFEDCFLAGS= ${CFLAGS:N-I*} ${OFEDINCLUDES} ${CFLAGS:M-I*} ${OFEDNOERR} OFED_C_NOIMP= ${CC} -c -o ${.TARGET} ${OFEDCFLAGS} ${WERROR} ${PROF} OFED_C= ${OFED_C_NOIMP} ${.IMPSRC} GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/} SYSTEM_CFILES= config.c env.c hints.c vnode_if.c SYSTEM_DEP= Makefile ${SYSTEM_OBJS} SYSTEM_OBJS= locore.o ${MDOBJS} ${OBJS} SYSTEM_OBJS+= ${SYSTEM_CFILES:.c=.o} SYSTEM_OBJS+= hack.pico MD_ROOT_SIZE_CONFIGURED!= grep MD_ROOT_SIZE opt_md.h || true ; echo .if ${MFS_IMAGE:Uno} != "no" .if empty(MD_ROOT_SIZE_CONFIGURED) SYSTEM_OBJS+= embedfs_${MFS_IMAGE:T:R}.o .endif .endif SYSTEM_LD= @${LD} -m ${LD_EMULATION} -Bdynamic -T ${LDSCRIPT} ${_LDFLAGS} \ --no-warn-mismatch --warn-common --export-dynamic \ --dynamic-linker /red/herring \ -o ${.TARGET} -X ${SYSTEM_OBJS} vers.o SYSTEM_LD_TAIL= @${OBJCOPY} --strip-symbol gcc2_compiled. ${.TARGET} ; \ ${SIZE} ${.TARGET} ; chmod 755 ${.TARGET} SYSTEM_DEP+= ${LDSCRIPT} # Calculate path for .m files early, if needed. .if !defined(NO_MODULES) && !defined(__MPATH) && !make(install) __MPATH!=find ${S:tA}/ -name \*_if.m .endif # MKMODULESENV is set here so that port makefiles can augment # them. MKMODULESENV+= MAKEOBJDIRPREFIX=${.OBJDIR}/modules KMODDIR=${KODIR} MKMODULESENV+= MACHINE_CPUARCH=${MACHINE_CPUARCH} MKMODULESENV+= MACHINE=${MACHINE} MACHINE_ARCH=${MACHINE_ARCH} MKMODULESENV+= MODULES_EXTRA="${MODULES_EXTRA}" WITHOUT_MODULES="${WITHOUT_MODULES}" +MKMODULESENV+= ARCH_FLAGS="${ARCH_FLAGS}" .if (${KERN_IDENT} == LINT) MKMODULESENV+= ALL_MODULES=LINT .endif .if defined(MODULES_OVERRIDE) MKMODULESENV+= MODULES_OVERRIDE="${MODULES_OVERRIDE}" .endif .if defined(DEBUG) MKMODULESENV+= DEBUG_FLAGS="${DEBUG}" .endif .if !defined(NO_MODULES) MKMODULESENV+= __MPATH="${__MPATH}" .endif # Architecture and output format arguments for objdump to convert image to # object file .if ${MFS_IMAGE:Uno} != "no" .if empty(MD_ROOT_SIZE_CONFIGURED) .if !defined(EMBEDFS_FORMAT.${MACHINE_ARCH}) EMBEDFS_FORMAT.${MACHINE_ARCH}!= awk -F'"' '/OUTPUT_FORMAT/ {print $$2}' ${LDSCRIPT} .if empty(EMBEDFS_FORMAT.${MACHINE_ARCH}) .undef EMBEDFS_FORMAT.${MACHINE_ARCH} .endif .endif .if !defined(EMBEDFS_ARCH.${MACHINE_ARCH}) EMBEDFS_ARCH.${MACHINE_ARCH}!= sed -n '/OUTPUT_ARCH/s/.*(\(.*\)).*/\1/p' ${LDSCRIPT} .if empty(EMBEDFS_ARCH.${MACHINE_ARCH}) .undef EMBEDFS_ARCH.${MACHINE_ARCH} .endif .endif EMBEDFS_FORMAT.arm?= elf32-littlearm EMBEDFS_FORMAT.armv6?= elf32-littlearm +EMBEDFS_FORMAT.aarch64?= elf64-littleaarch64 EMBEDFS_FORMAT.mips?= elf32-tradbigmips EMBEDFS_FORMAT.mipsel?= elf32-tradlittlemips EMBEDFS_FORMAT.mips64?= elf64-tradbigmips EMBEDFS_FORMAT.mips64el?= elf64-tradlittlemips EMBEDFS_FORMAT.riscv?= elf64-littleriscv .endif .endif # Detect kernel config options that force stack frames to be turned on. DDB_ENABLED!= grep DDB opt_ddb.h || true ; echo DTR_ENABLED!= grep KDTRACE_FRAME opt_kdtrace.h || true ; echo HWPMC_ENABLED!= grep HWPMC opt_hwpmc_hooks.h || true ; echo Index: projects/clang500-import/sys/conf/kmod.mk =================================================================== --- projects/clang500-import/sys/conf/kmod.mk (revision 321306) +++ projects/clang500-import/sys/conf/kmod.mk (revision 321307) @@ -1,485 +1,485 @@ # From: @(#)bsd.prog.mk 5.26 (Berkeley) 6/25/91 # $FreeBSD$ # # The include file handles building and installing loadable # kernel modules. # # # +++ variables +++ # # CLEANFILES Additional files to remove for the clean and cleandir targets. # # EXPORT_SYMS A list of symbols that should be exported from the module, # or the name of a file containing a list of symbols, or YES # to export all symbols. If not defined, no symbols are # exported. # # KMOD The name of the kernel module to build. # # KMODDIR Base path for kernel modules (see kld(4)). [/boot/kernel] # # KMODOWN Module file owner. [${BINOWN}] # # KMODGRP Module file group. [${BINGRP}] # # KMODMODE Module file mode. [${BINMODE}] # # KMODLOAD Command to load a kernel module [/sbin/kldload] # # KMODUNLOAD Command to unload a kernel module [/sbin/kldunload] # # KMODISLOADED Command to check whether a kernel module is # loaded [/sbin/kldstat -q -n] # # PROG The name of the kernel module to build. # If not supplied, ${KMOD}.ko is used. # # SRCS List of source files. # # FIRMWS List of firmware images in format filename:shortname:version # # FIRMWARE_LICENSE # Set to the name of the license the user has to agree on in # order to use this firmware. See /usr/share/doc/legal # # DESTDIR The tree where the module gets installed. [not set] # # +++ targets +++ # # install: # install the kernel module; if the Makefile # does not itself define the target install, the targets # beforeinstall and afterinstall may also be used to cause # actions immediately before and after the install target # is executed. # # load: # Load a module. # # unload: # Unload a module. # # reload: # Unload if loaded, then load. # AWK?= awk KMODLOAD?= /sbin/kldload KMODUNLOAD?= /sbin/kldunload KMODISLOADED?= /sbin/kldstat -q -n OBJCOPY?= objcopy .include # Grab all the options for a kernel build. For backwards compat, we need to # do this after bsd.own.mk. .include "kern.opts.mk" .include .include "config.mk" # Search for kernel source tree in standard places. .for _dir in ${.CURDIR}/../.. ${.CURDIR}/../../.. /sys /usr/src/sys .if !defined(SYSDIR) && exists(${_dir}/kern/) SYSDIR= ${_dir:tA} .endif .endfor .if !defined(SYSDIR) || !exists(${SYSDIR}/kern/) .error "can't find kernel source tree" .endif .SUFFIXES: .out .o .c .cc .cxx .C .y .l .s .S .m # amd64 and mips use direct linking for kmod, all others use shared binaries .if ${MACHINE_CPUARCH} != amd64 && ${MACHINE_CPUARCH} != mips __KLD_SHARED=yes .else __KLD_SHARED=no .endif .if !empty(CFLAGS:M-O[23s]) && empty(CFLAGS:M-fno-strict-aliasing) CFLAGS+= -fno-strict-aliasing .endif WERROR?= -Werror CFLAGS+= ${WERROR} CFLAGS+= -D_KERNEL CFLAGS+= -DKLD_MODULE # Don't use any standard or source-relative include directories. NOSTDINC= -nostdinc CFLAGS:= ${CFLAGS:N-I*} ${NOSTDINC} ${INCLMAGIC} ${CFLAGS:M-I*} .if defined(KERNBUILDDIR) CFLAGS+= -DHAVE_KERNEL_OPTION_HEADERS -include ${KERNBUILDDIR}/opt_global.h .endif # Add -I paths for system headers. Individual module makefiles don't # need any -I paths for this. Similar defaults for .PATH can't be # set because there are no standard paths for non-headers. CFLAGS+= -I. -I${SYSDIR} CFLAGS.gcc+= -finline-limit=${INLINE_LIMIT} CFLAGS.gcc+= -fms-extensions CFLAGS.gcc+= --param inline-unit-growth=100 CFLAGS.gcc+= --param large-function-growth=1000 # Disallow common variables, and if we end up with commons from # somewhere unexpected, allocate storage for them in the module itself. CFLAGS+= -fno-common LDFLAGS+= -d -warn-common .if defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mbuild-id} LDFLAGS+= -Wl,--build-id=sha1 .endif CFLAGS+= ${DEBUG_FLAGS} .if ${MACHINE_CPUARCH} == amd64 CFLAGS+= -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" CFLAGS+= -fPIC .endif # Temporary workaround for PR 196407, which contains the fascinating details. # Don't allow clang to use fpu instructions or registers in kernel modules. .if ${MACHINE_CPUARCH} == arm .if ${COMPILER_VERSION} < 30800 CFLAGS.clang+= -mllvm -arm-use-movt=0 .else CFLAGS.clang+= -mno-movt .endif CFLAGS.clang+= -mfpu=none CFLAGS+= -funwind-tables .endif .if ${MACHINE_CPUARCH} == powerpc CFLAGS+= -mlongcall -fno-omit-frame-pointer .endif .if ${MACHINE_CPUARCH} == mips CFLAGS+= -G0 -fno-pic -mno-abicalls -mlong-calls .endif .if defined(DEBUG) || defined(DEBUG_FLAGS) CTFFLAGS+= -g .endif .if defined(FIRMWS) ${KMOD:S/$/.c/}: ${SYSDIR}/tools/fw_stub.awk ${AWK} -f ${SYSDIR}/tools/fw_stub.awk ${FIRMWS} -m${KMOD} -c${KMOD:S/$/.c/g} \ ${FIRMWARE_LICENSE:C/.+/-l/}${FIRMWARE_LICENSE} SRCS+= ${KMOD:S/$/.c/} CLEANFILES+= ${KMOD:S/$/.c/} .for _firmw in ${FIRMWS} ${_firmw:C/\:.*$/.fwo/:T}: ${_firmw:C/\:.*$//} @${ECHO} ${_firmw:C/\:.*$//} ${.ALLSRC:M*${_firmw:C/\:.*$//}} @if [ -e ${_firmw:C/\:.*$//} ]; then \ ${LD} -b binary --no-warn-mismatch ${_LDFLAGS} \ -m ${LD_EMULATION} -r -d \ -o ${.TARGET} ${_firmw:C/\:.*$//}; \ else \ ln -s ${.ALLSRC:M*${_firmw:C/\:.*$//}} ${_firmw:C/\:.*$//}; \ ${LD} -b binary --no-warn-mismatch ${_LDFLAGS} \ -m ${LD_EMULATION} -r -d \ -o ${.TARGET} ${_firmw:C/\:.*$//}; \ rm ${_firmw:C/\:.*$//}; \ fi OBJS+= ${_firmw:C/\:.*$/.fwo/:T} .endfor .endif # Conditionally include SRCS based on kernel config options. .for _o in ${KERN_OPTS} SRCS+=${SRCS.${_o}} .endfor OBJS+= ${SRCS:N*.h:R:S/$/.o/g} .if !defined(PROG) PROG= ${KMOD}.ko .endif .if !defined(DEBUG_FLAGS) FULLPROG= ${PROG} .else FULLPROG= ${PROG}.full ${PROG}: ${FULLPROG} ${PROG}.debug ${OBJCOPY} --strip-debug --add-gnu-debuglink=${PROG}.debug \ ${FULLPROG} ${.TARGET} ${PROG}.debug: ${FULLPROG} ${OBJCOPY} --only-keep-debug ${FULLPROG} ${.TARGET} .endif .if ${__KLD_SHARED} == yes ${FULLPROG}: ${KMOD}.kld ${LD} -m ${LD_EMULATION} -Bshareable -znotext ${_LDFLAGS} \ -o ${.TARGET} ${KMOD}.kld .if !defined(DEBUG_FLAGS) ${OBJCOPY} --strip-debug ${.TARGET} .endif .endif EXPORT_SYMS?= NO .if ${EXPORT_SYMS} != YES CLEANFILES+= export_syms .endif .if ${__KLD_SHARED} == yes ${KMOD}.kld: ${OBJS} .else ${FULLPROG}: ${OBJS} .endif ${LD} -m ${LD_EMULATION} ${_LDFLAGS} -r -d -o ${.TARGET} ${OBJS} .if ${MK_CTF} != "no" ${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ${OBJS} .endif .if defined(EXPORT_SYMS) .if ${EXPORT_SYMS} != YES .if ${EXPORT_SYMS} == NO :> export_syms .elif !exists(${.CURDIR}/${EXPORT_SYMS}) echo ${EXPORT_SYMS} > export_syms .else grep -v '^#' < ${EXPORT_SYMS} > export_syms .endif ${AWK} -f ${SYSDIR}/conf/kmod_syms.awk ${.TARGET} \ export_syms | xargs -J% ${OBJCOPY} % ${.TARGET} .endif .endif .if defined(PREFIX_SYMS) ${AWK} -v prefix=${PREFIX_SYMS} -f ${SYSDIR}/conf/kmod_syms_prefix.awk \ ${.TARGET} /dev/null | xargs -J% ${OBJCOPY} % ${.TARGET} .endif .if !defined(DEBUG_FLAGS) && ${__KLD_SHARED} == no ${OBJCOPY} --strip-debug ${.TARGET} .endif _ILINKS=machine .if ${MACHINE} != ${MACHINE_CPUARCH} && ${MACHINE} != "arm64" _ILINKS+=${MACHINE_CPUARCH} .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" _ILINKS+=x86 .endif CLEANFILES+=${_ILINKS} all: ${PROG} beforedepend: ${_ILINKS} beforebuild: ${_ILINKS} # Ensure that the links exist without depending on it when it exists which # causes all the modules to be rebuilt when the directory pointed to changes. .for _link in ${_ILINKS} .if !exists(${.OBJDIR}/${_link}) OBJS_DEPEND_GUESS+= ${_link} .endif .endfor .NOPATH: ${_ILINKS} ${_ILINKS}: @case ${.TARGET} in \ machine) \ path=${SYSDIR}/${MACHINE}/include ;; \ *) \ path=${SYSDIR}/${.TARGET:T}/include ;; \ esac ; \ path=`(cd $$path && /bin/pwd)` ; \ ${ECHO} ${.TARGET:T} "->" $$path ; \ ln -fhs $$path ${.TARGET:T} CLEANFILES+= ${PROG} ${KMOD}.kld ${OBJS} .if defined(DEBUG_FLAGS) CLEANFILES+= ${FULLPROG} ${PROG}.debug .endif .if !target(install) _INSTALLFLAGS:= ${INSTALLFLAGS} .for ie in ${INSTALLFLAGS_EDIT} _INSTALLFLAGS:= ${_INSTALLFLAGS${ie}} .endfor .if !target(realinstall) KERN_DEBUGDIR?= ${DEBUGDIR} realinstall: _kmodinstall .ORDER: beforeinstall _kmodinstall _kmodinstall: .PHONY ${INSTALL} -T release -o ${KMODOWN} -g ${KMODGRP} -m ${KMODMODE} \ ${_INSTALLFLAGS} ${PROG} ${DESTDIR}${KMODDIR}/ .if defined(DEBUG_FLAGS) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no" ${INSTALL} -T debug -o ${KMODOWN} -g ${KMODGRP} -m ${KMODMODE} \ ${_INSTALLFLAGS} ${PROG}.debug ${DESTDIR}${KERN_DEBUGDIR}${KMODDIR}/ .endif .include .if !defined(NO_XREF) afterinstall: _kldxref .ORDER: realinstall _kldxref .ORDER: _installlinks _kldxref _kldxref: .PHONY @if type kldxref >/dev/null 2>&1; then \ ${ECHO} kldxref ${DESTDIR}${KMODDIR}; \ kldxref ${DESTDIR}${KMODDIR}; \ fi .endif .endif # !target(realinstall) .endif # !target(install) .if !target(load) load: ${PROG} .PHONY ${KMODLOAD} -v ${.OBJDIR}/${PROG} .endif .if !target(unload) unload: .PHONY if ${KMODISLOADED} ${PROG} ; then ${KMODUNLOAD} -v ${PROG} ; fi .endif .if !target(reload) reload: unload load .PHONY .endif .if defined(KERNBUILDDIR) .PATH: ${KERNBUILDDIR} CFLAGS+= -I${KERNBUILDDIR} .for _src in ${SRCS:Mopt_*.h} CLEANFILES+= ${_src} .if !target(${_src}) ${_src}: ln -sf ${KERNBUILDDIR}/${_src} ${.TARGET} .endif .endfor .else .for _src in ${SRCS:Mopt_*.h} CLEANFILES+= ${_src} .if !target(${_src}) ${_src}: :> ${.TARGET} .endif .endfor .endif # Respect configuration-specific C flags. -CFLAGS+= ${CONF_CFLAGS} +CFLAGS+= ${ARCH_FLAGS} ${CONF_CFLAGS} .if !empty(SRCS:Mvnode_if.c) CLEANFILES+= vnode_if.c vnode_if.c: ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src ${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -c .endif .if !empty(SRCS:Mvnode_if.h) CLEANFILES+= vnode_if.h vnode_if_newproto.h vnode_if_typedef.h vnode_if.h vnode_if_newproto.h vnode_if_typedef.h: ${SYSDIR}/tools/vnode_if.awk \ ${SYSDIR}/kern/vnode_if.src vnode_if.h: vnode_if_newproto.h vnode_if_typedef.h ${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -h vnode_if_newproto.h: ${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -p vnode_if_typedef.h: ${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -q .endif # Build _if.[ch] from _if.m, and clean them when we're done. # __MPATH defined in config.mk _MFILES=${__MPATH:T:O} _MPATH=${__MPATH:H:O:u} .PATH.m: ${_MPATH} .for _i in ${SRCS:M*_if.[ch]} _MATCH=M${_i:R:S/$/.m/} _MATCHES=${_MFILES:${_MATCH}} .if !empty(_MATCHES) CLEANFILES+= ${_i} .endif .endfor # _i .m.c: ${SYSDIR}/tools/makeobjops.awk ${AWK} -f ${SYSDIR}/tools/makeobjops.awk ${.IMPSRC} -c .m.h: ${SYSDIR}/tools/makeobjops.awk ${AWK} -f ${SYSDIR}/tools/makeobjops.awk ${.IMPSRC} -h .for _i in mii pccard .if !empty(SRCS:M${_i}devs.h) CLEANFILES+= ${_i}devs.h ${_i}devs.h: ${SYSDIR}/tools/${_i}devs2h.awk ${SYSDIR}/dev/${_i}/${_i}devs ${AWK} -f ${SYSDIR}/tools/${_i}devs2h.awk ${SYSDIR}/dev/${_i}/${_i}devs .endif .endfor # _i .if !empty(SRCS:Mbhnd_nvram_map.h) CLEANFILES+= bhnd_nvram_map.h bhnd_nvram_map.h: ${SYSDIR}/dev/bhnd/tools/nvram_map_gen.awk \ ${SYSDIR}/dev/bhnd/tools/nvram_map_gen.sh \ ${SYSDIR}/dev/bhnd/nvram/nvram_map bhnd_nvram_map.h: sh ${SYSDIR}/dev/bhnd/tools/nvram_map_gen.sh \ ${SYSDIR}/dev/bhnd/nvram/nvram_map -h .endif .if !empty(SRCS:Mbhnd_nvram_map_data.h) CLEANFILES+= bhnd_nvram_map_data.h bhnd_nvram_map_data.h: ${SYSDIR}/dev/bhnd/tools/nvram_map_gen.awk \ ${SYSDIR}/dev/bhnd/tools/nvram_map_gen.sh \ ${SYSDIR}/dev/bhnd/nvram/nvram_map bhnd_nvram_map_data.h: sh ${SYSDIR}/dev/bhnd/tools/nvram_map_gen.sh \ ${SYSDIR}/dev/bhnd/nvram/nvram_map -d .endif .if !empty(SRCS:Musbdevs.h) CLEANFILES+= usbdevs.h usbdevs.h: ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs ${AWK} -f ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs -h .endif .if !empty(SRCS:Musbdevs_data.h) CLEANFILES+= usbdevs_data.h usbdevs_data.h: ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs ${AWK} -f ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs -d .endif .if !empty(SRCS:Macpi_quirks.h) CLEANFILES+= acpi_quirks.h acpi_quirks.h: ${SYSDIR}/tools/acpi_quirks2h.awk ${SYSDIR}/dev/acpica/acpi_quirks ${AWK} -f ${SYSDIR}/tools/acpi_quirks2h.awk ${SYSDIR}/dev/acpica/acpi_quirks .endif .if !empty(SRCS:Massym.s) || !empty(DPSRCS:Massym.s) CLEANFILES+= assym.s genassym.o DEPENDOBJS+= genassym.o assym.s: genassym.o .if defined(KERNBUILDDIR) genassym.o: opt_global.h .endif assym.s: ${SYSDIR}/kern/genassym.sh sh ${SYSDIR}/kern/genassym.sh genassym.o > ${.TARGET} genassym.o: ${SYSDIR}/${MACHINE}/${MACHINE}/genassym.c genassym.o: ${SRCS:Mopt_*.h} ${CC} -c ${CFLAGS:N-flto:N-fno-common} \ ${SYSDIR}/${MACHINE}/${MACHINE}/genassym.c .endif lint: ${SRCS} ${LINT} ${LINTKERNFLAGS} ${CFLAGS:M-[DILU]*} ${.ALLSRC:M*.c} .if defined(KERNBUILDDIR) ${OBJS}: opt_global.h .endif CLEANDEPENDFILES+= ${_ILINKS} # .depend needs include links so we remove them only together. cleanilinks: rm -f ${_ILINKS} OBJS_DEPEND_GUESS+= ${SRCS:M*.h} .include .include .include .include "kern.mk" Index: projects/clang500-import/sys/dev/e1000/if_em.c =================================================================== --- projects/clang500-import/sys/dev/e1000/if_em.c (revision 321306) +++ projects/clang500-import/sys/dev/e1000/if_em.c (revision 321307) @@ -1,4382 +1,4530 @@ /*- * Copyright (c) 2016 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #include #include #define em_mac_min e1000_82547 #define igb_mac_min e1000_82575 /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static pci_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - Legacy em*/ PVID(0x8086, E1000_DEV_ID_82540EM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EM_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82542, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_PCIE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547GI, "Intel(R) PRO/1000 Network Connection"), /* Intel(R) PRO/1000 Network Connection - em */ PVID(0x8086, E1000_DEV_ID_82571EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E_IAMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82583V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_82567V_3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_BM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574LA, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, "Intel(R) PRO/1000 Network Connection"), /* required last entry */ PVID_END }; static pci_vendor_info_t igb_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - igb */ PVID(0x8086, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), - PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I211_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *em_register(device_t dev); static void *igb_register(device_t dev); static int em_if_attach_pre(if_ctx_t ctx); static int em_if_attach_post(if_ctx_t ctx); static int em_if_detach(if_ctx_t ctx); static int em_if_shutdown(if_ctx_t ctx); static int em_if_suspend(if_ctx_t ctx); static int em_if_resume(if_ctx_t ctx); static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void em_if_queues_free(if_ctx_t ctx); static uint64_t em_if_get_counter(if_ctx_t, ift_counter); -static void em_if_init(if_ctx_t ctx); -static void em_if_stop(if_ctx_t ctx); +static void em_if_init(if_ctx_t ctx); +static void em_if_stop(if_ctx_t ctx); static void em_if_media_status(if_ctx_t, struct ifmediareq *); static int em_if_media_change(if_ctx_t ctx); static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void em_if_timer(if_ctx_t ctx, uint16_t qid); static void em_if_vlan_register(if_ctx_t ctx, u16 vtag); static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void em_identify_hardware(if_ctx_t ctx); static int em_allocate_pci_resources(if_ctx_t ctx); static void em_free_pci_resources(if_ctx_t ctx); static void em_reset(if_ctx_t ctx); static int em_setup_interface(if_ctx_t ctx); static int em_setup_msix(if_ctx_t ctx); static void em_initialize_transmit_unit(if_ctx_t ctx); static void em_initialize_receive_unit(if_ctx_t ctx); static void em_if_enable_intr(if_ctx_t ctx); static void em_if_disable_intr(if_ctx_t ctx); static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static void em_if_multi_set(if_ctx_t ctx); static void em_if_update_admin_status(if_ctx_t ctx); static void em_if_debug(if_ctx_t ctx); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static int em_if_set_promisc(if_ctx_t ctx, int flags); static void em_setup_vlan_hw_support(struct adapter *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int em_get_rs(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(if_ctx_t ctx); static void em_enable_wakeup(if_ctx_t ctx); static int em_enable_phy_wakeup(struct adapter *); static void em_disable_aspm(struct adapter *); int em_intr(void *arg); static void em_disable_promisc(if_ctx_t ctx); /* MSIX handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *context); static void em_enable_vectors_82574(if_ctx_t); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static void em_if_led_func(if_ctx_t ctx, int onoff); static int em_get_regs(SYSCTL_HANDLER_ARGS); static void lem_smartspeed(struct adapter *adapter); static void igb_configure_queues(struct adapter *adapter); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_register, em_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_register, igb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; static devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); MODULE_DEPEND(em, iflib, 1, 1, 1); static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); MODULE_DEPEND(igb, iflib, 1, 1, 1); static device_method_t em_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), - DEVMETHOD(ifdi_resume, em_if_resume), + DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), - DEVMETHOD(ifdi_intr_enable, em_if_enable_intr), + DEVMETHOD(ifdi_intr_enable, em_if_enable_intr), DEVMETHOD(ifdi_intr_disable, em_if_disable_intr), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, em_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, em_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD_END }; /* * note that if (adapter->msix_mem) is replaced by: * if (adapter->intr_type == IFLIB_INTR_MSIX) */ static driver_t em_if_driver = { "em_if", em_if_methods, sizeof(struct adapter) }; /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif #define TSO_WORKAROUND 4 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* ** Tuneable Interrupt rate */ static int em_max_interrupt_rate = 8000; SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &em_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; extern struct if_txrx igb_txrx; extern struct if_txrx em_txrx; extern struct if_txrx lem_txrx; static struct if_shared_ctx em_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE, .isc_tx_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {EM_MAX_RXD}, .isc_ntxd_max = {EM_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t em_sctx = &em_sctx_init; static struct if_shared_ctx igb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE, .isc_tx_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {EM_MAX_RXD}, .isc_ntxd_max = {EM_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t igb_sctx = &igb_sctx_init; /***************************************************************** * * Dump Registers * ****************************************************************/ #define IGB_REGS_LEN 739 static int em_get_regs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct e1000_hw *hw = &adapter->hw; struct sbuf *sb; u32 *regs_buff = (u32 *)malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_NOWAIT); int rc; memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); /* General Registers */ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); sbuf_printf(sb, "General Registers\n"); sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); sbuf_printf(sb, "\tCTRL_EXIT\t %08x\n\n", regs_buff[2]); sbuf_printf(sb, "Interrupt Registers\n"); sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); sbuf_printf(sb, "RX Registers\n"); sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); sbuf_printf(sb, "TX Registers\n"); sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); #ifdef DUMP_DESCS { if_softc_ctx_t scctx = adapter->shared; struct rx_ring *rxr = &rx_que->rxr; struct tx_ring *txr = &tx_que->txr; int ntxd = scctx->isc_ntxd[0]; int nrxd = scctx->isc_nrxd[0]; int j; for (j = 0; j < nrxd; j++) { u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); u32 length = le32toh(rxr->rx_base[j].wb.upper.length); sbuf_printf(sb, "\tReceive Descriptor Address %d: %08" PRIx64 " Error:%d Length:%d\n", j, rxr->rx_base[j].read.buffer_addr, staterr, length); } for (j = 0; j < min(ntxd, 256); j++) { unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, buf->eop != -1 ? txr->tx_base[buf->eop].upper.fields.status & E1000_TXD_STAT_DD : 0); } } #endif rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } static void * em_register(device_t dev) { return (em_sctx); } static void * igb_register(device_t dev) { return (igb_sctx); } static int em_set_num_queues(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); int maxqueues; /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: case e1000_82575: maxqueues = 4; break; case e1000_i211: case e1000_82574: maxqueues = 2; break; default: maxqueues = 1; break; } return (maxqueues); } #define EM_CAPS \ IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; #define IGB_CAPS \ IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \ IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_TXCSUM_IPV6 | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU; /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; if_softc_ctx_t scctx; device_t dev; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_if_attach_pre begin"); dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter->ctx = ctx; adapter->dev = adapter->osdep.dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; adapter->tx_process_limit = scctx->isc_ntxd[0]; /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, em_get_regs, "A", "Dump Registers"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rs_dump", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, em_get_rs, "I", "Dump RS indexes"); /* Determine hardware and mac info */ em_identify_hardware(ctx); /* Set isc_msix_bar */ scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; if (adapter->hw.mac.type >= igb_mac_min) { int try_second_bar; scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc); scctx->isc_txrx = &igb_txrx; scctx->isc_capenable = IGB_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP \ | CSUM_IP6_UDP | CSUM_IP6_TCP; if (adapter->hw.mac.type != e1000_82575) scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ try_second_bar = pci_read_config(dev, scctx->isc_msix_bar, 4); if (try_second_bar == 0) scctx->isc_msix_bar += 4; } else if (adapter->hw.mac.type >= em_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended); scctx->isc_txrx = &em_txrx; scctx->isc_capenable = EM_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; } else { scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; scctx->isc_txrx = &lem_txrx; scctx->isc_capenable = EM_CAPS; if (adapter->hw.mac.type < e1000_82543) scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; scctx->isc_msix_bar = 0; } /* Setup PCI resources */ if (em_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type == e1000_pch_spt) { adapter->osdep.flash_bus_space_tag = adapter->osdep.mem_bus_space_tag; adapter->osdep.flash_bus_space_handle = adapter->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, TRUE); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } em_setup_msix(ctx); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; if (adapter->hw.mac.type < em_mac_min) { e1000_init_script_state_82541(&adapter->hw, TRUE); e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); } /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(ctx); iflib_set_mac(ctx, hw->mac.addr); return (0); err_late: em_release_hw_control(adapter); err_pci: em_free_pci_resources(ctx); free(adapter->mta, M_DEVBUF); return (error); } static int em_if_attach_post(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; int error = 0; /* Setup OS specific network interface */ error = em_setup_interface(ctx); if (error != 0) { goto err_late; } em_reset(ctx); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_if_update_admin_status(ctx); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); - + INIT_DEBUGOUT("em_if_attach_post: end"); return (error); err_late: em_release_hw_control(adapter); em_free_pci_resources(ctx); em_if_queues_free(ctx); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_detach(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_detach: begin"); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); em_free_pci_resources(ctx); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_if_shutdown(if_ctx_t ctx) { return em_if_suspend(ctx); } /* * Suspend/resume device methods. */ static int em_if_suspend(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(ctx); return (0); } static int em_if_resume(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_if_init(ctx); em_init_manageability(adapter); return(0); } static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { int max_frame_size; struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = iflib_get_softc_ctx(ctx); IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; case e1000_82542: case e1000_ich8lan: /* Adapters that do not support jumbo frames */ max_frame_size = ETHER_MAX_LEN; break; default: if (adapter->hw.mac.type >= igb_mac_min) max_frame_size = 9234; else /* lem */ max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { return (EINVAL); } scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct em_tx_queue *tx_que; int i; INIT_DEBUGOUT("em_if_init: begin"); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(ctx); em_if_update_admin_status(ctx); for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0; } /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ if (adapter->hw.mac.type >= igb_mac_min) e1000_rx_fifo_flush_82575(&adapter->hw); /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_initialize_transmit_unit(ctx); /* Setup Multicast table */ em_if_multi_set(ctx); /* * Figure out the desired mbuf * pool for doing jumbos */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else adapter->rx_mbuf_sz = MJUMPAGESIZE; #else else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; #endif em_initialize_receive_unit(ctx); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, IFF_PROMISC); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } else if (adapter->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); /* Set Energy Efficient Ethernet */ if (adapter->hw.mac.type >= igb_mac_min && adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); else e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ int em_intr(void *arg) { struct adapter *adapter = arg; if_ctx_t ctx = adapter->ctx; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (adapter->intr_type != IFLIB_INTR_LEGACY) goto skip_stray; /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; skip_stray: /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return (FILTER_SCHEDULE_THREAD); } static void igb_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) { E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims); } static void em_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) { E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxq->eims); } static void igb_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) { E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims); } static void em_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) { E1000_WRITE_REG(&adapter->hw, E1000_IMS, txq->eims); } static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; if (adapter->hw.mac.type >= igb_mac_min) igb_rx_enable_queue(adapter, rxq); else em_rx_enable_queue(adapter, rxq); return (0); } static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *txq = &adapter->tx_queues[txqid]; if (adapter->hw.mac.type >= igb_mac_min) igb_tx_enable_queue(adapter, txq); else em_tx_enable_queue(adapter, txq); return (0); } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static int em_msix_que(void *arg) { struct em_rx_queue *que = arg; ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static int em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; - MPASS(adapter->hw.back != NULL); + MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { em_handle_link(adapter->ctx); } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); if (adapter->hw.mac.type >= igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); } /* * Because we must read the ICR for this interrupt * it may clear other causes using autoclear, for * this reason we simply create a soft interrupt * for all these vectors. */ if (reg_icr && adapter->hw.mac.type < igb_mac_min) { E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } return (FILTER_HANDLED); } static void em_handle_link(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct adapter *adapter = iflib_get_softc(ctx); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_if_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("em_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_if_init(ctx); return (0); } static int em_if_set_promisc(if_ctx_t ctx, int flags) { struct adapter *adapter = iflib_get_softc(ctx); u32 reg_rctl; em_disable_promisc(ctx); reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } return (0); } static void em_disable_promisc(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_if_multi_set(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_if_timer(if_ctx_t ctx, uint16_t qid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *que; int i; int trigger = 0; if (qid != 0) return; em_if_update_admin_status(ctx); em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); if (adapter->hw.mac.type < em_mac_min) lem_smartspeed(adapter); /* Mask to use in the irq trigger */ if (adapter->intr_type == IFLIB_INTR_MSIX) { for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) trigger |= que->eims; } else { trigger = E1000_ICS_RXDMT0; } } static void em_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); - u32 link_check = 0; + u32 link_check, thstat, ctrl; + link_check = thstat = ctrl = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else { link_check = TRUE; } break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; - default: + /* VF device is type_unknown */ case e1000_media_type_unknown: + e1000_check_for_link(hw); + link_check = !hw->mac.get_link_status; + /* FALLTHROUGH */ + default: break; } + /* Check for thermal downshift or shutdown */ + if (hw->mac.type == e1000_i350) { + thstat = E1000_READ_REG(hw, E1000_THSTAT); + ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); + } + /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); + if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && + (thstat & E1000_THSTAT_LINK_THROTTLE)) + device_printf(dev, "Link: thermal downshift\n"); + /* Delay Link Up for Phy update */ + if (((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) && + (hw->phy.id == I210_I_PHY_ID)) + msec_delay(I210_LINK_DELAY); + /* Reset if the media type changed. */ + if ((hw->dev_spec._82575.media_changed) && + (adapter->hw.mac.type >= igb_mac_min)) { + hw->dev_spec._82575.media_changed = false; + adapter->flags |= IGB_MEDIA_RESET; + em_reset(ctx); + } iflib_link_state_change(ctx, LINK_STATE_UP, ifp->if_baudrate); printf("Link state changed to up\n"); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, ifp->if_baudrate); printf("link state changed to down\n"); } E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_stop: begin"); e1000_reset_hw(&adapter->hw); if (adapter->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); /* Make sure our PCI config space has the necessary stuff set */ adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid, val; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (adapter->hw.mac.type < em_mac_min && adapter->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { adapter->io_rid = rid; break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } adapter->hw.io_base = 0; adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->ioport); adapter->osdep.io_bus_space_handle = rman_get_bushandle(adapter->ioport); } adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers * **********************************************************************/ static int em_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rx_que = adapter->rx_queues; struct em_tx_queue *tx_que = adapter->tx_queues; int error, rid, i, vector = 0, rx_vectors; char buf[16]; /* First set up ring resources */ for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->rx_num_queues = i + 1; goto fail; } rx_que->msix = vector; /* * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has * NOTHING to do with the MSIX vector */ if (adapter->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); adapter->ims |= rx_que->eims; adapter->ivars |= (8 | rx_que->msix) << (i * 4); } else if (adapter->hw.mac.type == e1000_82575) rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; else rx_que->eims = 1 << vector; } rx_vectors = vector; vector = 0; for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); tx_que->msix = (vector % adapter->tx_num_queues); /* * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has * NOTHING to do with the MSIX vector */ if (adapter->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); adapter->ims |= tx_que->eims; adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (adapter->hw.mac.type == e1000_82575) { tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); } else { tx_que->eims = 1 << (i % adapter->tx_num_queues); } } /* Link interrupt */ rid = rx_vectors + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } adapter->linkvec = rx_vectors; if (adapter->hw.mac.type < igb_mac_min) { adapter->ivars |= (8 | rx_vectors) << 16; adapter->ivars |= 0x80000000; } return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *rx_que; struct em_tx_queue *tx_que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= rx_que->eims; } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; rx_que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, rx_que->eims); adapter->que_mask |= rx_que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (em_max_interrupt_rate > 0) newitr = (4000000 / em_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); } return; } static void em_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all msix queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); for (int i = 0; i < adapter->rx_num_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } /* First release all the interrupt resources */ if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); adapter->memory = NULL; } if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); adapter->flash = NULL; } if (adapter->ioport != NULL) bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid, adapter->ioport); } /* Setup MSI or MSI/X */ static int em_setup_msix(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_82574) { em_enable_vectors_82574(ctx); } return (0); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void lem_smartspeed(struct adapter *adapter) { u16 phy_tmp; if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || adapter->hw.mac.autoneg == 0 || (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); adapter->smartspeed++; if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) adapter->smartspeed = 0; } +/********************************************************************* + * + * Initialize the DMA Coalescing feature + * + **********************************************************************/ +static void +igb_init_dmac(struct adapter *adapter, u32 pba) +{ + device_t dev = adapter->dev; + struct e1000_hw *hw = &adapter->hw; + u32 dmac, reg = ~E1000_DMACR_DMAC_EN; + u16 hwm; + u16 max_frame_size; + if (hw->mac.type == e1000_i211) + return; + + max_frame_size = adapter->shared->isc_max_frame_size; + if (hw->mac.type > e1000_82580) { + + if (adapter->dmac == 0) { /* Disabling it */ + E1000_WRITE_REG(hw, E1000_DMACR, reg); + return; + } else + device_printf(dev, "DMA Coalescing enabled\n"); + + /* Set starting threshold */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); + + hwm = 64 * pba - max_frame_size / 16; + if (hwm < 64 * (pba - 6)) + hwm = 64 * (pba - 6); + reg = E1000_READ_REG(hw, E1000_FCRTC); + reg &= ~E1000_FCRTC_RTH_COAL_MASK; + reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) + & E1000_FCRTC_RTH_COAL_MASK); + E1000_WRITE_REG(hw, E1000_FCRTC, reg); + + + dmac = pba - max_frame_size / 512; + if (dmac < pba - 10) + dmac = pba - 10; + reg = E1000_READ_REG(hw, E1000_DMACR); + reg &= ~E1000_DMACR_DMACTHR_MASK; + reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) + & E1000_DMACR_DMACTHR_MASK); + + /* transition to L0x or L1 if available..*/ + reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); + + /* Check if status is 2.5Gb backplane connection + * before configuration of watchdog timer, which is + * in msec values in 12.8usec intervals + * watchdog timer= msec values in 32usec intervals + * for non 2.5Gb connection + */ + if (hw->mac.type == e1000_i354) { + int status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= ((adapter->dmac * 5) >> 6); + else + reg |= (adapter->dmac >> 5); + } else { + reg |= (adapter->dmac >> 5); + } + + E1000_WRITE_REG(hw, E1000_DMACR, reg); + + E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); + + /* Set the interval before transition */ + reg = E1000_READ_REG(hw, E1000_DMCTLX); + if (hw->mac.type == e1000_i350) + reg |= IGB_DMCTLX_DCFLUSH_DIS; + /* + ** in 2.5Gb connection, TTLX unit is 0.4 usec + ** which is 0x4*2 = 0xA. But delay is still 4 usec + */ + if (hw->mac.type == e1000_i354) { + int status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= 0xA; + else + reg |= 0x4; + } else { + reg |= 0x4; + } + + E1000_WRITE_REG(hw, E1000_DMCTLX, reg); + + /* free space in tx packet buffer to wake from DMA coal */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - + (2 * max_frame_size)) >> 6); + + /* make low power state decision controlled by DMA coal */ + reg = E1000_READ_REG(hw, E1000_PCIEMISC); + reg &= ~E1000_PCIEMISC_LX_DECISION; + E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); + + } else if (hw->mac.type == e1000_82580) { + u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); + E1000_WRITE_REG(hw, E1000_PCIEMISC, + reg & ~E1000_PCIEMISC_LX_DECISION); + E1000_WRITE_REG(hw, E1000_DMACR, 0); + } +} + static void em_reset(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); + /* Let the firmware know the OS is in control */ + em_get_hw_control(adapter); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: pba = E1000_PBA_26K; break; case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->hw.mac.max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->hw.mac.max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } if (hw->mac.type < igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); INIT_DEBUGOUT1("em_reset: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = (pba & 0xffff) << 10; hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_82575: case e1000_82576: /* 8-byte granularity */ hw->fc.low_water = hw->fc.high_water - 8; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* 16-byte granularity */ hw->fc.low_water = hw->fc.high_water - 16; - break; - case e1000_ich9lan: - case e1000_ich10lan: + break; + case e1000_ich9lan: + case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } - /* else fall thru */ + /* FALLTHROUGH */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* Issue a global reset */ e1000_reset_hw(hw); - E1000_WRITE_REG(hw, E1000_WUFC, 0); - em_disable_aspm(adapter); + if (adapter->hw.mac.type >= igb_mac_min) { + E1000_WRITE_REG(hw, E1000_WUC, 0); + } else { + E1000_WRITE_REG(hw, E1000_WUFC, 0); + em_disable_aspm(adapter); + } + if (adapter->flags & IGB_MEDIA_RESET) { + e1000_setup_init_funcs(hw, TRUE); + e1000_get_bus_info(hw); + adapter->flags &= ~IGB_MEDIA_RESET; + } /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } + if (adapter->hw.mac.type >= igb_mac_min) + igb_init_dmac(adapter, pba); E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); } #define RSSKEYLEN 10 static void em_initialize_rss_mapping(struct adapter *adapter) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; struct e1000_hw *hw = &adapter->hw; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % adapter->rx_num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) E1000_WRITE_REG(hw, E1000_RETA(i), reta); - E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | + E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } static void igb_initialize_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->rx_num_queues; #else queue_id = (i % adapter->rx_num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_8Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) - E1000_WRITE_REG_ARRAY(hw, - E1000_RSSRK(0), i, rss_key[i]); + E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; uint64_t cap = 0; INIT_DEBUGOUT("em_setup_interface: begin"); /* TSO parameters */ if_sethwtsomax(ifp, IP_MAXPACKET); /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ if_sethwtsomaxsegcount(ifp, EM_MAX_SCATTER - 5); if_sethwtsomaxsegsize(ifp, EM_TSO_SEG_SIZE); /* Single Queue */ if (adapter->tx_num_queues == 1) { if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1); if_setsendqready(ifp); } cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4; cap |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; /* * Tell the upper layer(s) we * support full VLAN capability */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapabilitiesbit(ifp, cap, 0); /* * Don't turn this on by default, if vlans are * created on another pseudo device (eg. lagg) * then vlan events are not passed thru, breaking * operation, but with HW FILTER off it works. If * using vlans directly on the em driver you can * enable this and get full hardware tag filtering. */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); /* Enable only WOL MAGIC by default */ if (adapter->wol) { if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, - IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); + IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); } else { if_setcapenablebit(ifp, 0, IFCAP_WOL_MAGIC | IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; int error = E1000_SUCCESS; struct em_tx_queue *que; int i, j; MPASS(adapter->tx_num_queues > 0); MPASS(adapter->tx_num_queues == ntxqsets); /* First allocate the top level queue structs */ if (!(adapter->tx_queues = (struct em_tx_queue *) malloc(sizeof(struct em_tx_queue) * adapter->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); return(ENOMEM); } for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { /* Set up some basics */ struct tx_ring *txr = &que->txr; txr->adapter = que->adapter = adapter; que->me = txr->me = i; /* Allocate report status array */ if (!(txr->tx_rsq = (qidx_t *) malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "failed to allocate rs_idxs memory\n"); error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tx_base = (struct e1000_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; } - + device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); int error = E1000_SUCCESS; struct em_rx_queue *que; int i; MPASS(adapter->rx_num_queues > 0); MPASS(adapter->rx_num_queues == nrxqsets); /* First allocate the top level queue structs */ if (!(adapter->rx_queues = (struct em_rx_queue *) malloc(sizeof(struct em_rx_queue) * adapter->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); error = ENOMEM; - goto fail; + goto fail; } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { /* Set up some basics */ struct rx_ring *rxr = &que->rxr; rxr->adapter = que->adapter = adapter; rxr->que = que; que->me = rxr->me = i; /* get the virtual and physical address of the hardware queues */ rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static void em_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; if (tx_que != NULL) { for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } free(adapter->tx_queues, M_DEVBUF); - adapter->tx_queues = NULL; + adapter->tx_queues = NULL; } if (rx_que != NULL) { free(adapter->rx_queues, M_DEVBUF); adapter->rx_queues = NULL; } em_release_hw_control(adapter); if (adapter->mta != NULL) { free(adapter->mta, M_DEVBUF); } } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que; struct tx_ring *txr; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { u64 bus_addr; caddr_t offp, endp; que = &adapter->tx_queues[i]; txr = &que->txr; bus_addr = txr->tx_paddr; /* Clear checksum offload context. */ offp = (caddr_t)&txr->csum_flags; endp = (caddr_t)(txr + 1); bzero(offp, endp - offp); /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->tx_num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); reg = E1000_READ_REG(hw, E1000_TARC(0)); reg |= E1000_TARC0_CB_MULTIQ_3_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *que; int i; u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; if (adapter->hw.mac.type >= e1000_82540) { E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); } E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* * When using MSIX interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM && adapter->hw.mac.type >= e1000_82543) { if (adapter->tx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) { rxcsum |= E1000_RXCSUM_PCSD; if (hw->mac.type != e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; } else { if (adapter->hw.mac.type >= igb_mac_min) rxcsum |= E1000_RXCSUM_IPPCSE; else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; if (adapter->hw.mac.type > e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); if (adapter->rx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) igb_initialize_rss_mapping(adapter); else em_initialize_rss_mapping(adapter); } /* * XXX TEMPORARY WORKAROUND: on some systems with 82573 * long latencies are observed, like Lenovo X60. This * change eliminates the problem, but since having positive * values in RDTR is a known source of problems on other * platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rx_paddr; #if 0 u32 rdt = adapter->rx_num_queues -1; /* default */ -#endif +#endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), 0); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->rx_num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= igb_mac_min) { u32 psize, srrctl = 0; if (if_getmtu(ifp) > ETHERMTU) { /* Set maximum packet len */ if (adapter->rx_mbuf_sz <= 4096) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > 4096) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } psize = scctx->isc_max_frame_size; /* are we on a vlan? */ if (ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } - + /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->rx_num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 bus_addr = rxr->rx_paddr; u32 rxdctl; #ifdef notyet /* Configure for header split? -- ignore for now */ rxr->hdr_split = igb_header_split; #else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; - rxdctl |= IGB_RX_WTHRESH << 16; + rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (adapter->hw.mac.type < igb_mac_min) { if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; /* ensure we clear use DTYPE of 00 here */ rctl &= ~0x00000C00; } /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } static void em_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; } static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_if_enable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= adapter->ims; } else if (adapter->intr_type == IFLIB_INTR_MSIX && hw->mac.type >= igb_mac_min) { u32 mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); ims_mask = E1000_IMS_LSC; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_if_disable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; if (adapter->intr_type == IFLIB_INTR_MSIX) { if (hw->mac.type >= igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka - * to disable special hardware management features + * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; + if (adapter->vf_ifp) + return; + if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); - return; } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* FALLTHROUGH */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82575: /* listing all igb devices */ case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: adapter->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if_t ifp = iflib_get_ifp(ctx); u32 pmc, ctrl, ctrl_ext, rctl, wuc; u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); wuc = E1000_READ_REG(&adapter->hw, E1000_WUC); wuc |= (E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc); if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* * Determine type of Wakeup: note that wol * is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_UCAST) == 0) adapter->wol &= ~E1000_WUFC_EX; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if ( adapter->hw.mac.type >= e1000_pchlan) { if (em_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (if_getcapenable(ifp) & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* * WOL in the newer chipset interfaces (pchlan) * require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_if_led_func(if_ctx_t ctx, int onoff) { struct adapter *adapter = iflib_get_softc(ctx); - + if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } } /* * Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->shared->isc_pause_frames = adapter->stats.xoffrxc; adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); - struct ifnet *ifp = iflib_get_ifp(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); - struct em_tx_queue *tx_que = adapter->tx_queues; + struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); } for (int j = 0; j < adapter->rx_num_queues; j++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", j); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ - int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", + int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *) arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } /* * Set flow control using sysctl: * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; - break; + break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* * Manage Energy Efficient Ethernet: * Control values: * 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_if_init(adapter->ctx); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; em_print_debug_info(adapter); - } + } return (error); } static int em_get_rs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; int result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr || result != 1) return (error); em_dump_rs(adapter); return (error); } static void em_if_debug(if_ctx_t ctx) { em_dump_rs(iflib_get_softc(ctx)); } /* * This routine is meant to be fluid, add whatever is * needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ifnet *ifp = iflib_get_ifp(adapter->ctx); struct tx_ring *txr = &adapter->tx_queues->txr; struct rx_ring *rxr = &adapter->rx_queues->rxr; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); } for (int j=0; j < adapter->rx_num_queues; j++, rxr++) { device_printf(dev, "RX Queue %d ------\n", j); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(j)), E1000_READ_REG(&adapter->hw, E1000_RDT(j))); } } /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSIX * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSIX vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } Index: projects/clang500-import/sys/dev/e1000/if_em.h =================================================================== --- projects/clang500-import/sys/dev/e1000/if_em.h (revision 321306) +++ projects/clang500-import/sys/dev/e1000/if_em.h (revision 321307) @@ -1,561 +1,583 @@ /*- * Copyright (c) 2016 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*$FreeBSD$*/ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #ifdef DDB #include #include #endif #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "ifdi_if.h" #ifndef _EM_H_DEFINED_ #define _EM_H_DEFINED_ /* Tunables */ /* * EM_TXD: Maximum number of Transmit Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 256 * This value is the number of transmit descriptors allocated by the driver. * Increasing this value allows the driver to queue more transmits. Each * descriptor is 16 bytes. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_TXD 128 #define EM_MAX_TXD 4096 #define EM_DEFAULT_TXD 1024 #define EM_DEFAULT_MULTI_TXD 4096 /* * EM_RXD - Maximum number of receive Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 256 * This value is the number of receive descriptors allocated by the driver. * Increasing this value allows the driver to buffer more incoming packets. * Each descriptor is 16 bytes. A receive buffer is also allocated for each * descriptor. The maximum MTU size is 16110. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_RXD 128 #define EM_MAX_RXD 4096 #define EM_DEFAULT_RXD 1024 #define EM_DEFAULT_MULTI_RXD 4096 /* * EM_TIDV - Transmit Interrupt Delay Value * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value delays the generation of transmit interrupts in units of * 1.024 microseconds. Transmit interrupt reduction can improve CPU * efficiency if properly tuned for specific network traffic. If the * system is reporting dropped transmits, this value may be set too high * causing the driver to run out of available transmit descriptors. */ #define EM_TIDV 64 /* * EM_TADV - Transmit Absolute Interrupt Delay Value * (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * transmit interrupt is generated. Useful only if EM_TIDV is non-zero, * this value ensures that an interrupt is generated after the initial * packet is sent on the wire within the set amount of time. Proper tuning, * along with EM_TIDV, may improve traffic throughput in specific * network conditions. */ #define EM_TADV 64 /* * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) * Valid Range: 0-65535 (0=off) * Default Value: 0 * This value delays the generation of receive interrupts in units of 1.024 * microseconds. Receive interrupt reduction can improve CPU efficiency if * properly tuned for specific network traffic. Increasing this value adds * extra latency to frame reception and can end up decreasing the throughput * of TCP traffic. If the system is reporting dropped receives, this value * may be set too high, causing the driver to run out of available receive * descriptors. * * CAUTION: When setting EM_RDTR to a value other than 0, adapters * may hang (stop transmitting) under certain network conditions. * If this occurs a WATCHDOG message is logged in the system * event log. In addition, the controller is automatically reset, * restoring the network connection. To eliminate the potential * for the hang ensure that EM_RDTR is set to 0. */ #define EM_RDTR 0 /* * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * receive interrupt is generated. Useful only if EM_RDTR is non-zero, * this value ensures that an interrupt is generated after the initial * packet is received within the set amount of time. Proper tuning, * along with EM_RDTR, may improve traffic throughput in specific network * conditions. */ #define EM_RADV 64 /* * This parameter controls whether or not autonegotation is enabled. * 0 - Disable autonegotiation * 1 - Enable autonegotiation */ #define DO_AUTO_NEG 1 /* * This parameter control whether or not the driver will wait for * autonegotiation to complete. * 1 - Wait for autonegotiation to complete * 0 - Don't wait for autonegotiation to complete */ #define WAIT_FOR_AUTO_NEG_DEFAULT 0 /* Tunables -- End */ #define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ADVERTISE_1000_FULL) #define AUTO_ALL_MODES 0 /* PHY master/slave setting */ #define EM_MASTER_SLAVE e1000_ms_hw_default /* * Micellaneous constants */ #define EM_VENDOR_ID 0x8086 #define EM_FLASH 0x0014 #define EM_JUMBO_PBA 0x00000028 #define EM_DEFAULT_PBA 0x00000030 #define EM_SMARTSPEED_DOWNSHIFT 3 #define EM_SMARTSPEED_MAX 15 #define EM_MAX_LOOP 10 #define MAX_NUM_MULTICAST_ADDRESSES 128 #define PCI_ANY_ID (~0U) #define ETHER_ALIGN 2 #define EM_FC_PAUSE_TIME 0x0680 #define EM_EEPROM_APME 0x400; #define EM_82544_APME 0x0004; + +/* Support AutoMediaDetect for Marvell M88 PHY in i354 */ +#define IGB_MEDIA_RESET (1 << 0) + +/* Define the starting Interrupt rate per Queue */ +#define IGB_INTS_PER_SEC 8000 +#define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) + +#define IGB_LINK_ITR 2000 +#define I210_LINK_DELAY 1000 + +#define IGB_MAX_SCATTER 40 +#define IGB_VFTA_SIZE 128 +#define IGB_BR_SIZE 4096 /* ring buf size */ +#define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) +#define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */ +#define IGB_TXPBSIZE 20408 +#define IGB_HDR_BUF 128 +#define IGB_PKTTYPE_MASK 0x0000FFF0 +#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ + /* * Driver state logic for the detection of a hung state * in hardware. Set TX_HUNG whenever a TX packet is used * (data is sent) and clear it when txeof() is invoked if * any descriptors from the ring are cleaned/reclaimed. * Increment internal counter if no descriptors are cleaned * and compare to TX_MAXTRIES. When counter > TX_MAXTRIES, * reset adapter. */ #define EM_TX_IDLE 0x00000000 #define EM_TX_BUSY 0x00000001 #define EM_TX_HUNG 0x80000000 #define EM_TX_MAXTRIES 10 #define PCICFG_DESC_RING_STATUS 0xe4 #define FLUSH_DESC_REQUIRED 0x100 #define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ ((hw->mac.type <= e1000_82576) ? 16 : 8)) #define IGB_RX_HTHRESH 8 #define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ (adapter->intr_type == IFLIB_INTR_MSIX)) ? 1 : 4) #define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) #define IGB_TX_HTHRESH 1 #define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ (adapter->intr_type == IFLIB_INTR_MSIX) ? 1 : 16) /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will * also optimize cache line size effect. H/W supports up to cache line size 128. */ #define EM_DBA_ALIGN 128 /* * See Intel 82574 Driver Programming Interface Manual, Section 10.2.6.9 */ #define TARC_COMPENSATION_MODE (1 << 7) /* Compensation Mode */ #define TARC_SPEED_MODE_BIT (1 << 21) /* On PCI-E MACs only */ #define TARC_MQ_FIX (1 << 23) | \ (1 << 24) | \ (1 << 25) /* Handle errata in MQ mode */ #define TARC_ERRATA_BIT (1 << 26) /* Note from errata on 82574 */ /* PCI Config defines */ #define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) #define EM_BAR_TYPE_MASK 0x00000001 #define EM_BAR_TYPE_MMEM 0x00000000 #define EM_BAR_TYPE_IO 0x00000001 #define EM_BAR_TYPE_FLASH 0x0014 #define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) #define EM_BAR_MEM_TYPE_MASK 0x00000006 #define EM_BAR_MEM_TYPE_32BIT 0x00000000 #define EM_BAR_MEM_TYPE_64BIT 0x00000004 #define EM_MSIX_BAR 3 /* On 82575 */ /* More backward compatibility */ #if __FreeBSD_version < 900000 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #endif /* Defines for printing debug information */ #define DEBUG_INIT 0 #define DEBUG_IOCTL 0 #define DEBUG_HW 0 #define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") #define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) #define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) #define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") #define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) #define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) #define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) #define EM_MAX_SCATTER 40 #define EM_VFTA_SIZE 128 #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ #define EM_MSIX_LINK 0x01000000 /* For 82574 use */ #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 #define EM_CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ #define IGB_CSUM_OFFLOAD 0x0E0F /* Offload bits in mbuf flag */ #define IGB_PKTTYPE_MASK 0x0000FFF0 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ /* * 82574 has a nonstandard address for EIAC * and since its only used in MSIX, and in * the em driver only 82574 uses MSIX we can * solve it just using this define. */ #define EM_EIAC 0x000DC /* * 82574 only reports 3 MSI-X vectors by default; * defines assisting with making it report 5 are * located here. */ #define EM_NVM_PCIE_CTRL 0x1B #define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT) #define EM_NVM_MSIX_N_SHIFT 7 struct adapter; struct em_int_delay_info { struct adapter *adapter; /* Back-pointer to the adapter struct */ int offset; /* Register offset to read/write */ int value; /* Current value in usecs */ }; /* * The transmit ring, one per tx queue */ struct tx_ring { struct adapter *adapter; struct e1000_tx_desc *tx_base; uint64_t tx_paddr; qidx_t *tx_rsq; bool tx_tso; /* last tx was tso */ uint8_t me; qidx_t tx_rs_cidx; qidx_t tx_rs_pidx; qidx_t tx_cidx_processed; /* Interrupt resources */ void *tag; struct resource *res; unsigned long tx_irq; /* Saved csum offloading context information */ int csum_flags; int csum_lhlen; int csum_iphlen; int csum_thlen; int csum_mss; int csum_pktlen; uint32_t csum_txd_upper; uint32_t csum_txd_lower; /* last field */ }; /* * The Receive ring, one per rx queue */ struct rx_ring { struct adapter *adapter; struct em_rx_queue *que; u32 me; u32 payload; union e1000_rx_desc_extended *rx_base; uint64_t rx_paddr; /* Interrupt resources */ void *tag; struct resource *res; bool discard; /* Soft stats */ unsigned long rx_irq; unsigned long rx_discarded; unsigned long rx_packets; unsigned long rx_bytes; }; struct em_tx_queue { struct adapter *adapter; u32 msix; u32 eims; /* This queue's EIMS bit */ u32 me; struct tx_ring txr; }; struct em_rx_queue { struct adapter *adapter; u32 me; u32 msix; u32 eims; struct rx_ring rxr; u64 irqs; struct if_irq que_irq; }; /* Our adapter structure */ struct adapter { struct ifnet *ifp; struct e1000_hw hw; if_softc_ctx_t shared; if_ctx_t ctx; #define tx_num_queues shared->isc_ntxqsets #define rx_num_queues shared->isc_nrxqsets #define intr_type shared->isc_intr /* FreeBSD operating-system-specific structures. */ struct e1000_osdep osdep; device_t dev; struct cdev *led_dev; struct em_tx_queue *tx_queues; struct em_rx_queue *rx_queues; struct if_irq irq; struct resource *memory; struct resource *flash; struct resource *ioport; int io_rid; struct resource *res; void *tag; u32 linkvec; u32 ivars; struct ifmedia *media; int msix; int if_flags; - int min_frame_size; int em_insert_vlan_header; u32 ims; bool in_detach; + u32 flags; /* Task for FAST handling */ struct grouptask link_task; u16 num_vlans; u32 txd_cmd; u32 tx_process_limit; u32 rx_process_limit; u32 rx_mbuf_sz; /* Management and WOL features */ u32 wol; bool has_manage; bool has_amt; /* Multicast array memory */ u8 *mta; /* ** Shadow VFTA table, this is needed because ** the real vlan filter table gets cleared during ** a soft reset and the driver needs to be able ** to repopulate it. */ u32 shadow_vfta[EM_VFTA_SIZE]; /* Info about the interface */ u16 link_active; u16 fc; u16 link_speed; u16 link_duplex; u32 smartspeed; u32 dmac; int link_mask; u64 que_mask; struct em_int_delay_info tx_int_delay; struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; struct em_int_delay_info tx_itr; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; unsigned long link_irq; unsigned long mbuf_defrag_failed; unsigned long no_tx_dma_setup; unsigned long no_tx_map_avail; unsigned long rx_overruns; unsigned long watchdog_events; struct e1000_hw_stats stats; + u16 vf_ifp; }; /******************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * ********************************************************************************/ typedef struct _em_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } em_vendor_info_t; void em_dump_rs(struct adapter *); #define EM_CORE_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) #define EM_TX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->tx_mtx, _name, "EM TX Lock", MTX_DEF) #define EM_RX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->rx_mtx, _name, "EM RX Lock", MTX_DEF) #define EM_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) #define EM_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) #define EM_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) #define EM_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) #define EM_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) #define EM_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) #define EM_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) #define EM_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) #define EM_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) #define EM_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) #define EM_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) #define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) #define EM_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) #define EM_RSSRK_SIZE 4 #define EM_RSSRK_VAL(key, i) (key[(i) * EM_RSSRK_SIZE] | \ key[(i) * EM_RSSRK_SIZE + 1] << 8 | \ key[(i) * EM_RSSRK_SIZE + 2] << 16 | \ key[(i) * EM_RSSRK_SIZE + 3] << 24) #endif /* _EM_H_DEFINED_ */ Index: projects/clang500-import/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c =================================================================== --- projects/clang500-import/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 321306) +++ projects/clang500-import/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 321307) @@ -1,2384 +1,2401 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * StorVSC driver for Hyper-V. This driver presents a SCSI HBA interface * to the Comman Access Method (CAM) layer. CAM control blocks (CCBs) are * converted into VSCSI protocol messages which are delivered to the parent * partition StorVSP driver over the Hyper-V VMBUS. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vstorage.h" #include "vmbus_if.h" #define STORVSC_MAX_LUNS_PER_TARGET (64) #define STORVSC_MAX_IO_REQUESTS (STORVSC_MAX_LUNS_PER_TARGET * 2) #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1) #define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS #define STORVSC_MAX_TARGETS (2) #define VSTOR_PKT_SIZE (sizeof(struct vstor_packet) - vmscsi_size_delta) /* * 33 segments are needed to allow 128KB maxio, in case the data * in the first page is _not_ PAGE_SIZE aligned, e.g. * * |<----------- 128KB ----------->| * | | * 0 2K 4K 8K 16K 124K 128K 130K * | | | | | | | | * +--+--+-----+-----+.......+-----+--+--+ * | | | | | | | | | DATA * | | | | | | | | | * +--+--+-----+-----+.......------+--+--+ * | | | | * | 1| 31 | 1| ...... # of segments */ #define STORVSC_DATA_SEGCNT_MAX 33 #define STORVSC_DATA_SEGSZ_MAX PAGE_SIZE #define STORVSC_DATA_SIZE_MAX \ ((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX) struct storvsc_softc; struct hv_sgl_node { LIST_ENTRY(hv_sgl_node) link; struct sglist *sgl_data; }; struct hv_sgl_page_pool{ LIST_HEAD(, hv_sgl_node) in_use_sgl_list; LIST_HEAD(, hv_sgl_node) free_sgl_list; boolean_t is_init; } g_hv_sgl_page_pool; enum storvsc_request_type { WRITE_TYPE, READ_TYPE, UNKNOWN_TYPE }; SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V storage interface"); static u_int hv_storvsc_use_win8ext_flags = 1; SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW, &hv_storvsc_use_win8ext_flags, 0, "Use win8 extension flags or not"); static u_int hv_storvsc_use_pim_unmapped = 1; SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN, &hv_storvsc_use_pim_unmapped, 0, "Optimize storvsc by using unmapped I/O"); static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE); SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN, &hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size"); static u_int hv_storvsc_max_io = 512; SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN, &hv_storvsc_max_io, 0, "Hyper-V storage max io limit"); static int hv_storvsc_chan_cnt = 0; SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hv_storvsc_chan_cnt, 0, "# of channels to use"); #define STORVSC_MAX_IO \ vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size, \ STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE) struct hv_storvsc_sysctl { u_long data_bio_cnt; u_long data_vaddr_cnt; u_long data_sg_cnt; u_long chan_send_cnt[MAXCPU]; }; struct storvsc_gpa_range { struct vmbus_gpa_range gpa_range; uint64_t gpa_page[STORVSC_DATA_SEGCNT_MAX]; } __packed; struct hv_storvsc_request { LIST_ENTRY(hv_storvsc_request) link; struct vstor_packet vstor_packet; int prp_cnt; struct storvsc_gpa_range prp_list; void *sense_data; uint8_t sense_info_len; uint8_t retries; union ccb *ccb; struct storvsc_softc *softc; struct callout callout; struct sema synch_sema; /*Synchronize the request/response if needed */ struct sglist *bounce_sgl; unsigned int bounce_sgl_count; uint64_t not_aligned_seg_bits; bus_dmamap_t data_dmap; }; struct storvsc_softc { struct vmbus_channel *hs_chan; LIST_HEAD(, hv_storvsc_request) hs_free_list; struct mtx hs_lock; struct storvsc_driver_props *hs_drv_props; int hs_unit; uint32_t hs_frozen; struct cam_sim *hs_sim; struct cam_path *hs_path; uint32_t hs_num_out_reqs; boolean_t hs_destroy; boolean_t hs_drain_notify; struct sema hs_drain_sema; struct hv_storvsc_request hs_init_req; struct hv_storvsc_request hs_reset_req; device_t hs_dev; bus_dma_tag_t storvsc_req_dtag; struct hv_storvsc_sysctl sysctl_data; uint32_t hs_nchan; struct vmbus_channel *hs_sel_chan[MAXCPU]; }; static eventhandler_tag storvsc_handler_tag; /* * The size of the vmscsi_request has changed in win8. The * additional size is for the newly added elements in the * structure. These elements are valid only when we are talking * to a win8 host. * Track the correct size we need to apply. */ static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension); /** * HyperV storvsc timeout testing cases: * a. IO returned after first timeout; * b. IO returned after second timeout and queue freeze; * c. IO returned while timer handler is running * The first can be tested by "sg_senddiag -vv /dev/daX", * and the second and third can be done by * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX". */ #define HVS_TIMEOUT_TEST 0 /* * Bus/adapter reset functionality on the Hyper-V host is * buggy and it will be disabled until * it can be further tested. */ #define HVS_HOST_RESET 0 struct storvsc_driver_props { char *drv_name; char *drv_desc; uint8_t drv_max_luns_per_target; uint32_t drv_max_ios_per_target; uint32_t drv_ringbuffer_size; }; enum hv_storage_type { DRIVER_BLKVSC, DRIVER_STORVSC, DRIVER_UNKNOWN }; #define HS_MAX_ADAPTERS 10 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */ static const struct hyperv_guid gStorVscDeviceType={ .hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} }; /* {32412632-86cb-44a2-9b5c-50d1417354f5} */ static const struct hyperv_guid gBlkVscDeviceType={ .hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} }; static struct storvsc_driver_props g_drv_props_table[] = { {"blkvsc", "Hyper-V IDE", BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS, 20*PAGE_SIZE}, {"storvsc", "Hyper-V SCSI", STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS, 20*PAGE_SIZE} }; /* * Sense buffer size changed in win8; have a run-time * variable to track the size we should use. */ static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; /* * The storage protocol version is determined during the * initial exchange with the host. It will indicate which * storage functionality is available in the host. */ static int vmstor_proto_version; struct vmstor_proto { int proto_version; int sense_buffer_size; int vmscsi_size_delta; }; static const struct vmstor_proto vmstor_proto_list[] = { { VMSTOR_PROTOCOL_VERSION_WIN10, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8_1, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN7, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), }, { VMSTOR_PROTOCOL_VERSION_WIN6, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), } }; /* static functions */ static int storvsc_probe(device_t dev); static int storvsc_attach(device_t dev); static int storvsc_detach(device_t dev); static void storvsc_poll(struct cam_sim * sim); static void storvsc_action(struct cam_sim * sim, union ccb * ccb); static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp); static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp); static enum hv_storage_type storvsc_get_storage_type(device_t dev); static void hv_storvsc_rescan_target(struct storvsc_softc *sc); static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc); static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request); static int hv_storvsc_connect_vsp(struct storvsc_softc *); static void storvsc_io_done(struct hv_storvsc_request *reqp); static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits); void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits); static device_method_t storvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, storvsc_probe), DEVMETHOD(device_attach, storvsc_attach), DEVMETHOD(device_detach, storvsc_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD_END }; static driver_t storvsc_driver = { "storvsc", storvsc_methods, sizeof(struct storvsc_softc), }; static devclass_t storvsc_devclass; DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0); MODULE_VERSION(storvsc, 1); MODULE_DEPEND(storvsc, vmbus, 1, 1, 1); static void storvsc_subchan_attach(struct storvsc_softc *sc, struct vmbus_channel *new_channel) { struct vmstor_chan_props props; int ret = 0; memset(&props, 0, sizeof(props)); vmbus_chan_cpu_rr(new_channel); ret = vmbus_chan_open(new_channel, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, sc); } /** * @brief Send multi-channel creation request to host * * @param device a Hyper-V device pointer * @param max_chans the max channels supported by vmbus */ static void storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch) { struct vmbus_channel **subchan; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; int request_subch; int ret, i; /* get sub-channel count that need to create */ request_subch = MIN(max_subch, mp_ncpus - 1); request = &sc->hs_init_req; /* request the host to create multi-channel */ memset(request, 0, sizeof(struct hv_storvsc_request)); sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet = &request->vstor_packet; vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.multi_channels_cnt = request_subch; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { printf("Storvsc_error: create multi-channel invalid operation " "(%d) or statue (%u)\n", vstor_packet->operation, vstor_packet->status); return; } /* Update channel count */ sc->hs_nchan = request_subch + 1; /* Wait for sub-channels setup to complete. */ subchan = vmbus_subchan_get(sc->hs_chan, request_subch); /* Attach the sub-channels. */ for (i = 0; i < request_subch; ++i) storvsc_subchan_attach(sc, subchan[i]); /* Release the sub-channels. */ vmbus_subchan_rel(subchan, request_subch); if (bootverbose) printf("Storvsc create multi-channel success!\n"); } /** * @brief initialize channel connection to parent partition * * @param dev a Hyper-V device pointer * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_channel_init(struct storvsc_softc *sc) { int ret = 0, i; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; uint16_t max_subch; boolean_t support_multichannel; uint32_t version; max_subch = 0; support_multichannel = FALSE; request = &sc->hs_init_req; memset(request, 0, sizeof(struct hv_storvsc_request)); vstor_packet = &request->vstor_packet; request->softc = sc; /** * Initiate the vsc/vsp initialization protocol on the open channel */ sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) goto cleanup; sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } for (i = 0; i < nitems(vmstor_proto_list); i++) { /* reuse the packet for version range supported */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.version.major_minor = vmstor_proto_list[i].proto_version; /* revision is only significant for Windows guests */ vstor_packet->u.version.revision = 0; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) goto cleanup; sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) { ret = EINVAL; goto cleanup; } if (vstor_packet->status == 0) { vmstor_proto_version = vmstor_proto_list[i].proto_version; sense_buffer_size = vmstor_proto_list[i].sense_buffer_size; vmscsi_size_delta = vmstor_proto_list[i].vmscsi_size_delta; break; } } if (vstor_packet->status != 0) { ret = EINVAL; goto cleanup; } /** * Query channel properties */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if ( ret != 0) goto cleanup; sema_wait(&request->synch_sema); /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } max_subch = vstor_packet->u.chan_props.max_channel_cnt; if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1)) max_subch = hv_storvsc_chan_cnt - 1; /* multi-channels feature is supported by WIN8 and above version */ version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev); if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 && (vstor_packet->u.chan_props.flags & HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) { support_multichannel = TRUE; } if (bootverbose) { device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1, support_multichannel ? ", multi-chan capable" : ""); } memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) { goto cleanup; } sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) goto cleanup; /* * If multi-channel is supported, send multichannel create * request to host. */ if (support_multichannel && max_subch > 0) storvsc_send_multichannel_request(sc, max_subch); cleanup: sema_destroy(&request->synch_sema); return (ret); } /** * @brief Open channel connection to paraent partition StorVSP driver * * Open and initialize channel connection to parent partition StorVSP driver. * * @param pointer to a Hyper-V device * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_connect_vsp(struct storvsc_softc *sc) { int ret = 0; struct vmstor_chan_props props; memset(&props, 0, sizeof(struct vmstor_chan_props)); /* * Open the channel */ vmbus_chan_cpu_rr(sc->hs_chan); ret = vmbus_chan_open( sc->hs_chan, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, sc); if (ret != 0) { return ret; } ret = hv_storvsc_channel_init(sc); return (ret); } #if HVS_HOST_RESET static int hv_storvsc_host_reset(struct storvsc_softc *sc) { int ret = 0; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; request = &sc->hs_reset_req; request->softc = sc; vstor_packet = &request->vstor_packet; sema_init(&request->synch_sema, 0, "stor synch sema"); vstor_packet->operation = VSTOR_OPERATION_RESETBUS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(dev->channel, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)&sc->hs_reset_req); if (ret != 0) { goto cleanup; } sema_wait(&request->synch_sema); /* * At this point, all outstanding requests in the adapter * should have been flushed out and return to us */ cleanup: sema_destroy(&request->synch_sema); return (ret); } #endif /* HVS_HOST_RESET */ /** * @brief Function to initiate an I/O request * * @param device Hyper-V device pointer * @param request pointer to a request structure * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_io_request(struct storvsc_softc *sc, struct hv_storvsc_request *request) { struct vstor_packet *vstor_packet = &request->vstor_packet; struct vmbus_channel* outgoing_channel = NULL; int ret = 0, ch_sel; vstor_packet->flags |= REQUEST_COMPLETION_FLAG; vstor_packet->u.vm_srb.length = sizeof(struct vmscsi_req) - vmscsi_size_delta; vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size; vstor_packet->u.vm_srb.transfer_len = request->prp_list.gpa_range.gpa_len; vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB; ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan; outgoing_channel = sc->hs_sel_chan[ch_sel]; mtx_unlock(&request->softc->hs_lock); if (request->prp_list.gpa_range.gpa_len) { ret = vmbus_chan_send_prplist(outgoing_channel, &request->prp_list.gpa_range, request->prp_cnt, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } else { ret = vmbus_chan_send(outgoing_channel, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } /* statistic for successful request sending on each channel */ if (!ret) { sc->sysctl_data.chan_send_cnt[ch_sel]++; } mtx_lock(&request->softc->hs_lock); if (ret != 0) { printf("Unable to send packet %p ret %d", vstor_packet, ret); } else { atomic_add_int(&sc->hs_num_out_reqs, 1); } return (ret); } /** * Process IO_COMPLETION_OPERATION and ready * the result to be completed for upper layer * processing by the CAM layer. */ static void hv_storvsc_on_iocompletion(struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request) { struct vmscsi_req *vm_srb; vm_srb = &vstor_packet->u.vm_srb; /* * Copy some fields of the host's response into the request structure, * because the fields will be used later in storvsc_io_done(). */ request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status; request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status; request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len; if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) && (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) { /* Autosense data available */ KASSERT(vm_srb->sense_info_len <= request->sense_info_len, ("vm_srb->sense_info_len <= " "request->sense_info_len")); memcpy(request->sense_data, vm_srb->u.sense_data, vm_srb->sense_info_len); request->sense_info_len = vm_srb->sense_info_len; } /* Complete request by passing to the CAM layer */ storvsc_io_done(request); atomic_subtract_int(&sc->hs_num_out_reqs, 1); if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) { sema_post(&sc->hs_drain_sema); } } static void hv_storvsc_rescan_target(struct storvsc_softc *sc) { path_id_t pathid; target_id_t targetid; union ccb *ccb; pathid = cam_sim_path(sc->hs_sim); targetid = CAM_TARGET_WILDCARD; /* * Allocate a CCB and schedule a rescan. */ ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return; } if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { printf("unable to create path for rescan, pathid: %u," "targetid: %u\n", pathid, targetid); xpt_free_ccb(ccb); return; } if (targetid == CAM_TARGET_WILDCARD) ccb->ccb_h.func_code = XPT_SCAN_BUS; else ccb->ccb_h.func_code = XPT_SCAN_TGT; xpt_rescan(ccb); } static void hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc) { int ret = 0; struct storvsc_softc *sc = xsc; uint32_t bytes_recvd; uint64_t request_id; uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)]; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8); ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id); KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough")); /* XXX check bytes_recvd to make sure that it contains enough data */ while ((ret == 0) && (bytes_recvd > 0)) { request = (struct hv_storvsc_request *)(uintptr_t)request_id; if ((request == &sc->hs_init_req) || (request == &sc->hs_reset_req)) { memcpy(&request->vstor_packet, packet, sizeof(struct vstor_packet)); sema_post(&request->synch_sema); } else { vstor_packet = (struct vstor_packet *)packet; switch(vstor_packet->operation) { case VSTOR_OPERATION_COMPLETEIO: if (request == NULL) panic("VMBUS: storvsc received a " "packet with NULL request id in " "COMPLETEIO operation."); hv_storvsc_on_iocompletion(sc, vstor_packet, request); break; case VSTOR_OPERATION_REMOVEDEVICE: printf("VMBUS: storvsc operation %d not " "implemented.\n", vstor_packet->operation); /* TODO: implement */ break; case VSTOR_OPERATION_ENUMERATE_BUS: hv_storvsc_rescan_target(sc); break; default: break; } } bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8), ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id); KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough")); /* * XXX check bytes_recvd to make sure that it contains * enough data */ } } /** * @brief StorVSC probe function * * Device probe function. Returns 0 if the input device is a StorVSC * device. Otherwise, a ENXIO is returned. If the input device is * for BlkVSC (paravirtual IDE) device and this support is disabled in * favor of the emulated ATA/IDE device, return ENXIO. * * @param a device * @returns 0 on success, ENXIO if not a matcing StorVSC device */ static int storvsc_probe(device_t dev) { int ret = ENXIO; switch (storvsc_get_storage_type(dev)) { case DRIVER_BLKVSC: if(bootverbose) device_printf(dev, "Enlightened ATA/IDE detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc); ret = BUS_PROBE_DEFAULT; break; case DRIVER_STORVSC: if(bootverbose) device_printf(dev, "Enlightened SCSI device detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc); ret = BUS_PROBE_DEFAULT; break; default: ret = ENXIO; } return (ret); } static void storvsc_create_chan_sel(struct storvsc_softc *sc) { struct vmbus_channel **subch; int i, nsubch; sc->hs_sel_chan[0] = sc->hs_chan; nsubch = sc->hs_nchan - 1; if (nsubch == 0) return; subch = vmbus_subchan_get(sc->hs_chan, nsubch); for (i = 0; i < nsubch; i++) sc->hs_sel_chan[i + 1] = subch[i]; vmbus_subchan_rel(subch, nsubch); } static int storvsc_init_requests(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp; int error, i; LIST_INIT(&sc->hs_free_list); error = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1, /* alignment */ PAGE_SIZE, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ STORVSC_DATA_SIZE_MAX, /* maxsize */ STORVSC_DATA_SEGCNT_MAX, /* nsegments */ STORVSC_DATA_SEGSZ_MAX, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &sc->storvsc_req_dtag); if (error) { device_printf(dev, "failed to create storvsc dma tag\n"); return (error); } for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) { reqp = malloc(sizeof(struct hv_storvsc_request), M_DEVBUF, M_WAITOK|M_ZERO); reqp->softc = sc; error = bus_dmamap_create(sc->storvsc_req_dtag, 0, &reqp->data_dmap); if (error) { device_printf(dev, "failed to allocate storvsc " "data dmamap\n"); goto cleanup; } LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } return (0); cleanup: while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) { LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } return (error); } static void storvsc_sysctl(device_t dev) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; struct sysctl_oid *ch_tree, *chid_tree; struct storvsc_softc *sc; char name[16]; int i; sc = device_get_softc(dev); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW, &sc->sysctl_data.data_bio_cnt, "# of bio data block"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW, &sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW, &sc->sysctl_data.data_sg_cnt, "# of sg data block"); /* dev.storvsc.UNIT.channel */ ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; for (i = 0; i < sc->hs_nchan; i++) { uint32_t ch_id; ch_id = vmbus_chan_id(sc->hs_sel_chan[i]); snprintf(name, sizeof(name), "%d", ch_id); /* dev.storvsc.UNIT.channel.CHID */ chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; /* dev.storvsc.UNIT.channel.CHID.send_req */ SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i], "# of request sending from this channel"); } } /** * @brief StorVSC attach function * * Function responsible for allocating per-device structures, * setting up CAM interfaces and scanning for available LUNs to * be used for SCSI device peripherals. * * @param a device * @returns 0 on success or an error on failure */ static int storvsc_attach(device_t dev) { enum hv_storage_type stor_type; struct storvsc_softc *sc; struct cam_devq *devq; int ret, i, j; struct hv_storvsc_request *reqp; struct root_hold_token *root_mount_token = NULL; struct hv_sgl_node *sgl_node = NULL; void *tmp_buff = NULL; /* * We need to serialize storvsc attach calls. */ root_mount_token = root_mount_hold("storvsc"); sc = device_get_softc(dev); sc->hs_nchan = 1; sc->hs_chan = vmbus_get_channel(dev); stor_type = storvsc_get_storage_type(dev); if (stor_type == DRIVER_UNKNOWN) { ret = ENODEV; goto cleanup; } /* fill in driver specific properties */ sc->hs_drv_props = &g_drv_props_table[stor_type]; sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size; sc->hs_drv_props->drv_max_ios_per_target = MIN(STORVSC_MAX_IO, hv_storvsc_max_io); if (bootverbose) { printf("storvsc ringbuffer size: %d, max_io: %d\n", sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_max_ios_per_target); } /* fill in device specific properties */ sc->hs_unit = device_get_unit(dev); sc->hs_dev = dev; mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF); ret = storvsc_init_requests(dev); if (ret != 0) goto cleanup; /* create sg-list page pool */ if (FALSE == g_hv_sgl_page_pool.is_init) { g_hv_sgl_page_pool.is_init = TRUE; LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list); /* * Pre-create SG list, each SG list with * STORVSC_DATA_SEGCNT_MAX segments, each * segment has one page buffer */ for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) { sgl_node = malloc(sizeof(struct hv_sgl_node), M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data = sglist_alloc(STORVSC_DATA_SEGCNT_MAX, M_WAITOK|M_ZERO); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) { tmp_buff = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data->sg_segs[j].ss_paddr = (vm_paddr_t)tmp_buff; } LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } } sc->hs_destroy = FALSE; sc->hs_drain_notify = FALSE; sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema"); ret = hv_storvsc_connect_vsp(sc); if (ret != 0) { goto cleanup; } /* Construct cpu to channel mapping */ storvsc_create_chan_sel(sc); /* * Create the device queue. * Hyper-V maps each target to one SCSI HBA */ devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target); if (devq == NULL) { device_printf(dev, "Failed to alloc device queue\n"); ret = ENOMEM; goto cleanup; } sc->hs_sim = cam_sim_alloc(storvsc_action, storvsc_poll, sc->hs_drv_props->drv_name, sc, sc->hs_unit, &sc->hs_lock, 1, sc->hs_drv_props->drv_max_ios_per_target, devq); if (sc->hs_sim == NULL) { device_printf(dev, "Failed to alloc sim\n"); cam_simq_free(devq); ret = ENOMEM; goto cleanup; } mtx_lock(&sc->hs_lock); /* bus_id is set to 0, need to get it from VMBUS channel query? */ if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) { cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to register SCSI bus\n"); ret = ENXIO; goto cleanup; } if (xpt_create_path(&sc->hs_path, /*periph*/NULL, cam_sim_path(sc->hs_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->hs_sim)); cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to create path\n"); ret = ENXIO; goto cleanup; } mtx_unlock(&sc->hs_lock); storvsc_sysctl(dev); root_mount_rel(root_mount_token); return (0); cleanup: root_mount_rel(root_mount_token); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) { if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (ret); } /** * @brief StorVSC device detach function * * This function is responsible for safely detaching a * StorVSC device. This includes waiting for inbound responses * to complete and freeing associated per-device structures. * * @param dev a device * returns 0 on success */ static int storvsc_detach(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp = NULL; struct hv_sgl_node *sgl_node = NULL; int j = 0; sc->hs_destroy = TRUE; /* * At this point, all outbound traffic should be disabled. We * only allow inbound traffic (responses) to proceed so that * outstanding requests can be completed. */ sc->hs_drain_notify = TRUE; sema_wait(&sc->hs_drain_sema); sc->hs_drain_notify = FALSE; /* * Since we have already drained, we don't need to busy wait. * The call to close the channel will reset the callback * under the protection of the incoming channel lock. */ vmbus_chan_close(sc->hs_chan); mtx_lock(&sc->hs_lock); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } mtx_unlock(&sc->hs_lock); while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){ if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (0); } #if HVS_TIMEOUT_TEST /** * @brief unit test for timed out operations * * This function provides unit testing capability to simulate * timed out operations. Recompilation with HV_TIMEOUT_TEST=1 * is required. * * @param reqp pointer to a request structure * @param opcode SCSI operation being performed * @param wait if 1, wait for I/O to complete */ static void storvsc_timeout_test(struct hv_storvsc_request *reqp, uint8_t opcode, int wait) { int ret; union ccb *ccb = reqp->ccb; struct storvsc_softc *sc = reqp->softc; if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) { return; } if (wait) { mtx_lock(&reqp->event.mtx); } ret = hv_storvsc_io_request(sc, reqp); if (ret != 0) { if (wait) { mtx_unlock(&reqp->event.mtx); } printf("%s: io_request failed with %d.\n", __func__, ret); ccb->ccb_h.status = CAM_PROVIDE_FAIL; mtx_lock(&sc->hs_lock); storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); return; } if (wait) { xpt_print(ccb->ccb_h.path, "%u: %s: waiting for IO return.\n", ticks, __func__); ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz); mtx_unlock(&reqp->event.mtx); xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n", ticks, __func__, (ret == 0)? "IO return detected" : "IO return not detected"); /* * Now both the timer handler and io done are running * simultaneously. We want to confirm the io done always * finishes after the timer handler exits. So reqp used by * timer handler is not freed or stale. Do busy loop for * another 1/10 second to make sure io done does * wait for the timer handler to complete. */ DELAY(100*1000); mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: %s: finishing, queue frozen %d, " "ccb status 0x%x scsi_status 0x%x.\n", ticks, __func__, sc->hs_frozen, ccb->ccb_h.status, ccb->csio.scsi_status); mtx_unlock(&sc->hs_lock); } } #endif /* HVS_TIMEOUT_TEST */ #ifdef notyet /** * @brief timeout handler for requests * * This function is called as a result of a callout expiring. * * @param arg pointer to a request */ static void storvsc_timeout(void *arg) { struct hv_storvsc_request *reqp = arg; struct storvsc_softc *sc = reqp->softc; union ccb *ccb = reqp->ccb; if (reqp->retries == 0) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO timed out (req=0x%p), wait for another %u secs.\n", ticks, reqp, ccb->ccb_h.timeout / 1000); cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL); mtx_unlock(&sc->hs_lock); reqp->retries++; callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0); #endif return; } mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n", ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000, (sc->hs_frozen == 0)? "freezing the queue" : "the queue is already frozen"); if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1); } mtx_unlock(&sc->hs_lock); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, MODE_SELECT_10, 1); #endif } #endif /** * @brief StorVSC device poll function * * This function is responsible for servicing requests when * interrupts are disabled (i.e when we are dumping core.) * * @param sim a pointer to a CAM SCSI interface module */ static void storvsc_poll(struct cam_sim *sim) { struct storvsc_softc *sc = cam_sim_softc(sim); mtx_assert(&sc->hs_lock, MA_OWNED); mtx_unlock(&sc->hs_lock); hv_storvsc_on_channel_callback(sc->hs_chan, sc); mtx_lock(&sc->hs_lock); } /** * @brief StorVSC device action function * * This function is responsible for handling SCSI operations which * are passed from the CAM layer. The requests are in the form of * CAM control blocks which indicate the action being performed. * Not all actions require converting the request to a VSCSI protocol * message - these actions can be responded to by this driver. * Requests which are destined for a backend storage device are converted * to a VSCSI protocol message and sent on the channel connection associated * with this device. * * @param sim pointer to a CAM SCSI interface module * @param ccb pointer to a CAM control block */ static void storvsc_action(struct cam_sim *sim, union ccb *ccb) { struct storvsc_softc *sc = cam_sim_softc(sim); int res; mtx_assert(&sc->hs_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET; if (hv_storvsc_use_pim_unmapped) cpi->hba_misc |= PIM_UNMAPPED; cpi->maxio = STORVSC_DATA_SIZE_MAX; cpi->hba_eng_cnt = 0; cpi->max_target = STORVSC_MAX_TARGETS; cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target; cpi->initiator_id = cpi->max_target; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 300000; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC2; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; cts->transport = XPORT_SAS; cts->transport_version = 0; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC2; /* enable tag queuing and disconnected mode */ cts->proto_specific.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; cts->xport_specific.valid = CTS_SPI_VALID_DISC; cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_SET_TRAN_SETTINGS: { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_CALC_GEOMETRY:{ cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); return; } case XPT_RESET_BUS: case XPT_RESET_DEV:{ #if HVS_HOST_RESET if ((res = hv_storvsc_host_reset(sc)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_host_reset failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; #else xpt_print(ccb->ccb_h.path, "%s reset not supported.\n", (ccb->ccb_h.func_code == XPT_RESET_BUS)? "bus" : "dev"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; #endif /* HVS_HOST_RESET */ } case XPT_SCSI_IO: case XPT_IMMED_NOTIFY: { struct hv_storvsc_request *reqp = NULL; bus_dmamap_t dmap_saved; if (ccb->csio.cdb_len == 0) { panic("cdl_len is 0\n"); } if (LIST_EMPTY(&sc->hs_free_list)) { ccb->ccb_h.status = CAM_REQUEUE_REQ; if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(sim, /* count*/1); } xpt_done(ccb); return; } reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); /* Save the data_dmap before reset request */ dmap_saved = reqp->data_dmap; /* XXX this is ugly */ bzero(reqp, sizeof(struct hv_storvsc_request)); /* Restore necessary bits */ reqp->data_dmap = dmap_saved; reqp->softc = sc; ccb->ccb_h.status |= CAM_SIM_QUEUED; if ((res = create_storvsc_request(ccb, reqp)) != 0) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } #ifdef notyet if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_init(&reqp->callout, 1); callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST cv_init(&reqp->event.cv, "storvsc timeout cv"); mtx_init(&reqp->event.mtx, "storvsc timeout mutex", NULL, MTX_DEF); switch (reqp->vstor_packet.vm_srb.cdb[0]) { case MODE_SELECT_10: case SEND_DIAGNOSTIC: /* To have timer send the request. */ return; default: break; } #endif /* HVS_TIMEOUT_TEST */ } #endif if ((res = hv_storvsc_io_request(sc, reqp)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_io_request failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; storvsc_free_request(sc, reqp); xpt_done(ccb); return; } return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } } /** * @brief destroy bounce buffer * * This function is responsible for destroy a Scatter/Gather list * that create by storvsc_create_bounce_buffer() * * @param sgl- the Scatter/Gather need be destroy * @param sg_count- page count of the SG list. * */ static void storvsc_destroy_bounce_buffer(struct sglist *sgl) { struct hv_sgl_node *sgl_node = NULL; if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) { printf("storvsc error: not enough in use sgl\n"); return; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_REMOVE(sgl_node, link); sgl_node->sgl_data = sgl; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } /** * @brief create bounce buffer * * This function is responsible for create a Scatter/Gather list, * which hold several pages that can be aligned with page size. * * @param seg_count- SG-list segments count * @param write - if WRITE_TYPE, set SG list page used size to 0, * otherwise set used size to page size. * * return NULL if create failed */ static struct sglist * storvsc_create_bounce_buffer(uint16_t seg_count, int write) { int i = 0; struct sglist *bounce_sgl = NULL; unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE); struct hv_sgl_node *sgl_node = NULL; /* get struct sglist from free_sgl_list */ if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { printf("storvsc error: not enough free sgl\n"); return NULL; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); bounce_sgl = sgl_node->sgl_data; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link); bounce_sgl->sg_maxseg = seg_count; if (write == WRITE_TYPE) bounce_sgl->sg_nseg = 0; else bounce_sgl->sg_nseg = seg_count; for (i = 0; i < seg_count; i++) bounce_sgl->sg_segs[i].ss_len = buf_len; return bounce_sgl; } /** * @brief copy data from SG list to bounce buffer * * This function is responsible for copy data from one SG list's segments * to another SG list which used as bounce buffer. * * @param bounce_sgl - the destination SG list * @param orig_sgl - the segment of the source SG list. * @param orig_sgl_count - the count of segments. * @param orig_sgl_count - indicate which segment need bounce buffer, * set 1 means need. * */ static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits) { int src_sgl_idx = 0; for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) { if (seg_bits & (1 << src_sgl_idx)) { memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr, (void*)orig_sgl[src_sgl_idx].ds_addr, orig_sgl[src_sgl_idx].ds_len); bounce_sgl->sg_segs[src_sgl_idx].ss_len = orig_sgl[src_sgl_idx].ds_len; } } } /** * @brief copy data from SG list which used as bounce to another SG list * * This function is responsible for copy data from one SG list with bounce * buffer to another SG list's segments. * * @param dest_sgl - the destination SG list's segments * @param dest_sgl_count - the count of destination SG list's segment. * @param src_sgl - the source SG list. * @param seg_bits - indicate which segment used bounce buffer of src SG-list. * */ void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits) { int sgl_idx = 0; for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) { if (seg_bits & (1 << sgl_idx)) { memcpy((void*)(dest_sgl[sgl_idx].ds_addr), (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr), src_sgl->sg_segs[sgl_idx].ss_len); } } } /** * @brief check SG list with bounce buffer or not * * This function is responsible for check if need bounce buffer for SG list. * * @param sgl - the SG list's segments * @param sg_count - the count of SG list's segment. * @param bits - segmengs number that need bounce buffer * * return -1 if SG list needless bounce buffer */ static int storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl, unsigned int sg_count, uint64_t *bits) { int i = 0; int offset = 0; uint64_t phys_addr = 0; uint64_t tmp_bits = 0; boolean_t found_hole = FALSE; boolean_t pre_aligned = TRUE; if (sg_count < 2){ return -1; } *bits = 0; phys_addr = vtophys(sgl[0].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset != 0) { pre_aligned = FALSE; tmp_bits |= 1; } for (i = 1; i < sg_count; i++) { phys_addr = vtophys(sgl[i].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset == 0) { if (FALSE == pre_aligned){ /* * This segment is aligned, if the previous * one is not aligned, find a hole */ found_hole = TRUE; } pre_aligned = TRUE; } else { tmp_bits |= 1ULL << i; if (!pre_aligned) { if (phys_addr != vtophys(sgl[i-1].ds_addr + sgl[i-1].ds_len)) { /* * Check whether connect to previous * segment,if not, find the hole */ found_hole = TRUE; } } else { found_hole = TRUE; } pre_aligned = FALSE; } } if (!found_hole) { return (-1); } else { *bits = tmp_bits; return 0; } } /** * Copy bus_dma segments to multiple page buffer, which requires * the pages are compact composed except for the 1st and last pages. */ static void storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct hv_storvsc_request *reqp = arg; union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_gpa_range *prplist; int i; prplist = &reqp->prp_list; prplist->gpa_range.gpa_len = csio->dxfer_len; prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK; for (i = 0; i < nsegs; i++) { #ifdef INVARIANTS if (nsegs > 1) { if (i == 0) { KASSERT((segs[i].ds_addr & PAGE_MASK) + segs[i].ds_len == PAGE_SIZE, ("invalid 1st page, ofs 0x%jx, len %zu", (uintmax_t)segs[i].ds_addr, segs[i].ds_len)); } else if (i == nsegs - 1) { KASSERT((segs[i].ds_addr & PAGE_MASK) == 0, ("invalid last page, ofs 0x%jx", (uintmax_t)segs[i].ds_addr)); } else { KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 && segs[i].ds_len == PAGE_SIZE, ("not a full page, ofs 0x%jx, len %zu", (uintmax_t)segs[i].ds_addr, segs[i].ds_len)); } } #endif prplist->gpa_page[i] = atop(segs[i].ds_addr); } reqp->prp_cnt = nsegs; } /** * @brief Fill in a request structure based on a CAM control block * * Fills in a request structure based on the contents of a CAM control * block. The request structure holds the payload information for * VSCSI protocol request. * * @param ccb pointer to a CAM contorl block * @param reqp pointer to a request structure */ static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp) { struct ccb_scsiio *csio = &ccb->csio; uint64_t phys_addr; uint32_t pfn; uint64_t not_aligned_seg_bits = 0; int error; /* refer to struct vmscsi_req for meanings of these two fields */ reqp->vstor_packet.u.vm_srb.port = cam_sim_unit(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.path_id = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id; reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun; reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len; if(ccb->ccb_h.flags & CAM_CDB_POINTER) { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); } else { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); } if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60; reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_DISABLE_SYNCH_TRANSFER; } switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE; if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_DATA_OUT; } break; case CAM_DIR_IN: reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE; if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_DATA_IN; } break; case CAM_DIR_NONE: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER; } break; default: printf("Error: unexpected data direction: 0x%x\n", ccb->ccb_h.flags & CAM_DIR_MASK); return (EINVAL); } reqp->sense_data = &csio->sense_data; reqp->sense_info_len = csio->sense_len; reqp->ccb = ccb; if (0 == csio->dxfer_len) { return (0); } switch (ccb->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_BIO: case CAM_DATA_VADDR: error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag, reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp, BUS_DMA_NOWAIT); if (error) { xpt_print(ccb->ccb_h.path, "bus_dmamap_load_ccb failed: %d\n", error); return (error); } if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO) reqp->softc->sysctl_data.data_bio_cnt++; else reqp->softc->sysctl_data.data_vaddr_cnt++; break; case CAM_DATA_SG: { struct storvsc_gpa_range *prplist; int i = 0; int offset = 0; int ret; bus_dma_segment_t *storvsc_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt; prplist = &reqp->prp_list; prplist->gpa_range.gpa_len = csio->dxfer_len; printf("Storvsc: get SG I/O operation, %d\n", reqp->vstor_packet.u.vm_srb.data_in); if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){ printf("Storvsc: %d segments is too much, " "only support %d segments\n", storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX); return (EINVAL); } /* * We create our own bounce buffer function currently. Idealy * we should use BUS_DMA(9) framework. But with current BUS_DMA * code there is no callback API to check the page alignment of * middle segments before busdma can decide if a bounce buffer * is needed for particular segment. There is callback, * "bus_dma_filter_t *filter", but the parrameters are not * sufficient for storvsc driver. * TODO: * Add page alignment check in BUS_DMA(9) callback. Once * this is complete, switch the following code to use * BUS_DMA(9) for storvsc bounce buffer support. */ /* check if we need to create bounce buffer */ ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist, storvsc_sg_count, ¬_aligned_seg_bits); if (ret != -1) { reqp->bounce_sgl = storvsc_create_bounce_buffer(storvsc_sg_count, reqp->vstor_packet.u.vm_srb.data_in); if (NULL == reqp->bounce_sgl) { printf("Storvsc_error: " "create bounce buffer failed.\n"); return (ENOMEM); } reqp->bounce_sgl_count = storvsc_sg_count; reqp->not_aligned_seg_bits = not_aligned_seg_bits; /* * if it is write, we need copy the original data *to bounce buffer */ if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_sgl_to_bounce_buf( reqp->bounce_sgl, storvsc_sglist, storvsc_sg_count, reqp->not_aligned_seg_bits); } /* transfer virtual address to physical frame number */ if (reqp->not_aligned_seg_bits & 0x1){ phys_addr = vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr); }else{ phys_addr = vtophys(storvsc_sglist[0].ds_addr); } prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK; pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[0] = pfn; for (i = 1; i < storvsc_sg_count; i++) { if (reqp->not_aligned_seg_bits & (1 << i)) { phys_addr = vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr); } else { phys_addr = vtophys(storvsc_sglist[i].ds_addr); } pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; } reqp->prp_cnt = i; } else { phys_addr = vtophys(storvsc_sglist[0].ds_addr); prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK; for (i = 0; i < storvsc_sg_count; i++) { phys_addr = vtophys(storvsc_sglist[i].ds_addr); pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; } reqp->prp_cnt = i; /* check the last segment cross boundary or not */ offset = phys_addr & PAGE_MASK; if (offset) { /* Add one more PRP entry */ phys_addr = vtophys(storvsc_sglist[i-1].ds_addr + PAGE_SIZE - offset); pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; reqp->prp_cnt++; } reqp->bounce_sgl_count = 0; } reqp->softc->sysctl_data.data_sg_cnt++; break; } default: printf("Unknow flags: %d\n", ccb->ccb_h.flags); return(EINVAL); } return(0); } static uint32_t is_scsi_valid(const struct scsi_inquiry_data *inq_data) { u_int8_t type; type = SID_TYPE(inq_data); if (type == T_NODEVICE) return (0); if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU) return (0); return (1); } /** * @brief completion function before returning to CAM * * I/O process has been completed and the result needs * to be passed to the CAM layer. * Free resources related to this request. * * @param reqp pointer to a request structure */ static void storvsc_io_done(struct hv_storvsc_request *reqp) { union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_softc *sc = reqp->softc; struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb; bus_dma_segment_t *ori_sglist = NULL; int ori_sg_count = 0; const struct scsi_generic *cmd; /* destroy bounce buffer if it is used */ if (reqp->bounce_sgl_count) { ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; ori_sg_count = ccb->csio.sglist_cnt; /* * If it is READ operation, we should copy back the data * to original SG list. */ if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_from_bounce_buf_to_sgl(ori_sglist, ori_sg_count, reqp->bounce_sgl, reqp->not_aligned_seg_bits); } storvsc_destroy_bounce_buffer(reqp->bounce_sgl); reqp->bounce_sgl_count = 0; } if (reqp->retries > 0) { mtx_lock(&sc->hs_lock); #if HVS_TIMEOUT_TEST xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "waking up timer handler if any.\n", ticks); mtx_lock(&reqp->event.mtx); cv_signal(&reqp->event.cv); mtx_unlock(&reqp->event.mtx); #endif reqp->retries = 0; xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "stopping timer if any.\n", ticks); mtx_unlock(&sc->hs_lock); } #ifdef notyet /* * callout_drain() will wait for the timer handler to finish * if it is running. So we don't need any lock to synchronize * between this routine and the timer handler. * Note that we need to make sure reqp is not freed when timer * handler is using or will use it. */ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_drain(&reqp->callout); } #endif cmd = (const struct scsi_generic *) ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes); ccb->ccb_h.status &= ~CAM_SIM_QUEUED; ccb->ccb_h.status &= ~CAM_STATUS_MASK; int srb_status = SRB_STATUS(vm_srb->srb_status); if (vm_srb->scsi_status == SCSI_STATUS_OK) { if (srb_status != SRB_STATUS_SUCCESS) { /* * If there are errors, for example, invalid LUN, * host will inform VM through SRB status. */ if (bootverbose) { if (srb_status == SRB_STATUS_INVALID_LUN) { xpt_print(ccb->ccb_h.path, "invalid LUN %d for op: %s\n", vm_srb->lun, scsi_op_desc(cmd->opcode, NULL)); } else { xpt_print(ccb->ccb_h.path, "Unknown SRB flag: %d for op: %s\n", srb_status, scsi_op_desc(cmd->opcode, NULL)); } } /* * XXX For a selection timeout, all of the LUNs * on the target will be gone. It works for SCSI * disks, but does not work for IDE disks. * * For CAM_DEV_NOT_THERE, CAM will only get * rid of the device(s) specified by the path. */ if (storvsc_get_storage_type(sc->hs_dev) == DRIVER_STORVSC) ccb->ccb_h.status |= CAM_SEL_TIMEOUT; else ccb->ccb_h.status |= CAM_DEV_NOT_THERE; } else { ccb->ccb_h.status |= CAM_REQ_CMP; } if (cmd->opcode == INQUIRY && srb_status == SRB_STATUS_SUCCESS) { int resp_xfer_len, resp_buf_len, data_len; uint8_t *resp_buf = (uint8_t *)csio->data_ptr; struct scsi_inquiry_data *inq_data = (struct scsi_inquiry_data *)csio->data_ptr; /* Get the buffer length reported by host */ resp_xfer_len = vm_srb->transfer_len; /* Get the available buffer length */ resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0; data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len; if (bootverbose && data_len >= 5) { xpt_print(ccb->ccb_h.path, "storvsc inquiry " "(%d) [%x %x %x %x %x ... ]\n", data_len, resp_buf[0], resp_buf[1], resp_buf[2], resp_buf[3], resp_buf[4]); } /* + * XXX: Hyper-V (since win2012r2) responses inquiry with + * unknown version (0) for GEN-2 DVD device. + * Manually set the version number to SPC3 in order to + * ask CAM to continue probing with "PROBE_REPORT_LUNS". + * see probedone() in scsi_xpt.c + */ + if (SID_TYPE(inq_data) == T_CDROM && + inq_data->version == 0 && + (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) { + inq_data->version = SCSI_REV_SPC3; + if (bootverbose) { + xpt_print(ccb->ccb_h.path, + "set version from 0 to %d\n", + inq_data->version); + } + } + /* * XXX: Manually fix the wrong response returned from WS2012 */ if (!is_scsi_valid(inq_data) && (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 || vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 || vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) { if (data_len >= 4 && (resp_buf[2] == 0 || resp_buf[3] == 0)) { - resp_buf[2] = 5; // verion=5 means SPC-3 + resp_buf[2] = SCSI_REV_SPC3; resp_buf[3] = 2; // resp fmt must be 2 if (bootverbose) xpt_print(ccb->ccb_h.path, "fix version and resp fmt for 0x%x\n", vmstor_proto_version); } } else if (data_len >= SHORT_INQUIRY_LENGTH) { char vendor[16]; cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor), sizeof(vendor)); /* * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or * WIN2012 R2 in order to support UNMAP feature. */ if (!strncmp(vendor, "Msft", 4) && SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 && (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 || vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8)) { inq_data->version = SCSI_REV_SPC3; if (bootverbose) { xpt_print(ccb->ccb_h.path, "storvsc upgrades " "SPC2 to SPC3\n"); } } } } } else { /** * On Some Windows hosts TEST_UNIT_READY command can return * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1 * "(Medium not present - tray closed)". This error can be * ignored since it will be sent to host periodically. */ boolean_t unit_not_ready = \ vm_srb->scsi_status == SCSI_STATUS_CHECK_COND && cmd->opcode == TEST_UNIT_READY && srb_status == SRB_STATUS_ERROR; if (!unit_not_ready && bootverbose) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc scsi_status = %d, srb_status = %d\n", vm_srb->scsi_status, srb_status); mtx_unlock(&sc->hs_lock); } ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF); ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len; if (reqp->sense_info_len != 0) { csio->sense_resid = csio->sense_len - reqp->sense_info_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } mtx_lock(&sc->hs_lock); if (reqp->softc->hs_frozen == 1) { xpt_print(ccb->ccb_h.path, "%u: storvsc unfreezing softc 0x%p.\n", ticks, reqp->softc); ccb->ccb_h.status |= CAM_RELEASE_SIMQ; reqp->softc->hs_frozen = 0; } storvsc_free_request(sc, reqp); mtx_unlock(&sc->hs_lock); xpt_done_direct(ccb); } /** * @brief Free a request structure * * Free a request structure by returning it to the free list * * @param sc pointer to a softc * @param reqp pointer to a request structure */ static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp) { LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /** * @brief Determine type of storage device from GUID * * Using the type GUID, determine if this is a StorVSC (paravirtual * SCSI or BlkVSC (paravirtual IDE) device. * * @param dev a device * returns an enum */ static enum hv_storage_type storvsc_get_storage_type(device_t dev) { device_t parent = device_get_parent(dev); if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0) return DRIVER_BLKVSC; if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0) return DRIVER_STORVSC; return DRIVER_UNKNOWN; } #define PCI_VENDOR_INTEL 0x8086 #define PCI_PRODUCT_PIIX4 0x7111 static void storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path, struct ata_params *ident_buf __unused, int *veto) { /* * The ATA disks are shared with the controllers managed * by this driver, so veto the ATA disks' attachment; the * ATA disks will be attached as SCSI disks once this driver * attached. */ if (path->device->protocol == PROTO_ATA) { struct ccb_pathinq cpi; bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); if (cpi.ccb_h.status == CAM_REQ_CMP && cpi.hba_vendor == PCI_VENDOR_INTEL && cpi.hba_device == PCI_PRODUCT_PIIX4) { (*veto)++; if (bootverbose) { xpt_print(path, "Disable ATA disks on " "simulated ATA controller (0x%04x%04x)\n", cpi.hba_device, cpi.hba_vendor); } } } } static void storvsc_sysinit(void *arg __unused) { if (vm_guest == VM_GUEST_HV) { storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto, storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY); } } SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit, NULL); static void storvsc_sysuninit(void *arg __unused) { if (storvsc_handler_tag != NULL) EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag); } SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysuninit, NULL); Index: projects/clang500-import/sys/net/iflib.c =================================================================== --- projects/clang500-import/sys/net/iflib.c (revision 321306) +++ projects/clang500-import/sys/net/iflib.c (revision 321307) @@ -1,5650 +1,5658 @@ /*- * Copyright (c) 2014-2017, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #if defined(__i386__) || defined(__amd64__) #include #include #include #include #include #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct mtx ifc_mtx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; int ifc_in_detach; int ifc_link_state; int ifc_link_irq; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (&ctx->ifc_media); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ uint8_t *ifsd_flags; } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ uint8_t *ifsd_flags; } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 #define IFLIB_MAX_RX_SEGS 32 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define IFC_LEGACY 0x001 #define IFC_QFLUSH 0x002 #define IFC_MULTISEG 0x004 #define IFC_DMAR 0x008 #define IFC_SC_ALLOCATED 0x010 #define IFC_INIT_DONE 0x020 #define IFC_PREFETCH 0x040 #define IFC_DO_RESET 0x080 #define IFC_CHECK_HUNG 0x100 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_desc_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { /* If there is a separate completion queue - * these are the cq cidx and pidx. Otherwise * these are unused. */ qidx_t ifr_size; qidx_t ifr_cq_cidx; qidx_t ifr_cq_pidx; uint8_t ifr_cq_gen; uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ #define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else #define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# tx mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# tx mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# tx mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# tx frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# rx allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; static int iflib_txq_drain_encapfail; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD, &iflib_txq_drain_encapfail, 0, "# drain encap fails"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_intr_link; static int iflib_intr_msix; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_zero_len; static int iflib_rx_if_input; static int iflib_rx_mbuf_null; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD, &iflib_intr_link, 0, "# intr link calls"); SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD, &iflib_intr_msix, 0, "# intr msix calls"); SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# rx intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD, &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_txq_drain_encapfail = iflib_encap_load_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input = iflib_rx_mbuf_null = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static int iflib_register(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_stop(ctx); iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap ring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = kring->nr_hwcur; pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); if (txq->ift_sds.ifsd_map) __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_len = len; pi.ipi_segs[0].ds_addr = paddr; pi.ipi_segs[0].ds_len = len; pi.ipi_nsegs = 1; pi.ipi_ndescs = 0; pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); if (txq->ift_sds.ifsd_map) { __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; /* synchronize the NIC ring */ if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. */ if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i, nic_i_start; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; struct if_rxd_info ri; struct if_rxd_update iru; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { if (fl->ifl_sds.ifsd_map == NULL) continue; bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = iflib_crcstrip ? 0 : 4; int error, avail; uint16_t slot_flags = kring->nkr_slot_flags; for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) { nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; ri.iri_cidx = nic_i; error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = slot_flags; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } fl->ifl_cidx = nic_i; kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ /* XXX not sure how this will work with multiple free lists */ nm_i = kring->nr_hwcur; if (nm_i == head) return (0); iru.iru_paddrs = fl->ifl_bus_addrs; iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; iru.iru_idxs = fl->ifl_rxd_idxs; iru.iru_qsidx = rxq->ifr_id; iru.iru_buf_size = fl->ifl_buf_size; iru.iru_flidx = fl->ifl_id; nic_i_start = nic_i = netmap_idx_k2n(kring, nm_i); for (i = 0; nm_i != head; i++) { struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ goto ring_reset; fl->ifl_vm_addrs[i] = addr; if (fl->ifl_sds.ifsd_map && (slot->flags & NS_BUF_CHANGED)) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], addr); } slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); fl->ifl_rxd_idxs[i] = nic_i = nm_next(nic_i, lim); if (nm_i != head && i < IFLIB_MAX_RX_REFRESH) continue; iru.iru_pidx = nic_i_start; iru.iru_count = i; i = 0; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); if (fl->ifl_sds.ifsd_map == NULL) { nic_i_start = nic_i; continue; } nic_i = nic_i_start; for (n = 0; n < iru.iru_count; n++) { bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_PREREAD); nic_i = nm_next(nic_i, lim); } nic_i_start = nic_i; } kring->nr_hwcur = head; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * IMPORTANT: we must leave one free slot in the ring, * so move nic_i back by one unit */ nic_i = nm_prev(nic_i, lim); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); return 0; ring_reset: return netmap_ring_reinit(kring); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = scctx->isc_ntxd[0]; na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static void iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; if (txq->ift_sds.ifsd_map == NULL) return; for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } } static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; struct if_rxd_update iru; iflib_fl_t fl; bus_dmamap_t *map; int nrxd; uint32_t i, j, pidx_start; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; fl = &rxq->ifr_fl[0]; map = fl->ifl_sds.ifsd_map; nrxd = ctx->ifc_softc_ctx.isc_nrxd[0]; iru.iru_paddrs = fl->ifl_bus_addrs; iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; iru.iru_idxs = fl->ifl_rxd_idxs; iru.iru_qsidx = rxq->ifr_id; iru.iru_buf_size = rxq->ifr_fl[0].ifl_buf_size; iru.iru_flidx = 0; for (pidx_start = i = j = 0; i < nrxd; i++, j++) { int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i); void *addr; fl->ifl_rxd_idxs[j] = i; addr = fl->ifl_vm_addrs[j] = PNMB(na, slot + sj, &fl->ifl_bus_addrs[j]); if (map) { netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, *map, addr); map++; } if (j < IFLIB_MAX_RX_REFRESH && i < nrxd - 1) continue; iru.iru_pidx = pidx_start; pidx_start = i; iru.iru_count = j; j = 0; MPASS(pidx_start + j <= nrxd); /* Update descriptors and the cached value */ ctx->isc_rxd_refill(ctx->ifc_softc, &iru); } /* preserve queue */ if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) { struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t); } else ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) #define iflib_netmap_rxq_init(ctx, rxq) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) #endif static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { int err; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ sctx->isc_q_align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else /* * We used to abuse the smp_started flag to decide if the queues have been * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()). * That gave bad races, since the SYSINIT() runs strictly after smp_started * is set. Run a SYSINIT() strictly after that to just set a usable * completion flag. */ static int iflib_started; static void iflib_record_started(void *arg) { iflib_started = 1; } SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST, iflib_record_started, NULL); #endif static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; if_ctx_t ctx; int i, cidx; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); for (i = 0; i < rxq->ifr_ntxqirq; i++) { qidx_t txqid = rxq->ifr_txqid[i]; ctx = rxq->ifr_ctx; if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, char *name) { int rc, flags; struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); irq->ii_rid = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int err, nsegments, ntsosegments; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* * Setup DMA descriptor areas. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_desc_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); - device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n", - sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize); + device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", + (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ scctx->isc_tx_tso_size_max, /* maxsize */ ntsosegments, /* nsegments */ scctx->isc_tx_tso_segsize_max, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_desc_tag))) { device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err); goto fail; } if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) return (0); if (!(txq->ift_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } #endif return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; map = NULL; if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[i]; if (map != NULL) { bus_dmamap_unload(txq->ift_desc_tag, map); bus_dmamap_destroy(txq->ift_desc_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_sds.ifsd_flags != NULL) { free(txq->ift_sds.ifsd_flags, M_IFLIB); txq->ift_sds.ifsd_flags = NULL; } if (txq->ift_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_desc_tag); txq->ift_desc_tag = NULL; } if (txq->ift_tso_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_desc_tag); txq->ift_tso_desc_tag = NULL; } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_desc_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, err); goto fail; } if (!(fl->ifl_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) continue; if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } #endif } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #ifdef ACPI_DMAR #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR) #else #define IS_DMAR(ctx) (0) #endif /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context * @rxq: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct mbuf *m; int idx, frag_idx = fl->ifl_fragidx; int pidx = fl->ifl_pidx; caddr_t cl, *sd_cl; struct mbuf **sd_m; uint8_t *sd_flags; struct if_rxd_update iru; bus_dmamap_t *sd_map; int n, i = 0; uint64_t bus_addr; int err; sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_flags = fl->ifl_sds.ifsd_flags; idx = pidx; n = count; MPASS(n > 0); MPASS(fl->ifl_credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru.iru_paddrs = fl->ifl_bus_addrs; iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; iru.iru_idxs = fl->ifl_rxd_idxs; iru.iru_qsidx = fl->ifl_rxq->ifr_id; iru.iru_buf_size = fl->ifl_buf_size; iru.iru_flidx = fl->ifl_id; while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if ((frag_idx < 0) || (frag_idx >= fl->ifl_size)) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); if ((cl = sd_cl[frag_idx]) == NULL) { if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { break; } #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); #if defined(__i386__) || defined(__amd64__) if (!IS_DMAR(ctx)) { bus_addr = pmap_kextract((vm_offset_t)cl); } else #endif { struct rxq_refill_cb_arg cb_arg; iflib_rxq_t q; cb_arg.error = 0; q = fl->ifl_rxq; MPASS(sd_map != NULL); MPASS(sd_map[frag_idx] != NULL); err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ if (fl->ifl_zone == zone_pack) uma_zfree(fl->ifl_zone, cl); m_free(m); n = 0; goto done; } bus_addr = cb_arg.seg.ds_addr; } bit_set(fl->ifl_rx_bitmap, frag_idx); sd_flags[frag_idx] |= RX_SW_DESC_INUSE; MPASS(sd_m[frag_idx] == NULL); sd_cl[frag_idx] = cl; sd_m[frag_idx] = m; fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; fl->ifl_credits++; i++; MPASS(fl->ifl_credits <= fl->ifl_size); if (++idx == fl->ifl_size) { fl->ifl_gen = 1; idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); i = 0; pidx = idx; fl->ifl_pidx = idx; } } done: DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; else pidx = fl->ifl_pidx - 1; if (sd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); fl->ifl_fragidx = frag_idx; } static __inline void __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_flags & RX_SW_DESC_INUSE) { if (fl->ifl_sds.ifsd_map != NULL) { bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_unload(fl->ifl_desc_tag, sd_map); bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); } if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); } if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); *sd_flags = 0; } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { MPASS(fl->ifl_sds.ifsd_flags[i] == 0); MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ if (sctx->isc_max_frame_size <= 2048) fl->ifl_buf_size = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else fl->ifl_buf_size = MJUMPAGESIZE; #else else if (sctx->isc_max_frame_size <= 4096) fl->ifl_buf_size = MJUMPAGESIZE; else if (sctx->isc_max_frame_size <= 9216) fl->ifl_buf_size = MJUM9BYTES; else fl->ifl_buf_size = MJUM16BYTES; #endif if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_desc_tag != NULL) { bus_dma_tag_destroy(fl->ifl_desc_tag); fl->ifl_desc_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); /* XXX destroy maps first */ free(fl->ifl_sds.ifsd_map, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_map = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } } /* * MI independent logic * */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && ((txq->ift_cleaned_prev == txq->ift_cleaned) || (sctx->isc_pause_frames == 0))) goto hung; if (ifmp_ring_is_stalled(txq->ift_br)) txq->ift_qstatus = IFLIB_QUEUE_HUNG; txq->ift_cleaned_prev = txq->ift_cleaned; /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: CTX_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); IFDI_WATCHDOG_RESET(ctx); ctx->ifc_watchdog_events++; ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { MPASS(rxq->ifr_id == i); iflib_netmap_rxq_init(ctx, rxq); } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ if (if_getcapenable(ifp) & IFCAP_NETMAP) continue; for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } static void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); DELAY(1000); IFDI_STOP(ctx); DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static void rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) { int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; iflib_dma_info_t di; int next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); if (fl->ifl_sds.ifsd_map != NULL) { next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; di = fl->ifl_ifdi; next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_flags[next]); bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); if (unload) bus_dmamap_unload(fl->ifl_desc_tag, map); } fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; if (map != NULL) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bit_clear(fl->ifl_rx_bitmap, cidx); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) { int i, padlen , flags; struct mbuf *m, *mh, *mt; caddr_t cl; i = 0; mh = NULL; do { rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); MPASS(*sd->ifsd_cl != NULL); MPASS(*sd->ifsd_m != NULL); /* Don't include zero-length frags */ if (ri->iri_frags[i].irf_len == 0) { /* XXX we can save the cluster here, but not the mbuf */ m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); m_free(*sd->ifsd_m); *sd->ifsd_m = NULL; continue; } m = *sd->ifsd_m; *sd->ifsd_m = NULL; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) { rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); m = *sd.ifsd_m; *sd.ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m)) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri, &sd); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } static bool iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; int lro_enabled; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt; ifp = ctx->ifc_ifp; #ifdef DEV_NETMAP if (ifp->if_capenable & IFCAP_NETMAP) { u_int work = 0; if (netmap_rx_irq(ifp, rxq->ifr_id, &work)) return (FALSE); } #endif mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); return (false); } for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); continue; } /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; #endif DBG_COUNTER_INC(rx_if_input); ifp->if_input(ifp, m); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail) return true; return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: CTX_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); return (false); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) static inline bool iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) { qidx_t dbval, max; bool rang; rang = false; max = TXQ_MAX_DB_DEFERRED(txq, in_use); if (ring || txq->ift_db_pending >= max) { dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; rang = true; } return (rang); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct ether_vlan_header *eh; struct mbuf *m, *n; n = m = *mp; if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && M_WRITABLE(m) == 0) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); n = *mp = m; } } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; if (pi->ipi_csum_flags & CSUM_IP) ip->ip_sum = 0; if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO4(pi)) { if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; } if (IS_TSO6(pi)) { if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * The corresponding flag is set by the stack in the IPv4 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). * So, set it here because the rest of the flow requires it. */ pi->ipi_csum_flags |= CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } static __noinline struct mbuf * collapse_pkthdr(struct mbuf *m0) { struct mbuf *m, *m_next, *tmp; m = m0; m_next = m->m_next; while (m_next != NULL && m_next->m_len == 0) { m = m_next; m->m_next = NULL; m_free(m); m_next = m_next->m_next; } m = m0; m->m_next = m_next; if ((m_next->m_flags & M_EXT) == 0) { m = m_defrag(m, M_NOWAIT); } else { tmp = m_next->m_next; memcpy(m_next, m, MPKTHSIZE); m = m_next; m->m_next = tmp; } return (m); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif i = 1; while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; i++; } return (mh); } static int iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs, int max_segs, int flags) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; int i, next, pidx, err, ntxd, count; struct mbuf *m, *tmp, **ifsd_m; m = *m0; /* * Please don't ever do this */ if (__predict_false(m->m_len == 0)) *m0 = m = collapse_pkthdr(m); ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) return (err); ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; count = 0; m = *m0; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } + m = m->m_next; + count++; + } while (m != NULL); + if (count > *nsegs) + return (0); + m = *m0; + count = 0; + do { next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); ifsd_m[next] = m; count++; tmp = m; m = m->m_next; } while (m != NULL); } else { int buflen, sgsize, maxsegsz, max_sgsize; vm_offset_t vaddr; vm_paddr_t curaddr; count = i = 0; m = *m0; if (m->m_pkthdr.csum_flags & CSUM_TSO) maxsegsz = scctx->isc_tx_tso_segsize_max; else maxsegsz = sctx->isc_tx_maxsegsize; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } buflen = m->m_len; vaddr = (vm_offset_t)m->m_data; /* * see if we can't be smarter about physically * contiguous mappings */ next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); #if MEMORY_LOGGING txq->ift_enqueued++; #endif ifsd_m[next] = m; while (buflen > 0) { max_sgsize = MIN(buflen, maxsegsz); curaddr = pmap_kextract(vaddr); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); sgsize = MIN(sgsize, max_sgsize); segs[i].ds_addr = curaddr; segs[i].ds_len = sgsize; vaddr += sgsize; buflen -= sgsize; i++; if (i >= max_segs) goto err; } count++; tmp = m; m = m->m_next; } while (m != NULL); *nsegs = i; } return (0); err: *m0 = iflib_remove_mbuf(txq); return (EFBIG); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_segment_t *segs; struct mbuf *m_head; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; bus_dma_tag_t desc_tag; segs = txq->ift_segs; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); if (txq->ift_sds.ifsd_map != NULL) { prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } } else if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[pidx]; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { desc_tag = txq->ift_tso_desc_tag; max_segs = scctx->isc_tx_tso_segments_max; } else { desc_tag = txq->ift_desc_tag; max_segs = scctx->isc_tx_nsegments; } m_head = *m_headp; pkt_info_zero(&pi); pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; } retry: err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); if (remap == 1) m_head = m_defrag(*m_headp, M_NOWAIT); remap++; if (__predict_false(m_head == NULL)) goto defrag_failed; txq->ift_mbuf_defrag++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); return (err); } /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if (map != NULL) bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE); if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { if (map != NULL) bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; } else DBG_COUNTER_INC(encap_txd_encap_fail); return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { int hasmap; uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; uint8_t *ifsd_flags; bus_dmamap_t *ifsd_map; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; ifsd_m = txq->ift_sds.ifsd_m; ifsd_map = txq->ift_sds.ifsd_map; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n--) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if (ifsd_m[cidx] != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]); if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) { /* * does it matter if it's not the TSO tag? If so we'll * have to add the type to flags */ bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]); ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED; } if ((m = ifsd_m[cidx]) != NULL) { /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); m_free(m); ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { prefetch(&items[next]); prefetch(items[(cidx + offset + 1) & (size-1)]); prefetch(items[(cidx + offset + 2) & (size-1)]); prefetch(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) || ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; struct mbuf **mp, *m; int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; int reclaimed, err, in_use_prev, desc_used; bool do_prefetch, ring, rang; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; consumed = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); avail = TXQ_AVAIL(txq); for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { int pidx_prev, rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); if (__predict_false(*mp == (struct mbuf *)txq)) { consumed++; reclaimed++; continue; } in_use_prev = txq->ift_in_use; pidx_prev = txq->ift_pidx; err = iflib_encap(txq, mp); if (__predict_false(err)) { DBG_COUNTER_INC(txq_drain_encapfail); /* no room - bail out */ if (err == ENOBUFS) break; consumed++; DBG_COUNTER_INC(txq_drain_encapfail); /* we can't send this packet - skip it */ continue; } consumed++; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); avail = TXQ_AVAIL(txq); txq->ift_db_pending += (txq->ift_in_use - in_use_prev); desc_used += (txq->ift_in_use - in_use_prev); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) printf("consumed=%d\n", consumed); #endif return (consumed); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; int rc; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; if ((ifp->if_capenable & IFCAP_NETMAP)) { if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); else ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more) GROUPTASK_ENQUEUE(&rxq->ifr_task); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { return; } } CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); IFDI_LINK_INTR_ENABLE(ctx); if (ctx->ifc_flags & IFC_DO_RESET) { ctx->ifc_flags &= ~IFC_DO_RESET; iflib_if_init_locked(ctx); } CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENOBUFS); } MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); if (err) { GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) { GROUPTASK_ENQUEUE(&txq->ift_task); } return (err); } static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; CTX_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; CTX_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); CTX_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; CTX_UNLOCK(ctx); if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* falls thru */ case SIOCGIFMEDIA: err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); break; case SIOCGI2C: { struct ifi2creq i2c; err = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) err = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC)); if_vlancap(ifp); /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING) iflib_stop(ctx); if_togglecapenable(ifp, setmask); if (bits & IFF_DRV_RUNNING) iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); } break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { pci_vendor_info_t *ent; uint16_t pci_vendor_id, pci_device_id; uint16_t pci_subvendor_id, pci_subdevice_id; uint16_t pci_rev_id; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { int err, rid, msix, msix_bar; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); return (err); } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; #ifdef INVARIANTS MPASS(scctx->isc_capenable); if (scctx->isc_capenable & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capenable); if_setcapenable(ifp, scctx->isc_capenable); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #elif !(defined(__i386__) || defined(__amd64__)) /* set unconditionally for !x86 */ ctx->ifc_flags |= IFC_DMAR; #endif msix_bar = scctx->isc_msix_bar; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); err = EINVAL; goto fail; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware */ if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * Group taskqueues aren't properly set up until SMP is started, * so we disable interrupts until we can handle them post * SI_SUB_SMP. * * XXX: disabling interrupts doesn't actually work, at least for * the non-MSI case. When they occur before SI_SUB_SMP completes, * we do null handling and depend on this not causing too large an * interrupt storm. */ IFDI_INTR_DISABLE(ctx); if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_intr_free; } if (msix <= 1) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI) pci_release_msi(ctx->ifc_dev); fail_queues: /* XXX free queues */ fail: IFDI_DETACH(ctx); return (err); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; int i, j; struct taskqgroup *tqg; iflib_fl_t fl; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } CTX_LOCK(ctx); ctx->ifc_in_detach = 1; iflib_stop(ctx); CTX_UNLOCK(ctx); /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); iflib_netmap_detach(ifp); ether_ifdetach(ifp); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) free(fl->ifl_rx_bitmap, M_IFLIB); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); IFDI_DETACH(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } bus_generic_detach(dev); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxd_min[0]); MPASS(sctx->isc_nrxd_max[0]); MPASS(sctx->isc_nrxd_default[0]); MPASS(sctx->isc_ntxd_min[0]); MPASS(sctx->isc_ntxd_max[0]); MPASS(sctx->isc_ntxd_default[0]); } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); driver->refs++; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); ifmedia_init(&ctx->ifc_media, IFM_IMASK, iflib_media_change, iflib_media_status); return (0); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; struct ifmp_ring **brscp; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ if (!(txq = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(rxq = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", device_get_nameunit(dev), txq->ift_id); err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; rx_fail: if (brscp != NULL) free(brscp, M_IFLIB); if (rxq != NULL) free(rxq, M_IFLIB); if (txq != NULL) free(txq, M_IFLIB); fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { iflib_txq_destroy(txq); for (j = 0; j < ctx->ifc_nhwtxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int i, err; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free RX software descriptors allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { iflib_rx_sds_free(rxq); rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; if ((err = iflib_tx_structures_setup(ctx)) != 0) return (err); if ((err = iflib_rx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); } return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } static int find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid) { int i, cpuid, eqid, count; CPU_COPY(&ctx->ifc_cpus, cpus); count = CPU_COUNT(&ctx->ifc_cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { cpuid = CPU_FFS(cpus); MPASS(cpuid != 0); CPU_CLR(cpuid-1, cpus); } cpuid = CPU_FFS(cpus); MPASS(cpuid != 0); return (cpuid-1); } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; cpuset_t cpus; gtask_fn_t *fn; int tqrid, err, cpuid; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: panic("unknown net intr type"); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { cpuid = find_nth(ctx, &cpus, qid); taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name); } else { taskqgroup_attach(tqg, gtask, q, tqrid, name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; rid = -1; fn = _task_fn_iov; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, rid, name); } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; void *q; int err; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = ctx; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, tqrid, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { #ifdef INVARIANTS struct grouptask *gtask; gtask = &ctx->ifc_admin_task; MPASS(gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); } void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) ctx->ifc_flags |= IFC_PREFETCH; /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif if (ctx->isc_txd_credits_update == NULL) return (0); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, iflib_sysctl_int_delay, "I", description); } struct mtx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_mtx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; iflib_num_tx_queues = scctx->isc_ntxqsets; iflib_num_rx_queues = scctx->isc_nrxqsets; device_printf(dev, "msix_init qsets capped at %d\n", iflib_num_tx_queues); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { int msix_ctrl, rid; pci_enable_busmaster(dev); rid = 0; if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) { rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } else { device_printf(dev, "PCIY_MSIX capability not found; " "or rid %d == 0.\n", rid); goto msi; } } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use msix in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { /* May not be enabled */ device_printf(dev, "Unable to map MSIX table \n"); goto msi; } } /* First try MSI/X */ if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ device_printf(dev, "System has MSIX disabled \n"); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; goto msi; } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) { #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); } else { device_printf(dev, "Unable to fetch CPU list\n"); /* Figure out a reasonable auto config value */ queues = min(queuemsgs, mp_ncpus); } #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSIX (default 0)"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of tx descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif Index: projects/clang500-import/sys/netinet/sctp_indata.c =================================================================== --- projects/clang500-import/sys/netinet/sctp_indata.c (revision 321306) +++ projects/clang500-import/sys/netinet/sctp_indata.c (revision 321307) @@ -1,5737 +1,5731 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * NOTES: On the outbound side of things I need to check the sack timer to * see if I should generate a sack into the chunk queue (if I have data to * send that is and will be sending it .. for bundling. * * The callback in sctp_usrreq.c will get called when the socket is read from. * This will cause sctp_service_queues() to get called on the top entry in * the list. */ static uint32_t sctp_add_chk_to_control(struct sctp_queued_to_read *control, struct sctp_stream_in *strm, struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_tmit_chunk *chk, int lock_held); void sctp_set_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc) { asoc->my_rwnd = sctp_calc_rwnd(stcb, asoc); } /* Calculate what the rwnd would be */ uint32_t sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc) { uint32_t calc = 0; /* * This is really set wrong with respect to a 1-2-m socket. Since * the sb_cc is the count that everyone as put up. When we re-write * sctp_soreceive then we will fix this so that ONLY this * associations data is taken into account. */ if (stcb->sctp_socket == NULL) { return (calc); } if (stcb->asoc.sb_cc == 0 && asoc->size_on_reasm_queue == 0 && asoc->size_on_all_streams == 0) { /* Full rwnd granted */ KASSERT(asoc->cnt_on_reasm_queue == 0, ("cnt_on_reasm_queue is %u", asoc->cnt_on_reasm_queue)); KASSERT(asoc->cnt_on_all_streams == 0, ("cnt_on_all_streams is %u", asoc->cnt_on_all_streams)); calc = max(SCTP_SB_LIMIT_RCV(stcb->sctp_socket), SCTP_MINIMAL_RWND); return (calc); } /* get actual space */ calc = (uint32_t)sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv); /* * take out what has NOT been put on socket queue and we yet hold * for putting up. */ calc = sctp_sbspace_sub(calc, (uint32_t)(asoc->size_on_reasm_queue + asoc->cnt_on_reasm_queue * MSIZE)); calc = sctp_sbspace_sub(calc, (uint32_t)(asoc->size_on_all_streams + asoc->cnt_on_all_streams * MSIZE)); if (calc == 0) { /* out of space */ return (calc); } /* what is the overhead of all these rwnd's */ calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len); /* * If the window gets too small due to ctrl-stuff, reduce it to 1, * even it is 0. SWS engaged */ if (calc < stcb->asoc.my_rwnd_control_len) { calc = 1; } return (calc); } /* * Build out our readq entry based on the incoming packet. */ struct sctp_queued_to_read * sctp_build_readq_entry(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t tsn, uint32_t ppid, uint32_t context, uint16_t sid, uint32_t mid, uint8_t flags, struct mbuf *dm) { struct sctp_queued_to_read *read_queue_e = NULL; sctp_alloc_a_readq(stcb, read_queue_e); if (read_queue_e == NULL) { goto failed_build; } memset(read_queue_e, 0, sizeof(struct sctp_queued_to_read)); read_queue_e->sinfo_stream = sid; read_queue_e->sinfo_flags = (flags << 8); read_queue_e->sinfo_ppid = ppid; read_queue_e->sinfo_context = context; read_queue_e->sinfo_tsn = tsn; read_queue_e->sinfo_cumtsn = tsn; read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb); read_queue_e->mid = mid; read_queue_e->top_fsn = read_queue_e->fsn_included = 0xffffffff; TAILQ_INIT(&read_queue_e->reasm); read_queue_e->whoFrom = net; atomic_add_int(&net->ref_count, 1); read_queue_e->data = dm; read_queue_e->stcb = stcb; read_queue_e->port_from = stcb->rport; failed_build: return (read_queue_e); } struct mbuf * sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo) { struct sctp_extrcvinfo *seinfo; struct sctp_sndrcvinfo *outinfo; struct sctp_rcvinfo *rcvinfo; struct sctp_nxtinfo *nxtinfo; struct cmsghdr *cmh; struct mbuf *ret; int len; int use_extended; int provide_nxt; if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) && sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) && sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) { /* user does not want any ancillary data */ return (NULL); } len = 0; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) { len += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); } seinfo = (struct sctp_extrcvinfo *)sinfo; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO) && (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) { provide_nxt = 1; len += CMSG_SPACE(sizeof(struct sctp_nxtinfo)); } else { provide_nxt = 0; } if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { use_extended = 1; len += CMSG_SPACE(sizeof(struct sctp_extrcvinfo)); } else { use_extended = 0; len += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo)); } } else { use_extended = 0; } ret = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA); if (ret == NULL) { /* No space */ return (ret); } SCTP_BUF_LEN(ret) = 0; /* We need a CMSG header followed by the struct */ cmh = mtod(ret, struct cmsghdr *); /* * Make sure that there is no un-initialized padding between the * cmsg header and cmsg data and after the cmsg data. */ memset(cmh, 0, len); if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) { cmh->cmsg_level = IPPROTO_SCTP; cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_rcvinfo)); cmh->cmsg_type = SCTP_RCVINFO; rcvinfo = (struct sctp_rcvinfo *)CMSG_DATA(cmh); rcvinfo->rcv_sid = sinfo->sinfo_stream; rcvinfo->rcv_ssn = sinfo->sinfo_ssn; rcvinfo->rcv_flags = sinfo->sinfo_flags; rcvinfo->rcv_ppid = sinfo->sinfo_ppid; rcvinfo->rcv_tsn = sinfo->sinfo_tsn; rcvinfo->rcv_cumtsn = sinfo->sinfo_cumtsn; rcvinfo->rcv_context = sinfo->sinfo_context; rcvinfo->rcv_assoc_id = sinfo->sinfo_assoc_id; cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_rcvinfo))); SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); } if (provide_nxt) { cmh->cmsg_level = IPPROTO_SCTP; cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_nxtinfo)); cmh->cmsg_type = SCTP_NXTINFO; nxtinfo = (struct sctp_nxtinfo *)CMSG_DATA(cmh); nxtinfo->nxt_sid = seinfo->serinfo_next_stream; nxtinfo->nxt_flags = 0; if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) { nxtinfo->nxt_flags |= SCTP_UNORDERED; } if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) { nxtinfo->nxt_flags |= SCTP_NOTIFICATION; } if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) { nxtinfo->nxt_flags |= SCTP_COMPLETE; } nxtinfo->nxt_ppid = seinfo->serinfo_next_ppid; nxtinfo->nxt_length = seinfo->serinfo_next_length; nxtinfo->nxt_assoc_id = seinfo->serinfo_next_aid; cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_nxtinfo))); SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_nxtinfo)); } if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { cmh->cmsg_level = IPPROTO_SCTP; outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh); if (use_extended) { cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_extrcvinfo)); cmh->cmsg_type = SCTP_EXTRCV; memcpy(outinfo, sinfo, sizeof(struct sctp_extrcvinfo)); SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_extrcvinfo)); } else { cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); cmh->cmsg_type = SCTP_SNDRCV; *outinfo = *sinfo; SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo)); } } return (ret); } static void sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn) { uint32_t gap, i, cumackp1; int fnd = 0; int in_r = 0, in_nr = 0; if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) { return; } cumackp1 = asoc->cumulative_tsn + 1; if (SCTP_TSN_GT(cumackp1, tsn)) { /* * this tsn is behind the cum ack and thus we don't need to * worry about it being moved from one to the other. */ return; } SCTP_CALC_TSN_TO_GAP(gap, tsn, asoc->mapping_array_base_tsn); in_r = SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap); in_nr = SCTP_IS_TSN_PRESENT(asoc->nr_mapping_array, gap); if ((in_r == 0) && (in_nr == 0)) { #ifdef INVARIANTS panic("Things are really messed up now"); #else SCTP_PRINTF("gap:%x tsn:%x\n", gap, tsn); sctp_print_mapping_array(asoc); #endif } if (in_nr == 0) SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); if (in_r) SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } if (tsn == asoc->highest_tsn_inside_map) { /* We must back down to see what the new highest is */ for (i = tsn - 1; SCTP_TSN_GE(i, asoc->mapping_array_base_tsn); i--) { SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn); if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) { asoc->highest_tsn_inside_map = i; fnd = 1; break; } } if (!fnd) { asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1; } } } static int sctp_place_control_in_stream(struct sctp_stream_in *strm, struct sctp_association *asoc, struct sctp_queued_to_read *control) { struct sctp_queued_to_read *at; struct sctp_readhead *q; uint8_t flags, unordered; flags = (control->sinfo_flags >> 8); unordered = flags & SCTP_DATA_UNORDERED; if (unordered) { q = &strm->uno_inqueue; if (asoc->idata_supported == 0) { if (!TAILQ_EMPTY(q)) { /* * Only one stream can be here in old style * -- abort */ return (-1); } TAILQ_INSERT_TAIL(q, control, next_instrm); control->on_strm_q = SCTP_ON_UNORDERED; return (0); } } else { q = &strm->inqueue; } if ((flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) { control->end_added = 1; control->first_frag_seen = 1; control->last_frag_seen = 1; } if (TAILQ_EMPTY(q)) { /* Empty queue */ TAILQ_INSERT_HEAD(q, control, next_instrm); if (unordered) { control->on_strm_q = SCTP_ON_UNORDERED; } else { control->on_strm_q = SCTP_ON_ORDERED; } return (0); } else { TAILQ_FOREACH(at, q, next_instrm) { if (SCTP_MID_GT(asoc->idata_supported, at->mid, control->mid)) { /* * one in queue is bigger than the new one, * insert before this one */ TAILQ_INSERT_BEFORE(at, control, next_instrm); if (unordered) { control->on_strm_q = SCTP_ON_UNORDERED; } else { control->on_strm_q = SCTP_ON_ORDERED; } break; } else if (SCTP_MID_EQ(asoc->idata_supported, at->mid, control->mid)) { /* * Gak, He sent me a duplicate msg id * number?? return -1 to abort. */ return (-1); } else { if (TAILQ_NEXT(at, next_instrm) == NULL) { /* * We are at the end, insert it * after this one */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) { sctp_log_strm_del(control, at, SCTP_STR_LOG_FROM_INSERT_TL); } TAILQ_INSERT_AFTER(q, at, control, next_instrm); if (unordered) { control->on_strm_q = SCTP_ON_UNORDERED; } else { control->on_strm_q = SCTP_ON_ORDERED; } break; } } } } return (0); } static void sctp_abort_in_reasm(struct sctp_tcb *stcb, struct sctp_queued_to_read *control, struct sctp_tmit_chunk *chk, int *abort_flag, int opspot) { char msg[SCTP_DIAG_INFO_LEN]; struct mbuf *oper; if (stcb->asoc.idata_supported) { snprintf(msg, sizeof(msg), "Reass %x,CF:%x,TSN=%8.8x,SID=%4.4x,FSN=%8.8x,MID:%8.8x", opspot, control->fsn_included, chk->rec.data.tsn, chk->rec.data.sid, chk->rec.data.fsn, chk->rec.data.mid); } else { snprintf(msg, sizeof(msg), "Reass %x,CI:%x,TSN=%8.8x,SID=%4.4x,FSN=%4.4x,SSN:%4.4x", opspot, control->fsn_included, chk->rec.data.tsn, chk->rec.data.sid, chk->rec.data.fsn, (uint16_t)chk->rec.data.mid); } oper = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); sctp_m_freem(chk->data); chk->data = NULL; sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1; sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED); *abort_flag = 1; } static void sctp_clean_up_control(struct sctp_tcb *stcb, struct sctp_queued_to_read *control) { /* * The control could not be placed and must be cleaned. */ struct sctp_tmit_chunk *chk, *nchk; TAILQ_FOREACH_SAFE(chk, &control->reasm, sctp_next, nchk) { TAILQ_REMOVE(&control->reasm, chk, sctp_next); if (chk->data) sctp_m_freem(chk->data); chk->data = NULL; sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } sctp_free_a_readq(stcb, control); } /* * Queue the chunk either right into the socket buffer if it is the next one * to go OR put it in the correct place in the delivery queue. If we do * append to the so_buf, keep doing so until we are out of order as * long as the control's entered are non-fragmented. */ static void sctp_queue_data_to_stream(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_queued_to_read *control, int *abort_flag, int *need_reasm) { /* * FIX-ME maybe? What happens when the ssn wraps? If we are getting * all the data in one stream this could happen quite rapidly. One * could use the TSN to keep track of things, but this scheme breaks * down in the other type of stream usage that could occur. Send a * single msg to stream 0, send 4Billion messages to stream 1, now * send a message to stream 0. You have a situation where the TSN * has wrapped but not in the stream. Is this worth worrying about * or should we just change our queue sort at the bottom to be by * TSN. * * Could it also be legal for a peer to send ssn 1 with TSN 2 and * ssn 2 with TSN 1? If the peer is doing some sort of funky TSN/SSN * assignment this could happen... and I don't see how this would be * a violation. So for now I am undecided an will leave the sort by * SSN alone. Maybe a hybred approach is the answer * */ struct sctp_queued_to_read *at; int queue_needed; uint32_t nxt_todel; struct mbuf *op_err; struct sctp_stream_in *strm; char msg[SCTP_DIAG_INFO_LEN]; strm = &asoc->strmin[control->sinfo_stream]; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) { sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INTO_STRD); } if (SCTP_MID_GT((asoc->idata_supported), strm->last_mid_delivered, control->mid)) { /* The incoming sseq is behind where we last delivered? */ SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ: %u delivered: %u from peer, Abort association\n", strm->last_mid_delivered, control->mid); /* * throw it in the stream so it gets cleaned up in * association destruction */ TAILQ_INSERT_HEAD(&strm->inqueue, control, next_instrm); if (asoc->idata_supported) { snprintf(msg, sizeof(msg), "Delivered MID=%8.8x, got TSN=%8.8x, SID=%4.4x, MID=%8.8x", strm->last_mid_delivered, control->sinfo_tsn, control->sinfo_stream, control->mid); } else { snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x", (uint16_t)strm->last_mid_delivered, control->sinfo_tsn, control->sinfo_stream, (uint16_t)control->mid); } op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return; } queue_needed = 1; asoc->size_on_all_streams += control->length; sctp_ucount_incr(asoc->cnt_on_all_streams); nxt_todel = strm->last_mid_delivered + 1; if (SCTP_MID_EQ(asoc->idata_supported, nxt_todel, control->mid)) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { SCTP_SOCKET_UNLOCK(so, 1); return; } #endif /* can be delivered right away? */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) { sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_IMMED_DEL); } /* EY it wont be queued if it could be delivered directly */ queue_needed = 0; if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); strm->last_mid_delivered++; sctp_mark_non_revokable(asoc, control->sinfo_tsn); sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_LOCKED); TAILQ_FOREACH_SAFE(control, &strm->inqueue, next_instrm, at) { /* all delivered */ nxt_todel = strm->last_mid_delivered + 1; if (SCTP_MID_EQ(asoc->idata_supported, nxt_todel, control->mid) && (((control->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG)) { if (control->on_strm_q == SCTP_ON_ORDERED) { TAILQ_REMOVE(&strm->inqueue, control, next_instrm); if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); #ifdef INVARIANTS } else { panic("Huh control: %p is on_strm_q: %d", control, control->on_strm_q); #endif } control->on_strm_q = 0; strm->last_mid_delivered++; /* * We ignore the return of deliver_data here * since we always can hold the chunk on the * d-queue. And we have a finite number that * can be delivered from the strq. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) { sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_IMMED_DEL); } sctp_mark_non_revokable(asoc, control->sinfo_tsn); sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_LOCKED); continue; } else if (SCTP_MID_EQ(asoc->idata_supported, nxt_todel, control->mid)) { *need_reasm = 1; } break; } #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } if (queue_needed) { /* * Ok, we did not deliver this guy, find the correct place * to put it on the queue. */ if (sctp_place_control_in_stream(strm, asoc, control)) { snprintf(msg, sizeof(msg), "Queue to str MID: %u duplicate", control->mid); sctp_clean_up_control(stcb, control); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_flag = 1; } } } static void sctp_setup_tail_pointer(struct sctp_queued_to_read *control) { struct mbuf *m, *prev = NULL; struct sctp_tcb *stcb; stcb = control->stcb; control->held_length = 0; control->length = 0; m = control->data; while (m) { if (SCTP_BUF_LEN(m) == 0) { /* Skip mbufs with NO length */ if (prev == NULL) { /* First one */ control->data = sctp_m_free(m); m = control->data; } else { SCTP_BUF_NEXT(prev) = sctp_m_free(m); m = SCTP_BUF_NEXT(prev); } if (m == NULL) { control->tail_mbuf = prev; } continue; } prev = m; atomic_add_int(&control->length, SCTP_BUF_LEN(m)); if (control->on_read_q) { /* * On read queue so we must increment the SB stuff, * we assume caller has done any locks of SB. */ sctp_sballoc(stcb, &stcb->sctp_socket->so_rcv, m); } m = SCTP_BUF_NEXT(m); } if (prev) { control->tail_mbuf = prev; } } static void sctp_add_to_tail_pointer(struct sctp_queued_to_read *control, struct mbuf *m, uint32_t *added) { struct mbuf *prev = NULL; struct sctp_tcb *stcb; stcb = control->stcb; if (stcb == NULL) { #ifdef INVARIANTS panic("Control broken"); #else return; #endif } if (control->tail_mbuf == NULL) { /* TSNH */ control->data = m; sctp_setup_tail_pointer(control); return; } control->tail_mbuf->m_next = m; while (m) { if (SCTP_BUF_LEN(m) == 0) { /* Skip mbufs with NO length */ if (prev == NULL) { /* First one */ control->tail_mbuf->m_next = sctp_m_free(m); m = control->tail_mbuf->m_next; } else { SCTP_BUF_NEXT(prev) = sctp_m_free(m); m = SCTP_BUF_NEXT(prev); } if (m == NULL) { control->tail_mbuf = prev; } continue; } prev = m; if (control->on_read_q) { /* * On read queue so we must increment the SB stuff, * we assume caller has done any locks of SB. */ sctp_sballoc(stcb, &stcb->sctp_socket->so_rcv, m); } *added += SCTP_BUF_LEN(m); atomic_add_int(&control->length, SCTP_BUF_LEN(m)); m = SCTP_BUF_NEXT(m); } if (prev) { control->tail_mbuf = prev; } } static void sctp_build_readq_entry_from_ctl(struct sctp_queued_to_read *nc, struct sctp_queued_to_read *control) { memset(nc, 0, sizeof(struct sctp_queued_to_read)); nc->sinfo_stream = control->sinfo_stream; nc->mid = control->mid; TAILQ_INIT(&nc->reasm); nc->top_fsn = control->top_fsn; nc->mid = control->mid; nc->sinfo_flags = control->sinfo_flags; nc->sinfo_ppid = control->sinfo_ppid; nc->sinfo_context = control->sinfo_context; nc->fsn_included = 0xffffffff; nc->sinfo_tsn = control->sinfo_tsn; nc->sinfo_cumtsn = control->sinfo_cumtsn; nc->sinfo_assoc_id = control->sinfo_assoc_id; nc->whoFrom = control->whoFrom; atomic_add_int(&nc->whoFrom->ref_count, 1); nc->stcb = control->stcb; nc->port_from = control->port_from; } static void sctp_reset_a_control(struct sctp_queued_to_read *control, struct sctp_inpcb *inp, uint32_t tsn) { control->fsn_included = tsn; if (control->on_read_q) { /* * We have to purge it from there, hopefully this will work * :-) */ TAILQ_REMOVE(&inp->read_queue, control, next); control->on_read_q = 0; } } static int sctp_handle_old_unordered_data(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_in *strm, struct sctp_queued_to_read *control, uint32_t pd_point, int inp_read_lock_held) { /* * Special handling for the old un-ordered data chunk. All the * chunks/TSN's go to mid 0. So we have to do the old style watching * to see if we have it all. If you return one, no other control * entries on the un-ordered queue will be looked at. In theory * there should be no others entries in reality, unless the guy is * sending both unordered NDATA and unordered DATA... */ struct sctp_tmit_chunk *chk, *lchk, *tchk; uint32_t fsn; struct sctp_queued_to_read *nc; int cnt_added; if (control->first_frag_seen == 0) { /* Nothing we can do, we have not seen the first piece yet */ return (1); } /* Collapse any we can */ cnt_added = 0; restart: fsn = control->fsn_included + 1; /* Now what can we add? */ TAILQ_FOREACH_SAFE(chk, &control->reasm, sctp_next, lchk) { if (chk->rec.data.fsn == fsn) { /* Ok lets add it */ sctp_alloc_a_readq(stcb, nc); if (nc == NULL) { break; } memset(nc, 0, sizeof(struct sctp_queued_to_read)); TAILQ_REMOVE(&control->reasm, chk, sctp_next); sctp_add_chk_to_control(control, strm, stcb, asoc, chk, SCTP_READ_LOCK_NOT_HELD); fsn++; cnt_added++; chk = NULL; if (control->end_added) { /* We are done */ if (!TAILQ_EMPTY(&control->reasm)) { /* * Ok we have to move anything left * on the control queue to a new * control. */ sctp_build_readq_entry_from_ctl(nc, control); tchk = TAILQ_FIRST(&control->reasm); if (tchk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) { TAILQ_REMOVE(&control->reasm, tchk, sctp_next); if (asoc->size_on_reasm_queue >= tchk->send_size) { asoc->size_on_reasm_queue -= tchk->send_size; } else { #ifdef INVARIANTS panic("size_on_reasm_queue = %u smaller than chunk length %u", asoc->size_on_reasm_queue, tchk->send_size); #else asoc->size_on_reasm_queue = 0; #endif } sctp_ucount_decr(asoc->cnt_on_reasm_queue); nc->first_frag_seen = 1; nc->fsn_included = tchk->rec.data.fsn; nc->data = tchk->data; nc->sinfo_ppid = tchk->rec.data.ppid; nc->sinfo_tsn = tchk->rec.data.tsn; sctp_mark_non_revokable(asoc, tchk->rec.data.tsn); tchk->data = NULL; sctp_free_a_chunk(stcb, tchk, SCTP_SO_NOT_LOCKED); sctp_setup_tail_pointer(nc); tchk = TAILQ_FIRST(&control->reasm); } /* Spin the rest onto the queue */ while (tchk) { TAILQ_REMOVE(&control->reasm, tchk, sctp_next); TAILQ_INSERT_TAIL(&nc->reasm, tchk, sctp_next); tchk = TAILQ_FIRST(&control->reasm); } /* * Now lets add it to the queue * after removing control */ TAILQ_INSERT_TAIL(&strm->uno_inqueue, nc, next_instrm); nc->on_strm_q = SCTP_ON_UNORDERED; if (control->on_strm_q) { TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm); control->on_strm_q = 0; } } if (control->pdapi_started) { strm->pd_api_started = 0; control->pdapi_started = 0; } if (control->on_strm_q) { TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm); control->on_strm_q = 0; SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs); } if (control->on_read_q == 0) { sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, control->end_added, inp_read_lock_held, SCTP_SO_NOT_LOCKED); } sctp_wakeup_the_read_socket(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED); if ((nc->first_frag_seen) && !TAILQ_EMPTY(&nc->reasm)) { /* * Switch to the new guy and * continue */ control = nc; goto restart; } else { if (nc->on_strm_q == 0) { sctp_free_a_readq(stcb, nc); } } return (1); } else { sctp_free_a_readq(stcb, nc); } } else { /* Can't add more */ break; } } if ((control->length > pd_point) && (strm->pd_api_started == 0)) { strm->pd_api_started = 1; control->pdapi_started = 1; sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, control->end_added, inp_read_lock_held, SCTP_SO_NOT_LOCKED); sctp_wakeup_the_read_socket(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED); return (0); } else { return (1); } } static void sctp_inject_old_unordered_data(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_queued_to_read *control, struct sctp_tmit_chunk *chk, int *abort_flag) { struct sctp_tmit_chunk *at; int inserted; /* * Here we need to place the chunk into the control structure sorted * in the correct order. */ if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) { /* Its the very first one. */ SCTPDBG(SCTP_DEBUG_XXX, "chunk is a first fsn: %u becomes fsn_included\n", chk->rec.data.fsn); if (control->first_frag_seen) { /* * In old un-ordered we can reassembly on one * control multiple messages. As long as the next * FIRST is greater then the old first (TSN i.e. FSN * wise) */ struct mbuf *tdata; uint32_t tmp; if (SCTP_TSN_GT(chk->rec.data.fsn, control->fsn_included)) { /* * Easy way the start of a new guy beyond * the lowest */ goto place_chunk; } if ((chk->rec.data.fsn == control->fsn_included) || (control->pdapi_started)) { /* * Ok this should not happen, if it does we * started the pd-api on the higher TSN * (since the equals part is a TSN failure * it must be that). * * We are completly hosed in that case since * I have no way to recover. This really * will only happen if we can get more TSN's * higher before the pd-api-point. */ sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_4); return; } /* * Ok we have two firsts and the one we just got is * smaller than the one we previously placed.. yuck! * We must swap them out. */ /* swap the mbufs */ tdata = control->data; control->data = chk->data; chk->data = tdata; /* Save the lengths */ chk->send_size = control->length; /* Recompute length of control and tail pointer */ sctp_setup_tail_pointer(control); /* Fix the FSN included */ tmp = control->fsn_included; control->fsn_included = chk->rec.data.fsn; chk->rec.data.fsn = tmp; /* Fix the TSN included */ tmp = control->sinfo_tsn; control->sinfo_tsn = chk->rec.data.tsn; chk->rec.data.tsn = tmp; /* Fix the PPID included */ tmp = control->sinfo_ppid; control->sinfo_ppid = chk->rec.data.ppid; chk->rec.data.ppid = tmp; /* Fix tail pointer */ goto place_chunk; } control->first_frag_seen = 1; control->fsn_included = chk->rec.data.fsn; control->top_fsn = chk->rec.data.fsn; control->sinfo_tsn = chk->rec.data.tsn; control->sinfo_ppid = chk->rec.data.ppid; control->data = chk->data; sctp_mark_non_revokable(asoc, chk->rec.data.tsn); chk->data = NULL; sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); sctp_setup_tail_pointer(control); return; } place_chunk: inserted = 0; TAILQ_FOREACH(at, &control->reasm, sctp_next) { if (SCTP_TSN_GT(at->rec.data.fsn, chk->rec.data.fsn)) { /* * This one in queue is bigger than the new one, * insert the new one before at. */ asoc->size_on_reasm_queue += chk->send_size; sctp_ucount_incr(asoc->cnt_on_reasm_queue); inserted = 1; TAILQ_INSERT_BEFORE(at, chk, sctp_next); break; } else if (at->rec.data.fsn == chk->rec.data.fsn) { /* * They sent a duplicate fsn number. This really * should not happen since the FSN is a TSN and it * should have been dropped earlier. */ sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_5); return; } } if (inserted == 0) { /* Its at the end */ asoc->size_on_reasm_queue += chk->send_size; sctp_ucount_incr(asoc->cnt_on_reasm_queue); control->top_fsn = chk->rec.data.fsn; TAILQ_INSERT_TAIL(&control->reasm, chk, sctp_next); } } static int sctp_deliver_reasm_check(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_in *strm, int inp_read_lock_held) { /* * Given a stream, strm, see if any of the SSN's on it that are * fragmented are ready to deliver. If so go ahead and place them on * the read queue. In so placing if we have hit the end, then we * need to remove them from the stream's queue. */ struct sctp_queued_to_read *control, *nctl = NULL; uint32_t next_to_del; uint32_t pd_point; int ret = 0; if (stcb->sctp_socket) { pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT, stcb->sctp_ep->partial_delivery_point); } else { pd_point = stcb->sctp_ep->partial_delivery_point; } control = TAILQ_FIRST(&strm->uno_inqueue); if ((control != NULL) && (asoc->idata_supported == 0)) { /* Special handling needed for "old" data format */ if (sctp_handle_old_unordered_data(stcb, asoc, strm, control, pd_point, inp_read_lock_held)) { goto done_un; } } if (strm->pd_api_started) { /* Can't add more */ return (0); } while (control) { SCTPDBG(SCTP_DEBUG_XXX, "Looking at control: %p e(%d) ssn: %u top_fsn: %u inc_fsn: %u -uo\n", control, control->end_added, control->mid, control->top_fsn, control->fsn_included); nctl = TAILQ_NEXT(control, next_instrm); if (control->end_added) { /* We just put the last bit on */ if (control->on_strm_q) { #ifdef INVARIANTS if (control->on_strm_q != SCTP_ON_UNORDERED) { panic("Huh control: %p on_q: %d -- not unordered?", control, control->on_strm_q); } #endif SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs); TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm); control->on_strm_q = 0; } if (control->on_read_q == 0) { sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, control->end_added, inp_read_lock_held, SCTP_SO_NOT_LOCKED); } } else { /* Can we do a PD-API for this un-ordered guy? */ if ((control->length >= pd_point) && (strm->pd_api_started == 0)) { strm->pd_api_started = 1; control->pdapi_started = 1; sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, control->end_added, inp_read_lock_held, SCTP_SO_NOT_LOCKED); break; } } control = nctl; } done_un: control = TAILQ_FIRST(&strm->inqueue); if (strm->pd_api_started) { /* Can't add more */ return (0); } if (control == NULL) { return (ret); } if (SCTP_MID_EQ(asoc->idata_supported, strm->last_mid_delivered, control->mid)) { /* * Ok the guy at the top was being partially delivered * completed, so we remove it. Note the pd_api flag was * taken off when the chunk was merged on in * sctp_queue_data_for_reasm below. */ nctl = TAILQ_NEXT(control, next_instrm); SCTPDBG(SCTP_DEBUG_XXX, "Looking at control: %p e(%d) ssn: %u top_fsn: %u inc_fsn: %u (lastdel: %u)- o\n", control, control->end_added, control->mid, control->top_fsn, control->fsn_included, strm->last_mid_delivered); if (control->end_added) { if (control->on_strm_q) { #ifdef INVARIANTS if (control->on_strm_q != SCTP_ON_ORDERED) { panic("Huh control: %p on_q: %d -- not ordered?", control, control->on_strm_q); } #endif SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs); TAILQ_REMOVE(&strm->inqueue, control, next_instrm); if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); control->on_strm_q = 0; } if (strm->pd_api_started && control->pdapi_started) { control->pdapi_started = 0; strm->pd_api_started = 0; } if (control->on_read_q == 0) { sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, control->end_added, inp_read_lock_held, SCTP_SO_NOT_LOCKED); } control = nctl; } } if (strm->pd_api_started) { /* * Can't add more must have gotten an un-ordered above being * partially delivered. */ return (0); } deliver_more: next_to_del = strm->last_mid_delivered + 1; if (control) { SCTPDBG(SCTP_DEBUG_XXX, "Looking at control: %p e(%d) ssn: %u top_fsn: %u inc_fsn: %u (nxtdel: %u)- o\n", control, control->end_added, control->mid, control->top_fsn, control->fsn_included, next_to_del); nctl = TAILQ_NEXT(control, next_instrm); if (SCTP_MID_EQ(asoc->idata_supported, control->mid, next_to_del) && (control->first_frag_seen)) { int done; /* Ok we can deliver it onto the stream. */ if (control->end_added) { /* We are done with it afterwards */ if (control->on_strm_q) { #ifdef INVARIANTS if (control->on_strm_q != SCTP_ON_ORDERED) { panic("Huh control: %p on_q: %d -- not ordered?", control, control->on_strm_q); } #endif SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs); TAILQ_REMOVE(&strm->inqueue, control, next_instrm); if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); control->on_strm_q = 0; } ret++; } if (((control->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) { /* * A singleton now slipping through - mark * it non-revokable too */ sctp_mark_non_revokable(asoc, control->sinfo_tsn); } else if (control->end_added == 0) { /* * Check if we can defer adding until its * all there */ if ((control->length < pd_point) || (strm->pd_api_started)) { /* * Don't need it or cannot add more * (one being delivered that way) */ goto out; } } done = (control->end_added) && (control->last_frag_seen); if (control->on_read_q == 0) { sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, control->end_added, inp_read_lock_held, SCTP_SO_NOT_LOCKED); } strm->last_mid_delivered = next_to_del; if (done) { control = nctl; goto deliver_more; } else { /* We are now doing PD API */ strm->pd_api_started = 1; control->pdapi_started = 1; } } } out: return (ret); } uint32_t sctp_add_chk_to_control(struct sctp_queued_to_read *control, struct sctp_stream_in *strm, struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_tmit_chunk *chk, int hold_rlock) { /* * Given a control and a chunk, merge the data from the chk onto the * control and free up the chunk resources. */ uint32_t added = 0; int i_locked = 0; if (control->on_read_q && (hold_rlock == 0)) { /* * Its being pd-api'd so we must do some locks. */ SCTP_INP_READ_LOCK(stcb->sctp_ep); i_locked = 1; } if (control->data == NULL) { control->data = chk->data; sctp_setup_tail_pointer(control); } else { sctp_add_to_tail_pointer(control, chk->data, &added); } control->fsn_included = chk->rec.data.fsn; asoc->size_on_reasm_queue -= chk->send_size; sctp_ucount_decr(asoc->cnt_on_reasm_queue); sctp_mark_non_revokable(asoc, chk->rec.data.tsn); chk->data = NULL; if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) { control->first_frag_seen = 1; control->sinfo_tsn = chk->rec.data.tsn; control->sinfo_ppid = chk->rec.data.ppid; } if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) { /* Its complete */ if ((control->on_strm_q) && (control->on_read_q)) { if (control->pdapi_started) { control->pdapi_started = 0; strm->pd_api_started = 0; } if (control->on_strm_q == SCTP_ON_UNORDERED) { /* Unordered */ TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm); control->on_strm_q = 0; } else if (control->on_strm_q == SCTP_ON_ORDERED) { /* Ordered */ TAILQ_REMOVE(&strm->inqueue, control, next_instrm); if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); control->on_strm_q = 0; #ifdef INVARIANTS } else if (control->on_strm_q) { panic("Unknown state on ctrl: %p on_strm_q: %d", control, control->on_strm_q); #endif } } control->end_added = 1; control->last_frag_seen = 1; } if (i_locked) { SCTP_INP_READ_UNLOCK(stcb->sctp_ep); } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return (added); } /* * Dump onto the re-assembly queue, in its proper place. After dumping on the * queue, see if anthing can be delivered. If so pull it off (or as much as * we can. If we run out of space then we must dump what we can and set the * appropriate flag to say we queued what we could. */ static void sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_queued_to_read *control, struct sctp_tmit_chunk *chk, int created_control, int *abort_flag, uint32_t tsn) { uint32_t next_fsn; struct sctp_tmit_chunk *at, *nat; struct sctp_stream_in *strm; int do_wakeup, unordered; uint32_t lenadded; strm = &asoc->strmin[control->sinfo_stream]; /* * For old un-ordered data chunks. */ if ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) { unordered = 1; } else { unordered = 0; } /* Must be added to the stream-in queue */ if (created_control) { if (unordered == 0) { sctp_ucount_incr(asoc->cnt_on_all_streams); } if (sctp_place_control_in_stream(strm, asoc, control)) { /* Duplicate SSN? */ sctp_clean_up_control(stcb, control); sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_6); return; } if ((tsn == (asoc->cumulative_tsn + 1) && (asoc->idata_supported == 0))) { /* * Ok we created this control and now lets validate * that its legal i.e. there is a B bit set, if not * and we have up to the cum-ack then its invalid. */ if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) { sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_7); return; } } } if ((asoc->idata_supported == 0) && (unordered == 1)) { sctp_inject_old_unordered_data(stcb, asoc, control, chk, abort_flag); return; } /* * Ok we must queue the chunk into the reasembly portion: o if its * the first it goes to the control mbuf. o if its not first but the * next in sequence it goes to the control, and each succeeding one * in order also goes. o if its not in order we place it on the list * in its place. */ if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) { /* Its the very first one. */ SCTPDBG(SCTP_DEBUG_XXX, "chunk is a first fsn: %u becomes fsn_included\n", chk->rec.data.fsn); if (control->first_frag_seen) { /* * Error on senders part, they either sent us two * data chunks with FIRST, or they sent two * un-ordered chunks that were fragmented at the * same time in the same stream. */ sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_8); return; } control->first_frag_seen = 1; control->sinfo_ppid = chk->rec.data.ppid; control->sinfo_tsn = chk->rec.data.tsn; control->fsn_included = chk->rec.data.fsn; control->data = chk->data; sctp_mark_non_revokable(asoc, chk->rec.data.tsn); chk->data = NULL; sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); sctp_setup_tail_pointer(control); asoc->size_on_all_streams += control->length; } else { /* Place the chunk in our list */ int inserted = 0; if (control->last_frag_seen == 0) { /* Still willing to raise highest FSN seen */ if (SCTP_TSN_GT(chk->rec.data.fsn, control->top_fsn)) { SCTPDBG(SCTP_DEBUG_XXX, "We have a new top_fsn: %u\n", chk->rec.data.fsn); control->top_fsn = chk->rec.data.fsn; } if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) { SCTPDBG(SCTP_DEBUG_XXX, "The last fsn is now in place fsn: %u\n", chk->rec.data.fsn); control->last_frag_seen = 1; } if (asoc->idata_supported || control->first_frag_seen) { /* * For IDATA we always check since we know * that the first fragment is 0. For old * DATA we have to receive the first before * we know the first FSN (which is the TSN). */ if (SCTP_TSN_GE(control->fsn_included, chk->rec.data.fsn)) { /* * We have already delivered up to * this so its a dup */ sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_9); return; } } } else { if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) { /* Second last? huh? */ SCTPDBG(SCTP_DEBUG_XXX, "Duplicate last fsn: %u (top: %u) -- abort\n", chk->rec.data.fsn, control->top_fsn); sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_10); return; } if (asoc->idata_supported || control->first_frag_seen) { /* * For IDATA we always check since we know * that the first fragment is 0. For old * DATA we have to receive the first before * we know the first FSN (which is the TSN). */ if (SCTP_TSN_GE(control->fsn_included, chk->rec.data.fsn)) { /* * We have already delivered up to * this so its a dup */ SCTPDBG(SCTP_DEBUG_XXX, "New fsn: %u is already seen in included_fsn: %u -- abort\n", chk->rec.data.fsn, control->fsn_included); sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_11); return; } } /* * validate not beyond top FSN if we have seen last * one */ if (SCTP_TSN_GT(chk->rec.data.fsn, control->top_fsn)) { SCTPDBG(SCTP_DEBUG_XXX, "New fsn: %u is beyond or at top_fsn: %u -- abort\n", chk->rec.data.fsn, control->top_fsn); sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_12); return; } } /* * If we reach here, we need to place the new chunk in the * reassembly for this control. */ SCTPDBG(SCTP_DEBUG_XXX, "chunk is a not first fsn: %u needs to be inserted\n", chk->rec.data.fsn); TAILQ_FOREACH(at, &control->reasm, sctp_next) { if (SCTP_TSN_GT(at->rec.data.fsn, chk->rec.data.fsn)) { /* * This one in queue is bigger than the new * one, insert the new one before at. */ SCTPDBG(SCTP_DEBUG_XXX, "Insert it before fsn: %u\n", at->rec.data.fsn); asoc->size_on_reasm_queue += chk->send_size; sctp_ucount_incr(asoc->cnt_on_reasm_queue); TAILQ_INSERT_BEFORE(at, chk, sctp_next); inserted = 1; break; } else if (at->rec.data.fsn == chk->rec.data.fsn) { /* * Gak, He sent me a duplicate str seq * number */ /* * foo bar, I guess I will just free this * new guy, should we abort too? FIX ME * MAYBE? Or it COULD be that the SSN's have * wrapped. Maybe I should compare to TSN * somehow... sigh for now just blow away * the chunk! */ SCTPDBG(SCTP_DEBUG_XXX, "Duplicate to fsn: %u -- abort\n", at->rec.data.fsn); sctp_abort_in_reasm(stcb, control, chk, abort_flag, SCTP_FROM_SCTP_INDATA + SCTP_LOC_13); return; } } if (inserted == 0) { /* Goes on the end */ SCTPDBG(SCTP_DEBUG_XXX, "Inserting at tail of list fsn: %u\n", chk->rec.data.fsn); asoc->size_on_reasm_queue += chk->send_size; sctp_ucount_incr(asoc->cnt_on_reasm_queue); TAILQ_INSERT_TAIL(&control->reasm, chk, sctp_next); } } /* * Ok lets see if we can suck any up into the control structure that * are in seq if it makes sense. */ do_wakeup = 0; /* * If the first fragment has not been seen there is no sense in * looking. */ if (control->first_frag_seen) { next_fsn = control->fsn_included + 1; TAILQ_FOREACH_SAFE(at, &control->reasm, sctp_next, nat) { if (at->rec.data.fsn == next_fsn) { /* We can add this one now to the control */ SCTPDBG(SCTP_DEBUG_XXX, "Adding more to control: %p at: %p fsn: %u next_fsn: %u included: %u\n", control, at, at->rec.data.fsn, next_fsn, control->fsn_included); TAILQ_REMOVE(&control->reasm, at, sctp_next); lenadded = sctp_add_chk_to_control(control, strm, stcb, asoc, at, SCTP_READ_LOCK_NOT_HELD); asoc->size_on_all_streams += lenadded; if (control->on_read_q) { do_wakeup = 1; } next_fsn++; if (control->end_added && control->pdapi_started) { if (strm->pd_api_started) { strm->pd_api_started = 0; control->pdapi_started = 0; } if (control->on_read_q == 0) { sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, control->end_added, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); do_wakeup = 1; } break; } } else { break; } } } if (do_wakeup) { /* Need to wakeup the reader */ sctp_wakeup_the_read_socket(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED); } } static struct sctp_queued_to_read * sctp_find_reasm_entry(struct sctp_stream_in *strm, uint32_t mid, int ordered, int idata_supported) { struct sctp_queued_to_read *control; if (ordered) { TAILQ_FOREACH(control, &strm->inqueue, next_instrm) { if (SCTP_MID_EQ(idata_supported, control->mid, mid)) { break; } } } else { if (idata_supported) { TAILQ_FOREACH(control, &strm->uno_inqueue, next_instrm) { if (SCTP_MID_EQ(idata_supported, control->mid, mid)) { break; } } } else { control = TAILQ_FIRST(&strm->uno_inqueue); } } return (control); } static int sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, struct mbuf **m, int offset, int chk_length, struct sctp_nets *net, uint32_t *high_tsn, int *abort_flag, int *break_flag, int last_chunk, uint8_t chk_type) { /* Process a data chunk */ /* struct sctp_tmit_chunk *chk; */ struct sctp_tmit_chunk *chk; uint32_t tsn, fsn, gap, mid; struct mbuf *dmbuf; int the_len; int need_reasm_check = 0; uint16_t sid; struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; struct sctp_queued_to_read *control, *ncontrol; uint32_t ppid; uint8_t chk_flags; struct sctp_stream_reset_list *liste; int ordered; size_t clen; int created_control = 0; if (chk_type == SCTP_IDATA) { struct sctp_idata_chunk *chunk, chunk_buf; chunk = (struct sctp_idata_chunk *)sctp_m_getptr(*m, offset, sizeof(struct sctp_idata_chunk), (uint8_t *)&chunk_buf); chk_flags = chunk->ch.chunk_flags; clen = sizeof(struct sctp_idata_chunk); tsn = ntohl(chunk->dp.tsn); sid = ntohs(chunk->dp.sid); mid = ntohl(chunk->dp.mid); if (chk_flags & SCTP_DATA_FIRST_FRAG) { fsn = 0; ppid = chunk->dp.ppid_fsn.ppid; } else { fsn = ntohl(chunk->dp.ppid_fsn.fsn); ppid = 0xffffffff; /* Use as an invalid value. */ } } else { struct sctp_data_chunk *chunk, chunk_buf; chunk = (struct sctp_data_chunk *)sctp_m_getptr(*m, offset, sizeof(struct sctp_data_chunk), (uint8_t *)&chunk_buf); chk_flags = chunk->ch.chunk_flags; clen = sizeof(struct sctp_data_chunk); tsn = ntohl(chunk->dp.tsn); sid = ntohs(chunk->dp.sid); mid = (uint32_t)(ntohs(chunk->dp.ssn)); fsn = tsn; ppid = chunk->dp.ppid; } if ((size_t)chk_length == clen) { /* * Need to send an abort since we had a empty data chunk. */ op_err = sctp_generate_no_user_data_cause(tsn); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } if ((chk_flags & SCTP_DATA_SACK_IMMEDIATELY) == SCTP_DATA_SACK_IMMEDIATELY) { asoc->send_sack = 1; } ordered = ((chk_flags & SCTP_DATA_UNORDERED) == 0); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_TSN_ENTERS); } if (stcb == NULL) { return (0); } SCTP_LTRACE_CHK(stcb->sctp_ep, stcb, chk_type, tsn); if (SCTP_TSN_GE(asoc->cumulative_tsn, tsn)) { /* It is a duplicate */ SCTP_STAT_INCR(sctps_recvdupdata); if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) { /* Record a dup for the next outbound sack */ asoc->dup_tsns[asoc->numduptsns] = tsn; asoc->numduptsns++; } asoc->send_sack = 1; return (0); } /* Calculate the number of TSN's between the base and this TSN */ SCTP_CALC_TSN_TO_GAP(gap, tsn, asoc->mapping_array_base_tsn); if (gap >= (SCTP_MAPPING_ARRAY << 3)) { /* Can't hold the bit in the mapping at max array, toss it */ return (0); } if (gap >= (uint32_t)(asoc->mapping_array_size << 3)) { SCTP_TCB_LOCK_ASSERT(stcb); if (sctp_expand_mapping_array(asoc, gap)) { /* Can't expand, drop it */ return (0); } } if (SCTP_TSN_GT(tsn, *high_tsn)) { *high_tsn = tsn; } /* See if we have received this one already */ if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap) || SCTP_IS_TSN_PRESENT(asoc->nr_mapping_array, gap)) { SCTP_STAT_INCR(sctps_recvdupdata); if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) { /* Record a dup for the next outbound sack */ asoc->dup_tsns[asoc->numduptsns] = tsn; asoc->numduptsns++; } asoc->send_sack = 1; return (0); } /* * Check to see about the GONE flag, duplicates would cause a sack * to be sent up above */ if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))) { /* * wait a minute, this guy is gone, there is no longer a * receiver. Send peer an ABORT! */ op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } /* * Now before going further we see if there is room. If NOT then we * MAY let one through only IF this TSN is the one we are waiting * for on a partial delivery API. */ /* Is the stream valid? */ if (sid >= asoc->streamincnt) { struct sctp_error_invalid_stream *cause; op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_invalid_stream), 0, M_NOWAIT, 1, MT_DATA); if (op_err != NULL) { /* add some space up front so prepend will work well */ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr)); cause = mtod(op_err, struct sctp_error_invalid_stream *); /* * Error causes are just param's and this one has * two back to back phdr, one with the error type * and size, the other with the streamid and a rsvd */ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_invalid_stream); cause->cause.code = htons(SCTP_CAUSE_INVALID_STREAM); cause->cause.length = htons(sizeof(struct sctp_error_invalid_stream)); cause->stream_id = htons(sid); cause->reserved = htons(0); sctp_queue_op_err(stcb, op_err); } SCTP_STAT_INCR(sctps_badsid); SCTP_TCB_LOCK_ASSERT(stcb); SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } if (tsn == (asoc->cumulative_tsn + 1)) { /* Update cum-ack */ asoc->cumulative_tsn = tsn; } return (0); } /* * If its a fragmented message, lets see if we can find the control * on the reassembly queues. */ if ((chk_type == SCTP_IDATA) && ((chk_flags & SCTP_DATA_FIRST_FRAG) == 0) && (fsn == 0)) { /* * The first *must* be fsn 0, and other (middle/end) pieces * can *not* be fsn 0. XXX: This can happen in case of a * wrap around. Ignore is for now. */ snprintf(msg, sizeof(msg), "FSN zero for MID=%8.8x, but flags=%2.2x", mid, chk_flags); goto err_out; } control = sctp_find_reasm_entry(&asoc->strmin[sid], mid, ordered, asoc->idata_supported); SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for control on queues %p\n", chk_flags, control); if ((chk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) { /* See if we can find the re-assembly entity */ if (control != NULL) { /* We found something, does it belong? */ if (ordered && (mid != control->mid)) { snprintf(msg, sizeof(msg), "Reassembly problem (MID=%8.8x)", mid); err_out: op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } if (ordered && ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED)) { /* * We can't have a switched order with an * unordered chunk */ snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)", tsn); goto err_out; } if (!ordered && (((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) == 0)) { /* * We can't have a switched unordered with a * ordered chunk */ snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)", tsn); goto err_out; } } } else { /* * Its a complete segment. Lets validate we don't have a * re-assembly going on with the same Stream/Seq (for * ordered) or in the same Stream for unordered. */ if (control != NULL) { if (ordered || asoc->idata_supported) { SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x dup detected on MID: %u\n", chk_flags, mid); snprintf(msg, sizeof(msg), "Duplicate MID=%8.8x detected.", mid); goto err_out; } else { if ((tsn == control->fsn_included + 1) && (control->end_added == 0)) { snprintf(msg, sizeof(msg), "Illegal message sequence, missing end for MID: %8.8x", control->fsn_included); goto err_out; } else { control = NULL; } } } } /* now do the tests */ if (((asoc->cnt_on_all_streams + asoc->cnt_on_reasm_queue + asoc->cnt_msg_on_sb) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)) || (((int)asoc->my_rwnd) <= 0)) { /* * When we have NO room in the rwnd we check to make sure * the reader is doing its job... */ if (stcb->sctp_socket->so_rcv.sb_cc) { /* some to read, wake-up */ #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { /* assoc was freed while we were unlocked */ SCTP_SOCKET_UNLOCK(so, 1); return (0); } #endif sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } /* now is it in the mapping array of what we have accepted? */ if (chk_type == SCTP_DATA) { if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map) && SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { /* Nope not in the valid range dump it */ dump_packet: sctp_set_rwnd(stcb, asoc); if ((asoc->cnt_on_all_streams + asoc->cnt_on_reasm_queue + asoc->cnt_msg_on_sb) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)) { SCTP_STAT_INCR(sctps_datadropchklmt); } else { SCTP_STAT_INCR(sctps_datadroprwnd); } *break_flag = 1; return (0); } } else { if (control == NULL) { goto dump_packet; } if (SCTP_TSN_GT(fsn, control->top_fsn)) { goto dump_packet; } } } #ifdef SCTP_ASOCLOG_OF_TSNS SCTP_TCB_LOCK_ASSERT(stcb); if (asoc->tsn_in_at >= SCTP_TSN_LOG_SIZE) { asoc->tsn_in_at = 0; asoc->tsn_in_wrapped = 1; } asoc->in_tsnlog[asoc->tsn_in_at].tsn = tsn; asoc->in_tsnlog[asoc->tsn_in_at].strm = sid; asoc->in_tsnlog[asoc->tsn_in_at].seq = mid; asoc->in_tsnlog[asoc->tsn_in_at].sz = chk_length; asoc->in_tsnlog[asoc->tsn_in_at].flgs = chunk_flags; asoc->in_tsnlog[asoc->tsn_in_at].stcb = (void *)stcb; asoc->in_tsnlog[asoc->tsn_in_at].in_pos = asoc->tsn_in_at; asoc->in_tsnlog[asoc->tsn_in_at].in_out = 1; asoc->tsn_in_at++; #endif /* * Before we continue lets validate that we are not being fooled by * an evil attacker. We can only have Nk chunks based on our TSN * spread allowed by the mapping array N * 8 bits, so there is no * way our stream sequence numbers could have wrapped. We of course * only validate the FIRST fragment so the bit must be set. */ if ((chk_flags & SCTP_DATA_FIRST_FRAG) && (TAILQ_EMPTY(&asoc->resetHead)) && (chk_flags & SCTP_DATA_UNORDERED) == 0 && SCTP_MID_GE(asoc->idata_supported, asoc->strmin[sid].last_mid_delivered, mid)) { /* The incoming sseq is behind where we last delivered? */ SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ: %u delivered: %u from peer, Abort!\n", mid, asoc->strmin[sid].last_mid_delivered); if (asoc->idata_supported) { snprintf(msg, sizeof(msg), "Delivered MID=%8.8x, got TSN=%8.8x, SID=%4.4x, MID=%8.8x", asoc->strmin[sid].last_mid_delivered, tsn, sid, mid); } else { snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x", (uint16_t)asoc->strmin[sid].last_mid_delivered, tsn, sid, (uint16_t)mid); } op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_flag = 1; return (0); } if (chk_type == SCTP_IDATA) { the_len = (chk_length - sizeof(struct sctp_idata_chunk)); } else { the_len = (chk_length - sizeof(struct sctp_data_chunk)); } if (last_chunk == 0) { if (chk_type == SCTP_IDATA) { dmbuf = SCTP_M_COPYM(*m, (offset + sizeof(struct sctp_idata_chunk)), the_len, M_NOWAIT); } else { dmbuf = SCTP_M_COPYM(*m, (offset + sizeof(struct sctp_data_chunk)), the_len, M_NOWAIT); } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(dmbuf, SCTP_MBUF_ICOPY); } #endif } else { /* We can steal the last chunk */ int l_len; dmbuf = *m; /* lop off the top part */ if (chk_type == SCTP_IDATA) { m_adj(dmbuf, (offset + sizeof(struct sctp_idata_chunk))); } else { m_adj(dmbuf, (offset + sizeof(struct sctp_data_chunk))); } if (SCTP_BUF_NEXT(dmbuf) == NULL) { l_len = SCTP_BUF_LEN(dmbuf); } else { /* * need to count up the size hopefully does not hit * this to often :-0 */ struct mbuf *lat; l_len = 0; for (lat = dmbuf; lat; lat = SCTP_BUF_NEXT(lat)) { l_len += SCTP_BUF_LEN(lat); } } if (l_len > the_len) { /* Trim the end round bytes off too */ m_adj(dmbuf, -(l_len - the_len)); } } if (dmbuf == NULL) { SCTP_STAT_INCR(sctps_nomem); return (0); } /* * Now no matter what, we need a control, get one if we don't have * one (we may have gotten it above when we found the message was * fragmented */ if (control == NULL) { sctp_alloc_a_readq(stcb, control); sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn, ppid, sid, chk_flags, NULL, fsn, mid); if (control == NULL) { SCTP_STAT_INCR(sctps_nomem); return (0); } if ((chk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) { struct mbuf *mm; control->data = dmbuf; mm = control->data; for (mm = control->data; mm; mm = mm->m_next) { control->length += SCTP_BUF_LEN(mm); } control->tail_mbuf = NULL; control->end_added = 1; control->last_frag_seen = 1; control->first_frag_seen = 1; control->fsn_included = fsn; control->top_fsn = fsn; } created_control = 1; } SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x ordered: %d MID: %u control: %p\n", chk_flags, ordered, mid, control); if ((chk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG && TAILQ_EMPTY(&asoc->resetHead) && ((ordered == 0) || (SCTP_MID_EQ(asoc->idata_supported, asoc->strmin[sid].last_mid_delivered + 1, mid) && TAILQ_EMPTY(&asoc->strmin[sid].inqueue)))) { /* Candidate for express delivery */ /* * Its not fragmented, No PD-API is up, Nothing in the * delivery queue, Its un-ordered OR ordered and the next to * deliver AND nothing else is stuck on the stream queue, * And there is room for it in the socket buffer. Lets just * stuff it up the buffer.... */ SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } SCTPDBG(SCTP_DEBUG_XXX, "Injecting control: %p to be read (MID: %u)\n", control, mid); sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); if ((chk_flags & SCTP_DATA_UNORDERED) == 0) { /* for ordered, bump what we delivered */ asoc->strmin[sid].last_mid_delivered++; } SCTP_STAT_INCR(sctps_recvexpress); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) { sctp_log_strm_del_alt(stcb, tsn, mid, sid, SCTP_STR_LOG_FROM_EXPRS_DEL); } control = NULL; goto finish_express_del; } /* Now will we need a chunk too? */ if ((chk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) { sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* No memory so we drop the chunk */ SCTP_STAT_INCR(sctps_nomem); if (last_chunk == 0) { /* we copied it, free the copy */ sctp_m_freem(dmbuf); } return (0); } chk->rec.data.tsn = tsn; chk->no_fr_allowed = 0; chk->rec.data.fsn = fsn; chk->rec.data.mid = mid; chk->rec.data.sid = sid; chk->rec.data.ppid = ppid; chk->rec.data.context = stcb->asoc.context; chk->rec.data.doing_fast_retransmit = 0; chk->rec.data.rcv_flags = chk_flags; chk->asoc = asoc; chk->send_size = the_len; chk->whoTo = net; SCTPDBG(SCTP_DEBUG_XXX, "Building ck: %p for control: %p to be read (MID: %u)\n", chk, control, mid); atomic_add_int(&net->ref_count, 1); chk->data = dmbuf; } /* Set the appropriate TSN mark */ if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) { SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap); if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = tsn; } } else { SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap); if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map)) { asoc->highest_tsn_inside_map = tsn; } } /* Now is it complete (i.e. not fragmented)? */ if ((chk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) { /* * Special check for when streams are resetting. We could be * more smart about this and check the actual stream to see * if it is not being reset.. that way we would not create a * HOLB when amongst streams being reset and those not being * reset. * */ if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) && SCTP_TSN_GT(tsn, liste->tsn)) { /* * yep its past where we need to reset... go ahead * and queue it. */ if (TAILQ_EMPTY(&asoc->pending_reply_queue)) { /* first one on */ TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next); } else { struct sctp_queued_to_read *lcontrol, *nlcontrol; unsigned char inserted = 0; TAILQ_FOREACH_SAFE(lcontrol, &asoc->pending_reply_queue, next, nlcontrol) { if (SCTP_TSN_GT(control->sinfo_tsn, lcontrol->sinfo_tsn)) { continue; } else { /* found it */ TAILQ_INSERT_BEFORE(lcontrol, control, next); inserted = 1; break; } } if (inserted == 0) { /* * must be put at end, use prevP * (all setup from loop) to setup * nextP. */ TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next); } } goto finish_express_del; } if (chk_flags & SCTP_DATA_UNORDERED) { /* queue directly into socket buffer */ SCTPDBG(SCTP_DEBUG_XXX, "Unordered data to be read control: %p MID: %u\n", control, mid); sctp_mark_non_revokable(asoc, control->sinfo_tsn); sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED); } else { SCTPDBG(SCTP_DEBUG_XXX, "Queue control: %p for reordering MID: %u\n", control, mid); sctp_queue_data_to_stream(stcb, asoc, control, abort_flag, &need_reasm_check); if (*abort_flag) { if (last_chunk) { *m = NULL; } return (0); } } goto finish_express_del; } /* If we reach here its a reassembly */ need_reasm_check = 1; SCTPDBG(SCTP_DEBUG_XXX, "Queue data to stream for reasm control: %p MID: %u\n", control, mid); sctp_queue_data_for_reasm(stcb, asoc, control, chk, created_control, abort_flag, tsn); if (*abort_flag) { /* * the assoc is now gone and chk was put onto the reasm * queue, which has all been freed. */ if (last_chunk) { *m = NULL; } return (0); } finish_express_del: /* Here we tidy up things */ if (tsn == (asoc->cumulative_tsn + 1)) { /* Update cum-ack */ asoc->cumulative_tsn = tsn; } if (last_chunk) { *m = NULL; } if (ordered) { SCTP_STAT_INCR_COUNTER64(sctps_inorderchunks); } else { SCTP_STAT_INCR_COUNTER64(sctps_inunorderchunks); } SCTP_STAT_INCR(sctps_recvdata); /* Set it present please */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) { sctp_log_strm_del_alt(stcb, tsn, mid, sid, SCTP_STR_LOG_FROM_MARK_TSN); } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_PREPARE_SLIDE); } if (need_reasm_check) { (void)sctp_deliver_reasm_check(stcb, asoc, &asoc->strmin[sid], SCTP_READ_LOCK_NOT_HELD); need_reasm_check = 0; } /* check the special flag for stream resets */ if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) && SCTP_TSN_GE(asoc->cumulative_tsn, liste->tsn)) { /* * we have finished working through the backlogged TSN's now * time to reset streams. 1: call reset function. 2: free * pending_reply space 3: distribute any chunks in * pending_reply_queue. */ sctp_reset_in_stream(stcb, liste->number_entries, liste->list_of_streams); TAILQ_REMOVE(&asoc->resetHead, liste, next_resp); sctp_send_deferred_reset_response(stcb, liste, SCTP_STREAM_RESET_RESULT_PERFORMED); SCTP_FREE(liste, SCTP_M_STRESET); /* sa_ignore FREED_MEMORY */ liste = TAILQ_FIRST(&asoc->resetHead); if (TAILQ_EMPTY(&asoc->resetHead)) { /* All can be removed */ TAILQ_FOREACH_SAFE(control, &asoc->pending_reply_queue, next, ncontrol) { TAILQ_REMOVE(&asoc->pending_reply_queue, control, next); sctp_queue_data_to_stream(stcb, asoc, control, abort_flag, &need_reasm_check); if (*abort_flag) { return (0); } if (need_reasm_check) { (void)sctp_deliver_reasm_check(stcb, asoc, &asoc->strmin[control->sinfo_stream], SCTP_READ_LOCK_NOT_HELD); need_reasm_check = 0; } } } else { TAILQ_FOREACH_SAFE(control, &asoc->pending_reply_queue, next, ncontrol) { if (SCTP_TSN_GT(control->sinfo_tsn, liste->tsn)) { break; } /* * if control->sinfo_tsn is <= liste->tsn we * can process it which is the NOT of * control->sinfo_tsn > liste->tsn */ TAILQ_REMOVE(&asoc->pending_reply_queue, control, next); sctp_queue_data_to_stream(stcb, asoc, control, abort_flag, &need_reasm_check); if (*abort_flag) { return (0); } if (need_reasm_check) { (void)sctp_deliver_reasm_check(stcb, asoc, &asoc->strmin[control->sinfo_stream], SCTP_READ_LOCK_NOT_HELD); need_reasm_check = 0; } } } } return (1); } static const int8_t sctp_map_lookup_tab[256] = { 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 8 }; void sctp_slide_mapping_arrays(struct sctp_tcb *stcb) { /* * Now we also need to check the mapping array in a couple of ways. * 1) Did we move the cum-ack point? * * When you first glance at this you might think that all entries * that make up the position of the cum-ack would be in the * nr-mapping array only.. i.e. things up to the cum-ack are always * deliverable. Thats true with one exception, when its a fragmented * message we may not deliver the data until some threshold (or all * of it) is in place. So we must OR the nr_mapping_array and * mapping_array to get a true picture of the cum-ack. */ struct sctp_association *asoc; int at; uint8_t val; int slide_from, slide_end, lgap, distance; uint32_t old_cumack, old_base, old_highest, highest_tsn; asoc = &stcb->asoc; old_cumack = asoc->cumulative_tsn; old_base = asoc->mapping_array_base_tsn; old_highest = asoc->highest_tsn_inside_map; /* * We could probably improve this a small bit by calculating the * offset of the current cum-ack as the starting point. */ at = 0; for (slide_from = 0; slide_from < stcb->asoc.mapping_array_size; slide_from++) { val = asoc->nr_mapping_array[slide_from] | asoc->mapping_array[slide_from]; if (val == 0xff) { at += 8; } else { /* there is a 0 bit */ at += sctp_map_lookup_tab[val]; break; } } asoc->cumulative_tsn = asoc->mapping_array_base_tsn + (at - 1); if (SCTP_TSN_GT(asoc->cumulative_tsn, asoc->highest_tsn_inside_map) && SCTP_TSN_GT(asoc->cumulative_tsn, asoc->highest_tsn_inside_nr_map)) { #ifdef INVARIANTS panic("huh, cumack 0x%x greater than high-tsn 0x%x in map", asoc->cumulative_tsn, asoc->highest_tsn_inside_map); #else SCTP_PRINTF("huh, cumack 0x%x greater than high-tsn 0x%x in map - should panic?\n", asoc->cumulative_tsn, asoc->highest_tsn_inside_map); sctp_print_mapping_array(asoc); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(0, 6, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); } asoc->highest_tsn_inside_map = asoc->cumulative_tsn; asoc->highest_tsn_inside_nr_map = asoc->cumulative_tsn; #endif } if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map)) { highest_tsn = asoc->highest_tsn_inside_nr_map; } else { highest_tsn = asoc->highest_tsn_inside_map; } if ((asoc->cumulative_tsn == highest_tsn) && (at >= 8)) { /* The complete array was completed by a single FR */ /* highest becomes the cum-ack */ int clr; #ifdef INVARIANTS unsigned int i; #endif /* clear the array */ clr = ((at + 7) >> 3); if (clr > asoc->mapping_array_size) { clr = asoc->mapping_array_size; } memset(asoc->mapping_array, 0, clr); memset(asoc->nr_mapping_array, 0, clr); #ifdef INVARIANTS for (i = 0; i < asoc->mapping_array_size; i++) { if ((asoc->mapping_array[i]) || (asoc->nr_mapping_array[i])) { SCTP_PRINTF("Error Mapping array's not clean at clear\n"); sctp_print_mapping_array(asoc); } } #endif asoc->mapping_array_base_tsn = asoc->cumulative_tsn + 1; asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map = asoc->cumulative_tsn; } else if (at >= 8) { /* we can slide the mapping array down */ /* slide_from holds where we hit the first NON 0xff byte */ /* * now calculate the ceiling of the move using our highest * TSN value */ SCTP_CALC_TSN_TO_GAP(lgap, highest_tsn, asoc->mapping_array_base_tsn); slide_end = (lgap >> 3); if (slide_end < slide_from) { sctp_print_mapping_array(asoc); #ifdef INVARIANTS panic("impossible slide"); #else SCTP_PRINTF("impossible slide lgap: %x slide_end: %x slide_from: %x? at: %d\n", lgap, slide_end, slide_from, at); return; #endif } if (slide_end > asoc->mapping_array_size) { #ifdef INVARIANTS panic("would overrun buffer"); #else SCTP_PRINTF("Gak, would have overrun map end: %d slide_end: %d\n", asoc->mapping_array_size, slide_end); slide_end = asoc->mapping_array_size; #endif } distance = (slide_end - slide_from) + 1; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(old_base, old_cumack, old_highest, SCTP_MAP_PREPARE_SLIDE); sctp_log_map((uint32_t)slide_from, (uint32_t)slide_end, (uint32_t)lgap, SCTP_MAP_SLIDE_FROM); } if (distance + slide_from > asoc->mapping_array_size || distance < 0) { /* * Here we do NOT slide forward the array so that * hopefully when more data comes in to fill it up * we will be able to slide it forward. Really I * don't think this should happen :-0 */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map((uint32_t)distance, (uint32_t)slide_from, (uint32_t)asoc->mapping_array_size, SCTP_MAP_SLIDE_NONE); } } else { int ii; for (ii = 0; ii < distance; ii++) { asoc->mapping_array[ii] = asoc->mapping_array[slide_from + ii]; asoc->nr_mapping_array[ii] = asoc->nr_mapping_array[slide_from + ii]; } for (ii = distance; ii < asoc->mapping_array_size; ii++) { asoc->mapping_array[ii] = 0; asoc->nr_mapping_array[ii] = 0; } if (asoc->highest_tsn_inside_map + 1 == asoc->mapping_array_base_tsn) { asoc->highest_tsn_inside_map += (slide_from << 3); } if (asoc->highest_tsn_inside_nr_map + 1 == asoc->mapping_array_base_tsn) { asoc->highest_tsn_inside_nr_map += (slide_from << 3); } asoc->mapping_array_base_tsn += (slide_from << 3); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); } } } } void sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap) { struct sctp_association *asoc; uint32_t highest_tsn; int is_a_gap; sctp_slide_mapping_arrays(stcb); asoc = &stcb->asoc; if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map)) { highest_tsn = asoc->highest_tsn_inside_nr_map; } else { highest_tsn = asoc->highest_tsn_inside_map; } /* Is there a gap now? */ is_a_gap = SCTP_TSN_GT(highest_tsn, stcb->asoc.cumulative_tsn); /* * Now we need to see if we need to queue a sack or just start the * timer (if allowed). */ if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) { /* * Ok special case, in SHUTDOWN-SENT case. here we maker * sure SACK timer is off and instead send a SHUTDOWN and a * SACK */ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_17); } sctp_send_shutdown(stcb, ((stcb->asoc.alternate) ? stcb->asoc.alternate : stcb->asoc.primary_destination)); if (is_a_gap) { sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); } } else { /* * CMT DAC algorithm: increase number of packets received * since last ack */ stcb->asoc.cmt_dac_pkts_rcvd++; if ((stcb->asoc.send_sack == 1) || /* We need to send a * SACK */ ((was_a_gap) && (is_a_gap == 0)) || /* was a gap, but no * longer is one */ (stcb->asoc.numduptsns) || /* we have dup's */ (is_a_gap) || /* is still a gap */ (stcb->asoc.delayed_ack == 0) || /* Delayed sack disabled */ (stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) /* hit limit of pkts */ ) { if ((stcb->asoc.sctp_cmt_on_off > 0) && (SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) && (stcb->asoc.send_sack == 0) && (stcb->asoc.numduptsns == 0) && (stcb->asoc.delayed_ack) && (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer))) { /* * CMT DAC algorithm: With CMT, delay acks * even in the face of * * reordering. Therefore, if acks that do * not have to be sent because of the above * reasons, will be delayed. That is, acks * that would have been sent due to gap * reports will be delayed with DAC. Start * the delayed ack timer. */ sctp_timer_start(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL); } else { /* * Ok we must build a SACK since the timer * is pending, we got our first packet OR * there are gaps or duplicates. */ (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); } } else { if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL); } } } } int sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t *high_tsn) { struct sctp_chunkhdr *ch, chunk_buf; struct sctp_association *asoc; int num_chunks = 0; /* number of control chunks processed */ int stop_proc = 0; int chk_length, break_flag, last_chunk; int abort_flag = 0, was_a_gap; struct mbuf *m; uint32_t highest_tsn; /* set the rwnd */ sctp_set_rwnd(stcb, &stcb->asoc); m = *mm; SCTP_TCB_LOCK_ASSERT(stcb); asoc = &stcb->asoc; if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->highest_tsn_inside_map)) { highest_tsn = asoc->highest_tsn_inside_nr_map; } else { highest_tsn = asoc->highest_tsn_inside_map; } was_a_gap = SCTP_TSN_GT(highest_tsn, stcb->asoc.cumulative_tsn); /* * setup where we got the last DATA packet from for any SACK that * may need to go out. Don't bump the net. This is done ONLY when a * chunk is assigned. */ asoc->last_data_chunk_from = net; /*- * Now before we proceed we must figure out if this is a wasted * cluster... i.e. it is a small packet sent in and yet the driver * underneath allocated a full cluster for it. If so we must copy it * to a smaller mbuf and free up the cluster mbuf. This will help * with cluster starvation. Note for __Panda__ we don't do this * since it has clusters all the way down to 64 bytes. */ if (SCTP_BUF_LEN(m) < (long)MLEN && SCTP_BUF_NEXT(m) == NULL) { /* we only handle mbufs that are singletons.. not chains */ m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_NOWAIT, 1, MT_DATA); if (m) { /* ok lets see if we can copy the data up */ caddr_t *from, *to; /* get the pointers and copy */ to = mtod(m, caddr_t *); from = mtod((*mm), caddr_t *); memcpy(to, from, SCTP_BUF_LEN((*mm))); /* copy the length and free up the old */ SCTP_BUF_LEN(m) = SCTP_BUF_LEN((*mm)); sctp_m_freem(*mm); /* success, back copy */ *mm = m; } else { /* We are in trouble in the mbuf world .. yikes */ m = *mm; } } /* get pointer to the first chunk header */ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset, sizeof(struct sctp_chunkhdr), (uint8_t *)&chunk_buf); if (ch == NULL) { return (1); } /* * process all DATA chunks... */ *high_tsn = asoc->cumulative_tsn; break_flag = 0; asoc->data_pkts_seen++; while (stop_proc == 0) { /* validate chunk length */ chk_length = ntohs(ch->chunk_length); if (length - *offset < chk_length) { /* all done, mutulated chunk */ stop_proc = 1; continue; } if ((asoc->idata_supported == 1) && (ch->chunk_type == SCTP_DATA)) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; snprintf(msg, sizeof(msg), "%s", "I-DATA chunk received when DATA was negotiated"); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_18; sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED); return (2); } if ((asoc->idata_supported == 0) && (ch->chunk_type == SCTP_IDATA)) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; snprintf(msg, sizeof(msg), "%s", "DATA chunk received when I-DATA was negotiated"); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19; sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED); return (2); } if ((ch->chunk_type == SCTP_DATA) || (ch->chunk_type == SCTP_IDATA)) { int clen; if (ch->chunk_type == SCTP_DATA) { clen = sizeof(struct sctp_data_chunk); } else { clen = sizeof(struct sctp_idata_chunk); } if (chk_length < clen) { /* * Need to send an abort since we had a * invalid data chunk. */ struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; snprintf(msg, sizeof(msg), "DATA chunk of length %d", chk_length); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_20; sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED); return (2); } #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xB1, 0); #endif if (SCTP_SIZE32(chk_length) == (length - *offset)) { last_chunk = 1; } else { last_chunk = 0; } if (sctp_process_a_data_chunk(stcb, asoc, mm, *offset, chk_length, net, high_tsn, &abort_flag, &break_flag, last_chunk, ch->chunk_type)) { num_chunks++; } if (abort_flag) return (2); if (break_flag) { /* * Set because of out of rwnd space and no * drop rep space left. */ stop_proc = 1; continue; } } else { /* not a data chunk in the data region */ switch (ch->chunk_type) { case SCTP_INITIATION: case SCTP_INITIATION_ACK: case SCTP_SELECTIVE_ACK: case SCTP_NR_SELECTIVE_ACK: case SCTP_HEARTBEAT_REQUEST: case SCTP_HEARTBEAT_ACK: case SCTP_ABORT_ASSOCIATION: case SCTP_SHUTDOWN: case SCTP_SHUTDOWN_ACK: case SCTP_OPERATION_ERROR: case SCTP_COOKIE_ECHO: case SCTP_COOKIE_ACK: case SCTP_ECN_ECHO: case SCTP_ECN_CWR: case SCTP_SHUTDOWN_COMPLETE: case SCTP_AUTHENTICATION: case SCTP_ASCONF_ACK: case SCTP_PACKET_DROPPED: case SCTP_STREAM_RESET: case SCTP_FORWARD_CUM_TSN: case SCTP_ASCONF: { /* * Now, what do we do with KNOWN * chunks that are NOT in the right * place? * * For now, I do nothing but ignore * them. We may later want to add * sysctl stuff to switch out and do * either an ABORT() or possibly * process them. */ struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; snprintf(msg, sizeof(msg), "DATA chunk followed by chunk of type %2.2x", ch->chunk_type); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED); return (2); } default: /* unknown chunk type, use bit rules */ if (ch->chunk_type & 0x40) { /* Add a error report to the queue */ struct mbuf *op_err; struct sctp_gen_error_cause *cause; op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause), 0, M_NOWAIT, 1, MT_DATA); if (op_err != NULL) { cause = mtod(op_err, struct sctp_gen_error_cause *); cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK); cause->length = htons((uint16_t)(chk_length + sizeof(struct sctp_gen_error_cause))); SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause); SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, chk_length, M_NOWAIT); if (SCTP_BUF_NEXT(op_err) != NULL) { sctp_queue_op_err(stcb, op_err); } else { sctp_m_freem(op_err); } } } if ((ch->chunk_type & 0x80) == 0) { /* discard the rest of this packet */ stop_proc = 1; } /* else skip this bad chunk and * continue... */ break; } /* switch of chunk type */ } *offset += SCTP_SIZE32(chk_length); if ((*offset >= length) || stop_proc) { /* no more data left in the mbuf chain */ stop_proc = 1; continue; } ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset, sizeof(struct sctp_chunkhdr), (uint8_t *)&chunk_buf); if (ch == NULL) { *offset = length; stop_proc = 1; continue; } } if (break_flag) { /* * we need to report rwnd overrun drops. */ sctp_send_packet_dropped(stcb, net, *mm, length, iphlen, 0); } if (num_chunks) { /* * Did we get data, if so update the time for auto-close and * give peer credit for being alive. */ SCTP_STAT_INCR(sctps_recvpktwithdata); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INDATA, __LINE__); } stcb->asoc.overall_error_count = 0; (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_last_rcvd); } /* now service all of the reassm queue if needed */ if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) { /* Assure that we ack right away */ stcb->asoc.send_sack = 1; } /* Start a sack timer or QUEUE a SACK for sending */ sctp_sack_check(stcb, was_a_gap); return (0); } static int sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1, uint32_t last_tsn, uint16_t frag_strt, uint16_t frag_end, int nr_sacking, int *num_frs, uint32_t *biggest_newly_acked_tsn, uint32_t *this_sack_lowest_newack, int *rto_ok) { struct sctp_tmit_chunk *tp1; unsigned int theTSN; int j, wake_him = 0, circled = 0; /* Recover the tp1 we last saw */ tp1 = *p_tp1; if (tp1 == NULL) { tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue); } for (j = frag_strt; j <= frag_end; j++) { theTSN = j + last_tsn; while (tp1) { if (tp1->rec.data.doing_fast_retransmit) (*num_frs) += 1; /*- * CMT: CUCv2 algorithm. For each TSN being * processed from the sent queue, track the * next expected pseudo-cumack, or * rtx_pseudo_cumack, if required. Separate * cumack trackers for first transmissions, * and retransmissions. */ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (tp1->whoTo->find_pseudo_cumack == 1) && (tp1->snd_count == 1)) { tp1->whoTo->pseudo_cumack = tp1->rec.data.tsn; tp1->whoTo->find_pseudo_cumack = 0; } if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (tp1->whoTo->find_rtx_pseudo_cumack == 1) && (tp1->snd_count > 1)) { tp1->whoTo->rtx_pseudo_cumack = tp1->rec.data.tsn; tp1->whoTo->find_rtx_pseudo_cumack = 0; } if (tp1->rec.data.tsn == theTSN) { if (tp1->sent != SCTP_DATAGRAM_UNSENT) { /*- * must be held until * cum-ack passes */ if (tp1->sent < SCTP_DATAGRAM_RESEND) { /*- * If it is less than RESEND, it is * now no-longer in flight. * Higher values may already be set * via previous Gap Ack Blocks... * i.e. ACKED or RESEND. */ if (SCTP_TSN_GT(tp1->rec.data.tsn, *biggest_newly_acked_tsn)) { *biggest_newly_acked_tsn = tp1->rec.data.tsn; } /*- * CMT: SFR algo (and HTNA) - set * saw_newack to 1 for dest being * newly acked. update * this_sack_highest_newack if * appropriate. */ if (tp1->rec.data.chunk_was_revoked == 0) tp1->whoTo->saw_newack = 1; if (SCTP_TSN_GT(tp1->rec.data.tsn, tp1->whoTo->this_sack_highest_newack)) { tp1->whoTo->this_sack_highest_newack = tp1->rec.data.tsn; } /*- * CMT DAC algo: also update * this_sack_lowest_newack */ if (*this_sack_lowest_newack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(*this_sack_lowest_newack, last_tsn, tp1->rec.data.tsn, 0, 0, SCTP_LOG_TSN_ACKED); } *this_sack_lowest_newack = tp1->rec.data.tsn; } /*- * CMT: CUCv2 algorithm. If (rtx-)pseudo-cumack for corresp * dest is being acked, then we have a new (rtx-)pseudo-cumack. Set * new_(rtx_)pseudo_cumack to TRUE so that the cwnd for this dest can be * updated. Also trigger search for the next expected (rtx-)pseudo-cumack. * Separate pseudo_cumack trackers for first transmissions and * retransmissions. */ if (tp1->rec.data.tsn == tp1->whoTo->pseudo_cumack) { if (tp1->rec.data.chunk_was_revoked == 0) { tp1->whoTo->new_pseudo_cumack = 1; } tp1->whoTo->find_pseudo_cumack = 1; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.tsn, SCTP_CWND_LOG_FROM_SACK); } if (tp1->rec.data.tsn == tp1->whoTo->rtx_pseudo_cumack) { if (tp1->rec.data.chunk_was_revoked == 0) { tp1->whoTo->new_pseudo_cumack = 1; } tp1->whoTo->find_rtx_pseudo_cumack = 1; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(*biggest_newly_acked_tsn, last_tsn, tp1->rec.data.tsn, frag_strt, frag_end, SCTP_LOG_TSN_ACKED); } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_GAP, tp1->whoTo->flight_size, tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); } sctp_flight_size_decrease(tp1); if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, tp1); } sctp_total_flight_decrease(stcb, tp1); tp1->whoTo->net_ack += tp1->send_size; if (tp1->snd_count < 2) { /*- * True non-retransmited chunk */ tp1->whoTo->net_ack2 += tp1->send_size; /*- * update RTO too ? */ if (tp1->do_rtt) { if (*rto_ok) { tp1->whoTo->RTO = sctp_calculate_rto(stcb, &stcb->asoc, tp1->whoTo, &tp1->sent_rcv_time, sctp_align_safe_nocopy, SCTP_RTT_FROM_DATA); *rto_ok = 0; } if (tp1->whoTo->rto_needed == 0) { tp1->whoTo->rto_needed = 1; } tp1->do_rtt = 0; } } } if (tp1->sent <= SCTP_DATAGRAM_RESEND) { if (SCTP_TSN_GT(tp1->rec.data.tsn, stcb->asoc.this_sack_highest_gap)) { stcb->asoc.this_sack_highest_gap = tp1->rec.data.tsn; } if (tp1->sent == SCTP_DATAGRAM_RESEND) { sctp_ucount_decr(stcb->asoc.sent_queue_retran_cnt); #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xB2, (stcb->asoc.sent_queue_retran_cnt & 0x000000ff)); #endif } } /*- * All chunks NOT UNSENT fall through here and are marked * (leave PR-SCTP ones that are to skip alone though) */ if ((tp1->sent != SCTP_FORWARD_TSN_SKIP) && (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) { tp1->sent = SCTP_DATAGRAM_MARKED; } if (tp1->rec.data.chunk_was_revoked) { /* deflate the cwnd */ tp1->whoTo->cwnd -= tp1->book_size; tp1->rec.data.chunk_was_revoked = 0; } /* NR Sack code here */ if (nr_sacking && (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) { if (stcb->asoc.strmout[tp1->rec.data.sid].chunks_on_queues > 0) { stcb->asoc.strmout[tp1->rec.data.sid].chunks_on_queues--; #ifdef INVARIANTS } else { panic("No chunks on the queues for sid %u.", tp1->rec.data.sid); #endif } if ((stcb->asoc.strmout[tp1->rec.data.sid].chunks_on_queues == 0) && (stcb->asoc.strmout[tp1->rec.data.sid].state == SCTP_STREAM_RESET_PENDING) && TAILQ_EMPTY(&stcb->asoc.strmout[tp1->rec.data.sid].outqueue)) { stcb->asoc.trigger_reset = 1; } tp1->sent = SCTP_DATAGRAM_NR_ACKED; if (tp1->data) { /* * sa_ignore * NO_NULL_CHK */ sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1); sctp_m_freem(tp1->data); tp1->data = NULL; } wake_him++; } } break; } /* if (tp1->tsn == theTSN) */ if (SCTP_TSN_GT(tp1->rec.data.tsn, theTSN)) { break; } tp1 = TAILQ_NEXT(tp1, sctp_next); if ((tp1 == NULL) && (circled == 0)) { circled++; tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue); } } /* end while (tp1) */ if (tp1 == NULL) { circled = 0; tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue); } /* In case the fragments were not in order we must reset */ } /* end for (j = fragStart */ *p_tp1 = tp1; return (wake_him); /* Return value only used for nr-sack */ } static int sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct sctp_association *asoc, uint32_t last_tsn, uint32_t *biggest_tsn_acked, uint32_t *biggest_newly_acked_tsn, uint32_t *this_sack_lowest_newack, int num_seg, int num_nr_seg, int *rto_ok) { struct sctp_gap_ack_block *frag, block; struct sctp_tmit_chunk *tp1; int i; int num_frs = 0; int chunk_freed; int non_revocable; uint16_t frag_strt, frag_end, prev_frag_end; tp1 = TAILQ_FIRST(&asoc->sent_queue); prev_frag_end = 0; chunk_freed = 0; for (i = 0; i < (num_seg + num_nr_seg); i++) { if (i == num_seg) { prev_frag_end = 0; tp1 = TAILQ_FIRST(&asoc->sent_queue); } frag = (struct sctp_gap_ack_block *)sctp_m_getptr(m, *offset, sizeof(struct sctp_gap_ack_block), (uint8_t *)&block); *offset += sizeof(block); if (frag == NULL) { return (chunk_freed); } frag_strt = ntohs(frag->start); frag_end = ntohs(frag->end); if (frag_strt > frag_end) { /* This gap report is malformed, skip it. */ continue; } if (frag_strt <= prev_frag_end) { /* This gap report is not in order, so restart. */ tp1 = TAILQ_FIRST(&asoc->sent_queue); } if (SCTP_TSN_GT((last_tsn + frag_end), *biggest_tsn_acked)) { *biggest_tsn_acked = last_tsn + frag_end; } if (i < num_seg) { non_revocable = 0; } else { non_revocable = 1; } if (sctp_process_segment_range(stcb, &tp1, last_tsn, frag_strt, frag_end, non_revocable, &num_frs, biggest_newly_acked_tsn, this_sack_lowest_newack, rto_ok)) { chunk_freed = 1; } prev_frag_end = frag_end; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { if (num_frs) sctp_log_fr(*biggest_tsn_acked, *biggest_newly_acked_tsn, last_tsn, SCTP_FR_LOG_BIGGEST_TSNS); } return (chunk_freed); } static void sctp_check_for_revoked(struct sctp_tcb *stcb, struct sctp_association *asoc, uint32_t cumack, uint32_t biggest_tsn_acked) { struct sctp_tmit_chunk *tp1; TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (SCTP_TSN_GT(tp1->rec.data.tsn, cumack)) { /* * ok this guy is either ACK or MARKED. If it is * ACKED it has been previously acked but not this * time i.e. revoked. If it is MARKED it was ACK'ed * again. */ if (SCTP_TSN_GT(tp1->rec.data.tsn, biggest_tsn_acked)) { break; } if (tp1->sent == SCTP_DATAGRAM_ACKED) { /* it has been revoked */ tp1->sent = SCTP_DATAGRAM_SENT; tp1->rec.data.chunk_was_revoked = 1; /* * We must add this stuff back in to assure * timers and such get started. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE, tp1->whoTo->flight_size, tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); } sctp_flight_size_increase(tp1); sctp_total_flight_increase(stcb, tp1); /* * We inflate the cwnd to compensate for our * artificial inflation of the flight_size. */ tp1->whoTo->cwnd += tp1->book_size; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(asoc->last_acked_seq, cumack, tp1->rec.data.tsn, 0, 0, SCTP_LOG_TSN_REVOKED); } } else if (tp1->sent == SCTP_DATAGRAM_MARKED) { /* it has been re-acked in this SACK */ tp1->sent = SCTP_DATAGRAM_ACKED; } } if (tp1->sent == SCTP_DATAGRAM_UNSENT) break; } } static void sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, uint32_t biggest_tsn_acked, uint32_t biggest_tsn_newly_acked, uint32_t this_sack_lowest_newack, int accum_moved) { struct sctp_tmit_chunk *tp1; int strike_flag = 0; struct timeval now; int tot_retrans = 0; uint32_t sending_seq; struct sctp_nets *net; int num_dests_sacked = 0; /* * select the sending_seq, this is either the next thing ready to be * sent but not transmitted, OR, the next seq we assign. */ tp1 = TAILQ_FIRST(&stcb->asoc.send_queue); if (tp1 == NULL) { sending_seq = asoc->sending_seq; } else { sending_seq = tp1->rec.data.tsn; } /* CMT DAC algo: finding out if SACK is a mixed SACK */ if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->saw_newack) num_dests_sacked++; } } if (stcb->asoc.prsctp_supported) { (void)SCTP_GETTIME_TIMEVAL(&now); } TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { strike_flag = 0; if (tp1->no_fr_allowed) { /* this one had a timeout or something */ continue; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { if (tp1->sent < SCTP_DATAGRAM_RESEND) sctp_log_fr(biggest_tsn_newly_acked, tp1->rec.data.tsn, tp1->sent, SCTP_FR_LOG_CHECK_STRIKE); } if (SCTP_TSN_GT(tp1->rec.data.tsn, biggest_tsn_acked) || tp1->sent == SCTP_DATAGRAM_UNSENT) { /* done */ break; } if (stcb->asoc.prsctp_supported) { if ((PR_SCTP_TTL_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) { /* Is it expired? */ if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) { /* Yes so drop it */ if (tp1->data != NULL) { (void)sctp_release_pr_sctp_chunk(stcb, tp1, 1, SCTP_SO_NOT_LOCKED); } continue; } } } if (SCTP_TSN_GT(tp1->rec.data.tsn, asoc->this_sack_highest_gap)) { /* we are beyond the tsn in the sack */ break; } if (tp1->sent >= SCTP_DATAGRAM_RESEND) { /* either a RESEND, ACKED, or MARKED */ /* skip */ if (tp1->sent == SCTP_FORWARD_TSN_SKIP) { /* Continue strikin FWD-TSN chunks */ tp1->rec.data.fwd_tsn_cnt++; } continue; } /* * CMT : SFR algo (covers part of DAC and HTNA as well) */ if (tp1->whoTo && tp1->whoTo->saw_newack == 0) { /* * No new acks were receieved for data sent to this * dest. Therefore, according to the SFR algo for * CMT, no data sent to this dest can be marked for * FR using this SACK. */ continue; } else if (tp1->whoTo && SCTP_TSN_GT(tp1->rec.data.tsn, tp1->whoTo->this_sack_highest_newack)) { /* * CMT: New acks were receieved for data sent to * this dest. But no new acks were seen for data * sent after tp1. Therefore, according to the SFR * algo for CMT, tp1 cannot be marked for FR using * this SACK. This step covers part of the DAC algo * and the HTNA algo as well. */ continue; } /* * Here we check to see if we were have already done a FR * and if so we see if the biggest TSN we saw in the sack is * smaller than the recovery point. If so we don't strike * the tsn... otherwise we CAN strike the TSN. */ /* * @@@ JRI: Check for CMT if (accum_moved && * asoc->fast_retran_loss_recovery && (sctp_cmt_on_off == * 0)) { */ if (accum_moved && asoc->fast_retran_loss_recovery) { /* * Strike the TSN if in fast-recovery and cum-ack * moved. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(biggest_tsn_newly_acked, tp1->rec.data.tsn, tp1->sent, SCTP_FR_LOG_STRIKE_CHUNK); } if (tp1->sent < SCTP_DATAGRAM_RESEND) { tp1->sent++; } if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /* * CMT DAC algorithm: If SACK flag is set to * 0, then lowest_newack test will not pass * because it would have been set to the * cumack earlier. If not already to be * rtx'd, If not a mixed sack and if tp1 is * not between two sacked TSNs, then mark by * one more. NOTE that we are marking by one * additional time since the SACK DAC flag * indicates that two packets have been * received after this missing TSN. */ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) && SCTP_TSN_GT(this_sack_lowest_newack, tp1->rec.data.tsn)) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(16 + num_dests_sacked, tp1->rec.data.tsn, tp1->sent, SCTP_FR_LOG_STRIKE_CHUNK); } tp1->sent++; } } } else if ((tp1->rec.data.doing_fast_retransmit) && (asoc->sctp_cmt_on_off == 0)) { /* * For those that have done a FR we must take * special consideration if we strike. I.e the * biggest_newly_acked must be higher than the * sending_seq at the time we did the FR. */ if ( #ifdef SCTP_FR_TO_ALTERNATE /* * If FR's go to new networks, then we must only do * this for singly homed asoc's. However if the FR's * go to the same network (Armando's work) then its * ok to FR multiple times. */ (asoc->numnets < 2) #else (1) #endif ) { if (SCTP_TSN_GE(biggest_tsn_newly_acked, tp1->rec.data.fast_retran_tsn)) { /* * Strike the TSN, since this ack is * beyond where things were when we * did a FR. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(biggest_tsn_newly_acked, tp1->rec.data.tsn, tp1->sent, SCTP_FR_LOG_STRIKE_CHUNK); } if (tp1->sent < SCTP_DATAGRAM_RESEND) { tp1->sent++; } strike_flag = 1; if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /* * CMT DAC algorithm: If * SACK flag is set to 0, * then lowest_newack test * will not pass because it * would have been set to * the cumack earlier. If * not already to be rtx'd, * If not a mixed sack and * if tp1 is not between two * sacked TSNs, then mark by * one more. NOTE that we * are marking by one * additional time since the * SACK DAC flag indicates * that two packets have * been received after this * missing TSN. */ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) && SCTP_TSN_GT(this_sack_lowest_newack, tp1->rec.data.tsn)) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(32 + num_dests_sacked, tp1->rec.data.tsn, tp1->sent, SCTP_FR_LOG_STRIKE_CHUNK); } if (tp1->sent < SCTP_DATAGRAM_RESEND) { tp1->sent++; } } } } } /* * JRI: TODO: remove code for HTNA algo. CMT's SFR * algo covers HTNA. */ } else if (SCTP_TSN_GT(tp1->rec.data.tsn, biggest_tsn_newly_acked)) { /* * We don't strike these: This is the HTNA * algorithm i.e. we don't strike If our TSN is * larger than the Highest TSN Newly Acked. */ ; } else { /* Strike the TSN */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(biggest_tsn_newly_acked, tp1->rec.data.tsn, tp1->sent, SCTP_FR_LOG_STRIKE_CHUNK); } if (tp1->sent < SCTP_DATAGRAM_RESEND) { tp1->sent++; } if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /* * CMT DAC algorithm: If SACK flag is set to * 0, then lowest_newack test will not pass * because it would have been set to the * cumack earlier. If not already to be * rtx'd, If not a mixed sack and if tp1 is * not between two sacked TSNs, then mark by * one more. NOTE that we are marking by one * additional time since the SACK DAC flag * indicates that two packets have been * received after this missing TSN. */ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) && SCTP_TSN_GT(this_sack_lowest_newack, tp1->rec.data.tsn)) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(48 + num_dests_sacked, tp1->rec.data.tsn, tp1->sent, SCTP_FR_LOG_STRIKE_CHUNK); } tp1->sent++; } } } if (tp1->sent == SCTP_DATAGRAM_RESEND) { struct sctp_nets *alt; /* fix counts and things */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND, (tp1->whoTo ? (tp1->whoTo->flight_size) : 0), tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); } if (tp1->whoTo) { tp1->whoTo->net_ack++; sctp_flight_size_decrease(tp1); if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, tp1); } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) { sctp_log_rwnd(SCTP_INCREASE_PEER_RWND, asoc->peers_rwnd, tp1->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)); } /* add back to the rwnd */ asoc->peers_rwnd += (tp1->send_size + SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)); /* remove from the total flight */ sctp_total_flight_decrease(stcb, tp1); if ((stcb->asoc.prsctp_supported) && (PR_SCTP_RTX_ENABLED(tp1->flags))) { /* * Has it been retransmitted tv_sec times? - * we store the retran count there. */ if (tp1->snd_count > tp1->rec.data.timetodrop.tv_sec) { /* Yes, so drop it */ if (tp1->data != NULL) { (void)sctp_release_pr_sctp_chunk(stcb, tp1, 1, SCTP_SO_NOT_LOCKED); } /* Make sure to flag we had a FR */ tp1->whoTo->net_ack++; continue; } } /* * SCTP_PRINTF("OK, we are now ready to FR this * guy\n"); */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { sctp_log_fr(tp1->rec.data.tsn, tp1->snd_count, 0, SCTP_FR_MARKED); } if (strike_flag) { /* This is a subsequent FR */ SCTP_STAT_INCR(sctps_sendmultfastretrans); } sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); if (asoc->sctp_cmt_on_off > 0) { /* * CMT: Using RTX_SSTHRESH policy for CMT. * If CMT is being used, then pick dest with * largest ssthresh for any retransmission. */ tp1->no_fr_allowed = 1; alt = tp1->whoTo; /* sa_ignore NO_NULL_CHK */ if (asoc->sctp_cmt_pf > 0) { /* * JRS 5/18/07 - If CMT PF is on, * use the PF version of * find_alt_net() */ alt = sctp_find_alternate_net(stcb, alt, 2); } else { /* * JRS 5/18/07 - If only CMT is on, * use the CMT version of * find_alt_net() */ /* sa_ignore NO_NULL_CHK */ alt = sctp_find_alternate_net(stcb, alt, 1); } if (alt == NULL) { alt = tp1->whoTo; } /* * CUCv2: If a different dest is picked for * the retransmission, then new * (rtx-)pseudo_cumack needs to be tracked * for orig dest. Let CUCv2 track new (rtx-) * pseudo-cumack always. */ if (tp1->whoTo) { tp1->whoTo->find_pseudo_cumack = 1; tp1->whoTo->find_rtx_pseudo_cumack = 1; } } else {/* CMT is OFF */ #ifdef SCTP_FR_TO_ALTERNATE /* Can we find an alternate? */ alt = sctp_find_alternate_net(stcb, tp1->whoTo, 0); #else /* * default behavior is to NOT retransmit * FR's to an alternate. Armando Caro's * paper details why. */ alt = tp1->whoTo; #endif } tp1->rec.data.doing_fast_retransmit = 1; tot_retrans++; /* mark the sending seq for possible subsequent FR's */ /* * SCTP_PRINTF("Marking TSN for FR new value %x\n", * (uint32_t)tpi->rec.data.tsn); */ if (TAILQ_EMPTY(&asoc->send_queue)) { /* * If the queue of send is empty then its * the next sequence number that will be * assigned so we subtract one from this to * get the one we last sent. */ tp1->rec.data.fast_retran_tsn = sending_seq; } else { /* * If there are chunks on the send queue * (unsent data that has made it from the * stream queues but not out the door, we * take the first one (which will have the * lowest TSN) and subtract one to get the * one we last sent. */ struct sctp_tmit_chunk *ttt; ttt = TAILQ_FIRST(&asoc->send_queue); tp1->rec.data.fast_retran_tsn = ttt->rec.data.tsn; } if (tp1->do_rtt) { /* * this guy had a RTO calculation pending on * it, cancel it */ if ((tp1->whoTo != NULL) && (tp1->whoTo->rto_needed == 0)) { tp1->whoTo->rto_needed = 1; } tp1->do_rtt = 0; } if (alt != tp1->whoTo) { /* yes, there is an alternate. */ sctp_free_remote_addr(tp1->whoTo); /* sa_ignore FREED_MEMORY */ tp1->whoTo = alt; atomic_add_int(&alt->ref_count, 1); } } } } struct sctp_tmit_chunk * sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_tmit_chunk *tp1, *tp2, *a_adv = NULL; struct timeval now; int now_filled = 0; if (asoc->prsctp_supported == 0) { return (NULL); } TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) { if (tp1->sent != SCTP_FORWARD_TSN_SKIP && tp1->sent != SCTP_DATAGRAM_RESEND && tp1->sent != SCTP_DATAGRAM_NR_ACKED) { /* no chance to advance, out of here */ break; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) || (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, asoc->advanced_peer_ack_point, tp1->rec.data.tsn, 0, 0); } } if (!PR_SCTP_ENABLED(tp1->flags)) { /* * We can't fwd-tsn past any that are reliable aka * retransmitted until the asoc fails. */ break; } if (!now_filled) { (void)SCTP_GETTIME_TIMEVAL(&now); now_filled = 1; } /* * now we got a chunk which is marked for another * retransmission to a PR-stream but has run out its chances * already maybe OR has been marked to skip now. Can we skip * it if its a resend? */ if (tp1->sent == SCTP_DATAGRAM_RESEND && (PR_SCTP_TTL_ENABLED(tp1->flags))) { /* * Now is this one marked for resend and its time is * now up? */ if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) { /* Yes so drop it */ if (tp1->data) { (void)sctp_release_pr_sctp_chunk(stcb, tp1, 1, SCTP_SO_NOT_LOCKED); } } else { /* * No, we are done when hit one for resend * whos time as not expired. */ break; } } /* * Ok now if this chunk is marked to drop it we can clean up * the chunk, advance our peer ack point and we can check * the next chunk. */ if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) || (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) { /* advance PeerAckPoint goes forward */ if (SCTP_TSN_GT(tp1->rec.data.tsn, asoc->advanced_peer_ack_point)) { asoc->advanced_peer_ack_point = tp1->rec.data.tsn; a_adv = tp1; } else if (tp1->rec.data.tsn == asoc->advanced_peer_ack_point) { /* No update but we do save the chk */ a_adv = tp1; } } else { /* * If it is still in RESEND we can advance no * further */ break; } } return (a_adv); } static int sctp_fs_audit(struct sctp_association *asoc) { struct sctp_tmit_chunk *chk; int inflight = 0, resend = 0, inbetween = 0, acked = 0, above = 0; int ret; #ifndef INVARIANTS int entry_flight, entry_cnt; #endif ret = 0; #ifndef INVARIANTS entry_flight = asoc->total_flight; entry_cnt = asoc->total_flight_count; #endif if (asoc->pr_sctp_cnt >= asoc->sent_queue_cnt) return (0); TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) { if (chk->sent < SCTP_DATAGRAM_RESEND) { SCTP_PRINTF("Chk TSN: %u size: %d inflight cnt: %d\n", chk->rec.data.tsn, chk->send_size, chk->snd_count); inflight++; } else if (chk->sent == SCTP_DATAGRAM_RESEND) { resend++; } else if (chk->sent < SCTP_DATAGRAM_ACKED) { inbetween++; } else if (chk->sent > SCTP_DATAGRAM_ACKED) { above++; } else { acked++; } } if ((inflight > 0) || (inbetween > 0)) { #ifdef INVARIANTS panic("Flight size-express incorrect? \n"); #else SCTP_PRINTF("asoc->total_flight: %d cnt: %d\n", entry_flight, entry_cnt); SCTP_PRINTF("Flight size-express incorrect F: %d I: %d R: %d Ab: %d ACK: %d\n", inflight, inbetween, resend, above, acked); ret = 1; #endif } return (ret); } static void sctp_window_probe_recovery(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_tmit_chunk *tp1) { tp1->window_probe = 0; if ((tp1->sent >= SCTP_DATAGRAM_ACKED) || (tp1->data == NULL)) { /* TSN's skipped we do NOT move back. */ sctp_misc_ints(SCTP_FLIGHT_LOG_DWN_WP_FWD, tp1->whoTo ? tp1->whoTo->flight_size : 0, tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); return; } /* First setup this by shrinking flight */ if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, tp1); } sctp_flight_size_decrease(tp1); sctp_total_flight_decrease(stcb, tp1); /* Now mark for resend */ tp1->sent = SCTP_DATAGRAM_RESEND; sctp_ucount_incr(asoc->sent_queue_retran_cnt); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_WP, tp1->whoTo->flight_size, tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); } } void sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, uint32_t rwnd, int *abort_now, int ecne_seen) { struct sctp_nets *net; struct sctp_association *asoc; struct sctp_tmit_chunk *tp1, *tp2; uint32_t old_rwnd; int win_probe_recovery = 0; int win_probe_recovered = 0; int j, done_once = 0; int rto_ok = 1; uint32_t send_s; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_SACK_ARRIVALS_ENABLE) { sctp_misc_ints(SCTP_SACK_LOG_EXPRESS, cumack, rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd); } SCTP_TCB_LOCK_ASSERT(stcb); #ifdef SCTP_ASOCLOG_OF_TSNS stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cumack; stcb->asoc.cumack_log_at++; if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) { stcb->asoc.cumack_log_at = 0; } #endif asoc = &stcb->asoc; old_rwnd = asoc->peers_rwnd; if (SCTP_TSN_GT(asoc->last_acked_seq, cumack)) { /* old ack */ return; } else if (asoc->last_acked_seq == cumack) { /* Window update sack */ asoc->peers_rwnd = sctp_sbspace_sub(rwnd, (uint32_t)(asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)))); if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ asoc->peers_rwnd = 0; } if (asoc->peers_rwnd > old_rwnd) { goto again; } return; } /* First setup for CC stuff */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (SCTP_TSN_GT(cumack, net->cwr_window_tsn)) { /* Drag along the window_tsn for cwr's */ net->cwr_window_tsn = cumack; } net->prev_cwnd = net->cwnd; net->net_ack = 0; net->net_ack2 = 0; /* * CMT: Reset CUC and Fast recovery algo variables before * SACK processing */ net->new_pseudo_cumack = 0; net->will_exit_fast_recovery = 0; if (stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) { (*stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) (stcb, net); } } if (!TAILQ_EMPTY(&asoc->sent_queue)) { tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead); send_s = tp1->rec.data.tsn + 1; } else { send_s = asoc->sending_seq; } if (SCTP_TSN_GE(cumack, send_s)) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; *abort_now = 1; /* XXX */ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x", cumack, send_s); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_21; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); return; } asoc->this_sack_highest_gap = cumack; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INDATA, __LINE__); } stcb->asoc.overall_error_count = 0; if (SCTP_TSN_GT(cumack, asoc->last_acked_seq)) { /* process the new consecutive TSN first */ TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) { if (SCTP_TSN_GE(cumack, tp1->rec.data.tsn)) { if (tp1->sent == SCTP_DATAGRAM_UNSENT) { SCTP_PRINTF("Warning, an unsent is now acked?\n"); } if (tp1->sent < SCTP_DATAGRAM_ACKED) { /* * If it is less than ACKED, it is * now no-longer in flight. Higher * values may occur during marking */ if (tp1->sent < SCTP_DATAGRAM_RESEND) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA, tp1->whoTo->flight_size, tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); } sctp_flight_size_decrease(tp1); if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, tp1); } /* sa_ignore NO_NULL_CHK */ sctp_total_flight_decrease(stcb, tp1); } tp1->whoTo->net_ack += tp1->send_size; if (tp1->snd_count < 2) { /* * True non-retransmited * chunk */ tp1->whoTo->net_ack2 += tp1->send_size; /* update RTO too? */ if (tp1->do_rtt) { if (rto_ok) { tp1->whoTo->RTO = /* * sa_ignore * NO_NULL_CHK */ sctp_calculate_rto(stcb, asoc, tp1->whoTo, &tp1->sent_rcv_time, sctp_align_safe_nocopy, SCTP_RTT_FROM_DATA); rto_ok = 0; } if (tp1->whoTo->rto_needed == 0) { tp1->whoTo->rto_needed = 1; } tp1->do_rtt = 0; } } /* * CMT: CUCv2 algorithm. From the * cumack'd TSNs, for each TSN being * acked for the first time, set the * following variables for the * corresp destination. * new_pseudo_cumack will trigger a * cwnd update. * find_(rtx_)pseudo_cumack will * trigger search for the next * expected (rtx-)pseudo-cumack. */ tp1->whoTo->new_pseudo_cumack = 1; tp1->whoTo->find_pseudo_cumack = 1; tp1->whoTo->find_rtx_pseudo_cumack = 1; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { /* sa_ignore NO_NULL_CHK */ sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.tsn, SCTP_CWND_LOG_FROM_SACK); } } if (tp1->sent == SCTP_DATAGRAM_RESEND) { sctp_ucount_decr(asoc->sent_queue_retran_cnt); } if (tp1->rec.data.chunk_was_revoked) { /* deflate the cwnd */ tp1->whoTo->cwnd -= tp1->book_size; tp1->rec.data.chunk_was_revoked = 0; } if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) { if (asoc->strmout[tp1->rec.data.sid].chunks_on_queues > 0) { asoc->strmout[tp1->rec.data.sid].chunks_on_queues--; #ifdef INVARIANTS } else { panic("No chunks on the queues for sid %u.", tp1->rec.data.sid); #endif } } if ((asoc->strmout[tp1->rec.data.sid].chunks_on_queues == 0) && (asoc->strmout[tp1->rec.data.sid].state == SCTP_STREAM_RESET_PENDING) && TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.sid].outqueue)) { asoc->trigger_reset = 1; } TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next); if (tp1->data) { /* sa_ignore NO_NULL_CHK */ sctp_free_bufspace(stcb, asoc, tp1, 1); sctp_m_freem(tp1->data); tp1->data = NULL; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(asoc->last_acked_seq, cumack, tp1->rec.data.tsn, 0, 0, SCTP_LOG_FREE_SENT); } asoc->sent_queue_cnt--; sctp_free_a_chunk(stcb, tp1, SCTP_SO_NOT_LOCKED); } else { break; } } } /* sa_ignore NO_NULL_CHK */ if (stcb->sctp_socket) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SOCKBUF_LOCK(&stcb->sctp_socket->so_snd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { /* sa_ignore NO_NULL_CHK */ sctp_wakeup_log(stcb, 1, SCTP_WAKESND_FROM_SACK); } #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { /* assoc was freed while we were unlocked */ SCTP_SOCKET_UNLOCK(so, 1); return; } #endif sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { sctp_wakeup_log(stcb, 1, SCTP_NOWAKE_FROM_SACK); } } /* JRS - Use the congestion control given in the CC module */ if ((asoc->last_acked_seq != cumack) && (ecne_seen == 0)) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->net_ack2 > 0) { /* * Karn's rule applies to clearing error * count, this is optional. */ net->error_count = 0; if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { /* addr came good */ net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); } if (net == stcb->asoc.primary_destination) { if (stcb->asoc.alternate) { /* * release the alternate, * primary is good */ sctp_free_remote_addr(stcb->asoc.alternate); stcb->asoc.alternate = NULL; } } if (net->dest_state & SCTP_ADDR_PF) { net->dest_state &= ~SCTP_ADDR_PF; sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_22); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net); /* Done with this net */ net->net_ack = 0; } /* restore any doubled timers */ net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; if (net->RTO < stcb->asoc.minrto) { net->RTO = stcb->asoc.minrto; } if (net->RTO > stcb->asoc.maxrto) { net->RTO = stcb->asoc.maxrto; } } } asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, 1, 0, 0); } asoc->last_acked_seq = cumack; if (TAILQ_EMPTY(&asoc->sent_queue)) { /* nothing left in-flight */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { net->flight_size = 0; net->partial_bytes_acked = 0; } asoc->total_flight = 0; asoc->total_flight_count = 0; } /* RWND update */ asoc->peers_rwnd = sctp_sbspace_sub(rwnd, (uint32_t)(asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)))); if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ asoc->peers_rwnd = 0; } if (asoc->peers_rwnd > old_rwnd) { win_probe_recovery = 1; } /* Now assure a timer where data is queued at */ again: j = 0; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { int to_ticks; if (win_probe_recovery && (net->window_probe)) { win_probe_recovered = 1; /* * Find first chunk that was used with window probe * and clear the sent */ /* sa_ignore FREED_MEMORY */ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (tp1->window_probe) { /* move back to data send queue */ sctp_window_probe_recovery(stcb, asoc, tp1); break; } } } if (net->RTO == 0) { to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto); } else { to_ticks = MSEC_TO_TICKS(net->RTO); } if (net->flight_size) { j++; (void)SCTP_OS_TIMER_START(&net->rxt_timer.timer, to_ticks, sctp_timeout_handler, &net->rxt_timer); if (net->window_probe) { net->window_probe = 0; } } else { if (net->window_probe) { /* * In window probes we must assure a timer * is still running there */ net->window_probe = 0; if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { SCTP_OS_TIMER_START(&net->rxt_timer.timer, to_ticks, sctp_timeout_handler, &net->rxt_timer); } } else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_23); } } } if ((j == 0) && (!TAILQ_EMPTY(&asoc->sent_queue)) && (asoc->sent_queue_retran_cnt == 0) && (win_probe_recovered == 0) && (done_once == 0)) { /* * huh, this should not happen unless all packets are * PR-SCTP and marked to skip of course. */ if (sctp_fs_audit(asoc)) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { net->flight_size = 0; } asoc->total_flight = 0; asoc->total_flight_count = 0; asoc->sent_queue_retran_cnt = 0; TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (tp1->sent < SCTP_DATAGRAM_RESEND) { sctp_flight_size_increase(tp1); sctp_total_flight_increase(stcb, tp1); } else if (tp1->sent == SCTP_DATAGRAM_RESEND) { sctp_ucount_incr(asoc->sent_queue_retran_cnt); } } } done_once = 1; goto again; } /**********************************/ /* Now what about shutdown issues */ /**********************************/ if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) { /* nothing left on sendqueue.. consider done */ /* clean up */ if ((asoc->stream_queue_cnt == 1) && ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) || (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) && ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) { asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT; } + if (((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) || + (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) && + (asoc->stream_queue_cnt == 1) && + (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) { + struct mbuf *op_err; + + *abort_now = 1; + /* XXX */ + op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); + stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24; + sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); + return; + } if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) && (asoc->stream_queue_cnt == 0)) { - if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) { - /* Need to abort here */ - struct mbuf *op_err; + struct sctp_nets *netp; - abort_out_now: - *abort_now = 1; - /* XXX */ - op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); - stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24; - sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); - return; + if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || + (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { + SCTP_STAT_DECR_GAUGE32(sctps_currestab); + } + SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); + SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(stcb); + if (asoc->alternate) { + netp = asoc->alternate; } else { - struct sctp_nets *netp; - - if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || - (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { - SCTP_STAT_DECR_GAUGE32(sctps_currestab); - } - SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); - SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); - sctp_stop_timers_for_shutdown(stcb); - if (asoc->alternate) { - netp = asoc->alternate; - } else { - netp = asoc->primary_destination; - } - sctp_send_shutdown(stcb, netp); - sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, - stcb->sctp_ep, stcb, netp); - sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, - stcb->sctp_ep, stcb, netp); + netp = asoc->primary_destination; } + sctp_send_shutdown(stcb, netp); + sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, + stcb->sctp_ep, stcb, netp); + sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, + stcb->sctp_ep, stcb, netp); } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) && (asoc->stream_queue_cnt == 0)) { struct sctp_nets *netp; - if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) { - goto abort_out_now; - } SCTP_STAT_DECR_GAUGE32(sctps_currestab); SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); sctp_stop_timers_for_shutdown(stcb); if (asoc->alternate) { netp = asoc->alternate; } else { netp = asoc->primary_destination; } sctp_send_shutdown_ack(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, netp); } } /*********************************************/ /* Here we perform PR-SCTP procedures */ /* (section 4.2) */ /*********************************************/ /* C1. update advancedPeerAckPoint */ if (SCTP_TSN_GT(cumack, asoc->advanced_peer_ack_point)) { asoc->advanced_peer_ack_point = cumack; } /* PR-Sctp issues need to be addressed too */ if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) { struct sctp_tmit_chunk *lchk; uint32_t old_adv_peer_ack_point; old_adv_peer_ack_point = asoc->advanced_peer_ack_point; lchk = sctp_try_advance_peer_ack_point(stcb, asoc); /* C3. See if we need to send a Fwd-TSN */ if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, cumack)) { /* * ISSUE with ECN, see FWD-TSN processing. */ if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, old_adv_peer_ack_point)) { send_forward_tsn(stcb, asoc); } else if (lchk) { /* try to FR fwd-tsn's that get lost too */ if (lchk->rec.data.fwd_tsn_cnt >= 3) { send_forward_tsn(stcb, asoc); } } } if (lchk) { /* Assure a timer is up */ sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, lchk->whoTo); } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_RWND_LOGGING_ENABLE) { sctp_misc_ints(SCTP_SACK_RWND_UPDATE, rwnd, stcb->asoc.peers_rwnd, stcb->asoc.total_flight, stcb->asoc.total_output_queue_size); } } void sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, struct sctp_tcb *stcb, uint16_t num_seg, uint16_t num_nr_seg, uint16_t num_dup, int *abort_now, uint8_t flags, uint32_t cum_ack, uint32_t rwnd, int ecne_seen) { struct sctp_association *asoc; struct sctp_tmit_chunk *tp1, *tp2; uint32_t last_tsn, biggest_tsn_acked, biggest_tsn_newly_acked, this_sack_lowest_newack; uint16_t wake_him = 0; uint32_t send_s = 0; long j; int accum_moved = 0; int will_exit_fast_recovery = 0; uint32_t a_rwnd, old_rwnd; int win_probe_recovery = 0; int win_probe_recovered = 0; struct sctp_nets *net = NULL; int done_once; int rto_ok = 1; uint8_t reneged_all = 0; uint8_t cmt_dac_flag; /* * we take any chance we can to service our queues since we cannot * get awoken when the socket is read from :< */ /* * Now perform the actual SACK handling: 1) Verify that it is not an * old sack, if so discard. 2) If there is nothing left in the send * queue (cum-ack is equal to last acked) then you have a duplicate * too, update any rwnd change and verify no timers are running. * then return. 3) Process any new consequtive data i.e. cum-ack * moved process these first and note that it moved. 4) Process any * sack blocks. 5) Drop any acked from the queue. 6) Check for any * revoked blocks and mark. 7) Update the cwnd. 8) Nothing left, * sync up flightsizes and things, stop all timers and also check * for shutdown_pending state. If so then go ahead and send off the * shutdown. If in shutdown recv, send off the shutdown-ack and * start that timer, Ret. 9) Strike any non-acked things and do FR * procedure if needed being sure to set the FR flag. 10) Do pr-sctp * procedures. 11) Apply any FR penalties. 12) Assure we will SACK * if in shutdown_recv state. */ SCTP_TCB_LOCK_ASSERT(stcb); /* CMT DAC algo */ this_sack_lowest_newack = 0; SCTP_STAT_INCR(sctps_slowpath_sack); last_tsn = cum_ack; cmt_dac_flag = flags & SCTP_SACK_CMT_DAC; #ifdef SCTP_ASOCLOG_OF_TSNS stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cum_ack; stcb->asoc.cumack_log_at++; if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) { stcb->asoc.cumack_log_at = 0; } #endif a_rwnd = rwnd; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_SACK_ARRIVALS_ENABLE) { sctp_misc_ints(SCTP_SACK_LOG_NORMAL, cum_ack, rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd); } old_rwnd = stcb->asoc.peers_rwnd; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INDATA, __LINE__); } stcb->asoc.overall_error_count = 0; asoc = &stcb->asoc; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(asoc->last_acked_seq, cum_ack, 0, num_seg, num_dup, SCTP_LOG_NEW_SACK); } if ((num_dup) && (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE)) { uint16_t i; uint32_t *dupdata, dblock; for (i = 0; i < num_dup; i++) { dupdata = (uint32_t *)sctp_m_getptr(m, offset_dup + i * sizeof(uint32_t), sizeof(uint32_t), (uint8_t *)&dblock); if (dupdata == NULL) { break; } sctp_log_fr(*dupdata, 0, 0, SCTP_FR_DUPED); } } /* reality check */ if (!TAILQ_EMPTY(&asoc->sent_queue)) { tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead); send_s = tp1->rec.data.tsn + 1; } else { tp1 = NULL; send_s = asoc->sending_seq; } if (SCTP_TSN_GE(cum_ack, send_s)) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; /* * no way, we have not even sent this TSN out yet. Peer is * hopelessly messed up with us. */ SCTP_PRINTF("NEW cum_ack:%x send_s:%x is smaller or equal\n", cum_ack, send_s); if (tp1) { SCTP_PRINTF("Got send_s from tsn:%x + 1 of tp1: %p\n", tp1->rec.data.tsn, (void *)tp1); } hopeless_peer: *abort_now = 1; /* XXX */ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x", cum_ack, send_s); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); return; } /**********************/ /* 1) check the range */ /**********************/ if (SCTP_TSN_GT(asoc->last_acked_seq, last_tsn)) { /* acking something behind */ return; } /* update the Rwnd of the peer */ if (TAILQ_EMPTY(&asoc->sent_queue) && TAILQ_EMPTY(&asoc->send_queue) && (asoc->stream_queue_cnt == 0)) { /* nothing left on send/sent and strmq */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) { sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK, asoc->peers_rwnd, 0, 0, a_rwnd); } asoc->peers_rwnd = a_rwnd; if (asoc->sent_queue_retran_cnt) { asoc->sent_queue_retran_cnt = 0; } if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ asoc->peers_rwnd = 0; } /* stop any timers */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_26); net->partial_bytes_acked = 0; net->flight_size = 0; } asoc->total_flight = 0; asoc->total_flight_count = 0; return; } /* * We init netAckSz and netAckSz2 to 0. These are used to track 2 * things. The total byte count acked is tracked in netAckSz AND * netAck2 is used to track the total bytes acked that are un- * amibguious and were never retransmitted. We track these on a per * destination address basis. */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (SCTP_TSN_GT(cum_ack, net->cwr_window_tsn)) { /* Drag along the window_tsn for cwr's */ net->cwr_window_tsn = cum_ack; } net->prev_cwnd = net->cwnd; net->net_ack = 0; net->net_ack2 = 0; /* * CMT: Reset CUC and Fast recovery algo variables before * SACK processing */ net->new_pseudo_cumack = 0; net->will_exit_fast_recovery = 0; if (stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) { (*stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) (stcb, net); } } /* process the new consecutive TSN first */ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (SCTP_TSN_GE(last_tsn, tp1->rec.data.tsn)) { if (tp1->sent != SCTP_DATAGRAM_UNSENT) { accum_moved = 1; if (tp1->sent < SCTP_DATAGRAM_ACKED) { /* * If it is less than ACKED, it is * now no-longer in flight. Higher * values may occur during marking */ if ((tp1->whoTo->dest_state & SCTP_ADDR_UNCONFIRMED) && (tp1->snd_count < 2)) { /* * If there was no retran * and the address is * un-confirmed and we sent * there and are now * sacked.. its confirmed, * mark it so. */ tp1->whoTo->dest_state &= ~SCTP_ADDR_UNCONFIRMED; } if (tp1->sent < SCTP_DATAGRAM_RESEND) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA, tp1->whoTo->flight_size, tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); } sctp_flight_size_decrease(tp1); sctp_total_flight_decrease(stcb, tp1); if (stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) { (*stcb->asoc.cc_functions.sctp_cwnd_update_tsn_acknowledged) (tp1->whoTo, tp1); } } tp1->whoTo->net_ack += tp1->send_size; /* CMT SFR and DAC algos */ this_sack_lowest_newack = tp1->rec.data.tsn; tp1->whoTo->saw_newack = 1; if (tp1->snd_count < 2) { /* * True non-retransmited * chunk */ tp1->whoTo->net_ack2 += tp1->send_size; /* update RTO too? */ if (tp1->do_rtt) { if (rto_ok) { tp1->whoTo->RTO = sctp_calculate_rto(stcb, asoc, tp1->whoTo, &tp1->sent_rcv_time, sctp_align_safe_nocopy, SCTP_RTT_FROM_DATA); rto_ok = 0; } if (tp1->whoTo->rto_needed == 0) { tp1->whoTo->rto_needed = 1; } tp1->do_rtt = 0; } } /* * CMT: CUCv2 algorithm. From the * cumack'd TSNs, for each TSN being * acked for the first time, set the * following variables for the * corresp destination. * new_pseudo_cumack will trigger a * cwnd update. * find_(rtx_)pseudo_cumack will * trigger search for the next * expected (rtx-)pseudo-cumack. */ tp1->whoTo->new_pseudo_cumack = 1; tp1->whoTo->find_pseudo_cumack = 1; tp1->whoTo->find_rtx_pseudo_cumack = 1; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(asoc->last_acked_seq, cum_ack, tp1->rec.data.tsn, 0, 0, SCTP_LOG_TSN_ACKED); } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.tsn, SCTP_CWND_LOG_FROM_SACK); } } if (tp1->sent == SCTP_DATAGRAM_RESEND) { sctp_ucount_decr(asoc->sent_queue_retran_cnt); #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xB3, (asoc->sent_queue_retran_cnt & 0x000000ff)); #endif } if (tp1->rec.data.chunk_was_revoked) { /* deflate the cwnd */ tp1->whoTo->cwnd -= tp1->book_size; tp1->rec.data.chunk_was_revoked = 0; } if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) { tp1->sent = SCTP_DATAGRAM_ACKED; } } } else { break; } } biggest_tsn_newly_acked = biggest_tsn_acked = last_tsn; /* always set this up to cum-ack */ asoc->this_sack_highest_gap = last_tsn; if ((num_seg > 0) || (num_nr_seg > 0)) { /* * CMT: SFR algo (and HTNA) - this_sack_highest_newack has * to be greater than the cumack. Also reset saw_newack to 0 * for all dests. */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { net->saw_newack = 0; net->this_sack_highest_newack = last_tsn; } /* * thisSackHighestGap will increase while handling NEW * segments this_sack_highest_newack will increase while * handling NEWLY ACKED chunks. this_sack_lowest_newack is * used for CMT DAC algo. saw_newack will also change. */ if (sctp_handle_segments(m, &offset_seg, stcb, asoc, last_tsn, &biggest_tsn_acked, &biggest_tsn_newly_acked, &this_sack_lowest_newack, num_seg, num_nr_seg, &rto_ok)) { wake_him++; } /* * validate the biggest_tsn_acked in the gap acks if strict * adherence is wanted. */ if (SCTP_TSN_GE(biggest_tsn_acked, send_s)) { /* * peer is either confused or we are under attack. * We must abort. */ SCTP_PRINTF("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n", biggest_tsn_acked, send_s); goto hopeless_peer; } } /*******************************************/ /* cancel ALL T3-send timer if accum moved */ /*******************************************/ if (asoc->sctp_cmt_on_off > 0) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->new_pseudo_cumack) sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_27); } } else { if (accum_moved) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28); } } } /********************************************/ /* drop the acked chunks from the sentqueue */ /********************************************/ asoc->last_acked_seq = cum_ack; TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) { if (SCTP_TSN_GT(tp1->rec.data.tsn, cum_ack)) { break; } if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) { if (asoc->strmout[tp1->rec.data.sid].chunks_on_queues > 0) { asoc->strmout[tp1->rec.data.sid].chunks_on_queues--; #ifdef INVARIANTS } else { panic("No chunks on the queues for sid %u.", tp1->rec.data.sid); #endif } } if ((asoc->strmout[tp1->rec.data.sid].chunks_on_queues == 0) && (asoc->strmout[tp1->rec.data.sid].state == SCTP_STREAM_RESET_PENDING) && TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.sid].outqueue)) { asoc->trigger_reset = 1; } TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next); if (PR_SCTP_ENABLED(tp1->flags)) { if (asoc->pr_sctp_cnt != 0) asoc->pr_sctp_cnt--; } asoc->sent_queue_cnt--; if (tp1->data) { /* sa_ignore NO_NULL_CHK */ sctp_free_bufspace(stcb, asoc, tp1, 1); sctp_m_freem(tp1->data); tp1->data = NULL; if (asoc->prsctp_supported && PR_SCTP_BUF_ENABLED(tp1->flags)) { asoc->sent_queue_cnt_removeable--; } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) { sctp_log_sack(asoc->last_acked_seq, cum_ack, tp1->rec.data.tsn, 0, 0, SCTP_LOG_FREE_SENT); } sctp_free_a_chunk(stcb, tp1, SCTP_SO_NOT_LOCKED); wake_him++; } if (TAILQ_EMPTY(&asoc->sent_queue) && (asoc->total_flight > 0)) { #ifdef INVARIANTS panic("Warning flight size is positive and should be 0"); #else SCTP_PRINTF("Warning flight size incorrect should be 0 is %d\n", asoc->total_flight); #endif asoc->total_flight = 0; } /* sa_ignore NO_NULL_CHK */ if ((wake_him) && (stcb->sctp_socket)) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SOCKBUF_LOCK(&stcb->sctp_socket->so_snd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { sctp_wakeup_log(stcb, wake_him, SCTP_WAKESND_FROM_SACK); } #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { /* assoc was freed while we were unlocked */ SCTP_SOCKET_UNLOCK(so, 1); return; } #endif sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) { sctp_wakeup_log(stcb, wake_him, SCTP_NOWAKE_FROM_SACK); } } if (asoc->fast_retran_loss_recovery && accum_moved) { if (SCTP_TSN_GE(asoc->last_acked_seq, asoc->fast_recovery_tsn)) { /* Setup so we will exit RFC2582 fast recovery */ will_exit_fast_recovery = 1; } } /* * Check for revoked fragments: * * if Previous sack - Had no frags then we can't have any revoked if * Previous sack - Had frag's then - If we now have frags aka * num_seg > 0 call sctp_check_for_revoked() to tell if peer revoked * some of them. else - The peer revoked all ACKED fragments, since * we had some before and now we have NONE. */ if (num_seg) { sctp_check_for_revoked(stcb, asoc, cum_ack, biggest_tsn_acked); asoc->saw_sack_with_frags = 1; } else if (asoc->saw_sack_with_frags) { int cnt_revoked = 0; /* Peer revoked all dg's marked or acked */ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (tp1->sent == SCTP_DATAGRAM_ACKED) { tp1->sent = SCTP_DATAGRAM_SENT; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE, tp1->whoTo->flight_size, tp1->book_size, (uint32_t)(uintptr_t)tp1->whoTo, tp1->rec.data.tsn); } sctp_flight_size_increase(tp1); sctp_total_flight_increase(stcb, tp1); tp1->rec.data.chunk_was_revoked = 1; /* * To ensure that this increase in * flightsize, which is artificial, does not * throttle the sender, we also increase the * cwnd artificially. */ tp1->whoTo->cwnd += tp1->book_size; cnt_revoked++; } } if (cnt_revoked) { reneged_all = 1; } asoc->saw_sack_with_frags = 0; } if (num_nr_seg > 0) asoc->saw_sack_with_nr_frags = 1; else asoc->saw_sack_with_nr_frags = 0; /* JRS - Use the congestion control given in the CC module */ if (ecne_seen == 0) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->net_ack2 > 0) { /* * Karn's rule applies to clearing error * count, this is optional. */ net->error_count = 0; if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { /* addr came good */ net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); } if (net == stcb->asoc.primary_destination) { if (stcb->asoc.alternate) { /* * release the alternate, * primary is good */ sctp_free_remote_addr(stcb->asoc.alternate); stcb->asoc.alternate = NULL; } } if (net->dest_state & SCTP_ADDR_PF) { net->dest_state &= ~SCTP_ADDR_PF; sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_29); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net); /* Done with this net */ net->net_ack = 0; } /* restore any doubled timers */ net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; if (net->RTO < stcb->asoc.minrto) { net->RTO = stcb->asoc.minrto; } if (net->RTO > stcb->asoc.maxrto) { net->RTO = stcb->asoc.maxrto; } } } asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery); } if (TAILQ_EMPTY(&asoc->sent_queue)) { /* nothing left in-flight */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { /* stop all timers */ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30); net->flight_size = 0; net->partial_bytes_acked = 0; } asoc->total_flight = 0; asoc->total_flight_count = 0; } /**********************************/ /* Now what about shutdown issues */ /**********************************/ if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) { /* nothing left on sendqueue.. consider done */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) { sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK, asoc->peers_rwnd, 0, 0, a_rwnd); } asoc->peers_rwnd = a_rwnd; if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ asoc->peers_rwnd = 0; } /* clean up */ if ((asoc->stream_queue_cnt == 1) && ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) || (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) && ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) { asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT; } + if (((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) || + (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) && + (asoc->stream_queue_cnt == 1) && + (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) { + struct mbuf *op_err; + + *abort_now = 1; + /* XXX */ + op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); + stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24; + sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); + return; + } if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) && (asoc->stream_queue_cnt == 0)) { - if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) { - /* Need to abort here */ - struct mbuf *op_err; + struct sctp_nets *netp; - abort_out_now: - *abort_now = 1; - /* XXX */ - op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); - stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31; - sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); - return; + if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || + (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { + SCTP_STAT_DECR_GAUGE32(sctps_currestab); + } + SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); + SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); + sctp_stop_timers_for_shutdown(stcb); + if (asoc->alternate) { + netp = asoc->alternate; } else { - struct sctp_nets *netp; - - if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || - (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { - SCTP_STAT_DECR_GAUGE32(sctps_currestab); - } - SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT); - SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); - sctp_stop_timers_for_shutdown(stcb); - if (asoc->alternate) { - netp = asoc->alternate; - } else { - netp = asoc->primary_destination; - } - sctp_send_shutdown(stcb, netp); - sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, - stcb->sctp_ep, stcb, netp); - sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, - stcb->sctp_ep, stcb, netp); + netp = asoc->primary_destination; } + sctp_send_shutdown(stcb, netp); + sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, + stcb->sctp_ep, stcb, netp); + sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, + stcb->sctp_ep, stcb, netp); return; } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) && (asoc->stream_queue_cnt == 0)) { struct sctp_nets *netp; - if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) { - goto abort_out_now; - } SCTP_STAT_DECR_GAUGE32(sctps_currestab); SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); sctp_stop_timers_for_shutdown(stcb); if (asoc->alternate) { netp = asoc->alternate; } else { netp = asoc->primary_destination; } sctp_send_shutdown_ack(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, netp); return; } } /* * Now here we are going to recycle net_ack for a different use... * HEADS UP. */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { net->net_ack = 0; } /* * CMT DAC algorithm: If SACK DAC flag was 0, then no extra marking * to be done. Setting this_sack_lowest_newack to the cum_ack will * automatically ensure that. */ if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac) && (cmt_dac_flag == 0)) { this_sack_lowest_newack = cum_ack; } if ((num_seg > 0) || (num_nr_seg > 0)) { sctp_strike_gap_ack_chunks(stcb, asoc, biggest_tsn_acked, biggest_tsn_newly_acked, this_sack_lowest_newack, accum_moved); } /* JRS - Use the congestion control given in the CC module */ asoc->cc_functions.sctp_cwnd_update_after_fr(stcb, asoc); /* Now are we exiting loss recovery ? */ if (will_exit_fast_recovery) { /* Ok, we must exit fast recovery */ asoc->fast_retran_loss_recovery = 0; } if ((asoc->sat_t3_loss_recovery) && SCTP_TSN_GE(asoc->last_acked_seq, asoc->sat_t3_recovery_tsn)) { /* end satellite t3 loss recovery */ asoc->sat_t3_loss_recovery = 0; } /* * CMT Fast recovery */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->will_exit_fast_recovery) { /* Ok, we must exit fast recovery */ net->fast_retran_loss_recovery = 0; } } /* Adjust and set the new rwnd value */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) { sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK, asoc->peers_rwnd, asoc->total_flight, (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)), a_rwnd); } asoc->peers_rwnd = sctp_sbspace_sub(a_rwnd, (uint32_t)(asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)))); if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ asoc->peers_rwnd = 0; } if (asoc->peers_rwnd > old_rwnd) { win_probe_recovery = 1; } /* * Now we must setup so we have a timer up for anyone with * outstanding data. */ done_once = 0; again: j = 0; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (win_probe_recovery && (net->window_probe)) { win_probe_recovered = 1; /*- * Find first chunk that was used with * window probe and clear the event. Put * it back into the send queue as if has * not been sent. */ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (tp1->window_probe) { sctp_window_probe_recovery(stcb, asoc, tp1); break; } } } if (net->flight_size) { j++; if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net); } if (net->window_probe) { net->window_probe = 0; } } else { if (net->window_probe) { /* * In window probes we must assure a timer * is still running there */ if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net); } } else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); } } } if ((j == 0) && (!TAILQ_EMPTY(&asoc->sent_queue)) && (asoc->sent_queue_retran_cnt == 0) && (win_probe_recovered == 0) && (done_once == 0)) { /* * huh, this should not happen unless all packets are * PR-SCTP and marked to skip of course. */ if (sctp_fs_audit(asoc)) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { net->flight_size = 0; } asoc->total_flight = 0; asoc->total_flight_count = 0; asoc->sent_queue_retran_cnt = 0; TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) { if (tp1->sent < SCTP_DATAGRAM_RESEND) { sctp_flight_size_increase(tp1); sctp_total_flight_increase(stcb, tp1); } else if (tp1->sent == SCTP_DATAGRAM_RESEND) { sctp_ucount_incr(asoc->sent_queue_retran_cnt); } } } done_once = 1; goto again; } /*********************************************/ /* Here we perform PR-SCTP procedures */ /* (section 4.2) */ /*********************************************/ /* C1. update advancedPeerAckPoint */ if (SCTP_TSN_GT(cum_ack, asoc->advanced_peer_ack_point)) { asoc->advanced_peer_ack_point = cum_ack; } /* C2. try to further move advancedPeerAckPoint ahead */ if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) { struct sctp_tmit_chunk *lchk; uint32_t old_adv_peer_ack_point; old_adv_peer_ack_point = asoc->advanced_peer_ack_point; lchk = sctp_try_advance_peer_ack_point(stcb, asoc); /* C3. See if we need to send a Fwd-TSN */ if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, cum_ack)) { /* * ISSUE with ECN, see FWD-TSN processing. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, 0xee, cum_ack, asoc->advanced_peer_ack_point, old_adv_peer_ack_point); } if (SCTP_TSN_GT(asoc->advanced_peer_ack_point, old_adv_peer_ack_point)) { send_forward_tsn(stcb, asoc); } else if (lchk) { /* try to FR fwd-tsn's that get lost too */ if (lchk->rec.data.fwd_tsn_cnt >= 3) { send_forward_tsn(stcb, asoc); } } } if (lchk) { /* Assure a timer is up */ sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, lchk->whoTo); } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_RWND_LOGGING_ENABLE) { sctp_misc_ints(SCTP_SACK_RWND_UPDATE, a_rwnd, stcb->asoc.peers_rwnd, stcb->asoc.total_flight, stcb->asoc.total_output_queue_size); } } void sctp_update_acked(struct sctp_tcb *stcb, struct sctp_shutdown_chunk *cp, int *abort_flag) { /* Copy cum-ack */ uint32_t cum_ack, a_rwnd; cum_ack = ntohl(cp->cumulative_tsn_ack); /* Arrange so a_rwnd does NOT change */ a_rwnd = stcb->asoc.peers_rwnd + stcb->asoc.total_flight; /* Now call the express sack handling */ sctp_express_handle_sack(stcb, cum_ack, a_rwnd, abort_flag, 0); } static void sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb, struct sctp_stream_in *strmin) { struct sctp_queued_to_read *control, *ncontrol; struct sctp_association *asoc; uint32_t mid; int need_reasm_check = 0; asoc = &stcb->asoc; mid = strmin->last_mid_delivered; /* * First deliver anything prior to and including the stream no that * came in. */ TAILQ_FOREACH_SAFE(control, &strmin->inqueue, next_instrm, ncontrol) { if (SCTP_MID_GE(asoc->idata_supported, mid, control->mid)) { /* this is deliverable now */ if (((control->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) { if (control->on_strm_q) { if (control->on_strm_q == SCTP_ON_ORDERED) { TAILQ_REMOVE(&strmin->inqueue, control, next_instrm); } else if (control->on_strm_q == SCTP_ON_UNORDERED) { TAILQ_REMOVE(&strmin->uno_inqueue, control, next_instrm); #ifdef INVARIANTS } else { panic("strmin: %p ctl: %p unknown %d", strmin, control, control->on_strm_q); #endif } control->on_strm_q = 0; } /* subtract pending on streams */ if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); /* deliver it to at least the delivery-q */ if (stcb->sctp_socket) { sctp_mark_non_revokable(asoc, control->sinfo_tsn); sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED); } } else { /* Its a fragmented message */ if (control->first_frag_seen) { /* * Make it so this is next to * deliver, we restore later */ strmin->last_mid_delivered = control->mid - 1; need_reasm_check = 1; break; } } } else { /* no more delivery now. */ break; } } if (need_reasm_check) { int ret; ret = sctp_deliver_reasm_check(stcb, &stcb->asoc, strmin, SCTP_READ_LOCK_HELD); if (SCTP_MID_GT(asoc->idata_supported, mid, strmin->last_mid_delivered)) { /* Restore the next to deliver unless we are ahead */ strmin->last_mid_delivered = mid; } if (ret == 0) { /* Left the front Partial one on */ return; } need_reasm_check = 0; } /* * now we must deliver things in queue the normal way if any are * now ready. */ mid = strmin->last_mid_delivered + 1; TAILQ_FOREACH_SAFE(control, &strmin->inqueue, next_instrm, ncontrol) { if (SCTP_MID_EQ(asoc->idata_supported, mid, control->mid)) { if (((control->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) { /* this is deliverable now */ if (control->on_strm_q) { if (control->on_strm_q == SCTP_ON_ORDERED) { TAILQ_REMOVE(&strmin->inqueue, control, next_instrm); } else if (control->on_strm_q == SCTP_ON_UNORDERED) { TAILQ_REMOVE(&strmin->uno_inqueue, control, next_instrm); #ifdef INVARIANTS } else { panic("strmin: %p ctl: %p unknown %d", strmin, control, control->on_strm_q); #endif } control->on_strm_q = 0; } /* subtract pending on streams */ if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); /* deliver it to at least the delivery-q */ strmin->last_mid_delivered = control->mid; if (stcb->sctp_socket) { sctp_mark_non_revokable(asoc, control->sinfo_tsn); sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED); } mid = strmin->last_mid_delivered + 1; } else { /* Its a fragmented message */ if (control->first_frag_seen) { /* * Make it so this is next to * deliver */ strmin->last_mid_delivered = control->mid - 1; need_reasm_check = 1; break; } } } else { break; } } if (need_reasm_check) { (void)sctp_deliver_reasm_check(stcb, &stcb->asoc, strmin, SCTP_READ_LOCK_HELD); } } static void sctp_flush_reassm_for_str_seq(struct sctp_tcb *stcb, struct sctp_association *asoc, uint16_t stream, uint32_t mid, int ordered, uint32_t cumtsn) { struct sctp_queued_to_read *control; struct sctp_stream_in *strm; struct sctp_tmit_chunk *chk, *nchk; int cnt_removed = 0; /* * For now large messages held on the stream reasm that are complete * will be tossed too. We could in theory do more work to spin * through and stop after dumping one msg aka seeing the start of a * new msg at the head, and call the delivery function... to see if * it can be delivered... But for now we just dump everything on the * queue. */ strm = &asoc->strmin[stream]; control = sctp_find_reasm_entry(strm, mid, ordered, asoc->idata_supported); if (control == NULL) { /* Not found */ return; } if (!asoc->idata_supported && !ordered && SCTP_TSN_GT(control->fsn_included, cumtsn)) { return; } TAILQ_FOREACH_SAFE(chk, &control->reasm, sctp_next, nchk) { /* Purge hanging chunks */ if (!asoc->idata_supported && (ordered == 0)) { if (SCTP_TSN_GT(chk->rec.data.tsn, cumtsn)) { break; } } cnt_removed++; TAILQ_REMOVE(&control->reasm, chk, sctp_next); if (asoc->size_on_reasm_queue >= chk->send_size) { asoc->size_on_reasm_queue -= chk->send_size; } else { #ifdef INVARIANTS panic("size_on_reasm_queue = %u smaller than chunk length %u", asoc->size_on_reasm_queue, chk->send_size); #else asoc->size_on_reasm_queue = 0; #endif } sctp_ucount_decr(asoc->cnt_on_reasm_queue); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } if (!TAILQ_EMPTY(&control->reasm)) { /* This has to be old data, unordered */ if (control->data) { sctp_m_freem(control->data); control->data = NULL; } sctp_reset_a_control(control, stcb->sctp_ep, cumtsn); chk = TAILQ_FIRST(&control->reasm); if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) { TAILQ_REMOVE(&control->reasm, chk, sctp_next); sctp_add_chk_to_control(control, strm, stcb, asoc, chk, SCTP_READ_LOCK_HELD); } sctp_deliver_reasm_check(stcb, asoc, strm, SCTP_READ_LOCK_HELD); return; } if (control->on_strm_q == SCTP_ON_ORDERED) { TAILQ_REMOVE(&strm->inqueue, control, next_instrm); if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); control->on_strm_q = 0; } else if (control->on_strm_q == SCTP_ON_UNORDERED) { TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm); control->on_strm_q = 0; #ifdef INVARIANTS } else if (control->on_strm_q) { panic("strm: %p ctl: %p unknown %d", strm, control, control->on_strm_q); #endif } control->on_strm_q = 0; if (control->on_read_q == 0) { sctp_free_remote_addr(control->whoFrom); if (control->data) { sctp_m_freem(control->data); control->data = NULL; } sctp_free_a_readq(stcb, control); } } void sctp_handle_forward_tsn(struct sctp_tcb *stcb, struct sctp_forward_tsn_chunk *fwd, int *abort_flag, struct mbuf *m, int offset) { /* The pr-sctp fwd tsn */ /* * here we will perform all the data receiver side steps for * processing FwdTSN, as required in by pr-sctp draft: * * Assume we get FwdTSN(x): * * 1) update local cumTSN to x 2) try to further advance cumTSN to x * + others we have 3) examine and update re-ordering queue on * pr-in-streams 4) clean up re-assembly queue 5) Send a sack to * report where we are. */ struct sctp_association *asoc; uint32_t new_cum_tsn, gap; unsigned int i, fwd_sz, m_size; uint32_t str_seq; struct sctp_stream_in *strm; struct sctp_queued_to_read *control, *sv; asoc = &stcb->asoc; if ((fwd_sz = ntohs(fwd->ch.chunk_length)) < sizeof(struct sctp_forward_tsn_chunk)) { SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size too small/big fwd-tsn\n"); return; } m_size = (stcb->asoc.mapping_array_size << 3); /*************************************************************/ /* 1. Here we update local cumTSN and shift the bitmap array */ /*************************************************************/ new_cum_tsn = ntohl(fwd->new_cumulative_tsn); if (SCTP_TSN_GE(asoc->cumulative_tsn, new_cum_tsn)) { /* Already got there ... */ return; } /* * now we know the new TSN is more advanced, let's find the actual * gap */ SCTP_CALC_TSN_TO_GAP(gap, new_cum_tsn, asoc->mapping_array_base_tsn); asoc->cumulative_tsn = new_cum_tsn; if (gap >= m_size) { if ((long)gap > sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv)) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; /* * out of range (of single byte chunks in the rwnd I * give out). This must be an attacker. */ *abort_flag = 1; snprintf(msg, sizeof(msg), "New cum ack %8.8x too high, highest TSN %8.8x", new_cum_tsn, asoc->highest_tsn_inside_map); op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33; sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); return; } SCTP_STAT_INCR(sctps_fwdtsn_map_over); memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size); asoc->mapping_array_base_tsn = new_cum_tsn + 1; asoc->highest_tsn_inside_map = new_cum_tsn; memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size); asoc->highest_tsn_inside_nr_map = new_cum_tsn; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(0, 3, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); } } else { SCTP_TCB_LOCK_ASSERT(stcb); for (i = 0; i <= gap; i++) { if (!SCTP_IS_TSN_PRESENT(asoc->mapping_array, i) && !SCTP_IS_TSN_PRESENT(asoc->nr_mapping_array, i)) { SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, i); if (SCTP_TSN_GT(asoc->mapping_array_base_tsn + i, asoc->highest_tsn_inside_nr_map)) { asoc->highest_tsn_inside_nr_map = asoc->mapping_array_base_tsn + i; } } } } /*************************************************************/ /* 2. Clear up re-assembly queue */ /*************************************************************/ /* This is now done as part of clearing up the stream/seq */ if (asoc->idata_supported == 0) { uint16_t sid; /* Flush all the un-ordered data based on cum-tsn */ SCTP_INP_READ_LOCK(stcb->sctp_ep); for (sid = 0; sid < asoc->streamincnt; sid++) { sctp_flush_reassm_for_str_seq(stcb, asoc, sid, 0, 0, new_cum_tsn); } SCTP_INP_READ_UNLOCK(stcb->sctp_ep); } /*******************************************************/ /* 3. Update the PR-stream re-ordering queues and fix */ /* delivery issues as needed. */ /*******************************************************/ fwd_sz -= sizeof(*fwd); if (m && fwd_sz) { /* New method. */ unsigned int num_str; uint32_t mid, cur_mid; uint16_t sid; uint16_t ordered, flags; struct sctp_strseq *stseq, strseqbuf; struct sctp_strseq_mid *stseq_m, strseqbuf_m; offset += sizeof(*fwd); SCTP_INP_READ_LOCK(stcb->sctp_ep); if (asoc->idata_supported) { num_str = fwd_sz / sizeof(struct sctp_strseq_mid); } else { num_str = fwd_sz / sizeof(struct sctp_strseq); } for (i = 0; i < num_str; i++) { if (asoc->idata_supported) { stseq_m = (struct sctp_strseq_mid *)sctp_m_getptr(m, offset, sizeof(struct sctp_strseq_mid), (uint8_t *)&strseqbuf_m); offset += sizeof(struct sctp_strseq_mid); if (stseq_m == NULL) { break; } sid = ntohs(stseq_m->sid); mid = ntohl(stseq_m->mid); flags = ntohs(stseq_m->flags); if (flags & PR_SCTP_UNORDERED_FLAG) { ordered = 0; } else { ordered = 1; } } else { stseq = (struct sctp_strseq *)sctp_m_getptr(m, offset, sizeof(struct sctp_strseq), (uint8_t *)&strseqbuf); offset += sizeof(struct sctp_strseq); if (stseq == NULL) { break; } sid = ntohs(stseq->sid); mid = (uint32_t)ntohs(stseq->ssn); ordered = 1; } /* Convert */ /* now process */ /* * Ok we now look for the stream/seq on the read * queue where its not all delivered. If we find it * we transmute the read entry into a PDI_ABORTED. */ if (sid >= asoc->streamincnt) { /* screwed up streams, stop! */ break; } if ((asoc->str_of_pdapi == sid) && (asoc->ssn_of_pdapi == mid)) { /* * If this is the one we were partially * delivering now then we no longer are. * Note this will change with the reassembly * re-write. */ asoc->fragmented_delivery_inprogress = 0; } strm = &asoc->strmin[sid]; for (cur_mid = strm->last_mid_delivered; SCTP_MID_GE(asoc->idata_supported, mid, cur_mid); cur_mid++) { sctp_flush_reassm_for_str_seq(stcb, asoc, sid, cur_mid, ordered, new_cum_tsn); } TAILQ_FOREACH(control, &stcb->sctp_ep->read_queue, next) { if ((control->sinfo_stream == sid) && (SCTP_MID_EQ(asoc->idata_supported, control->mid, mid))) { str_seq = (sid << 16) | (0x0000ffff & mid); control->pdapi_aborted = 1; sv = stcb->asoc.control_pdapi; control->end_added = 1; if (control->on_strm_q == SCTP_ON_ORDERED) { TAILQ_REMOVE(&strm->inqueue, control, next_instrm); if (asoc->size_on_all_streams >= control->length) { asoc->size_on_all_streams -= control->length; } else { #ifdef INVARIANTS panic("size_on_all_streams = %u smaller than control length %u", asoc->size_on_all_streams, control->length); #else asoc->size_on_all_streams = 0; #endif } sctp_ucount_decr(asoc->cnt_on_all_streams); } else if (control->on_strm_q == SCTP_ON_UNORDERED) { TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm); #ifdef INVARIANTS } else if (control->on_strm_q) { panic("strm: %p ctl: %p unknown %d", strm, control, control->on_strm_q); #endif } control->on_strm_q = 0; stcb->asoc.control_pdapi = control; sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION, stcb, SCTP_PARTIAL_DELIVERY_ABORTED, (void *)&str_seq, SCTP_SO_NOT_LOCKED); stcb->asoc.control_pdapi = sv; break; } else if ((control->sinfo_stream == sid) && SCTP_MID_GT(asoc->idata_supported, control->mid, mid)) { /* We are past our victim SSN */ break; } } if (SCTP_MID_GT(asoc->idata_supported, mid, strm->last_mid_delivered)) { /* Update the sequence number */ strm->last_mid_delivered = mid; } /* now kick the stream the new way */ /* sa_ignore NO_NULL_CHK */ sctp_kick_prsctp_reorder_queue(stcb, strm); } SCTP_INP_READ_UNLOCK(stcb->sctp_ep); } /* * Now slide thing forward. */ sctp_slide_mapping_arrays(stcb); } Index: projects/clang500-import/sys/netinet/sctp_os_bsd.h =================================================================== --- projects/clang500-import/sys/netinet/sctp_os_bsd.h (revision 321306) +++ projects/clang500-import/sys/netinet/sctp_os_bsd.h (revision 321307) @@ -1,488 +1,488 @@ /*- * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifndef _NETINET_SCTP_OS_BSD_H_ #define _NETINET_SCTP_OS_BSD_H_ /* * includes */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" #include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #include #include #endif /* INET6 */ #include #include #include #ifndef in6pcb #define in6pcb inpcb #endif /* Declare all the malloc names for all the various mallocs */ MALLOC_DECLARE(SCTP_M_MAP); MALLOC_DECLARE(SCTP_M_STRMI); MALLOC_DECLARE(SCTP_M_STRMO); MALLOC_DECLARE(SCTP_M_ASC_ADDR); MALLOC_DECLARE(SCTP_M_ASC_IT); MALLOC_DECLARE(SCTP_M_AUTH_CL); MALLOC_DECLARE(SCTP_M_AUTH_KY); MALLOC_DECLARE(SCTP_M_AUTH_HL); MALLOC_DECLARE(SCTP_M_AUTH_IF); MALLOC_DECLARE(SCTP_M_STRESET); MALLOC_DECLARE(SCTP_M_CMSG); MALLOC_DECLARE(SCTP_M_COPYAL); MALLOC_DECLARE(SCTP_M_VRF); MALLOC_DECLARE(SCTP_M_IFA); MALLOC_DECLARE(SCTP_M_IFN); MALLOC_DECLARE(SCTP_M_TIMW); MALLOC_DECLARE(SCTP_M_MVRF); MALLOC_DECLARE(SCTP_M_ITER); MALLOC_DECLARE(SCTP_M_SOCKOPT); MALLOC_DECLARE(SCTP_M_MCORE); #if defined(SCTP_LOCAL_TRACE_BUF) #define SCTP_GET_CYCLECOUNT get_cyclecount() #define SCTP_CTR6 sctp_log_trace #else #define SCTP_CTR6 CTR6 #endif /* * Macros to expand out globals defined by various modules * to either a real global or a virtualized instance of one, * depending on whether VIMAGE is defined. */ /* then define the macro(s) that hook into the vimage macros */ #define MODULE_GLOBAL(__SYMBOL) V_##__SYMBOL #define V_system_base_info VNET(system_base_info) #define SCTP_BASE_INFO(__m) V_system_base_info.sctppcbinfo.__m #define SCTP_BASE_STATS V_system_base_info.sctpstat #define SCTP_BASE_STAT(__m) V_system_base_info.sctpstat.__m #define SCTP_BASE_SYSCTL(__m) V_system_base_info.sctpsysctl.__m #define SCTP_BASE_VAR(__m) V_system_base_info.__m #define SCTP_PRINTF(params...) printf(params) #if defined(SCTP_DEBUG) #define SCTPDBG(level, params...) \ { \ do { \ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \ SCTP_PRINTF(params); \ } \ } while (0); \ } #define SCTPDBG_ADDR(level, addr) \ { \ do { \ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \ sctp_print_address(addr); \ } \ } while (0); \ } #else #define SCTPDBG(level, params...) #define SCTPDBG_ADDR(level, addr) #endif #ifdef SCTP_LTRACE_CHUNKS #define SCTP_LTRACE_CHK(a, b, c, d) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_CHUNK_ENABLE) SCTP_CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d) #else #define SCTP_LTRACE_CHK(a, b, c, d) #endif #ifdef SCTP_LTRACE_ERRORS #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ m, inp, stcb, net, file, __LINE__, err); #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ inp, stcb, net, file, __LINE__, err); #else #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) #endif /* * Local address and interface list handling */ #define SCTP_MAX_VRF_ID 0 #define SCTP_SIZE_OF_VRF_HASH 3 #define SCTP_IFNAMSIZ IFNAMSIZ #define SCTP_DEFAULT_VRFID 0 #define SCTP_VRF_ADDR_HASH_SIZE 16 #define SCTP_VRF_IFN_HASH_SIZE 3 #define SCTP_INIT_VRF_TABLEID(vrf) #define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP) #define SCTP_ROUTE_IS_REAL_LOOP(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifa && (ro)->ro_rt->rt_ifa->ifa_ifp && (ro)->ro_rt->rt_ifa->ifa_ifp->if_type == IFT_LOOP) /* * Access to IFN's to help with src-addr-selection */ /* This could return VOID if the index works but for BSD we provide both. */ #define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp #define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index #define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp) /* * general memory allocation */ #define SCTP_MALLOC(var, type, size, name) \ do { \ var = (type)malloc(size, name, M_NOWAIT); \ } while (0) #define SCTP_FREE(var, type) free(var, type) #define SCTP_MALLOC_SONAME(var, type, size) \ do { \ var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \ } while (0) #define SCTP_FREE_SONAME(var) free(var, M_SONAME) #define SCTP_PROCESS_STRUCT struct proc * /* * zone allocation functions */ #include /* SCTP_ZONE_INIT: initialize the zone */ typedef struct uma_zone *sctp_zone_t; #define SCTP_ZONE_INIT(zone, name, size, number) { \ zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,\ 0); \ uma_zone_set_max(zone, number); \ } #define SCTP_ZONE_DESTROY(zone) uma_zdestroy(zone) /* SCTP_ZONE_GET: allocate element from the zone */ #define SCTP_ZONE_GET(zone, type) \ (type *)uma_zalloc(zone, M_NOWAIT); /* SCTP_ZONE_FREE: free element from the zone */ #define SCTP_ZONE_FREE(zone, element) \ uma_zfree(zone, element); #define SCTP_HASH_INIT(size, hashmark) hashinit_flags(size, M_PCB, hashmark, HASH_NOWAIT) #define SCTP_HASH_FREE(table, hashmark) hashdestroy(table, M_PCB, hashmark) #define SCTP_M_COPYM m_copym /* * timers */ #include typedef struct callout sctp_os_timer_t; #define SCTP_OS_TIMER_INIT(tmr) callout_init(tmr, 1) #define SCTP_OS_TIMER_START callout_reset #define SCTP_OS_TIMER_STOP callout_stop #define SCTP_OS_TIMER_STOP_DRAIN callout_drain #define SCTP_OS_TIMER_PENDING callout_pending #define SCTP_OS_TIMER_ACTIVE callout_active #define SCTP_OS_TIMER_DEACTIVATE callout_deactivate #define sctp_get_tick_count() (ticks) #define SCTP_UNUSED __attribute__((unused)) /* * Functions */ /* Mbuf manipulation and access macros */ #define SCTP_BUF_LEN(m) (m->m_len) #define SCTP_BUF_NEXT(m) (m->m_next) #define SCTP_BUF_NEXT_PKT(m) (m->m_nextpkt) #define SCTP_BUF_RESV_UF(m, size) m->m_data += size #define SCTP_BUF_AT(m, size) m->m_data + size #define SCTP_BUF_IS_EXTENDED(m) (m->m_flags & M_EXT) #define SCTP_BUF_SIZE M_SIZE #define SCTP_BUF_TYPE(m) (m->m_type) #define SCTP_BUF_RECVIF(m) (m->m_pkthdr.rcvif) #define SCTP_BUF_PREPEND M_PREPEND #define SCTP_ALIGN_TO_END(m, len) M_ALIGN(m, len) /* We make it so if you have up to 4 threads * writing based on the default size of * the packet log 65 k, that would be * 4 16k packets before we would hit * a problem. */ #define SCTP_PKTLOG_WRITERS_NEED_LOCK 3 /*************************/ /* MTU */ /*************************/ #define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu #define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((uint32_t)((rt != NULL) ? rt->rt_mtu : 0)) #define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0) #define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \ if (rt != NULL) \ rt->rt_mtu = mtu; \ } while(0) /* (de-)register interface event notifications */ #define SCTP_REGISTER_INTERFACE(ifhandle, af) #define SCTP_DEREGISTER_INTERFACE(ifhandle, af) /*************************/ /* These are for logging */ /*************************/ /* return the base ext data pointer */ #define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf) /* return the refcnt of the data pointer */ #define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ext_cnt) /* return any buffer related flags, this is * used beyond logging for apple only. */ #define SCTP_BUF_GET_FLAGS(m) (m->m_flags) /* For BSD this just accesses the M_PKTHDR length * so it operates on an mbuf with hdr flag. Other * O/S's may have separate packet header and mbuf * chain pointers.. thus the macro. */ #define SCTP_HEADER_TO_CHAIN(m) (m) #define SCTP_DETACH_HEADER_FROM_CHAIN(m) #define SCTP_HEADER_LEN(m) ((m)->m_pkthdr.len) #define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0 #define SCTP_RELEASE_HEADER(m) #define SCTP_RELEASE_PKT(m) sctp_m_freem(m) #define SCTP_ENABLE_UDP_CSUM(m) do { \ m->m_pkthdr.csum_flags = CSUM_UDP; \ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); \ } while (0) #define SCTP_GET_PKT_VRFID(m, vrf_id) ((vrf_id = SCTP_DEFAULT_VRFID) != SCTP_DEFAULT_VRFID) /* Attach the chain of data into the sendable packet. */ #define SCTP_ATTACH_CHAIN(pak, m, packet_length) do { \ pak = m; \ pak->m_pkthdr.len = packet_length; \ } while(0) /* Other m_pkthdr type things */ #define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_broadcast(dst, m->m_pkthdr.rcvif) : 0) #define SCTP_IS_IT_LOOPBACK(m) ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.rcvif == NULL) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP))) /* This converts any input packet header * into the chain of data holders, for BSD * its a NOP. */ /* get the v6 hop limit */ #define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL))); /* is the endpoint v6only? */ #define SCTP_IPV6_V6ONLY(inp) (((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY) /* is the socket non-blocking? */ #define SCTP_SO_IS_NBIO(so) ((so)->so_state & SS_NBIO) #define SCTP_SET_SO_NBIO(so) ((so)->so_state |= SS_NBIO) #define SCTP_CLEAR_SO_NBIO(so) ((so)->so_state &= ~SS_NBIO) /* get the socket type */ #define SCTP_SO_TYPE(so) ((so)->so_type) /* Use a macro for renaming sb_cc to sb_acc. * Initially sb_ccc was used, but this broke select() when used * with SCTP sockets. */ #define sb_cc sb_acc /* reserve sb space for a socket */ #define SCTP_SORESERVE(so, send, recv) soreserve(so, send, recv) /* wakeup a socket */ #define SCTP_SOWAKEUP(so) wakeup(&(so)->so_timeo) /* clear the socket buffer state */ #define SCTP_SB_CLEAR(sb) \ (sb).sb_cc = 0; \ (sb).sb_mb = NULL; \ (sb).sb_mbcnt = 0; -#define SCTP_SB_LIMIT_RCV(so) so->so_rcv.sb_hiwat -#define SCTP_SB_LIMIT_SND(so) so->so_snd.sb_hiwat +#define SCTP_SB_LIMIT_RCV(so) (SOLISTENING(so) ? so->sol_sbrcv_hiwat : so->so_rcv.sb_hiwat) +#define SCTP_SB_LIMIT_SND(so) (SOLISTENING(so) ? so->sol_sbsnd_hiwat : so->so_snd.sb_hiwat) /* * routes, output, etc. */ typedef struct route sctp_route_t; typedef struct rtentry sctp_rtentry_t; #define SCTP_RTALLOC(ro, vrf_id, fibnum) \ rtalloc_ign_fib((struct route *)ro, 0UL, fibnum) /* Future zero copy wakeup/send function */ #define SCTP_ZERO_COPY_EVENT(inp, so) /* This is re-pulse ourselves for sendbuf */ #define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so) /* * SCTP protocol specific mbuf flags. */ #define M_NOTIFICATION M_PROTO1 /* SCTP notification */ /* * IP output routines */ #define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \ { \ int o_flgs = IP_RAWOUTPUT; \ struct sctp_tcb *local_stcb = stcb; \ if (local_stcb && \ local_stcb->sctp_ep && \ local_stcb->sctp_ep->sctp_socket) \ o_flgs |= local_stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE; \ m_clrprotoflags(o_pak); \ result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \ } #define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \ { \ struct sctp_tcb *local_stcb = stcb; \ m_clrprotoflags(o_pak); \ if (local_stcb && local_stcb->sctp_ep) \ result = ip6_output(o_pak, \ ((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \ (ro), 0, 0, ifp, NULL); \ else \ result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \ } struct mbuf * sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header, int how, int allonebuf, int type); /* * SCTP AUTH */ #define SCTP_READ_RANDOM(buf, len) read_random(buf, len) /* map standard crypto API names */ #define SCTP_SHA1_CTX SHA1_CTX #define SCTP_SHA1_INIT SHA1Init #define SCTP_SHA1_UPDATE SHA1Update #define SCTP_SHA1_FINAL(x,y) SHA1Final((caddr_t)x, y) #define SCTP_SHA256_CTX SHA256_CTX #define SCTP_SHA256_INIT SHA256_Init #define SCTP_SHA256_UPDATE SHA256_Update #define SCTP_SHA256_FINAL(x,y) SHA256_Final((caddr_t)x, y) #define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 1) #if defined(INVARIANTS) #define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ { \ int32_t oldval; \ oldval = atomic_fetchadd_int(addr, -val); \ if (oldval < val) { \ panic("Counter goes negative"); \ } \ } #else #define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ { \ int32_t oldval; \ oldval = atomic_fetchadd_int(addr, -val); \ if (oldval < val) { \ *addr = 0; \ } \ } #endif #define SCTP_IS_LISTENING(inp) ((inp->sctp_flags & SCTP_PCB_FLAGS_ACCEPTING) != 0) #endif Index: projects/clang500-import/sys/netinet/sctp_ss_functions.c =================================================================== --- projects/clang500-import/sys/netinet/sctp_ss_functions.c (revision 321306) +++ projects/clang500-import/sys/netinet/sctp_ss_functions.c (revision 321307) @@ -1,983 +1,997 @@ /*- * Copyright (c) 2010-2012, by Michael Tuexen. All rights reserved. * Copyright (c) 2010-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2010-2012, by Robin Seggelmann. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include /* * Default simple round-robin algorithm. * Just interates the streams in the order they appear. */ static void sctp_ss_default_add(struct sctp_tcb *, struct sctp_association *, struct sctp_stream_out *, struct sctp_stream_queue_pending *, int); static void sctp_ss_default_remove(struct sctp_tcb *, struct sctp_association *, struct sctp_stream_out *, struct sctp_stream_queue_pending *, int); static void sctp_ss_default_init(struct sctp_tcb *stcb, struct sctp_association *asoc, int holds_lock) { uint16_t i; asoc->ss_data.locked_on_sending = NULL; asoc->ss_data.last_out_stream = NULL; TAILQ_INIT(&asoc->ss_data.out.wheel); /* * If there is data in the stream queues already, the scheduler of * an existing association has been changed. We need to add all * stream queues to the wheel. */ for (i = 0; i < stcb->asoc.streamoutcnt; i++) { stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, &stcb->asoc, &stcb->asoc.strmout[i], NULL, holds_lock); } return; } static void sctp_ss_default_clear(struct sctp_tcb *stcb, struct sctp_association *asoc, int clear_values SCTP_UNUSED, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } while (!TAILQ_EMPTY(&asoc->ss_data.out.wheel)) { struct sctp_stream_out *strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); TAILQ_REMOVE(&asoc->ss_data.out.wheel, TAILQ_FIRST(&asoc->ss_data.out.wheel), ss_params.rr.next_spoke); strq->ss_params.rr.next_spoke.tqe_next = NULL; strq->ss_params.rr.next_spoke.tqe_prev = NULL; } asoc->ss_data.last_out_stream = NULL; if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static void sctp_ss_default_init_stream(struct sctp_tcb *stcb, struct sctp_stream_out *strq, struct sctp_stream_out *with_strq) { if (with_strq != NULL) { if (stcb->asoc.ss_data.locked_on_sending == with_strq) { stcb->asoc.ss_data.locked_on_sending = strq; } if (stcb->asoc.ss_data.last_out_stream == with_strq) { stcb->asoc.ss_data.last_out_stream = strq; } } strq->ss_params.rr.next_spoke.tqe_next = NULL; strq->ss_params.rr.next_spoke.tqe_prev = NULL; return; } static void sctp_ss_default_add(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp SCTP_UNUSED, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } /* Add to wheel if not already on it and stream queue not empty */ if (!TAILQ_EMPTY(&strq->outqueue) && (strq->ss_params.rr.next_spoke.tqe_next == NULL) && (strq->ss_params.rr.next_spoke.tqe_prev == NULL)) { TAILQ_INSERT_TAIL(&asoc->ss_data.out.wheel, strq, ss_params.rr.next_spoke); } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static int sctp_ss_default_is_empty(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_association *asoc) { if (TAILQ_EMPTY(&asoc->ss_data.out.wheel)) { return (1); } else { return (0); } } static void sctp_ss_default_remove(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp SCTP_UNUSED, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } /* * Remove from wheel if stream queue is empty and actually is on the * wheel */ if (TAILQ_EMPTY(&strq->outqueue) && (strq->ss_params.rr.next_spoke.tqe_next != NULL || strq->ss_params.rr.next_spoke.tqe_prev != NULL)) { if (asoc->ss_data.last_out_stream == strq) { asoc->ss_data.last_out_stream = TAILQ_PREV(asoc->ss_data.last_out_stream, sctpwheel_listhead, ss_params.rr.next_spoke); if (asoc->ss_data.last_out_stream == NULL) { asoc->ss_data.last_out_stream = TAILQ_LAST(&asoc->ss_data.out.wheel, sctpwheel_listhead); } if (asoc->ss_data.last_out_stream == strq) { asoc->ss_data.last_out_stream = NULL; } } TAILQ_REMOVE(&asoc->ss_data.out.wheel, strq, ss_params.rr.next_spoke); strq->ss_params.rr.next_spoke.tqe_next = NULL; strq->ss_params.rr.next_spoke.tqe_prev = NULL; } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static struct sctp_stream_out * sctp_ss_default_select(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_nets *net, struct sctp_association *asoc) { struct sctp_stream_out *strq, *strqt; if (asoc->ss_data.locked_on_sending) { return (asoc->ss_data.locked_on_sending); } strqt = asoc->ss_data.last_out_stream; default_again: /* Find the next stream to use */ if (strqt == NULL) { strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); } else { strq = TAILQ_NEXT(strqt, ss_params.rr.next_spoke); if (strq == NULL) { strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); } } /* * If CMT is off, we must validate that the stream in question has * the first item pointed towards are network destination requested * by the caller. Note that if we turn out to be locked to a stream * (assigning TSN's then we must stop, since we cannot look for * another stream with data to send to that destination). In CMT's * case, by skipping this check, we will send one data packet * towards the requested net. */ if (net != NULL && strq != NULL && SCTP_BASE_SYSCTL(sctp_cmt_on_off) == 0) { if (TAILQ_FIRST(&strq->outqueue) && TAILQ_FIRST(&strq->outqueue)->net != NULL && TAILQ_FIRST(&strq->outqueue)->net != net) { if (strq == asoc->ss_data.last_out_stream) { return (NULL); } else { strqt = strq; goto default_again; } } } return (strq); } static void sctp_ss_default_scheduled(struct sctp_tcb *stcb, struct sctp_nets *net SCTP_UNUSED, struct sctp_association *asoc, struct sctp_stream_out *strq, int moved_how_much SCTP_UNUSED) { struct sctp_stream_queue_pending *sp; asoc->ss_data.last_out_stream = strq; if (stcb->asoc.idata_supported == 0) { sp = TAILQ_FIRST(&strq->outqueue); if ((sp != NULL) && (sp->some_taken == 1)) { stcb->asoc.ss_data.locked_on_sending = strq; } else { stcb->asoc.ss_data.locked_on_sending = NULL; } } else { stcb->asoc.ss_data.locked_on_sending = NULL; } return; } static void sctp_ss_default_packet_done(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_nets *net SCTP_UNUSED, struct sctp_association *asoc SCTP_UNUSED) { /* Nothing to be done here */ return; } static int sctp_ss_default_get_value(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_association *asoc SCTP_UNUSED, struct sctp_stream_out *strq SCTP_UNUSED, uint16_t *value SCTP_UNUSED) { /* Nothing to be done here */ return (-1); } static int sctp_ss_default_set_value(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_association *asoc SCTP_UNUSED, struct sctp_stream_out *strq SCTP_UNUSED, uint16_t value SCTP_UNUSED) { /* Nothing to be done here */ return (-1); } static int -sctp_ss_default_is_user_msgs_incomplete(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_association *asoc SCTP_UNUSED) +sctp_ss_default_is_user_msgs_incomplete(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_association *asoc) { - return (0); + struct sctp_stream_out *strq; + struct sctp_stream_queue_pending *sp; + + if (asoc->stream_queue_cnt != 1) { + return (0); + } + strq = asoc->ss_data.locked_on_sending; + if (strq == NULL) { + return (0); + } + sp = TAILQ_FIRST(&strq->outqueue); + if (sp == NULL) { + return (0); + } + return (!sp->msg_is_complete); } /* * Real round-robin algorithm. * Always interates the streams in ascending order. */ static void sctp_ss_rr_add(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp SCTP_UNUSED, int holds_lock) { struct sctp_stream_out *strqt; if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } if (!TAILQ_EMPTY(&strq->outqueue) && (strq->ss_params.rr.next_spoke.tqe_next == NULL) && (strq->ss_params.rr.next_spoke.tqe_prev == NULL)) { if (TAILQ_EMPTY(&asoc->ss_data.out.wheel)) { TAILQ_INSERT_HEAD(&asoc->ss_data.out.wheel, strq, ss_params.rr.next_spoke); } else { strqt = TAILQ_FIRST(&asoc->ss_data.out.wheel); while (strqt != NULL && (strqt->sid < strq->sid)) { strqt = TAILQ_NEXT(strqt, ss_params.rr.next_spoke); } if (strqt != NULL) { TAILQ_INSERT_BEFORE(strqt, strq, ss_params.rr.next_spoke); } else { TAILQ_INSERT_TAIL(&asoc->ss_data.out.wheel, strq, ss_params.rr.next_spoke); } } } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } /* * Real round-robin per packet algorithm. * Always interates the streams in ascending order and * only fills messages of the same stream in a packet. */ static struct sctp_stream_out * sctp_ss_rrp_select(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_nets *net SCTP_UNUSED, struct sctp_association *asoc) { return (asoc->ss_data.last_out_stream); } static void sctp_ss_rrp_packet_done(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_nets *net, struct sctp_association *asoc) { struct sctp_stream_out *strq, *strqt; strqt = asoc->ss_data.last_out_stream; rrp_again: /* Find the next stream to use */ if (strqt == NULL) { strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); } else { strq = TAILQ_NEXT(strqt, ss_params.rr.next_spoke); if (strq == NULL) { strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); } } /* * If CMT is off, we must validate that the stream in question has * the first item pointed towards are network destination requested * by the caller. Note that if we turn out to be locked to a stream * (assigning TSN's then we must stop, since we cannot look for * another stream with data to send to that destination). In CMT's * case, by skipping this check, we will send one data packet * towards the requested net. */ if (net != NULL && strq != NULL && SCTP_BASE_SYSCTL(sctp_cmt_on_off) == 0) { if (TAILQ_FIRST(&strq->outqueue) && TAILQ_FIRST(&strq->outqueue)->net != NULL && TAILQ_FIRST(&strq->outqueue)->net != net) { if (strq == asoc->ss_data.last_out_stream) { strq = NULL; } else { strqt = strq; goto rrp_again; } } } asoc->ss_data.last_out_stream = strq; return; } /* * Priority algorithm. * Always prefers streams based on their priority id. */ static void sctp_ss_prio_clear(struct sctp_tcb *stcb, struct sctp_association *asoc, int clear_values, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } while (!TAILQ_EMPTY(&asoc->ss_data.out.wheel)) { struct sctp_stream_out *strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); if (clear_values) { strq->ss_params.prio.priority = 0; } TAILQ_REMOVE(&asoc->ss_data.out.wheel, TAILQ_FIRST(&asoc->ss_data.out.wheel), ss_params.prio.next_spoke); strq->ss_params.prio.next_spoke.tqe_next = NULL; strq->ss_params.prio.next_spoke.tqe_prev = NULL; } asoc->ss_data.last_out_stream = NULL; if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static void sctp_ss_prio_init_stream(struct sctp_tcb *stcb, struct sctp_stream_out *strq, struct sctp_stream_out *with_strq) { if (with_strq != NULL) { if (stcb->asoc.ss_data.locked_on_sending == with_strq) { stcb->asoc.ss_data.locked_on_sending = strq; } if (stcb->asoc.ss_data.last_out_stream == with_strq) { stcb->asoc.ss_data.last_out_stream = strq; } } strq->ss_params.prio.next_spoke.tqe_next = NULL; strq->ss_params.prio.next_spoke.tqe_prev = NULL; if (with_strq != NULL) { strq->ss_params.prio.priority = with_strq->ss_params.prio.priority; } else { strq->ss_params.prio.priority = 0; } return; } static void sctp_ss_prio_add(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp SCTP_UNUSED, int holds_lock) { struct sctp_stream_out *strqt; if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } /* Add to wheel if not already on it and stream queue not empty */ if (!TAILQ_EMPTY(&strq->outqueue) && (strq->ss_params.prio.next_spoke.tqe_next == NULL) && (strq->ss_params.prio.next_spoke.tqe_prev == NULL)) { if (TAILQ_EMPTY(&asoc->ss_data.out.wheel)) { TAILQ_INSERT_HEAD(&asoc->ss_data.out.wheel, strq, ss_params.prio.next_spoke); } else { strqt = TAILQ_FIRST(&asoc->ss_data.out.wheel); while (strqt != NULL && strqt->ss_params.prio.priority < strq->ss_params.prio.priority) { strqt = TAILQ_NEXT(strqt, ss_params.prio.next_spoke); } if (strqt != NULL) { TAILQ_INSERT_BEFORE(strqt, strq, ss_params.prio.next_spoke); } else { TAILQ_INSERT_TAIL(&asoc->ss_data.out.wheel, strq, ss_params.prio.next_spoke); } } } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static void sctp_ss_prio_remove(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp SCTP_UNUSED, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } /* * Remove from wheel if stream queue is empty and actually is on the * wheel */ if (TAILQ_EMPTY(&strq->outqueue) && (strq->ss_params.prio.next_spoke.tqe_next != NULL || strq->ss_params.prio.next_spoke.tqe_prev != NULL)) { if (asoc->ss_data.last_out_stream == strq) { asoc->ss_data.last_out_stream = TAILQ_PREV(asoc->ss_data.last_out_stream, sctpwheel_listhead, ss_params.prio.next_spoke); if (asoc->ss_data.last_out_stream == NULL) { asoc->ss_data.last_out_stream = TAILQ_LAST(&asoc->ss_data.out.wheel, sctpwheel_listhead); } if (asoc->ss_data.last_out_stream == strq) { asoc->ss_data.last_out_stream = NULL; } } TAILQ_REMOVE(&asoc->ss_data.out.wheel, strq, ss_params.prio.next_spoke); strq->ss_params.prio.next_spoke.tqe_next = NULL; strq->ss_params.prio.next_spoke.tqe_prev = NULL; } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static struct sctp_stream_out * sctp_ss_prio_select(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_nets *net, struct sctp_association *asoc) { struct sctp_stream_out *strq, *strqt, *strqn; strqt = asoc->ss_data.last_out_stream; prio_again: /* Find the next stream to use */ if (strqt == NULL) { strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); } else { strqn = TAILQ_NEXT(strqt, ss_params.prio.next_spoke); if (strqn != NULL && strqn->ss_params.prio.priority == strqt->ss_params.prio.priority) { strq = strqn; } else { strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); } } /* * If CMT is off, we must validate that the stream in question has * the first item pointed towards are network destination requested * by the caller. Note that if we turn out to be locked to a stream * (assigning TSN's then we must stop, since we cannot look for * another stream with data to send to that destination). In CMT's * case, by skipping this check, we will send one data packet * towards the requested net. */ if (net != NULL && strq != NULL && SCTP_BASE_SYSCTL(sctp_cmt_on_off) == 0) { if (TAILQ_FIRST(&strq->outqueue) && TAILQ_FIRST(&strq->outqueue)->net != NULL && TAILQ_FIRST(&strq->outqueue)->net != net) { if (strq == asoc->ss_data.last_out_stream) { return (NULL); } else { strqt = strq; goto prio_again; } } } return (strq); } static int sctp_ss_prio_get_value(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_association *asoc SCTP_UNUSED, struct sctp_stream_out *strq, uint16_t *value) { if (strq == NULL) { return (-1); } *value = strq->ss_params.prio.priority; return (1); } static int sctp_ss_prio_set_value(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, uint16_t value) { if (strq == NULL) { return (-1); } strq->ss_params.prio.priority = value; sctp_ss_prio_remove(stcb, asoc, strq, NULL, 1); sctp_ss_prio_add(stcb, asoc, strq, NULL, 1); return (1); } /* * Fair bandwidth algorithm. * Maintains an equal troughput per stream. */ static void sctp_ss_fb_clear(struct sctp_tcb *stcb, struct sctp_association *asoc, int clear_values, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } while (!TAILQ_EMPTY(&asoc->ss_data.out.wheel)) { struct sctp_stream_out *strq = TAILQ_FIRST(&asoc->ss_data.out.wheel); if (clear_values) { strq->ss_params.fb.rounds = -1; } TAILQ_REMOVE(&asoc->ss_data.out.wheel, TAILQ_FIRST(&asoc->ss_data.out.wheel), ss_params.fb.next_spoke); strq->ss_params.fb.next_spoke.tqe_next = NULL; strq->ss_params.fb.next_spoke.tqe_prev = NULL; } asoc->ss_data.last_out_stream = NULL; if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static void sctp_ss_fb_init_stream(struct sctp_tcb *stcb, struct sctp_stream_out *strq, struct sctp_stream_out *with_strq) { if (with_strq != NULL) { if (stcb->asoc.ss_data.locked_on_sending == with_strq) { stcb->asoc.ss_data.locked_on_sending = strq; } if (stcb->asoc.ss_data.last_out_stream == with_strq) { stcb->asoc.ss_data.last_out_stream = strq; } } strq->ss_params.fb.next_spoke.tqe_next = NULL; strq->ss_params.fb.next_spoke.tqe_prev = NULL; if (with_strq != NULL) { strq->ss_params.fb.rounds = with_strq->ss_params.fb.rounds; } else { strq->ss_params.fb.rounds = -1; } return; } static void sctp_ss_fb_add(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp SCTP_UNUSED, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } if (!TAILQ_EMPTY(&strq->outqueue) && (strq->ss_params.fb.next_spoke.tqe_next == NULL) && (strq->ss_params.fb.next_spoke.tqe_prev == NULL)) { if (strq->ss_params.fb.rounds < 0) strq->ss_params.fb.rounds = TAILQ_FIRST(&strq->outqueue)->length; TAILQ_INSERT_TAIL(&asoc->ss_data.out.wheel, strq, ss_params.fb.next_spoke); } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static void sctp_ss_fb_remove(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp SCTP_UNUSED, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } /* * Remove from wheel if stream queue is empty and actually is on the * wheel */ if (TAILQ_EMPTY(&strq->outqueue) && (strq->ss_params.fb.next_spoke.tqe_next != NULL || strq->ss_params.fb.next_spoke.tqe_prev != NULL)) { if (asoc->ss_data.last_out_stream == strq) { asoc->ss_data.last_out_stream = TAILQ_PREV(asoc->ss_data.last_out_stream, sctpwheel_listhead, ss_params.fb.next_spoke); if (asoc->ss_data.last_out_stream == NULL) { asoc->ss_data.last_out_stream = TAILQ_LAST(&asoc->ss_data.out.wheel, sctpwheel_listhead); } if (asoc->ss_data.last_out_stream == strq) { asoc->ss_data.last_out_stream = NULL; } } TAILQ_REMOVE(&asoc->ss_data.out.wheel, strq, ss_params.fb.next_spoke); strq->ss_params.fb.next_spoke.tqe_next = NULL; strq->ss_params.fb.next_spoke.tqe_prev = NULL; } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static struct sctp_stream_out * sctp_ss_fb_select(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_nets *net, struct sctp_association *asoc) { struct sctp_stream_out *strq = NULL, *strqt; if (asoc->ss_data.last_out_stream == NULL || TAILQ_FIRST(&asoc->ss_data.out.wheel) == TAILQ_LAST(&asoc->ss_data.out.wheel, sctpwheel_listhead)) { strqt = TAILQ_FIRST(&asoc->ss_data.out.wheel); } else { strqt = TAILQ_NEXT(asoc->ss_data.last_out_stream, ss_params.fb.next_spoke); } do { if ((strqt != NULL) && ((SCTP_BASE_SYSCTL(sctp_cmt_on_off) > 0) || (SCTP_BASE_SYSCTL(sctp_cmt_on_off) == 0 && (net == NULL || (TAILQ_FIRST(&strqt->outqueue) && TAILQ_FIRST(&strqt->outqueue)->net == NULL) || (net != NULL && TAILQ_FIRST(&strqt->outqueue) && TAILQ_FIRST(&strqt->outqueue)->net != NULL && TAILQ_FIRST(&strqt->outqueue)->net == net))))) { if ((strqt->ss_params.fb.rounds >= 0) && (strq == NULL || strqt->ss_params.fb.rounds < strq->ss_params.fb.rounds)) { strq = strqt; } } if (strqt != NULL) { strqt = TAILQ_NEXT(strqt, ss_params.fb.next_spoke); } else { strqt = TAILQ_FIRST(&asoc->ss_data.out.wheel); } } while (strqt != strq); return (strq); } static void sctp_ss_fb_scheduled(struct sctp_tcb *stcb, struct sctp_nets *net SCTP_UNUSED, struct sctp_association *asoc, struct sctp_stream_out *strq, int moved_how_much SCTP_UNUSED) { struct sctp_stream_queue_pending *sp; struct sctp_stream_out *strqt; int subtract; if (stcb->asoc.idata_supported == 0) { sp = TAILQ_FIRST(&strq->outqueue); if ((sp != NULL) && (sp->some_taken == 1)) { stcb->asoc.ss_data.locked_on_sending = strq; } else { stcb->asoc.ss_data.locked_on_sending = NULL; } } else { stcb->asoc.ss_data.locked_on_sending = NULL; } subtract = strq->ss_params.fb.rounds; TAILQ_FOREACH(strqt, &asoc->ss_data.out.wheel, ss_params.fb.next_spoke) { strqt->ss_params.fb.rounds -= subtract; if (strqt->ss_params.fb.rounds < 0) strqt->ss_params.fb.rounds = 0; } if (TAILQ_FIRST(&strq->outqueue)) { strq->ss_params.fb.rounds = TAILQ_FIRST(&strq->outqueue)->length; } else { strq->ss_params.fb.rounds = -1; } asoc->ss_data.last_out_stream = strq; return; } /* * First-come, first-serve algorithm. * Maintains the order provided by the application. */ static void sctp_ss_fcfs_add(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock); static void sctp_ss_fcfs_init(struct sctp_tcb *stcb, struct sctp_association *asoc, int holds_lock) { uint32_t x, n = 0, add_more = 1; struct sctp_stream_queue_pending *sp; uint16_t i; TAILQ_INIT(&asoc->ss_data.out.list); /* * If there is data in the stream queues already, the scheduler of * an existing association has been changed. We can only cycle * through the stream queues and add everything to the FCFS queue. */ while (add_more) { add_more = 0; for (i = 0; i < stcb->asoc.streamoutcnt; i++) { sp = TAILQ_FIRST(&stcb->asoc.strmout[i].outqueue); x = 0; /* Find n. message in current stream queue */ while (sp != NULL && x < n) { sp = TAILQ_NEXT(sp, next); x++; } if (sp != NULL) { sctp_ss_fcfs_add(stcb, &stcb->asoc, &stcb->asoc.strmout[i], sp, holds_lock); add_more = 1; } } n++; } return; } static void sctp_ss_fcfs_clear(struct sctp_tcb *stcb, struct sctp_association *asoc, int clear_values, int holds_lock) { if (clear_values) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } while (!TAILQ_EMPTY(&asoc->ss_data.out.list)) { TAILQ_REMOVE(&asoc->ss_data.out.list, TAILQ_FIRST(&asoc->ss_data.out.list), ss_next); } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } } return; } static void sctp_ss_fcfs_init_stream(struct sctp_tcb *stcb, struct sctp_stream_out *strq, struct sctp_stream_out *with_strq) { if (with_strq != NULL) { if (stcb->asoc.ss_data.locked_on_sending == with_strq) { stcb->asoc.ss_data.locked_on_sending = strq; } if (stcb->asoc.ss_data.last_out_stream == with_strq) { stcb->asoc.ss_data.last_out_stream = strq; } } return; } static void sctp_ss_fcfs_add(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq SCTP_UNUSED, struct sctp_stream_queue_pending *sp, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } if (sp && (sp->ss_next.tqe_next == NULL) && (sp->ss_next.tqe_prev == NULL)) { TAILQ_INSERT_TAIL(&asoc->ss_data.out.list, sp, ss_next); } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static int sctp_ss_fcfs_is_empty(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_association *asoc) { if (TAILQ_EMPTY(&asoc->ss_data.out.list)) { return (1); } else { return (0); } } static void sctp_ss_fcfs_remove(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_stream_out *strq SCTP_UNUSED, struct sctp_stream_queue_pending *sp, int holds_lock) { if (holds_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } if (sp && ((sp->ss_next.tqe_next != NULL) || (sp->ss_next.tqe_prev != NULL))) { TAILQ_REMOVE(&asoc->ss_data.out.list, sp, ss_next); } if (holds_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } return; } static struct sctp_stream_out * sctp_ss_fcfs_select(struct sctp_tcb *stcb SCTP_UNUSED, struct sctp_nets *net, struct sctp_association *asoc) { struct sctp_stream_out *strq; struct sctp_stream_queue_pending *sp; sp = TAILQ_FIRST(&asoc->ss_data.out.list); default_again: if (sp != NULL) { strq = &asoc->strmout[sp->sid]; } else { strq = NULL; } /* * If CMT is off, we must validate that the stream in question has * the first item pointed towards are network destination requested * by the caller. Note that if we turn out to be locked to a stream * (assigning TSN's then we must stop, since we cannot look for * another stream with data to send to that destination). In CMT's * case, by skipping this check, we will send one data packet * towards the requested net. */ if (net != NULL && strq != NULL && SCTP_BASE_SYSCTL(sctp_cmt_on_off) == 0) { if (TAILQ_FIRST(&strq->outqueue) && TAILQ_FIRST(&strq->outqueue)->net != NULL && TAILQ_FIRST(&strq->outqueue)->net != net) { sp = TAILQ_NEXT(sp, ss_next); goto default_again; } } return (strq); } const struct sctp_ss_functions sctp_ss_functions[] = { /* SCTP_SS_DEFAULT */ { .sctp_ss_init = sctp_ss_default_init, .sctp_ss_clear = sctp_ss_default_clear, .sctp_ss_init_stream = sctp_ss_default_init_stream, .sctp_ss_add_to_stream = sctp_ss_default_add, .sctp_ss_is_empty = sctp_ss_default_is_empty, .sctp_ss_remove_from_stream = sctp_ss_default_remove, .sctp_ss_select_stream = sctp_ss_default_select, .sctp_ss_scheduled = sctp_ss_default_scheduled, .sctp_ss_packet_done = sctp_ss_default_packet_done, .sctp_ss_get_value = sctp_ss_default_get_value, .sctp_ss_set_value = sctp_ss_default_set_value, .sctp_ss_is_user_msgs_incomplete = sctp_ss_default_is_user_msgs_incomplete }, /* SCTP_SS_ROUND_ROBIN */ { .sctp_ss_init = sctp_ss_default_init, .sctp_ss_clear = sctp_ss_default_clear, .sctp_ss_init_stream = sctp_ss_default_init_stream, .sctp_ss_add_to_stream = sctp_ss_rr_add, .sctp_ss_is_empty = sctp_ss_default_is_empty, .sctp_ss_remove_from_stream = sctp_ss_default_remove, .sctp_ss_select_stream = sctp_ss_default_select, .sctp_ss_scheduled = sctp_ss_default_scheduled, .sctp_ss_packet_done = sctp_ss_default_packet_done, .sctp_ss_get_value = sctp_ss_default_get_value, .sctp_ss_set_value = sctp_ss_default_set_value, .sctp_ss_is_user_msgs_incomplete = sctp_ss_default_is_user_msgs_incomplete }, /* SCTP_SS_ROUND_ROBIN_PACKET */ { .sctp_ss_init = sctp_ss_default_init, .sctp_ss_clear = sctp_ss_default_clear, .sctp_ss_init_stream = sctp_ss_default_init_stream, .sctp_ss_add_to_stream = sctp_ss_rr_add, .sctp_ss_is_empty = sctp_ss_default_is_empty, .sctp_ss_remove_from_stream = sctp_ss_default_remove, .sctp_ss_select_stream = sctp_ss_rrp_select, .sctp_ss_scheduled = sctp_ss_default_scheduled, .sctp_ss_packet_done = sctp_ss_rrp_packet_done, .sctp_ss_get_value = sctp_ss_default_get_value, .sctp_ss_set_value = sctp_ss_default_set_value, .sctp_ss_is_user_msgs_incomplete = sctp_ss_default_is_user_msgs_incomplete }, /* SCTP_SS_PRIORITY */ { .sctp_ss_init = sctp_ss_default_init, .sctp_ss_clear = sctp_ss_prio_clear, .sctp_ss_init_stream = sctp_ss_prio_init_stream, .sctp_ss_add_to_stream = sctp_ss_prio_add, .sctp_ss_is_empty = sctp_ss_default_is_empty, .sctp_ss_remove_from_stream = sctp_ss_prio_remove, .sctp_ss_select_stream = sctp_ss_prio_select, .sctp_ss_scheduled = sctp_ss_default_scheduled, .sctp_ss_packet_done = sctp_ss_default_packet_done, .sctp_ss_get_value = sctp_ss_prio_get_value, .sctp_ss_set_value = sctp_ss_prio_set_value, .sctp_ss_is_user_msgs_incomplete = sctp_ss_default_is_user_msgs_incomplete }, /* SCTP_SS_FAIR_BANDWITH */ { .sctp_ss_init = sctp_ss_default_init, .sctp_ss_clear = sctp_ss_fb_clear, .sctp_ss_init_stream = sctp_ss_fb_init_stream, .sctp_ss_add_to_stream = sctp_ss_fb_add, .sctp_ss_is_empty = sctp_ss_default_is_empty, .sctp_ss_remove_from_stream = sctp_ss_fb_remove, .sctp_ss_select_stream = sctp_ss_fb_select, .sctp_ss_scheduled = sctp_ss_fb_scheduled, .sctp_ss_packet_done = sctp_ss_default_packet_done, .sctp_ss_get_value = sctp_ss_default_get_value, .sctp_ss_set_value = sctp_ss_default_set_value, .sctp_ss_is_user_msgs_incomplete = sctp_ss_default_is_user_msgs_incomplete }, /* SCTP_SS_FIRST_COME */ { .sctp_ss_init = sctp_ss_fcfs_init, .sctp_ss_clear = sctp_ss_fcfs_clear, .sctp_ss_init_stream = sctp_ss_fcfs_init_stream, .sctp_ss_add_to_stream = sctp_ss_fcfs_add, .sctp_ss_is_empty = sctp_ss_fcfs_is_empty, .sctp_ss_remove_from_stream = sctp_ss_fcfs_remove, .sctp_ss_select_stream = sctp_ss_fcfs_select, .sctp_ss_scheduled = sctp_ss_default_scheduled, .sctp_ss_packet_done = sctp_ss_default_packet_done, .sctp_ss_get_value = sctp_ss_default_get_value, .sctp_ss_set_value = sctp_ss_default_set_value, .sctp_ss_is_user_msgs_incomplete = sctp_ss_default_is_user_msgs_incomplete } }; Index: projects/clang500-import/sys/sys/_pctrie.h =================================================================== --- projects/clang500-import/sys/sys/_pctrie.h (revision 321306) +++ projects/clang500-import/sys/sys/_pctrie.h (revision 321307) @@ -1,51 +1,41 @@ /* * Copyright (c) 2013 EMC Corp. * Copyright (c) 2011 Jeffrey Roberson * Copyright (c) 2008 Mayur Shardul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __SYS_PCTRIE_H_ #define __SYS_PCTRIE_H_ /* * Radix tree root. */ struct pctrie { uintptr_t pt_root; }; -#ifdef _KERNEL - -static __inline boolean_t -pctrie_is_empty(struct pctrie *ptree) -{ - - return (ptree->pt_root == 0); -} - -#endif /* _KERNEL */ #endif /* !__SYS_PCTRIE_H_ */ Index: projects/clang500-import/sys/sys/efi.h =================================================================== --- projects/clang500-import/sys/sys/efi.h (revision 321306) +++ projects/clang500-import/sys/sys/efi.h (revision 321307) @@ -1,170 +1,173 @@ /*- * Copyright (c) 2004 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_EFI_H_ #define _SYS_EFI_H_ #include #include #define EFI_PAGE_SHIFT 12 #define EFI_PAGE_SIZE (1 << EFI_PAGE_SHIFT) #define EFI_PAGE_MASK (EFI_PAGE_SIZE - 1) #define EFI_TABLE_ACPI20 \ {0x8868e871,0xe4f1,0x11d3,0xbc,0x22,{0x00,0x80,0xc7,0x3c,0x88,0x81}} #define EFI_TABLE_SAL \ {0xeb9d2d32,0x2d88,0x11d3,0x9a,0x16,{0x00,0x90,0x27,0x3f,0xc1,0x4d}} enum efi_reset { EFI_RESET_COLD, EFI_RESET_WARM }; typedef uint16_t efi_char; typedef unsigned long efi_status; struct efi_cfgtbl { struct uuid ct_uuid; uint64_t ct_data; }; struct efi_md { uint32_t md_type; #define EFI_MD_TYPE_NULL 0 #define EFI_MD_TYPE_CODE 1 /* Loader text. */ #define EFI_MD_TYPE_DATA 2 /* Loader data. */ #define EFI_MD_TYPE_BS_CODE 3 /* Boot services text. */ #define EFI_MD_TYPE_BS_DATA 4 /* Boot services data. */ #define EFI_MD_TYPE_RT_CODE 5 /* Runtime services text. */ #define EFI_MD_TYPE_RT_DATA 6 /* Runtime services data. */ #define EFI_MD_TYPE_FREE 7 /* Unused/free memory. */ #define EFI_MD_TYPE_BAD 8 /* Bad memory */ #define EFI_MD_TYPE_RECLAIM 9 /* ACPI reclaimable memory. */ #define EFI_MD_TYPE_FIRMWARE 10 /* ACPI NV memory */ #define EFI_MD_TYPE_IOMEM 11 /* Memory-mapped I/O. */ #define EFI_MD_TYPE_IOPORT 12 /* I/O port space. */ #define EFI_MD_TYPE_PALCODE 13 /* PAL */ #define EFI_MD_TYPE_PERSISTENT 14 /* Persistent memory. */ uint32_t __pad; uint64_t md_phys; void *md_virt; uint64_t md_pages; uint64_t md_attr; #define EFI_MD_ATTR_UC 0x0000000000000001UL #define EFI_MD_ATTR_WC 0x0000000000000002UL #define EFI_MD_ATTR_WT 0x0000000000000004UL #define EFI_MD_ATTR_WB 0x0000000000000008UL #define EFI_MD_ATTR_UCE 0x0000000000000010UL #define EFI_MD_ATTR_WP 0x0000000000001000UL #define EFI_MD_ATTR_RP 0x0000000000002000UL #define EFI_MD_ATTR_XP 0x0000000000004000UL #define EFI_MD_ATTR_NV 0x0000000000008000UL #define EFI_MD_ATTR_MORE_RELIABLE \ 0x0000000000010000UL #define EFI_MD_ATTR_RO 0x0000000000020000UL #define EFI_MD_ATTR_RT 0x8000000000000000UL }; #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *)(ptr)) + (size))) struct efi_tm { uint16_t tm_year; /* 1998 - 20XX */ uint8_t tm_mon; /* 1 - 12 */ uint8_t tm_mday; /* 1 - 31 */ uint8_t tm_hour; /* 0 - 23 */ uint8_t tm_min; /* 0 - 59 */ uint8_t tm_sec; /* 0 - 59 */ uint8_t __pad1; uint32_t tm_nsec; /* 0 - 999,999,999 */ int16_t tm_tz; /* -1440 to 1440 or 2047 */ uint8_t tm_dst; uint8_t __pad2; }; struct efi_tmcap { uint32_t tc_res; /* 1e-6 parts per million */ uint32_t tc_prec; /* hertz */ uint8_t tc_stz; /* Set clears sub-second time */ }; struct efi_tblhdr { uint64_t th_sig; uint32_t th_rev; uint32_t th_hdrsz; uint32_t th_crc32; uint32_t __res; }; +#ifdef _KERNEL + +#ifdef EFIABI_ATTR struct efi_rt { struct efi_tblhdr rt_hdr; efi_status (*rt_gettime)(struct efi_tm *, struct efi_tmcap *) EFIABI_ATTR; efi_status (*rt_settime)(struct efi_tm *) EFIABI_ATTR; efi_status (*rt_getwaketime)(uint8_t *, uint8_t *, struct efi_tm *) EFIABI_ATTR; efi_status (*rt_setwaketime)(uint8_t, struct efi_tm *) EFIABI_ATTR; efi_status (*rt_setvirtual)(u_long, u_long, uint32_t, struct efi_md *) EFIABI_ATTR; efi_status (*rt_cvtptr)(u_long, void **) EFIABI_ATTR; efi_status (*rt_getvar)(efi_char *, struct uuid *, uint32_t *, u_long *, void *) EFIABI_ATTR; efi_status (*rt_scanvar)(u_long *, efi_char *, struct uuid *) EFIABI_ATTR; efi_status (*rt_setvar)(efi_char *, struct uuid *, uint32_t, u_long, void *) EFIABI_ATTR; efi_status (*rt_gethicnt)(uint32_t *) EFIABI_ATTR; efi_status (*rt_reset)(enum efi_reset, efi_status, u_long, efi_char *) EFIABI_ATTR; }; +#endif struct efi_systbl { struct efi_tblhdr st_hdr; #define EFI_SYSTBL_SIG 0x5453595320494249UL efi_char *st_fwvendor; uint32_t st_fwrev; uint32_t __pad; void *st_cin; void *st_cinif; void *st_cout; void *st_coutif; void *st_cerr; void *st_cerrif; uint64_t st_rt; void *st_bs; u_long st_entries; uint64_t st_cfgtbl; }; -#ifdef _KERNEL extern vm_paddr_t efi_systbl_phys; #endif /* _KERNEL */ #endif /* _SYS_EFI_H_ */ Index: projects/clang500-import/sys/sys/pctrie.h =================================================================== --- projects/clang500-import/sys/sys/pctrie.h (revision 321306) +++ projects/clang500-import/sys/sys/pctrie.h (revision 321307) @@ -1,123 +1,137 @@ /* * Copyright (c) 2013 EMC Corp. * Copyright (c) 2011 Jeffrey Roberson * Copyright (c) 2008 Mayur Shardul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_PCTRIE_H_ #define _SYS_PCTRIE_H_ #include #ifdef _KERNEL #define PCTRIE_DEFINE(name, type, field, allocfn, freefn) \ \ CTASSERT(sizeof(((struct type *)0)->field) == sizeof(uint64_t)); \ /* \ * XXX This assert protects flag bits, it does not enforce natural \ * alignment. 32bit architectures do not naturally align 64bit fields. \ */ \ CTASSERT((__offsetof(struct type, field) & (sizeof(uint32_t) - 1)) == 0); \ \ static __inline struct type * \ name##_PCTRIE_VAL2PTR(uint64_t *val) \ { \ \ if (val == NULL) \ return (NULL); \ return (struct type *) \ ((uintptr_t)val - __offsetof(struct type, field)); \ } \ \ static __inline uint64_t * \ name##_PCTRIE_PTR2VAL(struct type *ptr) \ { \ \ return &ptr->field; \ } \ \ static __inline int \ name##_PCTRIE_INSERT(struct pctrie *ptree, struct type *ptr) \ { \ \ return pctrie_insert(ptree, name##_PCTRIE_PTR2VAL(ptr), \ allocfn); \ } \ \ static __inline struct type * \ name##_PCTRIE_LOOKUP(struct pctrie *ptree, uint64_t key) \ { \ \ return name##_PCTRIE_VAL2PTR(pctrie_lookup(ptree, key)); \ } \ \ static __inline struct type * \ name##_PCTRIE_LOOKUP_LE(struct pctrie *ptree, uint64_t key) \ { \ \ return name##_PCTRIE_VAL2PTR(pctrie_lookup_le(ptree, key)); \ } \ \ static __inline __unused struct type * \ name##_PCTRIE_LOOKUP_GE(struct pctrie *ptree, uint64_t key) \ { \ \ return name##_PCTRIE_VAL2PTR(pctrie_lookup_ge(ptree, key)); \ } \ \ static __inline __unused void \ name##_PCTRIE_RECLAIM(struct pctrie *ptree) \ { \ \ pctrie_reclaim_allnodes(ptree, freefn); \ } \ \ static __inline void \ name##_PCTRIE_REMOVE(struct pctrie *ptree, uint64_t key) \ { \ \ pctrie_remove(ptree, key, freefn); \ } typedef void *(*pctrie_alloc_t)(struct pctrie *ptree); typedef void (*pctrie_free_t)(struct pctrie *ptree, void *node); int pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn); uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key); void pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn); void pctrie_remove(struct pctrie *ptree, uint64_t key, pctrie_free_t freefn); size_t pctrie_node_size(void); int pctrie_zone_init(void *mem, int size, int flags); +static __inline void +pctrie_init(struct pctrie *ptree) +{ + + ptree->pt_root = 0; +} + +static __inline boolean_t +pctrie_is_empty(struct pctrie *ptree) +{ + + return (ptree->pt_root == 0); +} + #endif /* _KERNEL */ #endif /* !_SYS_PCTRIE_H_ */ Index: projects/clang500-import/sys/vm/_vm_radix.h =================================================================== --- projects/clang500-import/sys/vm/_vm_radix.h (revision 321306) +++ projects/clang500-import/sys/vm/_vm_radix.h (revision 321307) @@ -1,51 +1,41 @@ /* * Copyright (c) 2013 EMC Corp. * Copyright (c) 2011 Jeffrey Roberson * Copyright (c) 2008 Mayur Shardul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __VM_RADIX_H_ #define __VM_RADIX_H_ /* * Radix tree root. */ struct vm_radix { uintptr_t rt_root; }; -#ifdef _KERNEL - -static __inline boolean_t -vm_radix_is_empty(struct vm_radix *rtree) -{ - - return (rtree->rt_root == 0); -} - -#endif /* _KERNEL */ #endif /* !__VM_RADIX_H_ */ Index: projects/clang500-import/sys/vm/vm_object.c =================================================================== --- projects/clang500-import/sys/vm/vm_object.c (revision 321306) +++ projects/clang500-import/sys/vm/vm_object.c (revision 321307) @@ -1,2647 +1,2647 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Virtual memory object module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include /* for curproc, pageproc */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int old_msync; SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, "Use old (insecure) msync behavior"); static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *clearobjflags, boolean_t *eio); static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags); static void vm_object_qcollapse(vm_object_t object); static void vm_object_vndeallocate(vm_object_t object); /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given * page of memory exists within exactly one object. * * An object is only deallocated when all "references" * are given up. Only one "reference" to a given * region of an object should be writeable. * * Associated with each object is a list of all resident * memory pages belonging to that object; this list is * maintained by the "vm_page" module, and locked by the object's * lock. * * Each object also records a "pager" routine which is * used to retrieve (and store) pages to the proper backing * storage. In addition, objects may be backed by other * objects from which they were virtual-copied. * * The only items within the object structure which are * modified after time of creation are: * reference count locked by object's lock * pager routine locked by object's lock * */ struct object_q vm_object_list; struct mtx vm_object_list_mtx; /* lock for object list and count */ struct vm_object kernel_object_store; struct vm_object kmem_object_store; static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats"); static long object_collapses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD, &object_collapses, 0, "VM object collapses"); static long object_bypasses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD, &object_bypasses, 0, "VM object bypasses"); static uma_zone_t obj_zone; static int vm_object_zinit(void *mem, int size, int flags); #ifdef INVARIANTS static void vm_object_zdtor(void *mem, int size, void *arg); static void vm_object_zdtor(void *mem, int size, void *arg) { vm_object_t object; object = (vm_object_t)mem; KASSERT(object->ref_count == 0, ("object %p ref_count = %d", object, object->ref_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages in its memq", object)); KASSERT(vm_radix_is_empty(&object->rtree), ("object %p has resident pages in its trie", object)); #if VM_NRESERVLEVEL > 0 KASSERT(LIST_EMPTY(&object->rvq), ("object %p has reservations", object)); #endif KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); KASSERT(object->resident_page_count == 0, ("object %p resident_page_count = %d", object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); KASSERT(object->type == OBJT_DEAD, ("object %p has non-dead type %d", object, object->type)); } #endif static int vm_object_zinit(void *mem, int size, int flags) { vm_object_t object; object = (vm_object_t)mem; rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW); /* These are true for any object that has been freed */ object->type = OBJT_DEAD; object->ref_count = 0; - object->rtree.rt_root = 0; + vm_radix_init(&object->rtree); object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); mtx_unlock(&vm_object_list_mtx); return (0); } static void _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) { TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); object->type = type; switch (type) { case OBJT_DEAD: panic("_vm_object_allocate: can't create OBJT_DEAD"); case OBJT_DEFAULT: case OBJT_SWAP: object->flags = OBJ_ONEMAPPING; break; case OBJT_DEVICE: case OBJT_SG: object->flags = OBJ_FICTITIOUS | OBJ_UNMANAGED; break; case OBJT_MGTDEVICE: object->flags = OBJ_FICTITIOUS; break; case OBJT_PHYS: object->flags = OBJ_UNMANAGED; break; case OBJT_VNODE: object->flags = 0; break; default: panic("_vm_object_allocate: type %d is undefined", type); } object->size = size; object->generation = 1; object->ref_count = 1; object->memattr = VM_MEMATTR_DEFAULT; object->cred = NULL; object->charge = 0; object->handle = NULL; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif umtx_shm_object_init(object); } /* * vm_object_init: * * Initialize the VM objects module. */ void vm_object_init(void) { TAILQ_INIT(&vm_object_list); mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF); rw_init(&kernel_object->lock, "kernel vm object"); _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kernel_object); #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif rw_init(&kmem_object->lock, "kmem vm object"); _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kmem_object); #if VM_NRESERVLEVEL > 0 kmem_object->flags |= OBJ_COLORED; kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif /* * The lock portion of struct vm_object must be type stable due * to vm_pageout_fallback_object_lock locking a vm object * without holding any references to it. */ obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, #ifdef INVARIANTS vm_object_zdtor, #else NULL, #endif vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); - vm_radix_init(); + vm_radix_zinit(); } void vm_object_clear_flag(vm_object_t object, u_short bits) { VM_OBJECT_ASSERT_WLOCKED(object); object->flags &= ~bits; } /* * Sets the default memory attribute for the specified object. Pages * that are allocated to this object are by default assigned this memory * attribute. * * Presently, this function must be called before any pages are allocated * to the object. In the future, this requirement may be relaxed for * "default" and "swap" objects. */ int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) { VM_OBJECT_ASSERT_WLOCKED(object); switch (object->type) { case OBJT_DEFAULT: case OBJT_DEVICE: case OBJT_MGTDEVICE: case OBJT_PHYS: case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: if (!TAILQ_EMPTY(&object->memq)) return (KERN_FAILURE); break; case OBJT_DEAD: return (KERN_INVALID_ARGUMENT); default: panic("vm_object_set_memattr: object %p is of undefined type", object); } object->memattr = memattr; return (KERN_SUCCESS); } void vm_object_pip_add(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress += i; } void vm_object_pip_subtract(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress -= i; } void vm_object_pip_wakeup(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress--; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wakeupn(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); if (i) object->paging_in_progress -= i; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wait(vm_object_t object, char *waitid) { VM_OBJECT_ASSERT_WLOCKED(object); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; VM_OBJECT_SLEEP(object, object, PVM, waitid, 0); } } /* * vm_object_allocate: * * Returns a new object with the given size. */ vm_object_t vm_object_allocate(objtype_t type, vm_pindex_t size) { vm_object_t object; object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); _vm_object_allocate(type, size, object); return (object); } /* * vm_object_reference: * * Gets another reference to the given object. Note: OBJ_DEAD * objects can be referenced during final cleaning. */ void vm_object_reference(vm_object_t object) { if (object == NULL) return; VM_OBJECT_WLOCK(object); vm_object_reference_locked(object); VM_OBJECT_WUNLOCK(object); } /* * vm_object_reference_locked: * * Gets another reference to the given object. * * The object must be locked. */ void vm_object_reference_locked(vm_object_t object) { struct vnode *vp; VM_OBJECT_ASSERT_WLOCKED(object); object->ref_count++; if (object->type == OBJT_VNODE) { vp = object->handle; vref(vp); } } /* * Handle deallocating an object of type OBJT_VNODE. */ static void vm_object_vndeallocate(vm_object_t object) { struct vnode *vp = (struct vnode *) object->handle; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_VNODE, ("vm_object_vndeallocate: not a vnode object")); KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); #ifdef INVARIANTS if (object->ref_count == 0) { vn_printf(vp, "vm_object_vndeallocate "); panic("vm_object_vndeallocate: bad object reference count"); } #endif if (!umtx_shm_vnobj_persistent && object->ref_count == 1) umtx_shm_object_terminated(object); /* * The test for text of vp vnode does not need a bypass to * reach right VV_TEXT there, since it is obtained from * object->handle. */ if (object->ref_count > 1 || (vp->v_vflag & VV_TEXT) == 0) { object->ref_count--; VM_OBJECT_WUNLOCK(object); /* vrele may need the vnode lock. */ vrele(vp); } else { vhold(vp); VM_OBJECT_WUNLOCK(object); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vdrop(vp); VM_OBJECT_WLOCK(object); object->ref_count--; if (object->type == OBJT_DEAD) { VM_OBJECT_WUNLOCK(object); VOP_UNLOCK(vp, 0); } else { if (object->ref_count == 0) VOP_UNSET_TEXT(vp); VM_OBJECT_WUNLOCK(object); vput(vp); } } } /* * vm_object_deallocate: * * Release a reference to the specified object, * gained either through a vm_object_allocate * or a vm_object_reference call. When all references * are gone, storage associated with this object * may be relinquished. * * No object may be locked. */ void vm_object_deallocate(vm_object_t object) { vm_object_t temp; struct vnode *vp; while (object != NULL) { VM_OBJECT_WLOCK(object); if (object->type == OBJT_VNODE) { vm_object_vndeallocate(object); return; } KASSERT(object->ref_count != 0, ("vm_object_deallocate: object deallocated too many times: %d", object->type)); /* * If the reference count goes to 0 we start calling * vm_object_terminate() on the object chain. * A ref count of 1 may be a special case depending on the * shadow count being 0 or 1. */ object->ref_count--; if (object->ref_count > 1) { VM_OBJECT_WUNLOCK(object); return; } else if (object->ref_count == 1) { if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) { vp = object->un_pager.swp.swp_tmpfs; vhold(vp); VM_OBJECT_WUNLOCK(object); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEAD || object->ref_count != 1) { VM_OBJECT_WUNLOCK(object); VOP_UNLOCK(vp, 0); vdrop(vp); return; } if ((object->flags & OBJ_TMPFS) != 0) VOP_UNSET_TEXT(vp); VOP_UNLOCK(vp, 0); vdrop(vp); } if (object->shadow_count == 0 && object->handle == NULL && (object->type == OBJT_DEFAULT || (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS_NODE) == 0))) { vm_object_set_flag(object, OBJ_ONEMAPPING); } else if ((object->shadow_count == 1) && (object->handle == NULL) && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; robject = LIST_FIRST(&object->shadow_head); KASSERT(robject != NULL, ("vm_object_deallocate: ref_count: %d, shadow_count: %d", object->ref_count, object->shadow_count)); KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0, ("shadowed tmpfs v_object %p", object)); if (!VM_OBJECT_TRYWLOCK(robject)) { /* * Avoid a potential deadlock. */ object->ref_count++; VM_OBJECT_WUNLOCK(object); /* * More likely than not the thread * holding robject's lock has lower * priority than the current thread. * Let the lower priority thread run. */ pause("vmo_de", 1); continue; } /* * Collapse object into its shadow unless its * shadow is dead. In that case, object will * be deallocated by the thread that is * deallocating its shadow. */ if ((robject->flags & OBJ_DEAD) == 0 && (robject->handle == NULL) && (robject->type == OBJT_DEFAULT || robject->type == OBJT_SWAP)) { robject->ref_count++; retry: if (robject->paging_in_progress) { VM_OBJECT_WUNLOCK(object); vm_object_pip_wait(robject, "objde1"); temp = robject->backing_object; if (object == temp) { VM_OBJECT_WLOCK(object); goto retry; } } else if (object->paging_in_progress) { VM_OBJECT_WUNLOCK(robject); object->flags |= OBJ_PIPWNT; VM_OBJECT_SLEEP(object, object, PDROP | PVM, "objde2", 0); VM_OBJECT_WLOCK(robject); temp = robject->backing_object; if (object == temp) { VM_OBJECT_WLOCK(object); goto retry; } } else VM_OBJECT_WUNLOCK(object); if (robject->ref_count == 1) { robject->ref_count--; object = robject; goto doterm; } object = robject; vm_object_collapse(object); VM_OBJECT_WUNLOCK(object); continue; } VM_OBJECT_WUNLOCK(robject); } VM_OBJECT_WUNLOCK(object); return; } doterm: umtx_shm_object_terminated(object); temp = object->backing_object; if (temp != NULL) { KASSERT((object->flags & OBJ_TMPFS_NODE) == 0, ("shadowed tmpfs v_object 2 %p", object)); VM_OBJECT_WLOCK(temp); LIST_REMOVE(object, shadow_list); temp->shadow_count--; VM_OBJECT_WUNLOCK(temp); object->backing_object = NULL; } /* * Don't double-terminate, we could be in a termination * recursion due to the terminate having to sync data * to disk. */ if ((object->flags & OBJ_DEAD) == 0) vm_object_terminate(object); else VM_OBJECT_WUNLOCK(object); object = temp; } } /* * vm_object_destroy removes the object from the global object list * and frees the space for the object. */ void vm_object_destroy(vm_object_t object) { /* * Release the allocation charge. */ if (object->cred != NULL) { swap_release_by_cred(object->charge, object->cred); object->charge = 0; crfree(object->cred); object->cred = NULL; } /* * Free the space for the object. */ uma_zfree(obj_zone, object); } /* * vm_object_terminate actually destroys the specified object, freeing * up all previously used resources. * * The object must be locked. * This routine may block. */ void vm_object_terminate(vm_object_t object) { vm_page_t p, p_next; VM_OBJECT_ASSERT_WLOCKED(object); /* * Make sure no one uses us. */ vm_object_set_flag(object, OBJ_DEAD); /* * wait for the pageout daemon to be done with the object */ vm_object_pip_wait(object, "objtrm"); KASSERT(!object->paging_in_progress, ("vm_object_terminate: pageout in progress")); /* * Clean and free the pages, as appropriate. All references to the * object are gone, so we don't need to lock it. */ if (object->type == OBJT_VNODE) { struct vnode *vp = (struct vnode *)object->handle; /* * Clean pages and flush buffers. */ vm_object_page_clean(object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(object); vinvalbuf(vp, V_SAVE, 0, 0); BO_LOCK(&vp->v_bufobj); vp->v_bufobj.bo_flag |= BO_DEAD; BO_UNLOCK(&vp->v_bufobj); VM_OBJECT_WLOCK(object); } KASSERT(object->ref_count == 0, ("vm_object_terminate: object with references, ref_count=%d", object->ref_count)); /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); vm_page_lock(p); /* * Optimize the page's removal from the object by resetting * its "object" field. Specifically, if the page is not * wired, then the effect of this assignment is that * vm_page_free()'s call to vm_page_remove() will return * immediately without modifying the page or the object. */ p->object = NULL; if (p->wire_count == 0) { vm_page_free(p); VM_CNT_INC(v_pfree); } vm_page_unlock(p); } /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { vm_radix_reclaim_allnodes(&object->rtree); TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT || object->type == OBJT_SWAP, ("%s: non-swap obj %p has cred", __func__, object)); /* * Let the pager know object is dead. */ vm_pager_deallocate(object); VM_OBJECT_WUNLOCK(object); vm_object_destroy(object); } /* * Make the page read-only so that we can clear the object flags. However, if * this is a nosync mmap then the object is likely to stay dirty so do not * mess with the page and do not clear the object flags. Returns TRUE if the * page should be flushed, and FALSE otherwise. */ static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags) { /* * If we have been asked to skip nosync pages and this is a * nosync page, skip it. Note that the object flags were not * cleared in this case so we do not have to set them. */ if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) { *clearobjflags = FALSE; return (FALSE); } else { pmap_remove_write(p); return (p->dirty != 0); } } /* * vm_object_page_clean * * Clean all dirty pages in the specified range of object. Leaves page * on whatever queue it is currently on. If NOSYNC is set then do not * write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC), * leaving the object dirty. * * When stuffing pages asynchronously, allow clustering. XXX we need a * synchronous clustering mode implementation. * * Odd semantics: if start == end, we clean everything. * * The object must be locked. * * Returns FALSE if some page from the range was not written, as * reported by the pager, and TRUE otherwise. */ boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, int flags) { vm_page_t np, p; vm_pindex_t pi, tend, tstart; int curgeneration, n, pagerflags; boolean_t clearobjflags, eio, res; VM_OBJECT_ASSERT_WLOCKED(object); /* * The OBJ_MIGHTBEDIRTY flag is only set for OBJT_VNODE * objects. The check below prevents the function from * operating on non-vnode objects. */ if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 || object->resident_page_count == 0) return (TRUE); pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0; tstart = OFF_TO_IDX(start); tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK); clearobjflags = tstart == 0 && tend >= object->size; res = TRUE; rescan: curgeneration = object->generation; for (p = vm_page_find_least(object, tstart); p != NULL; p = np) { pi = p->pindex; if (pi >= tend) break; np = TAILQ_NEXT(p, listq); if (p->valid == 0) continue; if (vm_page_sleep_if_busy(p, "vpcwai")) { if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; else clearobjflags = FALSE; } np = vm_page_find_least(object, pi); continue; } if (!vm_object_page_remove_write(p, flags, &clearobjflags)) continue; n = vm_object_page_collect_flush(object, p, pagerflags, flags, &clearobjflags, &eio); if (eio) { res = FALSE; clearobjflags = FALSE; } if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; else clearobjflags = FALSE; } /* * If the VOP_PUTPAGES() did a truncated write, so * that even the first page of the run is not fully * written, vm_pageout_flush() returns 0 as the run * length. Since the condition that caused truncated * write may be permanent, e.g. exhausted free space, * accepting n == 0 would cause an infinite loop. * * Forwarding the iterator leaves the unwritten page * behind, but there is not much we can do there if * filesystem refuses to write it. */ if (n == 0) { n = 1; clearobjflags = FALSE; } np = vm_page_find_least(object, pi + n); } #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); #endif if (clearobjflags) vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY); return (res); } static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *clearobjflags, boolean_t *eio) { vm_page_t ma[vm_pageout_page_count], p_first, tp; int count, i, mreq, runlen; vm_page_lock_assert(p, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(object); count = 1; mreq = 0; for (tp = p; count < vm_pageout_page_count; count++) { tp = vm_page_next(tp); if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; } for (p_first = p; count < vm_pageout_page_count; count++) { tp = vm_page_prev(p_first); if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; p_first = tp; mreq++; } for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++) ma[i] = tp; vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio); return (runlen); } /* * Note that there is absolutely no sense in writing out * anonymous objects, so we track down the vnode object * to write out. * We invalidate (remove) all pages from the address space * for semantic correctness. * * If the backing object is a device object with unmanaged pages, then any * mappings to the specified range of pages must be removed before this * function is called. * * Note: certain anonymous maps, such as MAP_NOSYNC maps, * may start out with a NULL object. */ boolean_t vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, boolean_t syncio, boolean_t invalidate) { vm_object_t backing_object; struct vnode *vp; struct mount *mp; int error, flags, fsync_after; boolean_t res; if (object == NULL) return (TRUE); res = TRUE; error = 0; VM_OBJECT_WLOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_WLOCK(backing_object); offset += object->backing_object_offset; VM_OBJECT_WUNLOCK(object); object = backing_object; if (object->size < OFF_TO_IDX(offset + size)) size = IDX_TO_OFF(object->size) - offset; } /* * Flush pages if writing is allowed, invalidate them * if invalidation requested. Pages undergoing I/O * will be ignored by vm_object_page_remove(). * * We cannot lock the vnode and then wait for paging * to complete without deadlocking against vm_fault. * Instead we simply call vm_object_page_remove() and * allow it to block internally on a page-by-page * basis when it encounters pages undergoing async * I/O. */ if (object->type == OBJT_VNODE && (object->flags & OBJ_MIGHTBEDIRTY) != 0) { vp = object->handle; VM_OBJECT_WUNLOCK(object); (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (syncio && !invalidate && offset == 0 && atop(size) == object->size) { /* * If syncing the whole mapping of the file, * it is faster to schedule all the writes in * async mode, also allowing the clustering, * and then wait for i/o to complete. */ flags = 0; fsync_after = TRUE; } else { flags = (syncio || invalidate) ? OBJPC_SYNC : 0; flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0; fsync_after = FALSE; } VM_OBJECT_WLOCK(object); res = vm_object_page_clean(object, offset, offset + size, flags); VM_OBJECT_WUNLOCK(object); if (fsync_after) error = VOP_FSYNC(vp, MNT_WAIT, curthread); VOP_UNLOCK(vp, 0); vn_finished_write(mp); if (error != 0) res = FALSE; VM_OBJECT_WLOCK(object); } if ((object->type == OBJT_VNODE || object->type == OBJT_DEVICE) && invalidate) { if (object->type == OBJT_DEVICE) /* * The option OBJPR_NOTMAPPED must be passed here * because vm_object_page_remove() cannot remove * unmanaged mappings. */ flags = OBJPR_NOTMAPPED; else if (old_msync) flags = 0; else flags = OBJPR_CLEANONLY; vm_object_page_remove(object, OFF_TO_IDX(offset), OFF_TO_IDX(offset + size + PAGE_MASK), flags); } VM_OBJECT_WUNLOCK(object); return (res); } /* * Determine whether the given advice can be applied to the object. Advice is * not applied to unmanaged pages since they never belong to page queues, and * since MADV_FREE is destructive, it can apply only to anonymous pages that * have been mapped at most once. */ static bool vm_object_advice_applies(vm_object_t object, int advice) { if ((object->flags & OBJ_UNMANAGED) != 0) return (false); if (advice != MADV_FREE) return (true); return ((object->type == OBJT_DEFAULT || object->type == OBJT_SWAP) && (object->flags & OBJ_ONEMAPPING) != 0); } static void vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex, vm_size_t size) { if (advice == MADV_FREE && object->type == OBJT_SWAP) swap_pager_freespace(object, pindex, size); } /* * vm_object_madvise: * * Implements the madvise function at the object/page level. * * MADV_WILLNEED (any object) * * Activate the specified pages if they are resident. * * MADV_DONTNEED (any object) * * Deactivate the specified pages if they are resident. * * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, * OBJ_ONEMAPPING only) * * Deactivate and clean the specified pages if they are * resident. This permits the process to reuse the pages * without faulting or the kernel to reclaim the pages * without I/O. */ void vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, int advice) { vm_pindex_t tpindex; vm_object_t backing_object, tobject; vm_page_t m, tm; if (object == NULL) return; relookup: VM_OBJECT_WLOCK(object); if (!vm_object_advice_applies(object, advice)) { VM_OBJECT_WUNLOCK(object); return; } for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) { tobject = object; /* * If the next page isn't resident in the top-level object, we * need to search the shadow chain. When applying MADV_FREE, we * take care to release any swap space used to store * non-resident pages. */ if (m == NULL || pindex < m->pindex) { /* * Optimize a common case: if the top-level object has * no backing object, we can skip over the non-resident * range in constant time. */ if (object->backing_object == NULL) { tpindex = (m != NULL && m->pindex < end) ? m->pindex : end; vm_object_madvise_freespace(object, advice, pindex, tpindex - pindex); if ((pindex = tpindex) == end) break; goto next_page; } tpindex = pindex; do { vm_object_madvise_freespace(tobject, advice, tpindex, 1); /* * Prepare to search the next object in the * chain. */ backing_object = tobject->backing_object; if (backing_object == NULL) goto next_pindex; VM_OBJECT_WLOCK(backing_object); tpindex += OFF_TO_IDX(tobject->backing_object_offset); if (tobject != object) VM_OBJECT_WUNLOCK(tobject); tobject = backing_object; if (!vm_object_advice_applies(tobject, advice)) goto next_pindex; } while ((tm = vm_page_lookup(tobject, tpindex)) == NULL); } else { next_page: tm = m; m = TAILQ_NEXT(m, listq); } /* * If the page is not in a normal state, skip it. */ if (tm->valid != VM_PAGE_BITS_ALL) goto next_pindex; vm_page_lock(tm); if (tm->hold_count != 0 || tm->wire_count != 0) { vm_page_unlock(tm); goto next_pindex; } KASSERT((tm->flags & PG_FICTITIOUS) == 0, ("vm_object_madvise: page %p is fictitious", tm)); KASSERT((tm->oflags & VPO_UNMANAGED) == 0, ("vm_object_madvise: page %p is not managed", tm)); if (vm_page_busied(tm)) { if (object != tobject) VM_OBJECT_WUNLOCK(tobject); VM_OBJECT_WUNLOCK(object); if (advice == MADV_WILLNEED) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(tm, PGA_REFERENCED); } vm_page_busy_sleep(tm, "madvpo", false); goto relookup; } vm_page_advise(tm, advice); vm_page_unlock(tm); vm_object_madvise_freespace(tobject, advice, tm->pindex, 1); next_pindex: if (tobject != object) VM_OBJECT_WUNLOCK(tobject); } VM_OBJECT_WUNLOCK(object); } /* * vm_object_shadow: * * Create a new object which is backed by the * specified existing object range. The source * object reference is deallocated. * * The new object and offset into that object * are returned in the source parameters. */ void vm_object_shadow( vm_object_t *object, /* IN/OUT */ vm_ooffset_t *offset, /* IN/OUT */ vm_size_t length) { vm_object_t source; vm_object_t result; source = *object; /* * Don't create the new object if the old object isn't shared. */ if (source != NULL) { VM_OBJECT_WLOCK(source); if (source->ref_count == 1 && source->handle == NULL && (source->type == OBJT_DEFAULT || source->type == OBJT_SWAP)) { VM_OBJECT_WUNLOCK(source); return; } VM_OBJECT_WUNLOCK(source); } /* * Allocate a new object with the given length. */ result = vm_object_allocate(OBJT_DEFAULT, atop(length)); /* * The new object shadows the source object, adding a reference to it. * Our caller changes his reference to point to the new object, * removing a reference to the source object. Net result: no change * of reference count. * * Try to optimize the result object's page color when shadowing * in order to maintain page coloring consistency in the combined * shadowed object. */ result->backing_object = source; /* * Store the offset into the source object, and fix up the offset into * the new object. */ result->backing_object_offset = *offset; if (source != NULL) { VM_OBJECT_WLOCK(source); LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); source->shadow_count++; #if VM_NRESERVLEVEL > 0 result->flags |= source->flags & OBJ_COLORED; result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER - 1)) - 1); #endif VM_OBJECT_WUNLOCK(source); } /* * Return the new things */ *offset = 0; *object = result; } /* * vm_object_split: * * Split the pages in a map entry into a new object. This affords * easier removal of unused pages, and keeps object inheritance from * being a negative impact on memory usage. */ void vm_object_split(vm_map_entry_t entry) { vm_page_t m, m_next; vm_object_t orig_object, new_object, source; vm_pindex_t idx, offidxstart; vm_size_t size; orig_object = entry->object.vm_object; if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) return; if (orig_object->ref_count <= 1) return; VM_OBJECT_WUNLOCK(orig_object); offidxstart = OFF_TO_IDX(entry->offset); size = atop(entry->end - entry->start); /* * If swap_pager_copy() is later called, it will convert new_object * into a swap object. */ new_object = vm_object_allocate(OBJT_DEFAULT, size); /* * At this point, the new object is still private, so the order in * which the original and new objects are locked does not matter. */ VM_OBJECT_WLOCK(new_object); VM_OBJECT_WLOCK(orig_object); source = orig_object->backing_object; if (source != NULL) { VM_OBJECT_WLOCK(source); if ((source->flags & OBJ_DEAD) != 0) { VM_OBJECT_WUNLOCK(source); VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); vm_object_deallocate(new_object); VM_OBJECT_WLOCK(orig_object); return; } LIST_INSERT_HEAD(&source->shadow_head, new_object, shadow_list); source->shadow_count++; vm_object_reference_locked(source); /* for new_object */ vm_object_clear_flag(source, OBJ_ONEMAPPING); VM_OBJECT_WUNLOCK(source); new_object->backing_object_offset = orig_object->backing_object_offset + entry->offset; new_object->backing_object = source; } if (orig_object->cred != NULL) { new_object->cred = orig_object->cred; crhold(orig_object->cred); new_object->charge = ptoa(size); KASSERT(orig_object->charge >= ptoa(size), ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } retry: m = vm_page_find_least(orig_object, offidxstart); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); /* * We must wait for pending I/O to complete before we can * rename the page. * * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ if (vm_page_busied(m)) { VM_OBJECT_WUNLOCK(new_object); vm_page_lock(m); VM_OBJECT_WUNLOCK(orig_object); vm_page_busy_sleep(m, "spltwt", false); VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } /* vm_page_rename() will dirty the page. */ if (vm_page_rename(m, new_object, idx)) { VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); VM_WAIT; VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } #if VM_NRESERVLEVEL > 0 /* * If some of the reservation's allocated pages remain with * the original object, then transferring the reservation to * the new object is neither particularly beneficial nor * particularly harmful as compared to leaving the reservation * with the original object. If, however, all of the * reservation's allocated pages are transferred to the new * object, then transferring the reservation is typically * beneficial. Determining which of these two cases applies * would be more costly than unconditionally renaming the * reservation. */ vm_reserv_rename(m, new_object, orig_object, offidxstart); #endif if (orig_object->type == OBJT_SWAP) vm_page_xbusy(m); } if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); TAILQ_FOREACH(m, &new_object->memq, listq) vm_page_xunbusy(m); } VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); entry->object.vm_object = new_object; entry->offset = 0LL; vm_object_deallocate(orig_object); VM_OBJECT_WLOCK(new_object); } #define OBSC_COLLAPSE_NOWAIT 0x0002 #define OBSC_COLLAPSE_WAIT 0x0004 static vm_page_t vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, int op) { vm_object_t backing_object; VM_OBJECT_ASSERT_WLOCKED(object); backing_object = object->backing_object; VM_OBJECT_ASSERT_WLOCKED(backing_object); KASSERT(p == NULL || vm_page_busied(p), ("unbusy page %p", p)); KASSERT(p == NULL || p->object == object || p->object == backing_object, ("invalid ownership %p %p %p", p, object, backing_object)); if ((op & OBSC_COLLAPSE_NOWAIT) != 0) return (next); if (p != NULL) vm_page_lock(p); VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(backing_object); if (p == NULL) VM_WAIT; else vm_page_busy_sleep(p, "vmocol", false); VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); return (TAILQ_FIRST(&backing_object->memq)); } static bool vm_object_scan_all_shadowed(vm_object_t object) { vm_object_t backing_object; vm_page_t p, pp; vm_pindex_t backing_offset_index, new_pindex, pi, ps; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); backing_object = object->backing_object; if (backing_object->type != OBJT_DEFAULT && backing_object->type != OBJT_SWAP) return (false); pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset); p = vm_page_find_least(backing_object, pi); ps = swap_pager_find_least(backing_object, pi); /* * Only check pages inside the parent object's range and * inside the parent object's mapping of the backing object. */ for (;; pi++) { if (p != NULL && p->pindex < pi) p = TAILQ_NEXT(p, listq); if (ps < pi) ps = swap_pager_find_least(backing_object, pi); if (p == NULL && ps >= backing_object->size) break; else if (p == NULL) pi = ps; else pi = MIN(p->pindex, ps); new_pindex = pi - backing_offset_index; if (new_pindex >= object->size) break; /* * See if the parent has the page or if the parent's object * pager has the page. If the parent has the page but the page * is not valid, the parent's object pager must have the page. * * If this fails, the parent does not completely shadow the * object and we might as well give up now. */ pp = vm_page_lookup(object, new_pindex); if ((pp == NULL || pp->valid == 0) && !vm_pager_has_page(object, new_pindex, NULL, NULL)) return (false); } return (true); } static bool vm_object_collapse_scan(vm_object_t object, int op) { vm_object_t backing_object; vm_page_t next, p, pp; vm_pindex_t backing_offset_index, new_pindex; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); backing_object = object->backing_object; backing_offset_index = OFF_TO_IDX(object->backing_object_offset); /* * Initial conditions */ if ((op & OBSC_COLLAPSE_WAIT) != 0) vm_object_set_flag(backing_object, OBJ_DEAD); /* * Our scan */ for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); new_pindex = p->pindex - backing_offset_index; /* * Check for busy page */ if (vm_page_busied(p)) { next = vm_object_collapse_scan_wait(object, p, next, op); continue; } KASSERT(p->object == backing_object, ("vm_object_collapse_scan: object mismatch")); if (p->pindex < backing_offset_index || new_pindex >= object->size) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); /* * Page is out of the parent object's range, we can * simply destroy it. */ vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); continue; } pp = vm_page_lookup(object, new_pindex); if (pp != NULL && vm_page_busied(pp)) { /* * The page in the parent is busy and possibly not * (yet) valid. Until its state is finalized by the * busy bit owner, we can't tell whether it shadows the * original page. Therefore, we must either skip it * and the original (backing_object) page or wait for * its state to be finalized. * * This is due to a race with vm_fault() where we must * unbusy the original (backing_obj) page before we can * (re)lock the parent. Hence we can get here. */ next = vm_object_collapse_scan_wait(object, pp, next, op); continue; } KASSERT(pp == NULL || pp->valid != 0, ("unbusy invalid page %p", pp)); if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL, NULL)) { /* * The page already exists in the parent OR swap exists * for this location in the parent. Leave the parent's * page alone. Destroy the original page from the * backing object. */ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); continue; } /* * Page does not exist in parent, rename the page from the * backing object to the main object. * * If the page was mapped to a process, it can remain mapped * through the rename. vm_page_rename() will dirty the page. */ if (vm_page_rename(p, object, new_pindex)) { next = vm_object_collapse_scan_wait(object, NULL, next, op); continue; } /* Use the old pindex to free the right page. */ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, new_pindex + backing_offset_index, 1); #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(p, object, backing_object, backing_offset_index); #endif } return (true); } /* * this version of collapse allows the operation to occur earlier and * when paging_in_progress is true for an object... This is not a complete * operation, but should plug 99.9% of the rest of the leaks. */ static void vm_object_qcollapse(vm_object_t object) { vm_object_t backing_object = object->backing_object; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(backing_object); if (backing_object->ref_count != 1) return; vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT); } /* * vm_object_collapse: * * Collapse an object with the object backing it. * Pages in the backing object are moved into the * parent, and the backing object is deallocated. */ void vm_object_collapse(vm_object_t object) { vm_object_t backing_object, new_backing_object; VM_OBJECT_ASSERT_WLOCKED(object); while (TRUE) { /* * Verify that the conditions are right for collapse: * * The object exists and the backing object exists. */ if ((backing_object = object->backing_object) == NULL) break; /* * we check the backing object first, because it is most likely * not collapsable. */ VM_OBJECT_WLOCK(backing_object); if (backing_object->handle != NULL || (backing_object->type != OBJT_DEFAULT && backing_object->type != OBJT_SWAP) || (backing_object->flags & OBJ_DEAD) || object->handle != NULL || (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP) || (object->flags & OBJ_DEAD)) { VM_OBJECT_WUNLOCK(backing_object); break; } if (object->paging_in_progress != 0 || backing_object->paging_in_progress != 0) { vm_object_qcollapse(object); VM_OBJECT_WUNLOCK(backing_object); break; } /* * We know that we can either collapse the backing object (if * the parent is the only reference to it) or (perhaps) have * the parent bypass the object if the parent happens to shadow * all the resident pages in the entire backing object. * * This is ignoring pager-backed pages such as swap pages. * vm_object_collapse_scan fails the shadowing test in this * case. */ if (backing_object->ref_count == 1) { vm_object_pip_add(object, 1); vm_object_pip_add(backing_object, 1); /* * If there is exactly one reference to the backing * object, we can collapse it into the parent. */ vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT); #if VM_NRESERVLEVEL > 0 /* * Break any reservations from backing_object. */ if (__predict_false(!LIST_EMPTY(&backing_object->rvq))) vm_reserv_break_all(backing_object); #endif /* * Move the pager from backing_object to object. */ if (backing_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case * the backing_object's and object's locks are * released and reacquired. * Since swap_pager_copy() is being asked to * destroy the source, it will change the * backing_object's type to OBJT_DEFAULT. */ swap_pager_copy( backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); } /* * Object now shadows whatever backing_object did. * Note that the reference to * backing_object->backing_object moves from within * backing_object to within object. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; if (backing_object->backing_object) { VM_OBJECT_WLOCK(backing_object->backing_object); LIST_REMOVE(backing_object, shadow_list); LIST_INSERT_HEAD( &backing_object->backing_object->shadow_head, object, shadow_list); /* * The shadow_count has not changed. */ VM_OBJECT_WUNLOCK(backing_object->backing_object); } object->backing_object = backing_object->backing_object; object->backing_object_offset += backing_object->backing_object_offset; /* * Discard backing_object. * * Since the backing object has no pages, no pager left, * and no object references within it, all that is * necessary is to dispose of it. */ KASSERT(backing_object->ref_count == 1, ( "backing_object %p was somehow re-referenced during collapse!", backing_object)); vm_object_pip_wakeup(backing_object); backing_object->type = OBJT_DEAD; backing_object->ref_count = 0; VM_OBJECT_WUNLOCK(backing_object); vm_object_destroy(backing_object); vm_object_pip_wakeup(object); object_collapses++; } else { /* * If we do not entirely shadow the backing object, * there is nothing we can do so we give up. */ if (object->resident_page_count != object->size && !vm_object_scan_all_shadowed(object)) { VM_OBJECT_WUNLOCK(backing_object); break; } /* * Make the parent shadow the next object in the * chain. Deallocating backing_object will not remove * it, since its reference count is at least 2. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; new_backing_object = backing_object->backing_object; if ((object->backing_object = new_backing_object) != NULL) { VM_OBJECT_WLOCK(new_backing_object); LIST_INSERT_HEAD( &new_backing_object->shadow_head, object, shadow_list ); new_backing_object->shadow_count++; vm_object_reference_locked(new_backing_object); VM_OBJECT_WUNLOCK(new_backing_object); object->backing_object_offset += backing_object->backing_object_offset; } /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish. */ backing_object->ref_count--; VM_OBJECT_WUNLOCK(backing_object); object_bypasses++; } /* * Try again with this object's new backing object. */ } } /* * vm_object_page_remove: * * For the given object, either frees or invalidates each of the * specified pages. In general, a page is freed. However, if a page is * wired for any reason other than the existence of a managed, wired * mapping, then it may be invalidated but not removed from the object. * Pages are specified by the given range ["start", "end") and the option * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range * extends from "start" to the end of the object. If the option * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the * specified range are affected. If the option OBJPR_NOTMAPPED is * specified, then the pages within the specified range must have no * mappings. Otherwise, if this option is not specified, any mappings to * the specified pages are removed before the pages are freed or * invalidated. * * In general, this operation should only be performed on objects that * contain managed pages. There are, however, two exceptions. First, it * is performed on the kernel and kmem objects by vm_map_entry_delete(). * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device- * backed pages. In both of these cases, the option OBJPR_CLEANONLY must * not be specified and the option OBJPR_NOTMAPPED must be specified. * * The object must be locked. */ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { vm_page_t p, next; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * If the page is wired for any reason besides the existence * of managed, wired mappings, then it cannot be freed. For * example, fictitious pages, which represent device memory, * are inherently wired and cannot be freed. They can, * however, be invalidated if the option OBJPR_CLEANONLY is * not specified. */ vm_page_lock(p); if (vm_page_xbusied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmopax", true); VM_OBJECT_WLOCK(object); goto again; } if (p->wire_count != 0) { if ((options & OBJPR_NOTMAPPED) == 0) pmap_remove_all(p); if ((options & OBJPR_CLEANONLY) == 0) { p->valid = 0; vm_page_undirty(p); } goto next; } if (vm_page_busied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmopar", false); VM_OBJECT_WLOCK(object); goto again; } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { if ((options & OBJPR_NOTMAPPED) == 0) pmap_remove_write(p); if (p->dirty) goto next; } if ((options & OBJPR_NOTMAPPED) == 0) pmap_remove_all(p); vm_page_free(p); next: vm_page_unlock(p); } vm_object_pip_wakeup(object); } /* * vm_object_page_noreuse: * * For the given object, attempt to move the specified pages to * the head of the inactive queue. This bypasses regular LRU * operation and allows the pages to be reused quickly under memory * pressure. If a page is wired for any reason, then it will not * be queued. Pages are specified by the range ["start", "end"). * As a special case, if "end" is zero, then the range extends from * "start" to the end of the object. * * This operation should only be performed on objects that * contain non-fictitious, managed pages. * * The object must be locked. */ void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { struct mtx *mtx, *new_mtx; vm_page_t p, next; VM_OBJECT_ASSERT_LOCKED(object); KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0, ("vm_object_page_noreuse: illegal object %p", object)); if (object->resident_page_count == 0) return; p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ mtx = NULL; for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * Avoid releasing and reacquiring the same page lock. */ new_mtx = vm_page_lockptr(p); if (mtx != new_mtx) { if (mtx != NULL) mtx_unlock(mtx); mtx = new_mtx; mtx_lock(mtx); } vm_page_deactivate_noreuse(p); } if (mtx != NULL) mtx_unlock(mtx); } /* * Populate the specified range of the object with valid pages. Returns * TRUE if the range is successfully populated and FALSE otherwise. * * Note: This function should be optimized to pass a larger array of * pages to vm_pager_get_pages() before it is applied to a non- * OBJT_DEVICE object. * * The object must be locked. */ boolean_t vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t m; vm_pindex_t pindex; int rv; VM_OBJECT_ASSERT_WLOCKED(object); for (pindex = start; pindex < end; pindex++) { m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); break; } } /* * Keep "m" busy because a subsequent iteration may unlock * the object. */ } if (pindex > start) { m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { vm_page_xunbusy(m); m = TAILQ_NEXT(m, listq); } } return (pindex == end); } /* * Routine: vm_object_coalesce * Function: Coalesces two objects backing up adjoining * regions of memory into a single object. * * returns TRUE if objects were combined. * * NOTE: Only works at the moment if the second object is NULL - * if it's not, which object do we lock first? * * Parameters: * prev_object First object to coalesce * prev_offset Offset into prev_object * prev_size Size of reference to prev_object * next_size Size of reference to the second object * reserved Indicator that extension region has * swap accounted for * * Conditions: * The object must *not* be locked. */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, vm_size_t prev_size, vm_size_t next_size, boolean_t reserved) { vm_pindex_t next_pindex; if (prev_object == NULL) return (TRUE); VM_OBJECT_WLOCK(prev_object); if ((prev_object->type != OBJT_DEFAULT && prev_object->type != OBJT_SWAP) || (prev_object->flags & OBJ_TMPFS_NODE) != 0) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } /* * Try to collapse the object first */ vm_object_collapse(prev_object); /* * Can't coalesce if: . more than one reference . paged out . shadows * another object . has a copy elsewhere (any of which mean that the * pages not mapped to prev_entry may be in use anyway) */ if (prev_object->backing_object != NULL) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } prev_size >>= PAGE_SHIFT; next_size >>= PAGE_SHIFT; next_pindex = OFF_TO_IDX(prev_offset) + prev_size; if ((prev_object->ref_count > 1) && (prev_object->size != next_pindex)) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } /* * Account for the charge. */ if (prev_object->cred != NULL) { /* * If prev_object was charged, then this mapping, * although not charged now, may become writable * later. Non-NULL cred in the object would prevent * swap reservation during enabling of the write * access, so reserve swap now. Failed reservation * cause allocation of the separate object for the map * entry, and swap reservation for this entry is * managed in appropriate time. */ if (!reserved && !swap_reserve_by_cred(ptoa(next_size), prev_object->cred)) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } prev_object->charge += ptoa(next_size); } /* * Remove any pages that may still be in the object from a previous * deallocation. */ if (next_pindex < prev_object->size) { vm_object_page_remove(prev_object, next_pindex, next_pindex + next_size, 0); if (prev_object->type == OBJT_SWAP) swap_pager_freespace(prev_object, next_pindex, next_size); #if 0 if (prev_object->cred != NULL) { KASSERT(prev_object->charge >= ptoa(prev_object->size - next_pindex), ("object %p overcharged 1 %jx %jx", prev_object, (uintmax_t)next_pindex, (uintmax_t)next_size)); prev_object->charge -= ptoa(prev_object->size - next_pindex); } #endif } /* * Extend the object if necessary. */ if (next_pindex + next_size > prev_object->size) prev_object->size = next_pindex + next_size; VM_OBJECT_WUNLOCK(prev_object); return (TRUE); } void vm_object_set_writeable_dirty(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_VNODE) { if ((object->flags & OBJ_TMPFS_NODE) != 0) { KASSERT(object->type == OBJT_SWAP, ("non-swap tmpfs")); vm_object_set_flag(object, OBJ_TMPFS_DIRTY); } return; } object->generation++; if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) return; vm_object_set_flag(object, OBJ_MIGHTBEDIRTY); } /* * vm_object_unwire: * * For each page offset within the specified range of the given object, * find the highest-level page in the shadow chain and unwire it. A page * must exist at every page offset, and the highest-level page must be * wired. */ void vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, uint8_t queue) { vm_object_t tobject; vm_page_t m, tm; vm_pindex_t end_pindex, pindex, tpindex; int depth, locked_depth; KASSERT((offset & PAGE_MASK) == 0, ("vm_object_unwire: offset is not page aligned")); KASSERT((length & PAGE_MASK) == 0, ("vm_object_unwire: length is not a multiple of PAGE_SIZE")); /* The wired count of a fictitious page never changes. */ if ((object->flags & OBJ_FICTITIOUS) != 0) return; pindex = OFF_TO_IDX(offset); end_pindex = pindex + atop(length); locked_depth = 1; VM_OBJECT_RLOCK(object); m = vm_page_find_least(object, pindex); while (pindex < end_pindex) { if (m == NULL || pindex < m->pindex) { /* * The first object in the shadow chain doesn't * contain a page at the current index. Therefore, * the page must exist in a backing object. */ tobject = object; tpindex = pindex; depth = 0; do { tpindex += OFF_TO_IDX(tobject->backing_object_offset); tobject = tobject->backing_object; KASSERT(tobject != NULL, ("vm_object_unwire: missing page")); if ((tobject->flags & OBJ_FICTITIOUS) != 0) goto next_page; depth++; if (depth == locked_depth) { locked_depth++; VM_OBJECT_RLOCK(tobject); } } while ((tm = vm_page_lookup(tobject, tpindex)) == NULL); } else { tm = m; m = TAILQ_NEXT(m, listq); } vm_page_lock(tm); vm_page_unwire(tm, queue); vm_page_unlock(tm); next_page: pindex++; } /* Release the accumulated object locks. */ for (depth = 0; depth < locked_depth; depth++) { tobject = object->backing_object; VM_OBJECT_RUNLOCK(object); object = tobject; } } struct vnode * vm_object_vnode(vm_object_t object) { VM_OBJECT_ASSERT_LOCKED(object); if (object->type == OBJT_VNODE) return (object->handle); if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) return (object->un_pager.swp.swp_tmpfs); return (NULL); } static int sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) { struct kinfo_vmobject kvo; char *fullpath, *freepath; struct vnode *vp; struct vattr va; vm_object_t obj; vm_page_t m; int count, error; if (req->oldptr == NULL) { /* * If an old buffer has not been provided, generate an * estimate of the space needed for a subsequent call. */ mtx_lock(&vm_object_list_mtx); count = 0; TAILQ_FOREACH(obj, &vm_object_list, object_list) { if (obj->type == OBJT_DEAD) continue; count++; } mtx_unlock(&vm_object_list_mtx); return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) * count * 11 / 10)); } error = 0; /* * VM objects are type stable and are never removed from the * list once added. This allows us to safely read obj->object_list * after reacquiring the VM object lock. */ mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(obj, &vm_object_list, object_list) { if (obj->type == OBJT_DEAD) continue; VM_OBJECT_RLOCK(obj); if (obj->type == OBJT_DEAD) { VM_OBJECT_RUNLOCK(obj); continue; } mtx_unlock(&vm_object_list_mtx); kvo.kvo_size = ptoa(obj->size); kvo.kvo_resident = obj->resident_page_count; kvo.kvo_ref_count = obj->ref_count; kvo.kvo_shadow_count = obj->shadow_count; kvo.kvo_memattr = obj->memattr; kvo.kvo_active = 0; kvo.kvo_inactive = 0; TAILQ_FOREACH(m, &obj->memq, listq) { /* * A page may belong to the object but be * dequeued and set to PQ_NONE while the * object lock is not held. This makes the * reads of m->queue below racy, and we do not * count pages set to PQ_NONE. However, this * sysctl is only meant to give an * approximation of the system anyway. */ if (vm_page_active(m)) kvo.kvo_active++; else if (vm_page_inactive(m)) kvo.kvo_inactive++; } kvo.kvo_vn_fileid = 0; kvo.kvo_vn_fsid = 0; kvo.kvo_vn_fsid_freebsd11 = 0; freepath = NULL; fullpath = ""; vp = NULL; switch (obj->type) { case OBJT_DEFAULT: kvo.kvo_type = KVME_TYPE_DEFAULT; break; case OBJT_VNODE: kvo.kvo_type = KVME_TYPE_VNODE; vp = obj->handle; vref(vp); break; case OBJT_SWAP: kvo.kvo_type = KVME_TYPE_SWAP; break; case OBJT_DEVICE: kvo.kvo_type = KVME_TYPE_DEVICE; break; case OBJT_PHYS: kvo.kvo_type = KVME_TYPE_PHYS; break; case OBJT_DEAD: kvo.kvo_type = KVME_TYPE_DEAD; break; case OBJT_SG: kvo.kvo_type = KVME_TYPE_SG; break; case OBJT_MGTDEVICE: kvo.kvo_type = KVME_TYPE_MGTDEVICE; break; default: kvo.kvo_type = KVME_TYPE_UNKNOWN; break; } VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(curthread, vp, &fullpath, &freepath); vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) { kvo.kvo_vn_fileid = va.va_fileid; kvo.kvo_vn_fsid = va.va_fsid; kvo.kvo_vn_fsid_freebsd11 = va.va_fsid; /* truncate */ } vput(vp); } strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path)); if (freepath != NULL) free(freepath, M_TEMP); /* Pack record size down */ kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) + strlen(kvo.kvo_path) + 1; kvo.kvo_structsize = roundup(kvo.kvo_structsize, sizeof(uint64_t)); error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize); mtx_lock(&vm_object_list_mtx); if (error) break; } mtx_unlock(&vm_object_list_mtx); return (error); } SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject", "List of VM objects"); #include "opt_ddb.h" #ifdef DDB #include #include #include static int _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) { vm_map_t tmpm; vm_map_entry_t tmpe; vm_object_t obj; int entcount; if (map == 0) return 0; if (entry == 0) { tmpe = map->header.next; entcount = map->nentries; while (entcount-- && (tmpe != &map->header)) { if (_vm_object_in_map(map, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { tmpm = entry->object.sub_map; tmpe = tmpm->header.next; entcount = tmpm->nentries; while (entcount-- && tmpe != &tmpm->header) { if (_vm_object_in_map(tmpm, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if ((obj = entry->object.vm_object) != NULL) { for (; obj; obj = obj->backing_object) if (obj == object) { return 1; } } return 0; } static int vm_object_in_map(vm_object_t object) { struct proc *p; /* sx_slock(&allproc_lock); */ FOREACH_PROC_IN_SYSTEM(p) { if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) continue; if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { /* sx_sunlock(&allproc_lock); */ return 1; } } /* sx_sunlock(&allproc_lock); */ if (_vm_object_in_map(kernel_map, object, 0)) return 1; return 0; } DB_SHOW_COMMAND(vmochk, vm_object_check) { vm_object_t object; /* * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ TAILQ_FOREACH(object, &vm_object_list, object_list) { if (object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { if (object->ref_count == 0) { db_printf("vmochk: internal obj has zero ref count: %ld\n", (long)object->size); } if (!vm_object_in_map(object)) { db_printf( "vmochk: internal obj is not in a map: " "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", object->ref_count, (u_long)object->size, (u_long)object->size, (void *)object->backing_object); } } } } /* * vm_object_print: [ debug ] */ DB_SHOW_COMMAND(object, vm_object_print_static) { /* XXX convert args. */ vm_object_t object = (vm_object_t)addr; boolean_t full = have_addr; vm_page_t p; /* XXX count is an (unused) arg. Avoid shadowing it. */ #define count was_count int count; if (object == NULL) return; db_iprintf( "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n", object, (int)object->type, (uintmax_t)object->size, object->resident_page_count, object->ref_count, object->flags, object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge); db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n", object->shadow_count, object->backing_object ? object->backing_object->ref_count : 0, object->backing_object, (uintmax_t)object->backing_object_offset); if (!full) return; db_indent += 2; count = 0; TAILQ_FOREACH(p, &object->memq, listq) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { db_printf("\n"); db_iprintf(" ..."); count = 0; } else db_printf(","); count++; db_printf("(off=0x%jx,page=0x%jx)", (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); } if (count != 0) db_printf("\n"); db_indent -= 2; } /* XXX. */ #undef count /* XXX need this non-static entry for calling from vm_map_print. */ void vm_object_print( /* db_expr_t */ long addr, boolean_t have_addr, /* db_expr_t */ long count, char *modif) { vm_object_print_static(addr, have_addr, count, modif); } DB_SHOW_COMMAND(vmopag, vm_object_print_pages) { vm_object_t object; vm_pindex_t fidx; vm_paddr_t pa; vm_page_t m, prev_m; int rcount, nl, c; nl = 0; TAILQ_FOREACH(object, &vm_object_list, object_list) { db_printf("new object: %p\n", (void *)object); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; fidx = 0; pa = -1; TAILQ_FOREACH(m, &object->memq, listq) { if (m->pindex > 128) break; if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && prev_m->pindex + 1 != m->pindex) { if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; } } if (rcount && (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { ++rcount; continue; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } fidx = m->pindex; pa = VM_PAGE_TO_PHYS(m); rcount = 1; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } } } #endif /* DDB */ Index: projects/clang500-import/sys/vm/vm_radix.c =================================================================== --- projects/clang500-import/sys/vm/vm_radix.c (revision 321306) +++ projects/clang500-import/sys/vm/vm_radix.c (revision 321307) @@ -1,801 +1,801 @@ /* * Copyright (c) 2013 EMC Corp. * Copyright (c) 2011 Jeffrey Roberson * Copyright (c) 2008 Mayur Shardul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Path-compressed radix trie implementation. * The following code is not generalized into a general purpose library * because there are way too many parameters embedded that should really * be decided by the library consumers. At the same time, consumers * of this code must achieve highest possible performance. * * The implementation takes into account the following rationale: * - Size of the nodes should be as small as possible but still big enough * to avoid a large maximum depth for the trie. This is a balance * between the necessity to not wire too much physical memory for the nodes * and the necessity to avoid too much cache pollution during the trie * operations. * - There is not a huge bias toward the number of lookup operations over * the number of insert and remove operations. This basically implies * that optimizations supposedly helping one operation but hurting the * other might be carefully evaluated. * - On average not many nodes are expected to be fully populated, hence * level compression may just complicate things. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* * These widths should allow the pointers to a node's children to fit within * a single cache line. The extra levels from a narrow width should not be * a problem thanks to path compression. */ #ifdef __LP64__ #define VM_RADIX_WIDTH 4 #else #define VM_RADIX_WIDTH 3 #endif #define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) #define VM_RADIX_MASK (VM_RADIX_COUNT - 1) #define VM_RADIX_LIMIT \ (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1) /* Flag bits stored in node pointers. */ #define VM_RADIX_ISLEAF 0x1 #define VM_RADIX_FLAGS 0x1 #define VM_RADIX_PAD VM_RADIX_FLAGS /* Returns one unit associated with specified level. */ #define VM_RADIX_UNITLEVEL(lev) \ ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH)) struct vm_radix_node { vm_pindex_t rn_owner; /* Owner of record. */ uint16_t rn_count; /* Valid children. */ uint16_t rn_clev; /* Current level. */ void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ }; static uma_zone_t vm_radix_node_zone; /* * Allocate a radix node. */ static __inline struct vm_radix_node * vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) { struct vm_radix_node *rnode; rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO); if (rnode == NULL) return (NULL); rnode->rn_owner = owner; rnode->rn_count = count; rnode->rn_clev = clevel; return (rnode); } /* * Free radix node. */ static __inline void vm_radix_node_put(struct vm_radix_node *rnode) { uma_zfree(vm_radix_node_zone, rnode); } /* * Return the position in the array for a given level. */ static __inline int vm_radix_slot(vm_pindex_t index, uint16_t level) { return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); } /* Trims the key after the specified level. */ static __inline vm_pindex_t vm_radix_trimkey(vm_pindex_t index, uint16_t level) { vm_pindex_t ret; ret = index; if (level > 0) { ret >>= level * VM_RADIX_WIDTH; ret <<= level * VM_RADIX_WIDTH; } return (ret); } /* * Get the root node for a radix tree. */ static __inline struct vm_radix_node * vm_radix_getroot(struct vm_radix *rtree) { return ((struct vm_radix_node *)rtree->rt_root); } /* * Set the root node for a radix tree. */ static __inline void vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode) { rtree->rt_root = (uintptr_t)rnode; } /* * Returns TRUE if the specified radix node is a leaf and FALSE otherwise. */ static __inline boolean_t vm_radix_isleaf(struct vm_radix_node *rnode) { return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0); } /* * Returns the associated page extracted from rnode. */ static __inline vm_page_t vm_radix_topage(struct vm_radix_node *rnode) { return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS)); } /* * Adds the page as a child of the provided node. */ static __inline void vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, vm_page_t page) { int slot; slot = vm_radix_slot(index, clev); rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF); } /* * Returns the slot where two keys differ. * It cannot accept 2 equal keys. */ static __inline uint16_t vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) { uint16_t clev; KASSERT(index1 != index2, ("%s: passing the same key value %jx", __func__, (uintmax_t)index1)); index1 ^= index2; for (clev = VM_RADIX_LIMIT;; clev--) if (vm_radix_slot(index1, clev) != 0) return (clev); } /* * Returns TRUE if it can be determined that key does not belong to the * specified rnode. Otherwise, returns FALSE. */ static __inline boolean_t vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) { if (rnode->rn_clev < VM_RADIX_LIMIT) { idx = vm_radix_trimkey(idx, rnode->rn_clev + 1); return (idx != rnode->rn_owner); } return (FALSE); } /* * Internal helper for vm_radix_reclaim_allnodes(). * This function is recursive. */ static void vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) { int slot; KASSERT(rnode->rn_count <= VM_RADIX_COUNT, ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode)); for (slot = 0; rnode->rn_count != 0; slot++) { if (rnode->rn_child[slot] == NULL) continue; if (!vm_radix_isleaf(rnode->rn_child[slot])) vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]); rnode->rn_child[slot] = NULL; rnode->rn_count--; } vm_radix_node_put(rnode); } #ifdef INVARIANTS /* * Radix node zone destructor. */ static void vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused) { struct vm_radix_node *rnode; int slot; rnode = mem; KASSERT(rnode->rn_count == 0, ("vm_radix_node_put: rnode %p has %d children", rnode, rnode->rn_count)); for (slot = 0; slot < VM_RADIX_COUNT; slot++) KASSERT(rnode->rn_child[slot] == NULL, ("vm_radix_node_put: rnode %p has a child", rnode)); } #endif #ifndef UMA_MD_SMALL_ALLOC /* * Reserve the KVA necessary to satisfy the node allocation. * This is mandatory in architectures not supporting direct * mapping as they will need otherwise to carve into the kernel maps for * every node allocation, resulting into deadlocks for consumers already * working with kernel maps. */ static void vm_radix_reserve_kva(void *arg __unused) { /* * Calculate the number of reserved nodes, discounting the pages that * are needed to store them. */ if (!uma_zone_reserve_kva(vm_radix_node_zone, ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + sizeof(struct vm_radix_node)))) panic("%s: unable to reserve KVA", __func__); } SYSINIT(vm_radix_reserve_kva, SI_SUB_KMEM, SI_ORDER_THIRD, vm_radix_reserve_kva, NULL); #endif /* * Initialize the UMA slab zone. */ void -vm_radix_init(void) +vm_radix_zinit(void) { vm_radix_node_zone = uma_zcreate("RADIX NODE", sizeof(struct vm_radix_node), NULL, #ifdef INVARIANTS vm_radix_node_zone_dtor, #else NULL, #endif NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM); } /* * Inserts the key-value pair into the trie. * Panics if the key already exists. */ int vm_radix_insert(struct vm_radix *rtree, vm_page_t page) { vm_pindex_t index, newind; void **parentp; struct vm_radix_node *rnode, *tmp; vm_page_t m; int slot; uint16_t clev; index = page->pindex; /* * The owner of record for root is not really important because it * will never be used. */ rnode = vm_radix_getroot(rtree); if (rnode == NULL) { rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF; return (0); } parentp = (void **)&rtree->rt_root; for (;;) { if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex == index) panic("%s: key %jx is already present", __func__, (uintmax_t)index); clev = vm_radix_keydiff(m->pindex, index); tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); *parentp = tmp; vm_radix_addpage(tmp, index, clev, page); vm_radix_addpage(tmp, m->pindex, clev, m); return (0); } else if (vm_radix_keybarr(rnode, index)) break; slot = vm_radix_slot(index, rnode->rn_clev); if (rnode->rn_child[slot] == NULL) { rnode->rn_count++; vm_radix_addpage(rnode, index, rnode->rn_clev, page); return (0); } parentp = &rnode->rn_child[slot]; rnode = rnode->rn_child[slot]; } /* * A new node is needed because the right insertion level is reached. * Setup the new intermediate node and add the 2 children: the * new object and the older edge. */ newind = rnode->rn_owner; clev = vm_radix_keydiff(newind, index); tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); *parentp = tmp; vm_radix_addpage(tmp, index, clev, page); slot = vm_radix_slot(newind, clev); tmp->rn_child[slot] = rnode; return (0); } /* * Returns TRUE if the specified radix tree contains a single leaf and FALSE * otherwise. */ boolean_t vm_radix_is_singleton(struct vm_radix *rtree) { struct vm_radix_node *rnode; rnode = vm_radix_getroot(rtree); if (rnode == NULL) return (FALSE); return (vm_radix_isleaf(rnode)); } /* * Returns the value stored at the index. If the index is not present, * NULL is returned. */ vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *rnode; vm_page_t m; int slot; rnode = vm_radix_getroot(rtree); while (rnode != NULL) { if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex == index) return (m); else break; } else if (vm_radix_keybarr(rnode, index)) break; slot = vm_radix_slot(index, rnode->rn_clev); rnode = rnode->rn_child[slot]; } return (NULL); } /* * Look up the nearest entry at a position bigger than or equal to index. */ vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *stack[VM_RADIX_LIMIT]; vm_pindex_t inc; vm_page_t m; struct vm_radix_node *child, *rnode; #ifdef INVARIANTS int loops = 0; #endif int slot, tos; rnode = vm_radix_getroot(rtree); if (rnode == NULL) return (NULL); else if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex >= index) return (m); else return (NULL); } tos = 0; for (;;) { /* * If the keys differ before the current bisection node, * then the search key might rollback to the earliest * available bisection node or to the smallest key * in the current node (if the owner is bigger than the * search key). */ if (vm_radix_keybarr(rnode, index)) { if (index > rnode->rn_owner) { ascend: KASSERT(++loops < 1000, ("vm_radix_lookup_ge: too many loops")); /* * Pop nodes from the stack until either the * stack is empty or a node that could have a * matching descendant is found. */ do { if (tos == 0) return (NULL); rnode = stack[--tos]; } while (vm_radix_slot(index, rnode->rn_clev) == (VM_RADIX_COUNT - 1)); /* * The following computation cannot overflow * because index's slot at the current level * is less than VM_RADIX_COUNT - 1. */ index = vm_radix_trimkey(index, rnode->rn_clev); index += VM_RADIX_UNITLEVEL(rnode->rn_clev); } else index = rnode->rn_owner; KASSERT(!vm_radix_keybarr(rnode, index), ("vm_radix_lookup_ge: keybarr failed")); } slot = vm_radix_slot(index, rnode->rn_clev); child = rnode->rn_child[slot]; if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex >= index) return (m); } else if (child != NULL) goto descend; /* * Look for an available edge or page within the current * bisection node. */ if (slot < (VM_RADIX_COUNT - 1)) { inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); index = vm_radix_trimkey(index, rnode->rn_clev); do { index += inc; slot++; child = rnode->rn_child[slot]; if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex >= index) return (m); } else if (child != NULL) goto descend; } while (slot < (VM_RADIX_COUNT - 1)); } KASSERT(child == NULL || vm_radix_isleaf(child), ("vm_radix_lookup_ge: child is radix node")); /* * If a page or edge bigger than the search slot is not found * in the current node, ascend to the next higher-level node. */ goto ascend; descend: KASSERT(rnode->rn_clev > 0, ("vm_radix_lookup_ge: pushing leaf's parent")); KASSERT(tos < VM_RADIX_LIMIT, ("vm_radix_lookup_ge: stack overflow")); stack[tos++] = rnode; rnode = child; } } /* * Look up the nearest entry at a position less than or equal to index. */ vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *stack[VM_RADIX_LIMIT]; vm_pindex_t inc; vm_page_t m; struct vm_radix_node *child, *rnode; #ifdef INVARIANTS int loops = 0; #endif int slot, tos; rnode = vm_radix_getroot(rtree); if (rnode == NULL) return (NULL); else if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex <= index) return (m); else return (NULL); } tos = 0; for (;;) { /* * If the keys differ before the current bisection node, * then the search key might rollback to the earliest * available bisection node or to the largest key * in the current node (if the owner is smaller than the * search key). */ if (vm_radix_keybarr(rnode, index)) { if (index > rnode->rn_owner) { index = rnode->rn_owner + VM_RADIX_COUNT * VM_RADIX_UNITLEVEL(rnode->rn_clev); } else { ascend: KASSERT(++loops < 1000, ("vm_radix_lookup_le: too many loops")); /* * Pop nodes from the stack until either the * stack is empty or a node that could have a * matching descendant is found. */ do { if (tos == 0) return (NULL); rnode = stack[--tos]; } while (vm_radix_slot(index, rnode->rn_clev) == 0); /* * The following computation cannot overflow * because index's slot at the current level * is greater than 0. */ index = vm_radix_trimkey(index, rnode->rn_clev); } index--; KASSERT(!vm_radix_keybarr(rnode, index), ("vm_radix_lookup_le: keybarr failed")); } slot = vm_radix_slot(index, rnode->rn_clev); child = rnode->rn_child[slot]; if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex <= index) return (m); } else if (child != NULL) goto descend; /* * Look for an available edge or page within the current * bisection node. */ if (slot > 0) { inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); index |= inc - 1; do { index -= inc; slot--; child = rnode->rn_child[slot]; if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex <= index) return (m); } else if (child != NULL) goto descend; } while (slot > 0); } KASSERT(child == NULL || vm_radix_isleaf(child), ("vm_radix_lookup_le: child is radix node")); /* * If a page or edge smaller than the search slot is not found * in the current node, ascend to the next higher-level node. */ goto ascend; descend: KASSERT(rnode->rn_clev > 0, ("vm_radix_lookup_le: pushing leaf's parent")); KASSERT(tos < VM_RADIX_LIMIT, ("vm_radix_lookup_le: stack overflow")); stack[tos++] = rnode; rnode = child; } } /* * Remove the specified index from the trie, and return the value stored at * that index. If the index is not present, return NULL. */ vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *rnode, *parent; vm_page_t m; int i, slot; rnode = vm_radix_getroot(rtree); if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex != index) return (NULL); vm_radix_setroot(rtree, NULL); return (m); } parent = NULL; for (;;) { if (rnode == NULL) return (NULL); slot = vm_radix_slot(index, rnode->rn_clev); if (vm_radix_isleaf(rnode->rn_child[slot])) { m = vm_radix_topage(rnode->rn_child[slot]); if (m->pindex != index) return (NULL); rnode->rn_child[slot] = NULL; rnode->rn_count--; if (rnode->rn_count > 1) return (m); for (i = 0; i < VM_RADIX_COUNT; i++) if (rnode->rn_child[i] != NULL) break; KASSERT(i != VM_RADIX_COUNT, ("%s: invalid node configuration", __func__)); if (parent == NULL) vm_radix_setroot(rtree, rnode->rn_child[i]); else { slot = vm_radix_slot(index, parent->rn_clev); KASSERT(parent->rn_child[slot] == rnode, ("%s: invalid child value", __func__)); parent->rn_child[slot] = rnode->rn_child[i]; } rnode->rn_count--; rnode->rn_child[i] = NULL; vm_radix_node_put(rnode); return (m); } parent = rnode; rnode = rnode->rn_child[slot]; } } /* * Remove and free all the nodes from the radix tree. * This function is recursive but there is a tight control on it as the * maximum depth of the tree is fixed. */ void vm_radix_reclaim_allnodes(struct vm_radix *rtree) { struct vm_radix_node *root; root = vm_radix_getroot(rtree); if (root == NULL) return; vm_radix_setroot(rtree, NULL); if (!vm_radix_isleaf(root)) vm_radix_reclaim_allnodes_int(root); } /* * Replace an existing page in the trie with another one. * Panics if there is not an old page in the trie at the new page's index. */ vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) { struct vm_radix_node *rnode; vm_page_t m; vm_pindex_t index; int slot; index = newpage->pindex; rnode = vm_radix_getroot(rtree); if (rnode == NULL) panic("%s: replacing page on an empty trie", __func__); if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex != index) panic("%s: original replacing root key not found", __func__); rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF; return (m); } for (;;) { slot = vm_radix_slot(index, rnode->rn_clev); if (vm_radix_isleaf(rnode->rn_child[slot])) { m = vm_radix_topage(rnode->rn_child[slot]); if (m->pindex == index) { rnode->rn_child[slot] = (void *)((uintptr_t)newpage | VM_RADIX_ISLEAF); return (m); } else break; } else if (rnode->rn_child[slot] == NULL || vm_radix_keybarr(rnode->rn_child[slot], index)) break; rnode = rnode->rn_child[slot]; } panic("%s: original replacing page not found", __func__); } #ifdef DDB /* * Show details about the given radix node. */ DB_SHOW_COMMAND(radixnode, db_show_radixnode) { struct vm_radix_node *rnode; int i; if (!have_addr) return; rnode = (struct vm_radix_node *)addr; db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, rnode->rn_clev); for (i = 0; i < VM_RADIX_COUNT; i++) if (rnode->rn_child[i] != NULL) db_printf("slot: %d, val: %p, page: %p, clev: %d\n", i, (void *)rnode->rn_child[i], vm_radix_isleaf(rnode->rn_child[i]) ? vm_radix_topage(rnode->rn_child[i]) : NULL, rnode->rn_clev); } #endif /* DDB */ Index: projects/clang500-import/sys/vm/vm_radix.h =================================================================== --- projects/clang500-import/sys/vm/vm_radix.h (revision 321306) +++ projects/clang500-import/sys/vm/vm_radix.h (revision 321307) @@ -1,49 +1,63 @@ /* * Copyright (c) 2013 EMC Corp. * Copyright (c) 2011 Jeffrey Roberson * Copyright (c) 2008 Mayur Shardul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VM_RADIX_H_ #define _VM_RADIX_H_ #include #ifdef _KERNEL -void vm_radix_init(void); int vm_radix_insert(struct vm_radix *rtree, vm_page_t page); boolean_t vm_radix_is_singleton(struct vm_radix *rtree); vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index); void vm_radix_reclaim_allnodes(struct vm_radix *rtree); vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage); +void vm_radix_zinit(void); + +static __inline void +vm_radix_init(struct vm_radix *rtree) +{ + + rtree->rt_root = 0; +} + +static __inline boolean_t +vm_radix_is_empty(struct vm_radix *rtree) +{ + + return (rtree->rt_root == 0); +} #endif /* _KERNEL */ #endif /* !_VM_RADIX_H_ */ Index: projects/clang500-import/usr.bin/procstat/procstat.c =================================================================== --- projects/clang500-import/usr.bin/procstat/procstat.c (revision 321306) +++ projects/clang500-import/usr.bin/procstat/procstat.c (revision 321307) @@ -1,374 +1,385 @@ /*- * Copyright (c) 2007, 2011 Robert N. M. Watson * Copyright (c) 2015 Allan Jude * Copyright (c) 2017 Dell EMC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include "procstat.h" static int aflag, bflag, cflag, eflag, fflag, iflag, jflag, kflag; static int lflag, Lflag, rflag, sflag, tflag, vflag, xflag, Sflag; int hflag, nflag, Cflag, Hflag; static void usage(void) { - xo_error("usage: procstat [--libxo] [-CHhn] [-M core] " + xo_error( + "usage: procstat [--libxo] [-Hhn] [-M core] " "[-N system] [-w interval]\n" - " [-b | -c | -e | -f | -i | -j | -k | " - "-l | -L | -r | -s | \n" - " -S | -t | -v | -x]\n" - " [-a | pid | core ...]\n"); + " [-S | -b | -c | -e | -i | -j | -k | -kk | " + "-l | -r | -s | \n" + " -t | -v | -x]\n" + " [-a | pid ... | core ...]\n" + " procstat [--libxo] -Cf [-hn] [-M core] " + "[-N system] [-a | pid ... | core ...]\n" + " [-S | -b | -c | -e | -i | -j | -k | -kk | " + "-l | -r | -s | \n" + " procstat [--libxo] -L [-hn] [-M core] " + "[-N system] [-w interval]\n" + " [-S | -b | -c | -e | -i | -j | -k | -kk | " + "-l | -r | -s | \n" + " -t | -v | -x]\n" + " [core ...]\n"); xo_finish(); exit(EX_USAGE); } static void procstat(struct procstat *prstat, struct kinfo_proc *kipp) { char *pidstr = NULL; asprintf(&pidstr, "%d", kipp->ki_pid); if (pidstr == NULL) xo_errc(1, ENOMEM, "Failed to allocate memory in procstat()"); xo_open_container(pidstr); if (bflag) procstat_bin(prstat, kipp); else if (cflag) procstat_args(prstat, kipp); else if (eflag) procstat_env(prstat, kipp); else if (fflag) procstat_files(prstat, kipp); else if (iflag) procstat_sigs(prstat, kipp); else if (jflag) procstat_threads_sigs(prstat, kipp); else if (kflag) procstat_kstack(prstat, kipp, kflag); else if (lflag) procstat_rlimit(prstat, kipp); else if (Lflag) procstat_ptlwpinfo(prstat); else if (rflag) procstat_rusage(prstat, kipp); else if (sflag) procstat_cred(prstat, kipp); else if (tflag) procstat_threads(prstat, kipp); else if (vflag) procstat_vm(prstat, kipp); else if (xflag) procstat_auxv(prstat, kipp); else if (Sflag) procstat_cs(prstat, kipp); else procstat_basic(kipp); xo_close_container(pidstr); free(pidstr); } /* * Sort processes first by pid and then tid. */ static int kinfo_proc_compare(const void *a, const void *b) { int i; i = ((const struct kinfo_proc *)a)->ki_pid - ((const struct kinfo_proc *)b)->ki_pid; if (i != 0) return (i); i = ((const struct kinfo_proc *)a)->ki_tid - ((const struct kinfo_proc *)b)->ki_tid; return (i); } void kinfo_proc_sort(struct kinfo_proc *kipp, int count) { qsort(kipp, count, sizeof(*kipp), kinfo_proc_compare); } const char * kinfo_proc_thread_name(const struct kinfo_proc *kipp) { static char name[MAXCOMLEN+1]; strlcpy(name, kipp->ki_tdname, sizeof(name)); strlcat(name, kipp->ki_moretdname, sizeof(name)); if (name[0] == '\0' || strcmp(kipp->ki_comm, name) == 0) { name[0] = '-'; name[1] = '\0'; } return (name); } int main(int argc, char *argv[]) { int ch, interval, tmp; int i; struct kinfo_proc *p; struct procstat *prstat, *cprstat; long l; pid_t pid; char *dummy; char *nlistf, *memf; const char *xocontainer; int cnt; interval = 0; memf = nlistf = NULL; argc = xo_parse_args(argc, argv); xocontainer = "basic"; while ((ch = getopt(argc, argv, "abCcefHhijkLlM:N:nrSstvw:x")) != -1) { switch (ch) { case 'C': Cflag++; break; case 'H': Hflag++; break; case 'M': memf = optarg; break; case 'N': nlistf = optarg; break; case 'S': Sflag++; xocontainer = "cs"; break; case 'a': aflag++; break; case 'b': bflag++; xocontainer = "binary"; break; case 'c': cflag++; xocontainer = "arguments"; break; case 'e': eflag++; xocontainer = "environment"; break; case 'f': fflag++; xocontainer = "files"; break; case 'i': iflag++; xocontainer = "signals"; break; case 'j': jflag++; xocontainer = "thread_signals"; break; case 'k': kflag++; xocontainer = "kstack"; break; case 'l': lflag++; xocontainer = "rlimit"; break; case 'L': Lflag++; xocontainer = "ptlwpinfo"; break; case 'n': nflag++; break; case 'h': hflag++; break; case 'r': rflag++; xocontainer = "rusage"; break; case 's': sflag++; xocontainer = "credentials"; break; case 't': tflag++; xocontainer = "threads"; break; case 'v': vflag++; xocontainer = "vm"; break; case 'w': l = strtol(optarg, &dummy, 10); if (*dummy != '\0') usage(); if (l < 1 || l > INT_MAX) usage(); interval = l; break; case 'x': xflag++; xocontainer = "auxv"; break; case '?': default: usage(); } } argc -= optind; argv += optind; /* We require that either 0 or 1 mode flags be set. */ tmp = bflag + cflag + eflag + fflag + iflag + jflag + (kflag ? 1 : 0) + lflag + rflag + sflag + tflag + vflag + xflag + Sflag; if (!(tmp == 0 || tmp == 1)) usage(); /* We allow -k to be specified up to twice, but not more. */ if (kflag > 2) usage(); /* Must specify either the -a flag or a list of pids. */ if (!(aflag == 1 && argc == 0) && !(aflag == 0 && argc > 0)) usage(); /* Only allow -C with -f. */ if (Cflag && !fflag) usage(); if (memf != NULL) prstat = procstat_open_kvm(nlistf, memf); else prstat = procstat_open_sysctl(); if (prstat == NULL) xo_errx(1, "procstat_open()"); do { xo_set_version(PROCSTAT_XO_VERSION); xo_open_container("procstat"); xo_open_container(xocontainer); if (aflag) { p = procstat_getprocs(prstat, KERN_PROC_PROC, 0, &cnt); if (p == NULL) xo_errx(1, "procstat_getprocs()"); kinfo_proc_sort(p, cnt); for (i = 0; i < cnt; i++) { procstat(prstat, &p[i]); /* Suppress header after first process. */ hflag = 1; xo_flush(); } procstat_freeprocs(prstat, p); } for (i = 0; i < argc; i++) { l = strtol(argv[i], &dummy, 10); if (*dummy == '\0') { if (l < 0) usage(); pid = l; p = procstat_getprocs(prstat, KERN_PROC_PID, pid, &cnt); if (p == NULL) xo_errx(1, "procstat_getprocs()"); if (cnt != 0) procstat(prstat, p); procstat_freeprocs(prstat, p); } else { cprstat = procstat_open_core(argv[i]); if (cprstat == NULL) { warnx("procstat_open()"); continue; } p = procstat_getprocs(cprstat, KERN_PROC_PID, -1, &cnt); if (p == NULL) xo_errx(1, "procstat_getprocs()"); if (cnt != 0) procstat(cprstat, p); procstat_freeprocs(cprstat, p); procstat_close(cprstat); } /* Suppress header after first process. */ hflag = 1; } xo_close_container(xocontainer); xo_close_container("procstat"); xo_finish(); if (interval) sleep(interval); } while (interval); procstat_close(prstat); exit(0); } Index: projects/clang500-import/usr.bin/truss/syscalls.c =================================================================== --- projects/clang500-import/usr.bin/truss/syscalls.c (revision 321306) +++ projects/clang500-import/usr.bin/truss/syscalls.c (revision 321307) @@ -1,2424 +1,2427 @@ /* * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This file has routines used to print out system calls and their * arguments. */ #include #include #include #include #include #include #include #include #define _WANT_FREEBSD11_STAT #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "truss.h" #include "extern.h" #include "syscall.h" /* * This should probably be in its own file, sorted alphabetically. */ static struct syscall decoded_syscalls[] = { /* Native ABI */ { .name = "__acl_aclcheck_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_delete_fd", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_file", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_get_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__cap_rights_get", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { CapRights | OUT, 2 } } }, { .name = "__getcwd", .ret_type = 1, .nargs = 2, .args = { { Name | OUT, 0 }, { Int, 1 } } }, { .name = "_umtx_op", .ret_type = 1, .nargs = 5, .args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 }, { Ptr, 4 } } }, { .name = "accept", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "access", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "bind", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "bindat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "break", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "cap_fcntls_get", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights | OUT, 1 } } }, { .name = "cap_fcntls_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights, 1 } } }, { .name = "cap_getmode", .ret_type = 1, .nargs = 1, .args = { { PUInt | OUT, 0 } } }, { .name = "cap_rights_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapRights, 1 } } }, { .name = "chdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "chflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "chflagsat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { FileFlags, 2 }, { Atflags, 3 } } }, { .name = "chmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "chown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "chroot", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "clock_gettime", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "close", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "compat11.fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat11 | OUT, 1 } } }, + { .name = "compat11.fstatat", .ret_type = 1, .nargs = 4, + .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat11 | OUT, 2 }, + { Atflags, 3 } } }, { .name = "compat11.lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } }, { .name = "compat11.stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } }, { .name = "connect", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "connectat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "dup", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "dup2", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "eaccess", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "exit", .ret_type = 0, .nargs = 1, .args = { { Hex, 0 } } }, { .name = "extattr_delete_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_get_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_list_fd", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_file", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_link", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_set_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattrctl", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Hex, 1 }, { Name, 2 }, { Extattrnamespace, 3 }, { Name, 4 } } }, { .name = "faccessat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Accessmode, 2 }, { Atflags, 3 } } }, { .name = "fchflags", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { FileFlags, 1 } } }, { .name = "fchmod", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Octal, 1 } } }, { .name = "fchmodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Atflags, 3 } } }, { .name = "fchown", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "fchownat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Int, 2 }, { Int, 3 }, { Atflags, 4 } } }, { .name = "fcntl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Fcntl, 1 }, { Fcntlflag, 2 } } }, { .name = "flock", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Flockop, 1 } } }, { .name = "fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat | OUT, 1 } } }, { .name = "fstatat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat | OUT, 2 }, { Atflags, 3 } } }, { .name = "fstatfs", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { StatFs | OUT, 1 } } }, { .name = "ftruncate", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { QuadHex | IN, 1 } } }, { .name = "futimens", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec2 | IN, 1 } } }, { .name = "futimes", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timeval2 | IN, 1 } } }, { .name = "futimesat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timeval2 | IN, 2 } } }, { .name = "getdirentries", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 }, { PQuadHex | OUT, 3 } } }, { .name = "getfsstat", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Long, 1 }, { Getfsstatmode, 2 } } }, { .name = "getitimer", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Itimerval | OUT, 2 } } }, { .name = "getpeername", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getpgid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getpriority", .ret_type = 1, .nargs = 2, .args = { { Priowhich, 0 }, { Int, 1 } } }, { .name = "getrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | OUT, 1 } } }, { .name = "getrusage", .ret_type = 1, .nargs = 2, .args = { { RusageWho, 0 }, { Rusage | OUT, 1 } } }, { .name = "getsid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getsockname", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } }, { .name = "kevent", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { Kevent, 1 }, { Int, 2 }, { Kevent | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } }, { .name = "kill", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { Signal | IN, 1 } } }, { .name = "kldfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldfirstmod", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldload", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldnext", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr, 1 } } }, { .name = "kldsym", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Kldsymcmd, 1 }, { Ptr, 2 } } }, { .name = "kldunload", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldunloadf", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Kldunloadflags, 1 } } }, { .name = "kse_release", .ret_type = 0, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "lchflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "lchmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "lchown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "linkat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 }, { Atflags, 4 } } }, { .name = "listen", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { QuadHex, 1 }, { Whence, 2 } } }, { .name = "lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "lutimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "madvise", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Madvice, 2 } } }, { .name = "minherit", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Minherit, 2 } } }, { .name = "mkdir", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkdirat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mkfifo", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkfifoat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mknod", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Octal, 1 }, { Int, 2 } } }, { .name = "mknodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Int, 3 } } }, { .name = "mlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "mlockall", .ret_type = 1, .nargs = 1, .args = { { Mlockall, 0 } } }, { .name = "mmap", .ret_type = 1, .nargs = 6, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 }, { Mmapflags, 3 }, { Int, 4 }, { QuadHex, 5 } } }, { .name = "modfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "mount", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Name, 1 }, { Mountflags, 2 }, { Ptr, 3 } } }, { .name = "mprotect", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 } } }, { .name = "msync", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Msync, 2 } } }, { .name = "munlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "munmap", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "nanosleep", .ret_type = 1, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "nmount", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { UInt, 1 }, { Mountflags, 2 } } }, { .name = "open", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Open, 1 }, { Octal, 2 } } }, { .name = "openat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Open, 2 }, { Octal, 3 } } }, { .name = "pathconf", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Pathconf, 1 } } }, { .name = "pipe", .ret_type = 1, .nargs = 1, .args = { { PipeFds | OUT, 0 } } }, { .name = "pipe2", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Pipe2, 1 } } }, { .name = "poll", .ret_type = 1, .nargs = 3, .args = { { Pollfd, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "posix_fadvise", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { QuadHex, 1 }, { QuadHex, 2 }, { Fadvice, 3 } } }, { .name = "posix_openpt", .ret_type = 1, .nargs = 1, .args = { { Open, 0 } } }, { .name = "pread", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "procctl", .ret_type = 1, .nargs = 4, .args = { { Idtype, 0 }, { Quad, 1 }, { Procctl, 2 }, { Ptr, 3 } } }, { .name = "ptrace", .ret_type = 1, .nargs = 4, .args = { { Ptraceop, 0 }, { Int, 1 }, { Ptr, 2 }, { Int, 3 } } }, { .name = "pwrite", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "quotactl", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Quotactlcmd, 1 }, { Int, 2 }, { Ptr, 3 } } }, { .name = "read", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Readlinkres | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlinkat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Readlinkres | OUT, 2 }, { Sizet, 3 } } }, { .name = "reboot", .ret_type = 1, .nargs = 1, .args = { { Reboothowto, 0 } } }, { .name = "recvfrom", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | OUT, 4 }, { Ptr | OUT, 5 } } }, { .name = "recvmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ptr, 1 }, { Msgflags, 2 } } }, { .name = "rename", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "renameat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, { .name = "rmdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "rtprio", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "rtprio_thread", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "sched_get_priority_max", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_get_priority_min", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_getparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam | OUT, 1 } } }, { .name = "sched_getscheduler", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "sched_rr_get_interval", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "sched_setparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam, 1 } } }, { .name = "sched_setscheduler", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Schedpolicy, 1 }, { Schedparam, 2 } } }, { .name = "sctp_generic_recvmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Ptr | IN, 1 }, { Int, 2 }, { Sockaddr | OUT, 3 }, { Ptr | OUT, 4 }, { Ptr | OUT, 5 }, { Ptr | OUT, 6 } } }, { .name = "sctp_generic_sendmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 }, { Sockaddr | IN, 3 }, { Socklent, 4 }, { Ptr | IN, 5 }, { Msgflags, 6 } } }, { .name = "select", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 }, { Timeval, 4 } } }, { .name = "sendmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ptr, 1 }, { Msgflags, 2 } } }, { .name = "sendto", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | IN, 4 }, { Socklent | IN, 5 } } }, { .name = "setitimer", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Itimerval, 1 }, { Itimerval | OUT, 2 } } }, { .name = "setpriority", .ret_type = 1, .nargs = 3, .args = { { Priowhich, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "setrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | IN, 1 } } }, { .name = "setsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | IN, 3 }, { Socklent, 4 } } }, { .name = "shutdown", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Shutdown, 1 } } }, { .name = "sigaction", .ret_type = 1, .nargs = 3, .args = { { Signal, 0 }, { Sigaction | IN, 1 }, { Sigaction | OUT, 2 } } }, { .name = "sigpending", .ret_type = 1, .nargs = 1, .args = { { Sigset | OUT, 0 } } }, { .name = "sigprocmask", .ret_type = 1, .nargs = 3, .args = { { Sigprocmask, 0 }, { Sigset, 1 }, { Sigset | OUT, 2 } } }, { .name = "sigqueue", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Signal, 1 }, { LongHex, 2 } } }, { .name = "sigreturn", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "sigsuspend", .ret_type = 1, .nargs = 1, .args = { { Sigset | IN, 0 } } }, { .name = "sigtimedwait", .ret_type = 1, .nargs = 3, .args = { { Sigset | IN, 0 }, { Ptr, 1 }, { Timespec | IN, 2 } } }, { .name = "sigwait", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { Ptr, 1 } } }, { .name = "sigwaitinfo", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { Ptr, 1 } } }, { .name = "socket", .ret_type = 1, .nargs = 3, .args = { { Sockdomain, 0 }, { Socktype, 1 }, { Sockprotocol, 2 } } }, { .name = "stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "statfs", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { StatFs | OUT, 1 } } }, { .name = "symlink", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "symlinkat", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Atfd, 1 }, { Name, 2 } } }, { .name = "sysarch", .ret_type = 1, .nargs = 2, .args = { { Sysarch, 0 }, { Ptr, 1 } } }, { .name = "thr_kill", .ret_type = 1, .nargs = 2, .args = { { Long, 0 }, { Signal, 1 } } }, { .name = "thr_self", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "truncate", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { QuadHex | IN, 1 } } }, #if 0 /* Does not exist */ { .name = "umount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Int, 2 } } }, #endif { .name = "unlink", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "unlinkat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Atflags, 2 } } }, { .name = "unmount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Mountflags, 1 } } }, { .name = "utimensat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timespec2 | IN, 2 }, { Atflags, 3 } } }, { .name = "utimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "utrace", .ret_type = 1, .nargs = 1, .args = { { Utrace, 0 } } }, { .name = "wait4", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { ExitStatus | OUT, 1 }, { Waitoptions, 2 }, { Rusage | OUT, 3 } } }, { .name = "wait6", .ret_type = 1, .nargs = 6, .args = { { Idtype, 0 }, { Quad, 1 }, { ExitStatus | OUT, 2 }, { Waitoptions, 3 }, { Rusage | OUT, 4 }, { Ptr, 5 } } }, { .name = "write", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 } } }, /* Linux ABI */ { .name = "linux_access", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Accessmode, 1 } } }, { .name = "linux_execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "linux_lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Whence, 2 } } }, { .name = "linux_mkdir", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Int, 1 } } }, { .name = "linux_newfstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_newstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_open", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Hex, 1 }, { Octal, 2 } } }, { .name = "linux_readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Name | OUT, 1 }, { Sizet, 2 } } }, { .name = "linux_socketcall", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { LinuxSockArgs, 1 } } }, { .name = "linux_stat64", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, /* CloudABI system calls. */ { .name = "cloudabi_sys_clock_res_get", .ret_type = 1, .nargs = 1, .args = { { CloudABIClockID, 0 } } }, { .name = "cloudabi_sys_clock_time_get", .ret_type = 1, .nargs = 2, .args = { { CloudABIClockID, 0 }, { CloudABITimestamp, 1 } } }, { .name = "cloudabi_sys_condvar_signal", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { CloudABIMFlags, 1 }, { UInt, 2 } } }, { .name = "cloudabi_sys_fd_close", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_fd_create1", .ret_type = 1, .nargs = 1, .args = { { CloudABIFileType, 0 } } }, { .name = "cloudabi_sys_fd_create2", .ret_type = 1, .nargs = 2, .args = { { CloudABIFileType, 0 }, { PipeFds | OUT, 0 } } }, { .name = "cloudabi_sys_fd_datasync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_fd_dup", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_fd_replace", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_fd_seek", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { CloudABIWhence, 2 } } }, { .name = "cloudabi_sys_fd_stat_get", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABIFDStat | OUT, 1 } } }, { .name = "cloudabi_sys_fd_stat_put", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { CloudABIFDStat | IN, 1 }, { ClouduABIFDSFlags, 2 } } }, { .name = "cloudabi_sys_fd_sync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_file_advise", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 }, { CloudABIAdvice, 3 } } }, { .name = "cloudabi_sys_file_allocate", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "cloudabi_sys_file_create", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { CloudABIFileType, 3 } } }, { .name = "cloudabi_sys_file_link", .ret_type = 1, .nargs = 4, .args = { { CloudABILookup, 0 }, { BinString | IN, 1 }, { Int, 3 }, { BinString | IN, 4 } } }, { .name = "cloudabi_sys_file_open", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { CloudABIOFlags, 3 }, { CloudABIFDStat | IN, 4 } } }, { .name = "cloudabi_sys_file_readdir", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 }, { Int, 3 } } }, { .name = "cloudabi_sys_file_readlink", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { BinString | OUT, 3 }, { Int, 4 } } }, { .name = "cloudabi_sys_file_rename", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 3 }, { BinString | IN, 4 } } }, { .name = "cloudabi_sys_file_stat_fget", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABIFileStat | OUT, 1 } } }, { .name = "cloudabi_sys_file_stat_fput", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { CloudABIFileStat | IN, 1 }, { CloudABIFSFlags, 2 } } }, { .name = "cloudabi_sys_file_stat_get", .ret_type = 1, .nargs = 3, .args = { { CloudABILookup, 0 }, { BinString | IN, 1 }, { CloudABIFileStat | OUT, 3 } } }, { .name = "cloudabi_sys_file_stat_put", .ret_type = 1, .nargs = 4, .args = { { CloudABILookup, 0 }, { BinString | IN, 1 }, { CloudABIFileStat | IN, 3 }, { CloudABIFSFlags, 4 } } }, { .name = "cloudabi_sys_file_symlink", .ret_type = 1, .nargs = 3, .args = { { BinString | IN, 0 }, { Int, 2 }, { BinString | IN, 3 } } }, { .name = "cloudabi_sys_file_unlink", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { CloudABIULFlags, 3 } } }, { .name = "cloudabi_sys_lock_unlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { CloudABIMFlags, 1 } } }, { .name = "cloudabi_sys_mem_advise", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIAdvice, 2 } } }, { .name = "cloudabi_sys_mem_lock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_mem_map", .ret_type = 1, .nargs = 6, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMProt, 2 }, { CloudABIMFlags, 3 }, { Int, 4 }, { Int, 5 } } }, { .name = "cloudabi_sys_mem_protect", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMProt, 2 } } }, { .name = "cloudabi_sys_mem_sync", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMSFlags, 2 } } }, { .name = "cloudabi_sys_mem_unlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_mem_unmap", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_proc_exec", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 }, { IntArray, 3 }, { Int, 4 } } }, { .name = "cloudabi_sys_proc_exit", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_proc_fork", .ret_type = 1, .nargs = 0 }, { .name = "cloudabi_sys_proc_raise", .ret_type = 1, .nargs = 1, .args = { { CloudABISignal, 0 } } }, { .name = "cloudabi_sys_random_get", .ret_type = 1, .nargs = 2, .args = { { BinString | OUT, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_sock_accept", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABISockStat | OUT, 1 } } }, { .name = "cloudabi_sys_sock_bind", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { BinString | IN, 2 } } }, { .name = "cloudabi_sys_sock_connect", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { BinString | IN, 2 } } }, { .name = "cloudabi_sys_sock_listen", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_sock_shutdown", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABISDFlags, 1 } } }, { .name = "cloudabi_sys_sock_stat_get", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { CloudABISockStat | OUT, 1 }, { CloudABISSFlags, 2 } } }, { .name = "cloudabi_sys_thread_exit", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { CloudABIMFlags, 1 } } }, { .name = "cloudabi_sys_thread_yield", .ret_type = 1, .nargs = 0 }, { .name = 0 }, }; static STAILQ_HEAD(, syscall) syscalls; /* Xlat idea taken from strace */ struct xlat { int val; const char *str; }; #define X(a) { a, #a }, #define XEND { 0, NULL } static struct xlat kevent_filters[] = { X(EVFILT_READ) X(EVFILT_WRITE) X(EVFILT_AIO) X(EVFILT_VNODE) X(EVFILT_PROC) X(EVFILT_SIGNAL) X(EVFILT_TIMER) X(EVFILT_PROCDESC) X(EVFILT_FS) X(EVFILT_LIO) X(EVFILT_USER) X(EVFILT_SENDFILE) XEND }; static struct xlat kevent_flags[] = { X(EV_ADD) X(EV_DELETE) X(EV_ENABLE) X(EV_DISABLE) X(EV_ONESHOT) X(EV_CLEAR) X(EV_RECEIPT) X(EV_DISPATCH) X(EV_FORCEONESHOT) X(EV_DROP) X(EV_FLAG1) X(EV_ERROR) X(EV_EOF) XEND }; static struct xlat kevent_user_ffctrl[] = { X(NOTE_FFNOP) X(NOTE_FFAND) X(NOTE_FFOR) X(NOTE_FFCOPY) XEND }; static struct xlat kevent_rdwr_fflags[] = { X(NOTE_LOWAT) X(NOTE_FILE_POLL) XEND }; static struct xlat kevent_vnode_fflags[] = { X(NOTE_DELETE) X(NOTE_WRITE) X(NOTE_EXTEND) X(NOTE_ATTRIB) X(NOTE_LINK) X(NOTE_RENAME) X(NOTE_REVOKE) XEND }; static struct xlat kevent_proc_fflags[] = { X(NOTE_EXIT) X(NOTE_FORK) X(NOTE_EXEC) X(NOTE_TRACK) X(NOTE_TRACKERR) X(NOTE_CHILD) XEND }; static struct xlat kevent_timer_fflags[] = { X(NOTE_SECONDS) X(NOTE_MSECONDS) X(NOTE_USECONDS) X(NOTE_NSECONDS) XEND }; static struct xlat poll_flags[] = { X(POLLSTANDARD) X(POLLIN) X(POLLPRI) X(POLLOUT) X(POLLERR) X(POLLHUP) X(POLLNVAL) X(POLLRDNORM) X(POLLRDBAND) X(POLLWRBAND) X(POLLINIGNEOF) XEND }; static struct xlat sigaction_flags[] = { X(SA_ONSTACK) X(SA_RESTART) X(SA_RESETHAND) X(SA_NOCLDSTOP) X(SA_NODEFER) X(SA_NOCLDWAIT) X(SA_SIGINFO) XEND }; static struct xlat pathconf_arg[] = { X(_PC_LINK_MAX) X(_PC_MAX_CANON) X(_PC_MAX_INPUT) X(_PC_NAME_MAX) X(_PC_PATH_MAX) X(_PC_PIPE_BUF) X(_PC_CHOWN_RESTRICTED) X(_PC_NO_TRUNC) X(_PC_VDISABLE) X(_PC_ASYNC_IO) X(_PC_PRIO_IO) X(_PC_SYNC_IO) X(_PC_ALLOC_SIZE_MIN) X(_PC_FILESIZEBITS) X(_PC_REC_INCR_XFER_SIZE) X(_PC_REC_MAX_XFER_SIZE) X(_PC_REC_MIN_XFER_SIZE) X(_PC_REC_XFER_ALIGN) X(_PC_SYMLINK_MAX) X(_PC_ACL_EXTENDED) X(_PC_ACL_PATH_MAX) X(_PC_CAP_PRESENT) X(_PC_INF_PRESENT) X(_PC_MAC_PRESENT) X(_PC_ACL_NFS4) X(_PC_MIN_HOLE_SIZE) XEND }; static struct xlat at_flags[] = { X(AT_EACCESS) X(AT_SYMLINK_NOFOLLOW) X(AT_SYMLINK_FOLLOW) X(AT_REMOVEDIR) XEND }; static struct xlat sysarch_ops[] = { #if defined(__i386__) || defined(__amd64__) X(I386_GET_LDT) X(I386_SET_LDT) X(I386_GET_IOPERM) X(I386_SET_IOPERM) X(I386_VM86) X(I386_GET_FSBASE) X(I386_SET_FSBASE) X(I386_GET_GSBASE) X(I386_SET_GSBASE) X(I386_GET_XFPUSTATE) X(AMD64_GET_FSBASE) X(AMD64_SET_FSBASE) X(AMD64_GET_GSBASE) X(AMD64_SET_GSBASE) X(AMD64_GET_XFPUSTATE) #endif XEND }; static struct xlat linux_socketcall_ops[] = { X(LINUX_SOCKET) X(LINUX_BIND) X(LINUX_CONNECT) X(LINUX_LISTEN) X(LINUX_ACCEPT) X(LINUX_GETSOCKNAME) X(LINUX_GETPEERNAME) X(LINUX_SOCKETPAIR) X(LINUX_SEND) X(LINUX_RECV) X(LINUX_SENDTO) X(LINUX_RECVFROM) X(LINUX_SHUTDOWN) X(LINUX_SETSOCKOPT) X(LINUX_GETSOCKOPT) X(LINUX_SENDMSG) X(LINUX_RECVMSG) XEND }; #undef X #define X(a) { CLOUDABI_##a, #a }, static struct xlat cloudabi_advice[] = { X(ADVICE_DONTNEED) X(ADVICE_NOREUSE) X(ADVICE_NORMAL) X(ADVICE_RANDOM) X(ADVICE_SEQUENTIAL) X(ADVICE_WILLNEED) XEND }; static struct xlat cloudabi_clockid[] = { X(CLOCK_MONOTONIC) X(CLOCK_PROCESS_CPUTIME_ID) X(CLOCK_REALTIME) X(CLOCK_THREAD_CPUTIME_ID) XEND }; static struct xlat cloudabi_errno[] = { X(E2BIG) X(EACCES) X(EADDRINUSE) X(EADDRNOTAVAIL) X(EAFNOSUPPORT) X(EAGAIN) X(EALREADY) X(EBADF) X(EBADMSG) X(EBUSY) X(ECANCELED) X(ECHILD) X(ECONNABORTED) X(ECONNREFUSED) X(ECONNRESET) X(EDEADLK) X(EDESTADDRREQ) X(EDOM) X(EDQUOT) X(EEXIST) X(EFAULT) X(EFBIG) X(EHOSTUNREACH) X(EIDRM) X(EILSEQ) X(EINPROGRESS) X(EINTR) X(EINVAL) X(EIO) X(EISCONN) X(EISDIR) X(ELOOP) X(EMFILE) X(EMLINK) X(EMSGSIZE) X(EMULTIHOP) X(ENAMETOOLONG) X(ENETDOWN) X(ENETRESET) X(ENETUNREACH) X(ENFILE) X(ENOBUFS) X(ENODEV) X(ENOENT) X(ENOEXEC) X(ENOLCK) X(ENOLINK) X(ENOMEM) X(ENOMSG) X(ENOPROTOOPT) X(ENOSPC) X(ENOSYS) X(ENOTCONN) X(ENOTDIR) X(ENOTEMPTY) X(ENOTRECOVERABLE) X(ENOTSOCK) X(ENOTSUP) X(ENOTTY) X(ENXIO) X(EOVERFLOW) X(EOWNERDEAD) X(EPERM) X(EPIPE) X(EPROTO) X(EPROTONOSUPPORT) X(EPROTOTYPE) X(ERANGE) X(EROFS) X(ESPIPE) X(ESRCH) X(ESTALE) X(ETIMEDOUT) X(ETXTBSY) X(EXDEV) X(ENOTCAPABLE) XEND }; static struct xlat cloudabi_fdflags[] = { X(FDFLAG_APPEND) X(FDFLAG_DSYNC) X(FDFLAG_NONBLOCK) X(FDFLAG_RSYNC) X(FDFLAG_SYNC) XEND }; static struct xlat cloudabi_fdsflags[] = { X(FDSTAT_FLAGS) X(FDSTAT_RIGHTS) XEND }; static struct xlat cloudabi_filetype[] = { X(FILETYPE_UNKNOWN) X(FILETYPE_BLOCK_DEVICE) X(FILETYPE_CHARACTER_DEVICE) X(FILETYPE_DIRECTORY) X(FILETYPE_FIFO) X(FILETYPE_POLL) X(FILETYPE_PROCESS) X(FILETYPE_REGULAR_FILE) X(FILETYPE_SHARED_MEMORY) X(FILETYPE_SOCKET_DGRAM) X(FILETYPE_SOCKET_SEQPACKET) X(FILETYPE_SOCKET_STREAM) X(FILETYPE_SYMBOLIC_LINK) XEND }; static struct xlat cloudabi_fsflags[] = { X(FILESTAT_ATIM) X(FILESTAT_ATIM_NOW) X(FILESTAT_MTIM) X(FILESTAT_MTIM_NOW) X(FILESTAT_SIZE) XEND }; static struct xlat cloudabi_mflags[] = { X(MAP_ANON) X(MAP_FIXED) X(MAP_PRIVATE) X(MAP_SHARED) XEND }; static struct xlat cloudabi_mprot[] = { X(PROT_EXEC) X(PROT_WRITE) X(PROT_READ) XEND }; static struct xlat cloudabi_msflags[] = { X(MS_ASYNC) X(MS_INVALIDATE) X(MS_SYNC) XEND }; static struct xlat cloudabi_oflags[] = { X(O_CREAT) X(O_DIRECTORY) X(O_EXCL) X(O_TRUNC) XEND }; static struct xlat cloudabi_sa_family[] = { X(AF_UNSPEC) X(AF_INET) X(AF_INET6) X(AF_UNIX) XEND }; static struct xlat cloudabi_sdflags[] = { X(SHUT_RD) X(SHUT_WR) XEND }; static struct xlat cloudabi_signal[] = { X(SIGABRT) X(SIGALRM) X(SIGBUS) X(SIGCHLD) X(SIGCONT) X(SIGFPE) X(SIGHUP) X(SIGILL) X(SIGINT) X(SIGKILL) X(SIGPIPE) X(SIGQUIT) X(SIGSEGV) X(SIGSTOP) X(SIGSYS) X(SIGTERM) X(SIGTRAP) X(SIGTSTP) X(SIGTTIN) X(SIGTTOU) X(SIGURG) X(SIGUSR1) X(SIGUSR2) X(SIGVTALRM) X(SIGXCPU) X(SIGXFSZ) XEND }; static struct xlat cloudabi_ssflags[] = { X(SOCKSTAT_CLEAR_ERROR) XEND }; static struct xlat cloudabi_ssstate[] = { X(SOCKSTATE_ACCEPTCONN) XEND }; static struct xlat cloudabi_ulflags[] = { X(UNLINK_REMOVEDIR) XEND }; static struct xlat cloudabi_whence[] = { X(WHENCE_CUR) X(WHENCE_END) X(WHENCE_SET) XEND }; #undef X #undef XEND /* * Searches an xlat array for a value, and returns it if found. Otherwise * return a string representation. */ static const char * lookup(struct xlat *xlat, int val, int base) { static char tmp[16]; for (; xlat->str != NULL; xlat++) if (xlat->val == val) return (xlat->str); switch (base) { case 8: sprintf(tmp, "0%o", val); break; case 16: sprintf(tmp, "0x%x", val); break; case 10: sprintf(tmp, "%u", val); break; default: errx(1,"Unknown lookup base"); break; } return (tmp); } static const char * xlookup(struct xlat *xlat, int val) { return (lookup(xlat, val, 16)); } /* * Searches an xlat array containing bitfield values. Remaining bits * set after removing the known ones are printed at the end: * IN|0x400. */ static char * xlookup_bits(struct xlat *xlat, int val) { int len, rem; static char str[512]; len = 0; rem = val; for (; xlat->str != NULL; xlat++) { if ((xlat->val & rem) == xlat->val) { /* * Don't print the "all-bits-zero" string unless all * bits are really zero. */ if (xlat->val == 0 && val != 0) continue; len += sprintf(str + len, "%s|", xlat->str); rem &= ~(xlat->val); } } /* * If we have leftover bits or didn't match anything, print * the remainder. */ if (rem || len == 0) len += sprintf(str + len, "0x%x", rem); if (len && str[len - 1] == '|') len--; str[len] = 0; return (str); } static void print_integer_arg(const char *(*decoder)(int), FILE *fp, int value) { const char *str; str = decoder(value); if (str != NULL) fputs(str, fp); else fprintf(fp, "%d", value); } static void print_mask_arg(bool (*decoder)(FILE *, int, int *), FILE *fp, int value) { int rem; if (!decoder(fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } static void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), FILE *fp, uint32_t value) { uint32_t rem; if (!decoder(fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } #ifndef __LP64__ /* * Add argument padding to subsequent system calls afater a Quad * syscall arguments as needed. This used to be done by hand in the * decoded_syscalls table which was ugly and error prone. It is * simpler to do the fixup of offsets at initalization time than when * decoding arguments. */ static void quad_fixup(struct syscall *sc) { int offset, prev; u_int i; offset = 0; prev = -1; for (i = 0; i < sc->nargs; i++) { /* This arg type is a dummy that doesn't use offset. */ if ((sc->args[i].type & ARG_MASK) == PipeFds) continue; assert(prev < sc->args[i].offset); prev = sc->args[i].offset; sc->args[i].offset += offset; switch (sc->args[i].type & ARG_MASK) { case Quad: case QuadHex: #ifdef __powerpc__ /* * 64-bit arguments on 32-bit powerpc must be * 64-bit aligned. If the current offset is * not aligned, the calling convention inserts * a 32-bit pad argument that should be skipped. */ if (sc->args[i].offset % 2 == 1) { sc->args[i].offset++; offset++; } #endif offset++; default: break; } } } #endif void init_syscalls(void) { struct syscall *sc; STAILQ_INIT(&syscalls); for (sc = decoded_syscalls; sc->name != NULL; sc++) { #ifndef __LP64__ quad_fixup(sc); #endif STAILQ_INSERT_HEAD(&syscalls, sc, entries); } } static struct syscall * find_syscall(struct procabi *abi, u_int number) { struct extra_syscall *es; if (number < nitems(abi->syscalls)) return (abi->syscalls[number]); STAILQ_FOREACH(es, &abi->extra_syscalls, entries) { if (es->number == number) return (es->sc); } return (NULL); } static void add_syscall(struct procabi *abi, u_int number, struct syscall *sc) { struct extra_syscall *es; if (number < nitems(abi->syscalls)) { assert(abi->syscalls[number] == NULL); abi->syscalls[number] = sc; } else { es = malloc(sizeof(*es)); es->sc = sc; es->number = number; STAILQ_INSERT_TAIL(&abi->extra_syscalls, es, entries); } } /* * If/when the list gets big, it might be desirable to do it * as a hash table or binary search. */ struct syscall * get_syscall(struct threadinfo *t, u_int number, u_int nargs) { struct syscall *sc; const char *name; char *new_name; u_int i; sc = find_syscall(t->proc->abi, number); if (sc != NULL) return (sc); name = sysdecode_syscallname(t->proc->abi->abi, number); if (name == NULL) { asprintf(&new_name, "#%d", number); name = new_name; } else new_name = NULL; STAILQ_FOREACH(sc, &syscalls, entries) { if (strcmp(name, sc->name) == 0) { add_syscall(t->proc->abi, number, sc); free(new_name); return (sc); } } /* It is unknown. Add it into the list. */ #if DEBUG fprintf(stderr, "unknown syscall %s -- setting args to %d\n", name, nargs); #endif sc = calloc(1, sizeof(struct syscall)); sc->name = name; if (new_name != NULL) sc->unknown = true; sc->ret_type = 1; sc->nargs = nargs; for (i = 0; i < nargs; i++) { sc->args[i].offset = i; /* Treat all unknown arguments as LongHex. */ sc->args[i].type = LongHex; } STAILQ_INSERT_HEAD(&syscalls, sc, entries); add_syscall(t->proc->abi, number, sc); return (sc); } /* * Copy a fixed amount of bytes from the process. */ static int get_struct(pid_t pid, void *offset, void *buf, int len) { struct ptrace_io_desc iorequest; iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = offset; iorequest.piod_addr = buf; iorequest.piod_len = len; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) return (-1); return (0); } #define MAXSIZE 4096 /* * Copy a string from the process. Note that it is * expected to be a C string, but if max is set, it will * only get that much. */ static char * get_string(pid_t pid, void *addr, int max) { struct ptrace_io_desc iorequest; char *buf, *nbuf; size_t offset, size, totalsize; offset = 0; if (max) size = max + 1; else { /* Read up to the end of the current page. */ size = PAGE_SIZE - ((uintptr_t)addr % PAGE_SIZE); if (size > MAXSIZE) size = MAXSIZE; } totalsize = size; buf = malloc(totalsize); if (buf == NULL) return (NULL); for (;;) { iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = (char *)addr + offset; iorequest.piod_addr = buf + offset; iorequest.piod_len = size; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) { free(buf); return (NULL); } if (memchr(buf + offset, '\0', size) != NULL) return (buf); offset += size; if (totalsize < MAXSIZE && max == 0) { size = MAXSIZE - totalsize; if (size > PAGE_SIZE) size = PAGE_SIZE; nbuf = realloc(buf, totalsize + size); if (nbuf == NULL) { buf[totalsize - 1] = '\0'; return (buf); } buf = nbuf; totalsize += size; } else { buf[totalsize - 1] = '\0'; return (buf); } } } static const char * strsig2(int sig) { static char tmp[32]; const char *signame; signame = sysdecode_signal(sig); if (signame == NULL) { snprintf(tmp, sizeof(tmp), "%d", sig); signame = tmp; } return (signame); } static void print_kevent(FILE *fp, struct kevent *ke, int input) { switch (ke->filter) { case EVFILT_READ: case EVFILT_WRITE: case EVFILT_VNODE: case EVFILT_PROC: case EVFILT_TIMER: case EVFILT_PROCDESC: fprintf(fp, "%ju", (uintmax_t)ke->ident); break; case EVFILT_SIGNAL: fputs(strsig2(ke->ident), fp); break; default: fprintf(fp, "%p", (void *)ke->ident); } fprintf(fp, ",%s,%s,", xlookup(kevent_filters, ke->filter), xlookup_bits(kevent_flags, ke->flags)); switch (ke->filter) { case EVFILT_READ: case EVFILT_WRITE: fputs(xlookup_bits(kevent_rdwr_fflags, ke->fflags), fp); break; case EVFILT_VNODE: fputs(xlookup_bits(kevent_vnode_fflags, ke->fflags), fp); break; case EVFILT_PROC: case EVFILT_PROCDESC: fputs(xlookup_bits(kevent_proc_fflags, ke->fflags), fp); break; case EVFILT_TIMER: fputs(xlookup_bits(kevent_timer_fflags, ke->fflags), fp); break; case EVFILT_USER: { int ctrl, data; ctrl = ke->fflags & NOTE_FFCTRLMASK; data = ke->fflags & NOTE_FFLAGSMASK; if (input) { fputs(xlookup(kevent_user_ffctrl, ctrl), fp); if (ke->fflags & NOTE_TRIGGER) fputs("|NOTE_TRIGGER", fp); if (data != 0) fprintf(fp, "|%#x", data); } else { fprintf(fp, "%#x", data); } break; } default: fprintf(fp, "%#x", ke->fflags); } fprintf(fp, ",%#jx,%p", (uintmax_t)ke->data, ke->udata); } static void print_utrace(FILE *fp, void *utrace_addr, size_t len) { unsigned char *utrace_buffer; fprintf(fp, "{ "); if (sysdecode_utrace(fp, utrace_addr, len)) { fprintf(fp, " }"); return; } utrace_buffer = utrace_addr; fprintf(fp, "%zu:", len); while (len--) fprintf(fp, " %02x", *utrace_buffer++); fprintf(fp, " }"); } /* * Converts a syscall argument into a string. Said string is * allocated via malloc(), so needs to be free()'d. sc is * a pointer to the syscall description (see above); args is * an array of all of the system call arguments. */ char * print_arg(struct syscall_args *sc, unsigned long *args, long *retval, struct trussinfo *trussinfo) { FILE *fp; char *tmp; size_t tmplen; pid_t pid; fp = open_memstream(&tmp, &tmplen); pid = trussinfo->curthread->proc->pid; switch (sc->type & ARG_MASK) { case Hex: fprintf(fp, "0x%x", (int)args[sc->offset]); break; case Octal: fprintf(fp, "0%o", (int)args[sc->offset]); break; case Int: fprintf(fp, "%d", (int)args[sc->offset]); break; case UInt: fprintf(fp, "%u", (unsigned int)args[sc->offset]); break; case PUInt: { unsigned int val; if (get_struct(pid, (void *)args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ %u }", val); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case LongHex: fprintf(fp, "0x%lx", args[sc->offset]); break; case Long: fprintf(fp, "%ld", args[sc->offset]); break; case Sizet: fprintf(fp, "%zu", (size_t)args[sc->offset]); break; case Name: { /* NULL-terminated string. */ char *tmp2; tmp2 = get_string(pid, (void*)args[sc->offset], 0); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case BinString: { /* * Binary block of data that might have printable characters. * XXX If type|OUT, assume that the length is the syscall's * return value. Otherwise, assume that the length of the block * is in the next syscall argument. */ int max_string = trussinfo->strsize; char tmp2[max_string + 1], *tmp3; int len; int truncated = 0; if (sc->type & OUT) len = retval[0]; else len = args[sc->offset + 1]; /* * Don't print more than max_string characters, to avoid word * wrap. If we have to truncate put some ... after the string. */ if (len > max_string) { len = max_string; truncated = 1; } if (len && get_struct(pid, (void*)args[sc->offset], &tmp2, len) != -1) { tmp3 = malloc(len * 4 + 1); while (len) { if (strvisx(tmp3, tmp2, len, VIS_CSTYLE|VIS_TAB|VIS_NL) <= max_string) break; len--; truncated = 1; } fprintf(fp, "\"%s\"%s", tmp3, truncated ? "..." : ""); free(tmp3); } else { fprintf(fp, "0x%lx", args[sc->offset]); } break; } case ExecArgs: case ExecEnv: case StringArray: { uintptr_t addr; union { char *strarray[0]; char buf[PAGE_SIZE]; } u; char *string; size_t len; u_int first, i; /* * Only parse argv[] and environment arrays from exec calls * if requested. */ if (((sc->type & ARG_MASK) == ExecArgs && (trussinfo->flags & EXECVEARGS) == 0) || ((sc->type & ARG_MASK) == ExecEnv && (trussinfo->flags & EXECVEENVS) == 0)) { fprintf(fp, "0x%lx", args[sc->offset]); break; } /* * Read a page of pointers at a time. Punt if the top-level * pointer is not aligned. Note that the first read is of * a partial page. */ addr = args[sc->offset]; if (addr % sizeof(char *) != 0) { fprintf(fp, "0x%lx", args[sc->offset]); break; } len = PAGE_SIZE - (addr & PAGE_MASK); if (get_struct(pid, (void *)addr, u.buf, len) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } fputc('[', fp); first = 1; i = 0; while (u.strarray[i] != NULL) { string = get_string(pid, u.strarray[i], 0); fprintf(fp, "%s \"%s\"", first ? "" : ",", string); free(string); first = 0; i++; if (i == len / sizeof(char *)) { addr += len; len = PAGE_SIZE; if (get_struct(pid, (void *)addr, u.buf, len) == -1) { fprintf(fp, ", "); break; } i = 0; } } fputs(" ]", fp); break; } #ifdef __LP64__ case Quad: fprintf(fp, "%ld", args[sc->offset]); break; case QuadHex: fprintf(fp, "0x%lx", args[sc->offset]); break; #else case Quad: case QuadHex: { unsigned long long ll; #if _BYTE_ORDER == _LITTLE_ENDIAN ll = (unsigned long long)args[sc->offset + 1] << 32 | args[sc->offset]; #else ll = (unsigned long long)args[sc->offset] << 32 | args[sc->offset + 1]; #endif if ((sc->type & ARG_MASK) == Quad) fprintf(fp, "%lld", ll); else fprintf(fp, "0x%llx", ll); break; } #endif case PQuadHex: { uint64_t val; if (get_struct(pid, (void *)args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ 0x%jx }", (uintmax_t)val); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Ptr: fprintf(fp, "0x%lx", args[sc->offset]); break; case Readlinkres: { char *tmp2; if (retval[0] == -1) break; tmp2 = get_string(pid, (void*)args[sc->offset], retval[0]); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case Ioctl: { const char *temp; unsigned long cmd; cmd = args[sc->offset]; temp = sysdecode_ioctlname(cmd); if (temp) fputs(temp, fp); else { fprintf(fp, "0x%lx { IO%s%s 0x%lx('%c'), %lu, %lu }", cmd, cmd & IOC_OUT ? "R" : "", cmd & IOC_IN ? "W" : "", IOCGROUP(cmd), isprint(IOCGROUP(cmd)) ? (char)IOCGROUP(cmd) : '?', cmd & 0xFF, IOCPARM_LEN(cmd)); } break; } case Timespec: { struct timespec ts; if (get_struct(pid, (void *)args[sc->offset], &ts, sizeof(ts)) != -1) fprintf(fp, "{ %jd.%09ld }", (intmax_t)ts.tv_sec, ts.tv_nsec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Timespec2: { struct timespec ts[2]; const char *sep; unsigned int i; if (get_struct(pid, (void *)args[sc->offset], &ts, sizeof(ts)) != -1) { fputs("{ ", fp); sep = ""; for (i = 0; i < nitems(ts); i++) { fputs(sep, fp); sep = ", "; switch (ts[i].tv_nsec) { case UTIME_NOW: fprintf(fp, "UTIME_NOW"); break; case UTIME_OMIT: fprintf(fp, "UTIME_OMIT"); break; default: fprintf(fp, "%jd.%09ld", (intmax_t)ts[i].tv_sec, ts[i].tv_nsec); break; } } fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Timeval: { struct timeval tv; if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld }", (intmax_t)tv.tv_sec, tv.tv_usec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Timeval2: { struct timeval tv[2]; if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)tv[0].tv_sec, tv[0].tv_usec, (intmax_t)tv[1].tv_sec, tv[1].tv_usec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Itimerval: { struct itimerval itv; if (get_struct(pid, (void *)args[sc->offset], &itv, sizeof(itv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)itv.it_interval.tv_sec, itv.it_interval.tv_usec, (intmax_t)itv.it_value.tv_sec, itv.it_value.tv_usec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case LinuxSockArgs: { struct linux_socketcall_args largs; if (get_struct(pid, (void *)args[sc->offset], (void *)&largs, sizeof(largs)) != -1) fprintf(fp, "{ %s, 0x%lx }", lookup(linux_socketcall_ops, largs.what, 10), (long unsigned int)largs.args); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Pollfd: { /* * XXX: A Pollfd argument expects the /next/ syscall argument * to be the number of fds in the array. This matches the poll * syscall. */ struct pollfd *pfd; int numfds = args[sc->offset + 1]; size_t bytes = sizeof(struct pollfd) * numfds; int i; if ((pfd = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for pollfd array", bytes); if (get_struct(pid, (void *)args[sc->offset], pfd, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { fprintf(fp, " %d/%s", pfd[i].fd, xlookup_bits(poll_flags, pfd[i].events)); } fputs(" }", fp); } else { fprintf(fp, "0x%lx", args[sc->offset]); } free(pfd); break; } case Fd_set: { /* * XXX: A Fd_set argument expects the /first/ syscall argument * to be the number of fds in the array. This matches the * select syscall. */ fd_set *fds; int numfds = args[0]; size_t bytes = _howmany(numfds, _NFDBITS) * _NFDBITS; int i; if ((fds = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for fd_set array", bytes); if (get_struct(pid, (void *)args[sc->offset], fds, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { if (FD_ISSET(i, fds)) fprintf(fp, " %d", i); } fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); free(fds); break; } case Signal: fputs(strsig2(args[sc->offset]), fp); break; case Sigset: { long sig; sigset_t ss; int i, first; sig = args[sc->offset]; if (get_struct(pid, (void *)args[sc->offset], (void *)&ss, sizeof(ss)) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } fputs("{ ", fp); first = 1; for (i = 1; i < sys_nsig; i++) { if (sigismember(&ss, i)) { fprintf(fp, "%s%s", !first ? "|" : "", strsig2(i)); first = 0; } } if (!first) fputc(' ', fp); fputc('}', fp); break; } case Sigprocmask: print_integer_arg(sysdecode_sigprocmask_how, fp, args[sc->offset]); break; case Fcntlflag: /* XXX: Output depends on the value of the previous argument. */ if (sysdecode_fcntl_arg_p(args[sc->offset - 1])) sysdecode_fcntl_arg(fp, args[sc->offset - 1], args[sc->offset], 16); break; case Open: print_mask_arg(sysdecode_open_flags, fp, args[sc->offset]); break; case Fcntl: print_integer_arg(sysdecode_fcntl_cmd, fp, args[sc->offset]); break; case Mprot: print_mask_arg(sysdecode_mmap_prot, fp, args[sc->offset]); break; case Mmapflags: print_mask_arg(sysdecode_mmap_flags, fp, args[sc->offset]); break; case Whence: print_integer_arg(sysdecode_whence, fp, args[sc->offset]); break; case Sockdomain: print_integer_arg(sysdecode_socketdomain, fp, args[sc->offset]); break; case Socktype: print_mask_arg(sysdecode_socket_type, fp, args[sc->offset]); break; case Shutdown: print_integer_arg(sysdecode_shutdown_how, fp, args[sc->offset]); break; case Resource: print_integer_arg(sysdecode_rlimit, fp, args[sc->offset]); break; case RusageWho: print_integer_arg(sysdecode_getrusage_who, fp, args[sc->offset]); break; case Pathconf: fputs(xlookup(pathconf_arg, args[sc->offset]), fp); break; case Rforkflags: print_mask_arg(sysdecode_rfork_flags, fp, args[sc->offset]); break; case Sockaddr: { char addr[64]; struct sockaddr_in *lsin; struct sockaddr_in6 *lsin6; struct sockaddr_un *sun; struct sockaddr *sa; socklen_t len; u_char *q; if (args[sc->offset] == 0) { fputs("NULL", fp); break; } /* * Extract the address length from the next argument. If * this is an output sockaddr (OUT is set), then the * next argument is a pointer to a socklen_t. Otherwise * the next argument contains a socklen_t by value. */ if (sc->type & OUT) { if (get_struct(pid, (void *)args[sc->offset + 1], &len, sizeof(len)) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } } else len = args[sc->offset + 1]; /* If the length is too small, just bail. */ if (len < sizeof(*sa)) { fprintf(fp, "0x%lx", args[sc->offset]); break; } sa = calloc(1, len); if (get_struct(pid, (void *)args[sc->offset], sa, len) == -1) { free(sa); fprintf(fp, "0x%lx", args[sc->offset]); break; } switch (sa->sa_family) { case AF_INET: if (len < sizeof(*lsin)) goto sockaddr_short; lsin = (struct sockaddr_in *)(void *)sa; inet_ntop(AF_INET, &lsin->sin_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET %s:%d }", addr, htons(lsin->sin_port)); break; case AF_INET6: if (len < sizeof(*lsin6)) goto sockaddr_short; lsin6 = (struct sockaddr_in6 *)(void *)sa; inet_ntop(AF_INET6, &lsin6->sin6_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET6 [%s]:%d }", addr, htons(lsin6->sin6_port)); break; case AF_UNIX: sun = (struct sockaddr_un *)sa; fprintf(fp, "{ AF_UNIX \"%.*s\" }", (int)(len - offsetof(struct sockaddr_un, sun_path)), sun->sun_path); break; default: sockaddr_short: fprintf(fp, "{ sa_len = %d, sa_family = %d, sa_data = {", (int)sa->sa_len, (int)sa->sa_family); for (q = (u_char *)sa->sa_data; q < (u_char *)sa + len; q++) fprintf(fp, "%s 0x%02x", q == (u_char *)sa->sa_data ? "" : ",", *q); fputs(" } }", fp); } free(sa); break; } case Sigaction: { struct sigaction sa; if (get_struct(pid, (void *)args[sc->offset], &sa, sizeof(sa)) != -1) { fputs("{ ", fp); if (sa.sa_handler == SIG_DFL) fputs("SIG_DFL", fp); else if (sa.sa_handler == SIG_IGN) fputs("SIG_IGN", fp); else fprintf(fp, "%p", sa.sa_handler); fprintf(fp, " %s ss_t }", xlookup_bits(sigaction_flags, sa.sa_flags)); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Kevent: { /* * XXX XXX: The size of the array is determined by either the * next syscall argument, or by the syscall return value, * depending on which argument number we are. This matches the * kevent syscall, but luckily that's the only syscall that uses * them. */ struct kevent *ke; int numevents = -1; size_t bytes; int i; if (sc->offset == 1) numevents = args[sc->offset+1]; else if (sc->offset == 3 && retval[0] != -1) numevents = retval[0]; if (numevents >= 0) { bytes = sizeof(struct kevent) * numevents; if ((ke = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for kevent array", bytes); } else ke = NULL; if (numevents >= 0 && get_struct(pid, (void *)args[sc->offset], ke, bytes) != -1) { fputc('{', fp); for (i = 0; i < numevents; i++) { fputc(' ', fp); print_kevent(fp, &ke[i], sc->offset == 1); } fputs(" }", fp); } else { fprintf(fp, "0x%lx", args[sc->offset]); } free(ke); break; } case Stat: { struct stat st; if (get_struct(pid, (void *)args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); fprintf(fp, "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode, (uintmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { fprintf(fp, "0x%lx", args[sc->offset]); } break; } case Stat11: { struct freebsd11_stat st; if (get_struct(pid, (void *)args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); fprintf(fp, "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode, (uintmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { fprintf(fp, "0x%lx", args[sc->offset]); } break; } case StatFs: { unsigned int i; struct statfs buf; if (get_struct(pid, (void *)args[sc->offset], &buf, sizeof(buf)) != -1) { char fsid[17]; bzero(fsid, sizeof(fsid)); if (buf.f_fsid.val[0] != 0 || buf.f_fsid.val[1] != 0) { for (i = 0; i < sizeof(buf.f_fsid); i++) snprintf(&fsid[i*2], sizeof(fsid) - (i*2), "%02x", ((u_char *)&buf.f_fsid)[i]); } fprintf(fp, "{ fstypename=%s,mntonname=%s,mntfromname=%s," "fsid=%s }", buf.f_fstypename, buf.f_mntonname, buf.f_mntfromname, fsid); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Rusage: { struct rusage ru; if (get_struct(pid, (void *)args[sc->offset], &ru, sizeof(ru)) != -1) { fprintf(fp, "{ u=%jd.%06ld,s=%jd.%06ld,in=%ld,out=%ld }", (intmax_t)ru.ru_utime.tv_sec, ru.ru_utime.tv_usec, (intmax_t)ru.ru_stime.tv_sec, ru.ru_stime.tv_usec, ru.ru_inblock, ru.ru_oublock); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Rlimit: { struct rlimit rl; if (get_struct(pid, (void *)args[sc->offset], &rl, sizeof(rl)) != -1) { fprintf(fp, "{ cur=%ju,max=%ju }", rl.rlim_cur, rl.rlim_max); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case ExitStatus: { int status; if (get_struct(pid, (void *)args[sc->offset], &status, sizeof(status)) != -1) { fputs("{ ", fp); if (WIFCONTINUED(status)) fputs("CONTINUED", fp); else if (WIFEXITED(status)) fprintf(fp, "EXITED,val=%d", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) fprintf(fp, "SIGNALED,sig=%s%s", strsig2(WTERMSIG(status)), WCOREDUMP(status) ? ",cored" : ""); else fprintf(fp, "STOPPED,sig=%s", strsig2(WTERMSIG(status))); fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Waitoptions: print_mask_arg(sysdecode_wait6_options, fp, args[sc->offset]); break; case Idtype: print_integer_arg(sysdecode_idtype, fp, args[sc->offset]); break; case Procctl: print_integer_arg(sysdecode_procctl_cmd, fp, args[sc->offset]); break; case Umtxop: print_integer_arg(sysdecode_umtx_op, fp, args[sc->offset]); break; case Atfd: print_integer_arg(sysdecode_atfd, fp, args[sc->offset]); break; case Atflags: fputs(xlookup_bits(at_flags, args[sc->offset]), fp); break; case Accessmode: print_mask_arg(sysdecode_access_mode, fp, args[sc->offset]); break; case Sysarch: fputs(xlookup(sysarch_ops, args[sc->offset]), fp); break; case PipeFds: /* * The pipe() system call in the kernel returns its * two file descriptors via return values. However, * the interface exposed by libc is that pipe() * accepts a pointer to an array of descriptors. * Format the output to match the libc API by printing * the returned file descriptors as a fake argument. * * Overwrite the first retval to signal a successful * return as well. */ fprintf(fp, "{ %ld, %ld }", retval[0], retval[1]); retval[0] = 0; break; case Utrace: { size_t len; void *utrace_addr; len = args[sc->offset + 1]; utrace_addr = calloc(1, len); if (get_struct(pid, (void *)args[sc->offset], (void *)utrace_addr, len) != -1) print_utrace(fp, utrace_addr, len); else fprintf(fp, "0x%lx", args[sc->offset]); free(utrace_addr); break; } case IntArray: { int descriptors[16]; unsigned long i, ndescriptors; bool truncated; ndescriptors = args[sc->offset + 1]; truncated = false; if (ndescriptors > nitems(descriptors)) { ndescriptors = nitems(descriptors); truncated = true; } if (get_struct(pid, (void *)args[sc->offset], descriptors, ndescriptors * sizeof(descriptors[0])) != -1) { fprintf(fp, "{"); for (i = 0; i < ndescriptors; i++) fprintf(fp, i == 0 ? " %d" : ", %d", descriptors[i]); fprintf(fp, truncated ? ", ... }" : " }"); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Pipe2: print_mask_arg(sysdecode_pipe2_flags, fp, args[sc->offset]); break; case CapFcntlRights: { uint32_t rights; if (sc->type & OUT) { if (get_struct(pid, (void *)args[sc->offset], &rights, sizeof(rights)) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } } else rights = args[sc->offset]; print_mask_arg32(sysdecode_cap_fcntlrights, fp, rights); break; } case Fadvice: print_integer_arg(sysdecode_fadvice, fp, args[sc->offset]); break; case FileFlags: { fflags_t rem; if (!sysdecode_fileflags(fp, args[sc->offset], &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); break; } case Flockop: print_mask_arg(sysdecode_flock_operation, fp, args[sc->offset]); break; case Getfsstatmode: print_integer_arg(sysdecode_getfsstat_mode, fp, args[sc->offset]); break; case Kldsymcmd: print_integer_arg(sysdecode_kldsym_cmd, fp, args[sc->offset]); break; case Kldunloadflags: print_integer_arg(sysdecode_kldunload_flags, fp, args[sc->offset]); break; case Madvice: print_integer_arg(sysdecode_madvice, fp, args[sc->offset]); break; case Socklent: fprintf(fp, "%u", (socklen_t)args[sc->offset]); break; case Sockprotocol: { const char *temp; int domain, protocol; domain = args[sc->offset - 2]; protocol = args[sc->offset]; if (protocol == 0) { fputs("0", fp); } else { temp = sysdecode_socket_protocol(domain, protocol); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", protocol); } } break; } case Sockoptlevel: print_integer_arg(sysdecode_sockopt_level, fp, args[sc->offset]); break; case Sockoptname: { const char *temp; int level, name; level = args[sc->offset - 1]; name = args[sc->offset]; temp = sysdecode_sockopt_name(level, name); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", name); } break; } case Msgflags: print_mask_arg(sysdecode_msg_flags, fp, args[sc->offset]); break; case CapRights: { cap_rights_t rights; if (get_struct(pid, (void *)args[sc->offset], &rights, sizeof(rights)) != -1) { fputs("{ ", fp); sysdecode_cap_rights(fp, &rights); fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Acltype: print_integer_arg(sysdecode_acltype, fp, args[sc->offset]); break; case Extattrnamespace: print_integer_arg(sysdecode_extattrnamespace, fp, args[sc->offset]); break; case Minherit: print_integer_arg(sysdecode_minherit_inherit, fp, args[sc->offset]); break; case Mlockall: print_mask_arg(sysdecode_mlockall_flags, fp, args[sc->offset]); break; case Mountflags: print_mask_arg(sysdecode_mount_flags, fp, args[sc->offset]); break; case Msync: print_mask_arg(sysdecode_msync_flags, fp, args[sc->offset]); break; case Priowhich: print_integer_arg(sysdecode_prio_which, fp, args[sc->offset]); break; case Ptraceop: print_integer_arg(sysdecode_ptrace_request, fp, args[sc->offset]); break; case Quotactlcmd: if (!sysdecode_quotactl_cmd(fp, args[sc->offset])) fprintf(fp, "%#x", (int)args[sc->offset]); break; case Reboothowto: print_mask_arg(sysdecode_reboot_howto, fp, args[sc->offset]); break; case Rtpriofunc: print_integer_arg(sysdecode_rtprio_function, fp, args[sc->offset]); break; case Schedpolicy: print_integer_arg(sysdecode_scheduler_policy, fp, args[sc->offset]); break; case Schedparam: { struct sched_param sp; if (get_struct(pid, (void *)args[sc->offset], &sp, sizeof(sp)) != -1) fprintf(fp, "{ %d }", sp.sched_priority); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABIAdvice: fputs(xlookup(cloudabi_advice, args[sc->offset]), fp); break; case CloudABIClockID: fputs(xlookup(cloudabi_clockid, args[sc->offset]), fp); break; case ClouduABIFDSFlags: fputs(xlookup_bits(cloudabi_fdsflags, args[sc->offset]), fp); break; case CloudABIFDStat: { cloudabi_fdstat_t fds; if (get_struct(pid, (void *)args[sc->offset], &fds, sizeof(fds)) != -1) { fprintf(fp, "{ %s, ", xlookup(cloudabi_filetype, fds.fs_filetype)); fprintf(fp, "%s, ... }", xlookup_bits(cloudabi_fdflags, fds.fs_flags)); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABIFileStat: { cloudabi_filestat_t fsb; if (get_struct(pid, (void *)args[sc->offset], &fsb, sizeof(fsb)) != -1) fprintf(fp, "{ %s, %ju }", xlookup(cloudabi_filetype, fsb.st_filetype), (uintmax_t)fsb.st_size); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABIFileType: fputs(xlookup(cloudabi_filetype, args[sc->offset]), fp); break; case CloudABIFSFlags: fputs(xlookup_bits(cloudabi_fsflags, args[sc->offset]), fp); break; case CloudABILookup: if ((args[sc->offset] & CLOUDABI_LOOKUP_SYMLINK_FOLLOW) != 0) fprintf(fp, "%d|LOOKUP_SYMLINK_FOLLOW", (int)args[sc->offset]); else fprintf(fp, "%d", (int)args[sc->offset]); break; case CloudABIMFlags: fputs(xlookup_bits(cloudabi_mflags, args[sc->offset]), fp); break; case CloudABIMProt: fputs(xlookup_bits(cloudabi_mprot, args[sc->offset]), fp); break; case CloudABIMSFlags: fputs(xlookup_bits(cloudabi_msflags, args[sc->offset]), fp); break; case CloudABIOFlags: fputs(xlookup_bits(cloudabi_oflags, args[sc->offset]), fp); break; case CloudABISDFlags: fputs(xlookup_bits(cloudabi_sdflags, args[sc->offset]), fp); break; case CloudABISignal: fputs(xlookup(cloudabi_signal, args[sc->offset]), fp); break; case CloudABISockStat: { cloudabi_sockstat_t ss; if (get_struct(pid, (void *)args[sc->offset], &ss, sizeof(ss)) != -1) { fprintf(fp, "{ %s, ", xlookup( cloudabi_sa_family, ss.ss_sockname.sa_family)); fprintf(fp, "%s, ", xlookup( cloudabi_sa_family, ss.ss_peername.sa_family)); fprintf(fp, "%s, ", xlookup( cloudabi_errno, ss.ss_error)); fprintf(fp, "%s }", xlookup_bits( cloudabi_ssstate, ss.ss_state)); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABISSFlags: fputs(xlookup_bits(cloudabi_ssflags, args[sc->offset]), fp); break; case CloudABITimestamp: fprintf(fp, "%lu.%09lus", args[sc->offset] / 1000000000, args[sc->offset] % 1000000000); break; case CloudABIULFlags: fputs(xlookup_bits(cloudabi_ulflags, args[sc->offset]), fp); break; case CloudABIWhence: fputs(xlookup(cloudabi_whence, args[sc->offset]), fp); break; default: errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK); } fclose(fp); return (tmp); } /* * Print (to outfile) the system call and its arguments. */ void print_syscall(struct trussinfo *trussinfo) { struct threadinfo *t; const char *name; char **s_args; int i, len, nargs; t = trussinfo->curthread; name = t->cs.sc->name; nargs = t->cs.nargs; s_args = t->cs.s_args; len = print_line_prefix(trussinfo); len += fprintf(trussinfo->outfile, "%s(", name); for (i = 0; i < nargs; i++) { if (s_args[i] != NULL) len += fprintf(trussinfo->outfile, "%s", s_args[i]); else len += fprintf(trussinfo->outfile, ""); len += fprintf(trussinfo->outfile, "%s", i < (nargs - 1) ? "," : ""); } len += fprintf(trussinfo->outfile, ")"); for (i = 0; i < 6 - (len / 8); i++) fprintf(trussinfo->outfile, "\t"); } void print_syscall_ret(struct trussinfo *trussinfo, int errorp, long *retval) { struct timespec timediff; struct threadinfo *t; struct syscall *sc; int error; t = trussinfo->curthread; sc = t->cs.sc; if (trussinfo->flags & COUNTONLY) { timespecsubt(&t->after, &t->before, &timediff); timespecadd(&sc->time, &timediff, &sc->time); sc->ncalls++; if (errorp) sc->nerror++; return; } print_syscall(trussinfo); fflush(trussinfo->outfile); if (retval == NULL) { /* * This system call resulted in the current thread's exit, * so there is no return value or error to display. */ fprintf(trussinfo->outfile, "\n"); return; } if (errorp) { error = sysdecode_abi_to_freebsd_errno(t->proc->abi->abi, retval[0]); fprintf(trussinfo->outfile, " ERR#%ld '%s'\n", retval[0], error == INT_MAX ? "Unknown error" : strerror(error)); } #ifndef __LP64__ else if (sc->ret_type == 2) { off_t off; #if _BYTE_ORDER == _LITTLE_ENDIAN off = (off_t)retval[1] << 32 | retval[0]; #else off = (off_t)retval[0] << 32 | retval[1]; #endif fprintf(trussinfo->outfile, " = %jd (0x%jx)\n", (intmax_t)off, (intmax_t)off); } #endif else fprintf(trussinfo->outfile, " = %ld (0x%lx)\n", retval[0], retval[0]); } void print_summary(struct trussinfo *trussinfo) { struct timespec total = {0, 0}; struct syscall *sc; int ncall, nerror; fprintf(trussinfo->outfile, "%-20s%15s%8s%8s\n", "syscall", "seconds", "calls", "errors"); ncall = nerror = 0; STAILQ_FOREACH(sc, &syscalls, entries) if (sc->ncalls) { fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", sc->name, (intmax_t)sc->time.tv_sec, sc->time.tv_nsec, sc->ncalls, sc->nerror); timespecadd(&total, &sc->time, &total); ncall += sc->ncalls; nerror += sc->nerror; } fprintf(trussinfo->outfile, "%20s%15s%8s%8s\n", "", "-------------", "-------", "-------"); fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", "", (intmax_t)total.tv_sec, total.tv_nsec, ncall, nerror); } Index: projects/clang500-import/usr.sbin/acpi/acpidump/acpi.c =================================================================== --- projects/clang500-import/usr.sbin/acpi/acpidump/acpi.c (revision 321306) +++ projects/clang500-import/usr.sbin/acpi/acpidump/acpi.c (revision 321307) @@ -1,1675 +1,1886 @@ /*- * Copyright (c) 1998 Doug Rabson * Copyright (c) 2000 Mitsuru IWASAKI * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include "acpidump.h" #define BEGIN_COMMENT "/*\n" #define END_COMMENT " */\n" static void acpi_print_string(char *s, size_t length); static void acpi_print_gas(ACPI_GENERIC_ADDRESS *gas); static int acpi_get_fadt_revision(ACPI_TABLE_FADT *fadt); static void acpi_handle_fadt(ACPI_TABLE_HEADER *fadt); static void acpi_print_cpu(u_char cpu_id); static void acpi_print_cpu_uid(uint32_t uid, char *uid_string); static void acpi_print_local_apic(uint32_t apic_id, uint32_t flags); static void acpi_print_io_apic(uint32_t apic_id, uint32_t int_base, uint64_t apic_addr); static void acpi_print_mps_flags(uint16_t flags); static void acpi_print_intr(uint32_t intr, uint16_t mps_flags); static void acpi_print_local_nmi(u_int lint, uint16_t mps_flags); static void acpi_print_madt(ACPI_SUBTABLE_HEADER *mp); static void acpi_handle_madt(ACPI_TABLE_HEADER *sdp); static void acpi_handle_ecdt(ACPI_TABLE_HEADER *sdp); static void acpi_handle_hpet(ACPI_TABLE_HEADER *sdp); static void acpi_handle_mcfg(ACPI_TABLE_HEADER *sdp); static void acpi_handle_slit(ACPI_TABLE_HEADER *sdp); static void acpi_print_srat_cpu(uint32_t apic_id, uint32_t proximity_domain, uint32_t flags); static void acpi_print_srat_memory(ACPI_SRAT_MEM_AFFINITY *mp); static void acpi_print_srat(ACPI_SUBTABLE_HEADER *srat); static void acpi_handle_srat(ACPI_TABLE_HEADER *sdp); static void acpi_handle_tcpa(ACPI_TABLE_HEADER *sdp); +static void acpi_print_nfit(ACPI_NFIT_HEADER *nfit); +static void acpi_handle_nfit(ACPI_TABLE_HEADER *sdp); static void acpi_print_sdt(ACPI_TABLE_HEADER *sdp); static void acpi_print_fadt(ACPI_TABLE_HEADER *sdp); static void acpi_print_facs(ACPI_TABLE_FACS *facs); static void acpi_print_dsdt(ACPI_TABLE_HEADER *dsdp); static ACPI_TABLE_HEADER *acpi_map_sdt(vm_offset_t pa); static void acpi_print_rsd_ptr(ACPI_TABLE_RSDP *rp); static void acpi_handle_rsdt(ACPI_TABLE_HEADER *rsdp); static void acpi_walk_subtables(ACPI_TABLE_HEADER *table, void *first, void (*action)(ACPI_SUBTABLE_HEADER *)); +static void acpi_walk_nfit(ACPI_TABLE_HEADER *table, void *first, + void (*action)(ACPI_NFIT_HEADER *)); /* Size of an address. 32-bit for ACPI 1.0, 64-bit for ACPI 2.0 and up. */ static int addr_size; /* Strings used in the TCPA table */ static const char *tcpa_event_type_strings[] = { "PREBOOT Certificate", "POST Code", "Unused", "No Action", "Separator", "Action", "Event Tag", "S-CRTM Contents", "S-CRTM Version", "CPU Microcode", "Platform Config Flags", "Table of Devices", "Compact Hash", "IPL", "IPL Partition Data", "Non-Host Code", "Non-Host Config", "Non-Host Info" }; static const char *TCPA_pcclient_strings[] = { "", "SMBIOS", "BIS Certificate", "POST BIOS ROM Strings", "ESCD", "CMOS", "NVRAM", "Option ROM Execute", "Option ROM Configurateion", "", "Option ROM Microcode Update ", "S-CRTM Version String", "S-CRTM Contents", "POST Contents", "Table of Devices", }; #define PRINTFLAG_END() printflag_end() static char pf_sep = '{'; static void printflag_end(void) { if (pf_sep != '{') { printf("}"); pf_sep = '{'; } printf("\n"); } static void printflag(uint64_t var, uint64_t mask, const char *name) { if (var & mask) { printf("%c%s", pf_sep, name); pf_sep = ','; } } static void acpi_print_string(char *s, size_t length) { int c; /* Trim trailing spaces and NULLs */ while (length > 0 && (s[length - 1] == ' ' || s[length - 1] == '\0')) length--; while (length--) { c = *s++; putchar(c); } } static void acpi_print_gas(ACPI_GENERIC_ADDRESS *gas) { switch(gas->SpaceId) { case ACPI_GAS_MEMORY: if (gas->BitWidth <= 32) printf("0x%08x:%u[%u] (Memory)", (u_int)gas->Address, gas->BitOffset, gas->BitWidth); else printf("0x%016jx:%u[%u] (Memory)", (uintmax_t)gas->Address, gas->BitOffset, gas->BitWidth); break; case ACPI_GAS_IO: printf("0x%02x:%u[%u] (IO)", (u_int)gas->Address, gas->BitOffset, gas->BitWidth); break; case ACPI_GAS_PCI: printf("%x:%x+0x%x (PCI)", (uint16_t)(gas->Address >> 32), (uint16_t)((gas->Address >> 16) & 0xffff), (uint16_t)gas->Address); break; /* XXX How to handle these below? */ case ACPI_GAS_EMBEDDED: printf("0x%x:%u[%u] (EC)", (uint16_t)gas->Address, gas->BitOffset, gas->BitWidth); break; case ACPI_GAS_SMBUS: printf("0x%x:%u[%u] (SMBus)", (uint16_t)gas->Address, gas->BitOffset, gas->BitWidth); break; case ACPI_GAS_CMOS: case ACPI_GAS_PCIBAR: case ACPI_GAS_DATATABLE: case ACPI_GAS_FIXED: default: printf("0x%016jx (?)", (uintmax_t)gas->Address); break; } } /* The FADT revision indicates whether we use the DSDT or X_DSDT addresses. */ static int acpi_get_fadt_revision(ACPI_TABLE_FADT *fadt) { int fadt_revision; /* Set the FADT revision separately from the RSDP version. */ if (addr_size == 8) { fadt_revision = 2; /* * A few systems (e.g., IBM T23) have an RSDP that claims * revision 2 but the 64 bit addresses are invalid. If * revision 2 and the 32 bit address is non-zero but the * 32 and 64 bit versions don't match, prefer the 32 bit * version for all subsequent tables. */ if (fadt->Facs != 0 && (fadt->XFacs & 0xffffffff) != fadt->Facs) fadt_revision = 1; } else fadt_revision = 1; return (fadt_revision); } static void acpi_handle_fadt(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_HEADER *dsdp; ACPI_TABLE_FACS *facs; ACPI_TABLE_FADT *fadt; int fadt_revision; fadt = (ACPI_TABLE_FADT *)sdp; acpi_print_fadt(sdp); fadt_revision = acpi_get_fadt_revision(fadt); if (fadt_revision == 1) facs = (ACPI_TABLE_FACS *)acpi_map_sdt(fadt->Facs); else facs = (ACPI_TABLE_FACS *)acpi_map_sdt(fadt->XFacs); if (memcmp(facs->Signature, ACPI_SIG_FACS, 4) != 0 || facs->Length < 64) errx(1, "FACS is corrupt"); acpi_print_facs(facs); if (fadt_revision == 1) dsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->Dsdt); else dsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->XDsdt); if (acpi_checksum(dsdp, dsdp->Length)) errx(1, "DSDT is corrupt"); acpi_print_dsdt(dsdp); } static void acpi_walk_subtables(ACPI_TABLE_HEADER *table, void *first, void (*action)(ACPI_SUBTABLE_HEADER *)) { ACPI_SUBTABLE_HEADER *subtable; char *end; subtable = first; end = (char *)table + table->Length; while ((char *)subtable < end) { printf("\n"); if (subtable->Length < sizeof(ACPI_SUBTABLE_HEADER)) { warnx("invalid subtable length %u", subtable->Length); return; } action(subtable); subtable = (ACPI_SUBTABLE_HEADER *)((char *)subtable + subtable->Length); } } static void +acpi_walk_nfit(ACPI_TABLE_HEADER *table, void *first, + void (*action)(ACPI_NFIT_HEADER *)) +{ + ACPI_NFIT_HEADER *subtable; + char *end; + + subtable = first; + end = (char *)table + table->Length; + while ((char *)subtable < end) { + printf("\n"); + if (subtable->Length < sizeof(ACPI_NFIT_HEADER)) { + warnx("invalid subtable length %u", subtable->Length); + return; + } + action(subtable); + subtable = (ACPI_NFIT_HEADER *)((char *)subtable + + subtable->Length); + } +} + +static void acpi_print_cpu(u_char cpu_id) { printf("\tACPI CPU="); if (cpu_id == 0xff) printf("ALL\n"); else printf("%d\n", (u_int)cpu_id); } static void acpi_print_cpu_uid(uint32_t uid, char *uid_string) { printf("\tUID=%d", uid); if (uid_string != NULL) printf(" (%s)", uid_string); printf("\n"); } static void acpi_print_local_apic(uint32_t apic_id, uint32_t flags) { printf("\tFlags={"); if (flags & ACPI_MADT_ENABLED) printf("ENABLED"); else printf("DISABLED"); printf("}\n"); printf("\tAPIC ID=%d\n", apic_id); } static void acpi_print_io_apic(uint32_t apic_id, uint32_t int_base, uint64_t apic_addr) { printf("\tAPIC ID=%d\n", apic_id); printf("\tINT BASE=%d\n", int_base); printf("\tADDR=0x%016jx\n", (uintmax_t)apic_addr); } static void acpi_print_mps_flags(uint16_t flags) { printf("\tFlags={Polarity="); switch (flags & ACPI_MADT_POLARITY_MASK) { case ACPI_MADT_POLARITY_CONFORMS: printf("conforming"); break; case ACPI_MADT_POLARITY_ACTIVE_HIGH: printf("active-hi"); break; case ACPI_MADT_POLARITY_ACTIVE_LOW: printf("active-lo"); break; default: printf("0x%x", flags & ACPI_MADT_POLARITY_MASK); break; } printf(", Trigger="); switch (flags & ACPI_MADT_TRIGGER_MASK) { case ACPI_MADT_TRIGGER_CONFORMS: printf("conforming"); break; case ACPI_MADT_TRIGGER_EDGE: printf("edge"); break; case ACPI_MADT_TRIGGER_LEVEL: printf("level"); break; default: printf("0x%x", (flags & ACPI_MADT_TRIGGER_MASK) >> 2); } printf("}\n"); } static void acpi_print_gicc_flags(uint32_t flags) { printf("\tFlags={Performance intr="); if (flags & ACPI_MADT_PERFORMANCE_IRQ_MODE) printf("edge"); else printf("level"); printf(", VGIC intr="); if (flags & ACPI_MADT_VGIC_IRQ_MODE) printf("edge"); else printf("level"); printf("}\n"); } static void acpi_print_intr(uint32_t intr, uint16_t mps_flags) { printf("\tINTR=%d\n", intr); acpi_print_mps_flags(mps_flags); } static void acpi_print_local_nmi(u_int lint, uint16_t mps_flags) { printf("\tLINT Pin=%d\n", lint); acpi_print_mps_flags(mps_flags); } -static const char *apic_types[] = { "Local APIC", "IO APIC", "INT Override", - "NMI", "Local APIC NMI", - "Local APIC Override", "IO SAPIC", - "Local SAPIC", "Platform Interrupt", - "Local X2APIC", "Local X2APIC NMI", - "GIC CPU Interface Structure", - "GIC Distributor Structure", - "GICv2m MSI Frame", - "GIC Redistributor Structure", - "GIC ITS Structure" }; +static const char *apic_types[] = { + [ACPI_MADT_TYPE_LOCAL_APIC] = "Local APIC", + [ACPI_MADT_TYPE_IO_APIC] = "IO APIC", + [ACPI_MADT_TYPE_INTERRUPT_OVERRIDE] = "INT Override", + [ACPI_MADT_TYPE_NMI_SOURCE] = "NMI", + [ACPI_MADT_TYPE_LOCAL_APIC_NMI] = "Local APIC NMI", + [ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE] = "Local APIC Override", + [ACPI_MADT_TYPE_IO_SAPIC] = "IO SAPIC", + [ACPI_MADT_TYPE_LOCAL_SAPIC] = "Local SAPIC", + [ACPI_MADT_TYPE_INTERRUPT_SOURCE] = "Platform Interrupt", + [ACPI_MADT_TYPE_LOCAL_X2APIC] = "Local X2APIC", + [ACPI_MADT_TYPE_LOCAL_X2APIC_NMI] = "Local X2APIC NMI", + [ACPI_MADT_TYPE_GENERIC_INTERRUPT] = "GIC CPU Interface Structure", + [ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR] = "GIC Distributor Structure", + [ACPI_MADT_TYPE_GENERIC_MSI_FRAME] = "GICv2m MSI Frame", + [ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR] = "GIC Redistributor Structure", + [ACPI_MADT_TYPE_GENERIC_TRANSLATOR] = "GIC ITS Structure" +}; + static const char *platform_int_types[] = { "0 (unknown)", "PMI", "INIT", "Corrected Platform Error" }; static void acpi_print_madt(ACPI_SUBTABLE_HEADER *mp) { ACPI_MADT_LOCAL_APIC *lapic; ACPI_MADT_IO_APIC *ioapic; ACPI_MADT_INTERRUPT_OVERRIDE *over; ACPI_MADT_NMI_SOURCE *nmi; ACPI_MADT_LOCAL_APIC_NMI *lapic_nmi; ACPI_MADT_LOCAL_APIC_OVERRIDE *lapic_over; ACPI_MADT_IO_SAPIC *iosapic; ACPI_MADT_LOCAL_SAPIC *lsapic; ACPI_MADT_INTERRUPT_SOURCE *isrc; ACPI_MADT_LOCAL_X2APIC *x2apic; ACPI_MADT_LOCAL_X2APIC_NMI *x2apic_nmi; ACPI_MADT_GENERIC_INTERRUPT *gicc; ACPI_MADT_GENERIC_DISTRIBUTOR *gicd; ACPI_MADT_GENERIC_REDISTRIBUTOR *gicr; ACPI_MADT_GENERIC_TRANSLATOR *gict; if (mp->Type < nitems(apic_types)) printf("\tType=%s\n", apic_types[mp->Type]); else printf("\tType=%d (unknown)\n", mp->Type); switch (mp->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: lapic = (ACPI_MADT_LOCAL_APIC *)mp; acpi_print_cpu(lapic->ProcessorId); acpi_print_local_apic(lapic->Id, lapic->LapicFlags); break; case ACPI_MADT_TYPE_IO_APIC: ioapic = (ACPI_MADT_IO_APIC *)mp; acpi_print_io_apic(ioapic->Id, ioapic->GlobalIrqBase, ioapic->Address); break; case ACPI_MADT_TYPE_INTERRUPT_OVERRIDE: over = (ACPI_MADT_INTERRUPT_OVERRIDE *)mp; printf("\tBUS=%d\n", (u_int)over->Bus); printf("\tIRQ=%d\n", (u_int)over->SourceIrq); acpi_print_intr(over->GlobalIrq, over->IntiFlags); break; case ACPI_MADT_TYPE_NMI_SOURCE: nmi = (ACPI_MADT_NMI_SOURCE *)mp; acpi_print_intr(nmi->GlobalIrq, nmi->IntiFlags); break; case ACPI_MADT_TYPE_LOCAL_APIC_NMI: lapic_nmi = (ACPI_MADT_LOCAL_APIC_NMI *)mp; acpi_print_cpu(lapic_nmi->ProcessorId); acpi_print_local_nmi(lapic_nmi->Lint, lapic_nmi->IntiFlags); break; case ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE: lapic_over = (ACPI_MADT_LOCAL_APIC_OVERRIDE *)mp; printf("\tLocal APIC ADDR=0x%016jx\n", (uintmax_t)lapic_over->Address); break; case ACPI_MADT_TYPE_IO_SAPIC: iosapic = (ACPI_MADT_IO_SAPIC *)mp; acpi_print_io_apic(iosapic->Id, iosapic->GlobalIrqBase, iosapic->Address); break; case ACPI_MADT_TYPE_LOCAL_SAPIC: lsapic = (ACPI_MADT_LOCAL_SAPIC *)mp; acpi_print_cpu(lsapic->ProcessorId); acpi_print_local_apic(lsapic->Id, lsapic->LapicFlags); printf("\tAPIC EID=%d\n", (u_int)lsapic->Eid); if (mp->Length > __offsetof(ACPI_MADT_LOCAL_SAPIC, Uid)) acpi_print_cpu_uid(lsapic->Uid, lsapic->UidString); break; case ACPI_MADT_TYPE_INTERRUPT_SOURCE: isrc = (ACPI_MADT_INTERRUPT_SOURCE *)mp; if (isrc->Type < nitems(platform_int_types)) printf("\tType=%s\n", platform_int_types[isrc->Type]); else printf("\tType=%d (unknown)\n", isrc->Type); printf("\tAPIC ID=%d\n", (u_int)isrc->Id); printf("\tAPIC EID=%d\n", (u_int)isrc->Eid); printf("\tSAPIC Vector=%d\n", (u_int)isrc->IoSapicVector); acpi_print_intr(isrc->GlobalIrq, isrc->IntiFlags); break; case ACPI_MADT_TYPE_LOCAL_X2APIC: x2apic = (ACPI_MADT_LOCAL_X2APIC *)mp; acpi_print_cpu_uid(x2apic->Uid, NULL); acpi_print_local_apic(x2apic->LocalApicId, x2apic->LapicFlags); break; case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI: x2apic_nmi = (ACPI_MADT_LOCAL_X2APIC_NMI *)mp; acpi_print_cpu_uid(x2apic_nmi->Uid, NULL); acpi_print_local_nmi(x2apic_nmi->Lint, x2apic_nmi->IntiFlags); break; case ACPI_MADT_TYPE_GENERIC_INTERRUPT: gicc = (ACPI_MADT_GENERIC_INTERRUPT *)mp; acpi_print_cpu_uid(gicc->Uid, NULL); printf("\tCPU INTERFACE=%x\n", gicc->CpuInterfaceNumber); acpi_print_gicc_flags(gicc->Flags); printf("\tParking Protocol Version=%x\n", gicc->ParkingVersion); printf("\tPERF INTR=%d\n", gicc->PerformanceInterrupt); printf("\tParked ADDR=%016jx\n", (uintmax_t)gicc->ParkedAddress); printf("\tBase ADDR=%016jx\n", (uintmax_t)gicc->BaseAddress); printf("\tGICV=%016jx\n", (uintmax_t)gicc->GicvBaseAddress); printf("\tGICH=%016jx\n", (uintmax_t)gicc->GichBaseAddress); printf("\tVGIC INTR=%d\n", gicc->VgicInterrupt); printf("\tGICR ADDR=%016jx\n", (uintmax_t)gicc->GicrBaseAddress); printf("\tMPIDR=%jx\n", (uintmax_t)gicc->ArmMpidr); printf("\tEfficency Class=%d\n", (u_int)gicc->EfficiencyClass); break; case ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR: gicd = (ACPI_MADT_GENERIC_DISTRIBUTOR *)mp; printf("\tGIC ID=%d\n", (u_int)gicd->GicId); printf("\tBase ADDR=%016jx\n", (uintmax_t)gicd->BaseAddress); printf("\tVector Base=%d\n", gicd->GlobalIrqBase); printf("\tGIC VERSION=%d\n", (u_int)gicd->Version); break; case ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR: gicr = (ACPI_MADT_GENERIC_REDISTRIBUTOR *)mp; printf("\tBase ADDR=%016jx\n", (uintmax_t)gicr->BaseAddress); printf("\tLength=%08x\n", gicr->Length); break; case ACPI_MADT_TYPE_GENERIC_TRANSLATOR: gict = (ACPI_MADT_GENERIC_TRANSLATOR *)mp; printf("\tGIC ITS ID=%d\n", gict->TranslationId); printf("\tBase ADDR=%016jx\n", (uintmax_t)gict->BaseAddress); break; } } static void acpi_handle_madt(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_MADT *madt; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); madt = (ACPI_TABLE_MADT *)sdp; printf("\tLocal APIC ADDR=0x%08x\n", madt->Address); printf("\tFlags={"); if (madt->Flags & ACPI_MADT_PCAT_COMPAT) printf("PC-AT"); printf("}\n"); acpi_walk_subtables(sdp, (madt + 1), acpi_print_madt); printf(END_COMMENT); } static void acpi_handle_hpet(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_HPET *hpet; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); hpet = (ACPI_TABLE_HPET *)sdp; printf("\tHPET Number=%d\n", hpet->Sequence); printf("\tADDR="); acpi_print_gas(&hpet->Address); printf("\tHW Rev=0x%x\n", hpet->Id & ACPI_HPET_ID_HARDWARE_REV_ID); printf("\tComparators=%d\n", (hpet->Id & ACPI_HPET_ID_COMPARATORS) >> 8); printf("\tCounter Size=%d\n", hpet->Id & ACPI_HPET_ID_COUNT_SIZE_CAP ? 1 : 0); printf("\tLegacy IRQ routing capable={"); if (hpet->Id & ACPI_HPET_ID_LEGACY_CAPABLE) printf("TRUE}\n"); else printf("FALSE}\n"); printf("\tPCI Vendor ID=0x%04x\n", hpet->Id >> 16); printf("\tMinimal Tick=%d\n", hpet->MinimumTick); printf("\tFlags=0x%02x\n", hpet->Flags); printf(END_COMMENT); } static void acpi_handle_ecdt(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_ECDT *ecdt; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); ecdt = (ACPI_TABLE_ECDT *)sdp; printf("\tEC_CONTROL="); acpi_print_gas(&ecdt->Control); printf("\n\tEC_DATA="); acpi_print_gas(&ecdt->Data); printf("\n\tUID=%#x, ", ecdt->Uid); printf("GPE_BIT=%#x\n", ecdt->Gpe); printf("\tEC_ID=%s\n", ecdt->Id); printf(END_COMMENT); } static void acpi_handle_mcfg(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_MCFG *mcfg; ACPI_MCFG_ALLOCATION *alloc; u_int i, entries; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); mcfg = (ACPI_TABLE_MCFG *)sdp; entries = (sdp->Length - sizeof(ACPI_TABLE_MCFG)) / sizeof(ACPI_MCFG_ALLOCATION); alloc = (ACPI_MCFG_ALLOCATION *)(mcfg + 1); for (i = 0; i < entries; i++, alloc++) { printf("\n"); printf("\tBase Address=0x%016jx\n", (uintmax_t)alloc->Address); printf("\tSegment Group=0x%04x\n", alloc->PciSegment); printf("\tStart Bus=%d\n", alloc->StartBusNumber); printf("\tEnd Bus=%d\n", alloc->EndBusNumber); } printf(END_COMMENT); } static void acpi_handle_slit(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_SLIT *slit; UINT64 i, j; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); slit = (ACPI_TABLE_SLIT *)sdp; printf("\tLocality Count=%ju\n", (uintmax_t)slit->LocalityCount); printf("\n\t "); for (i = 0; i < slit->LocalityCount; i++) printf(" %3ju", (uintmax_t)i); printf("\n\t +"); for (i = 0; i < slit->LocalityCount; i++) printf("----"); printf("\n"); for (i = 0; i < slit->LocalityCount; i++) { printf("\t %3ju |", (uintmax_t)i); for (j = 0; j < slit->LocalityCount; j++) printf(" %3d", slit->Entry[i * slit->LocalityCount + j]); printf("\n"); } printf(END_COMMENT); } static void acpi_print_srat_cpu(uint32_t apic_id, uint32_t proximity_domain, uint32_t flags) { printf("\tFlags={"); if (flags & ACPI_SRAT_CPU_ENABLED) printf("ENABLED"); else printf("DISABLED"); printf("}\n"); printf("\tAPIC ID=%d\n", apic_id); printf("\tProximity Domain=%d\n", proximity_domain); } static char * acpi_tcpa_evname(struct TCPAevent *event) { struct TCPApc_event *pc_event; char *eventname = NULL; pc_event = (struct TCPApc_event *)(event + 1); switch(event->event_type) { case PREBOOT: case POST_CODE: case UNUSED: case NO_ACTION: case SEPARATOR: case SCRTM_CONTENTS: case SCRTM_VERSION: case CPU_MICROCODE: case PLATFORM_CONFIG_FLAGS: case TABLE_OF_DEVICES: case COMPACT_HASH: case IPL: case IPL_PARTITION_DATA: case NONHOST_CODE: case NONHOST_CONFIG: case NONHOST_INFO: asprintf(&eventname, "%s", tcpa_event_type_strings[event->event_type]); break; case ACTION: eventname = calloc(event->event_size + 1, sizeof(char)); memcpy(eventname, pc_event, event->event_size); break; case EVENT_TAG: switch (pc_event->event_id) { case SMBIOS: case BIS_CERT: case CMOS: case NVRAM: case OPTION_ROM_EXEC: case OPTION_ROM_CONFIG: case S_CRTM_VERSION: case POST_BIOS_ROM: case ESCD: case OPTION_ROM_MICROCODE: case S_CRTM_CONTENTS: case POST_CONTENTS: asprintf(&eventname, "%s", TCPA_pcclient_strings[pc_event->event_id]); break; default: asprintf(&eventname, "", pc_event->event_id); break; } break; default: asprintf(&eventname, "", event->event_type); break; } return eventname; } static void acpi_print_tcpa(struct TCPAevent *event) { int i; char *eventname; eventname = acpi_tcpa_evname(event); printf("\t%d", event->pcr_index); printf(" 0x"); for (i = 0; i < 20; i++) printf("%02x", event->pcr_value[i]); printf(" [%s]\n", eventname ? eventname : ""); free(eventname); } static void acpi_handle_tcpa(ACPI_TABLE_HEADER *sdp) { struct TCPAbody *tcpa; struct TCPAevent *event; uintmax_t len, paddr; unsigned char *vaddr = NULL; unsigned char *vend = NULL; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); tcpa = (struct TCPAbody *) sdp; switch (tcpa->platform_class) { case ACPI_TCPA_BIOS_CLIENT: len = tcpa->client.log_max_len; paddr = tcpa->client.log_start_addr; break; case ACPI_TCPA_BIOS_SERVER: len = tcpa->server.log_max_len; paddr = tcpa->server.log_start_addr; break; default: printf("XXX"); printf(END_COMMENT); return; } printf("\tClass %u Base Address 0x%jx Length %ju\n\n", tcpa->platform_class, paddr, len); if (len == 0) { printf("\tEmpty TCPA table\n"); printf(END_COMMENT); return; } if(sdp->Revision == 1){ printf("\tOLD TCPA spec log found. Dumping not supported.\n"); printf(END_COMMENT); return; } vaddr = (unsigned char *)acpi_map_physical(paddr, len); vend = vaddr + len; while (vaddr != NULL) { if ((vaddr + sizeof(struct TCPAevent) >= vend)|| (vaddr + sizeof(struct TCPAevent) < vaddr)) break; event = (struct TCPAevent *)(void *)vaddr; if (vaddr + event->event_size >= vend) break; if (vaddr + event->event_size < vaddr) break; if (event->event_type == 0 && event->event_size == 0) break; #if 0 { unsigned int i, j, k; printf("\n\tsize %d\n\t\t%p ", event->event_size, vaddr); for (j = 0, i = 0; i < sizeof(struct TCPAevent) + event->event_size; i++) { printf("%02x ", vaddr[i]); if ((i+1) % 8 == 0) { for (k = 0; k < 8; k++) printf("%c", isprint(vaddr[j+k]) ? vaddr[j+k] : '.'); printf("\n\t\t%p ", &vaddr[i + 1]); j = i + 1; } } printf("\n"); } #endif acpi_print_tcpa(event); vaddr += sizeof(struct TCPAevent) + event->event_size; } printf(END_COMMENT); } static const char * devscope_type2str(int type) { static char typebuf[16]; switch (type) { case 1: return ("PCI Endpoint Device"); case 2: return ("PCI Sub-Hierarchy"); case 3: return ("IOAPIC"); case 4: return ("HPET"); default: snprintf(typebuf, sizeof(typebuf), "%d", type); return (typebuf); } } static int acpi_handle_dmar_devscope(void *addr, int remaining) { char sep; int pathlen; ACPI_DMAR_PCI_PATH *path, *pathend; ACPI_DMAR_DEVICE_SCOPE *devscope = addr; if (remaining < (int)sizeof(ACPI_DMAR_DEVICE_SCOPE)) return (-1); if (remaining < devscope->Length) return (-1); printf("\n"); printf("\t\tType=%s\n", devscope_type2str(devscope->EntryType)); printf("\t\tLength=%d\n", devscope->Length); printf("\t\tEnumerationId=%d\n", devscope->EnumerationId); printf("\t\tStartBusNumber=%d\n", devscope->Bus); path = (ACPI_DMAR_PCI_PATH *)(devscope + 1); pathlen = devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE); pathend = path + pathlen / sizeof(ACPI_DMAR_PCI_PATH); if (path < pathend) { sep = '{'; printf("\t\tPath="); do { printf("%c%d:%d", sep, path->Device, path->Function); sep=','; path++; } while (path < pathend); printf("}\n"); } return (devscope->Length); } static void acpi_handle_dmar_drhd(ACPI_DMAR_HARDWARE_UNIT *drhd) { char *cp; int remaining, consumed; printf("\n"); printf("\tType=DRHD\n"); printf("\tLength=%d\n", drhd->Header.Length); #define PRINTFLAG(var, flag) printflag((var), ACPI_DMAR_## flag, #flag) printf("\tFlags="); PRINTFLAG(drhd->Flags, INCLUDE_ALL); PRINTFLAG_END(); #undef PRINTFLAG printf("\tSegment=%d\n", drhd->Segment); printf("\tAddress=0x%016jx\n", (uintmax_t)drhd->Address); remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT); if (remaining > 0) printf("\tDevice Scope:"); while (remaining > 0) { cp = (char *)drhd + drhd->Header.Length - remaining; consumed = acpi_handle_dmar_devscope(cp, remaining); if (consumed <= 0) break; else remaining -= consumed; } } static void acpi_handle_dmar_rmrr(ACPI_DMAR_RESERVED_MEMORY *rmrr) { char *cp; int remaining, consumed; printf("\n"); printf("\tType=RMRR\n"); printf("\tLength=%d\n", rmrr->Header.Length); printf("\tSegment=%d\n", rmrr->Segment); printf("\tBaseAddress=0x%016jx\n", (uintmax_t)rmrr->BaseAddress); printf("\tLimitAddress=0x%016jx\n", (uintmax_t)rmrr->EndAddress); remaining = rmrr->Header.Length - sizeof(ACPI_DMAR_RESERVED_MEMORY); if (remaining > 0) printf("\tDevice Scope:"); while (remaining > 0) { cp = (char *)rmrr + rmrr->Header.Length - remaining; consumed = acpi_handle_dmar_devscope(cp, remaining); if (consumed <= 0) break; else remaining -= consumed; } } static void acpi_handle_dmar_atsr(ACPI_DMAR_ATSR *atsr) { char *cp; int remaining, consumed; printf("\n"); printf("\tType=ATSR\n"); printf("\tLength=%d\n", atsr->Header.Length); #define PRINTFLAG(var, flag) printflag((var), ACPI_DMAR_## flag, #flag) printf("\tFlags="); PRINTFLAG(atsr->Flags, ALL_PORTS); PRINTFLAG_END(); #undef PRINTFLAG printf("\tSegment=%d\n", atsr->Segment); remaining = atsr->Header.Length - sizeof(ACPI_DMAR_ATSR); if (remaining > 0) printf("\tDevice Scope:"); while (remaining > 0) { cp = (char *)atsr + atsr->Header.Length - remaining; consumed = acpi_handle_dmar_devscope(cp, remaining); if (consumed <= 0) break; else remaining -= consumed; } } static void acpi_handle_dmar_rhsa(ACPI_DMAR_RHSA *rhsa) { printf("\n"); printf("\tType=RHSA\n"); printf("\tLength=%d\n", rhsa->Header.Length); printf("\tBaseAddress=0x%016jx\n", (uintmax_t)rhsa->BaseAddress); printf("\tProximityDomain=0x%08x\n", rhsa->ProximityDomain); } static int acpi_handle_dmar_remapping_structure(void *addr, int remaining) { ACPI_DMAR_HEADER *hdr = addr; if (remaining < (int)sizeof(ACPI_DMAR_HEADER)) return (-1); if (remaining < hdr->Length) return (-1); switch (hdr->Type) { case ACPI_DMAR_TYPE_HARDWARE_UNIT: acpi_handle_dmar_drhd(addr); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: acpi_handle_dmar_rmrr(addr); break; case ACPI_DMAR_TYPE_ROOT_ATS: acpi_handle_dmar_atsr(addr); break; case ACPI_DMAR_TYPE_HARDWARE_AFFINITY: acpi_handle_dmar_rhsa(addr); break; default: printf("\n"); printf("\tType=%d\n", hdr->Type); printf("\tLength=%d\n", hdr->Length); break; } return (hdr->Length); } #ifndef ACPI_DMAR_X2APIC_OPT_OUT #define ACPI_DMAR_X2APIC_OPT_OUT (0x2) #endif static void acpi_handle_dmar(ACPI_TABLE_HEADER *sdp) { char *cp; int remaining, consumed; ACPI_TABLE_DMAR *dmar; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); dmar = (ACPI_TABLE_DMAR *)sdp; printf("\tHost Address Width=%d\n", dmar->Width + 1); #define PRINTFLAG(var, flag) printflag((var), ACPI_DMAR_## flag, #flag) printf("\tFlags="); PRINTFLAG(dmar->Flags, INTR_REMAP); PRINTFLAG(dmar->Flags, X2APIC_OPT_OUT); PRINTFLAG_END(); #undef PRINTFLAG remaining = sdp->Length - sizeof(ACPI_TABLE_DMAR); while (remaining > 0) { cp = (char *)sdp + sdp->Length - remaining; consumed = acpi_handle_dmar_remapping_structure(cp, remaining); if (consumed <= 0) break; else remaining -= consumed; } printf(END_COMMENT); } static void acpi_print_srat_memory(ACPI_SRAT_MEM_AFFINITY *mp) { printf("\tFlags={"); if (mp->Flags & ACPI_SRAT_MEM_ENABLED) printf("ENABLED"); else printf("DISABLED"); if (mp->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) printf(",HOT_PLUGGABLE"); if (mp->Flags & ACPI_SRAT_MEM_NON_VOLATILE) printf(",NON_VOLATILE"); printf("}\n"); printf("\tBase Address=0x%016jx\n", (uintmax_t)mp->BaseAddress); printf("\tLength=0x%016jx\n", (uintmax_t)mp->Length); printf("\tProximity Domain=%d\n", mp->ProximityDomain); } -static const char *srat_types[] = { "CPU", "Memory", "X2APIC", "GICC" }; +static const char *srat_types[] = { + [ACPI_SRAT_TYPE_CPU_AFFINITY] = "CPU", + [ACPI_SRAT_TYPE_MEMORY_AFFINITY] = "Memory", + [ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY] = "X2APIC", + [ACPI_SRAT_TYPE_GICC_AFFINITY] = "GICC", + [ACPI_SRAT_TYPE_GIC_ITS_AFFINITY] = "GIC ITS", +}; static void acpi_print_srat(ACPI_SUBTABLE_HEADER *srat) { ACPI_SRAT_CPU_AFFINITY *cpu; ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; ACPI_SRAT_GICC_AFFINITY *gic; if (srat->Type < nitems(srat_types)) printf("\tType=%s\n", srat_types[srat->Type]); else printf("\tType=%d (unknown)\n", srat->Type); switch (srat->Type) { case ACPI_SRAT_TYPE_CPU_AFFINITY: cpu = (ACPI_SRAT_CPU_AFFINITY *)srat; acpi_print_srat_cpu(cpu->ApicId, cpu->ProximityDomainHi[2] << 24 | cpu->ProximityDomainHi[1] << 16 | cpu->ProximityDomainHi[0] << 0 | cpu->ProximityDomainLo, cpu->Flags); break; case ACPI_SRAT_TYPE_MEMORY_AFFINITY: acpi_print_srat_memory((ACPI_SRAT_MEM_AFFINITY *)srat); break; case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)srat; acpi_print_srat_cpu(x2apic->ApicId, x2apic->ProximityDomain, x2apic->Flags); break; case ACPI_SRAT_TYPE_GICC_AFFINITY: gic = (ACPI_SRAT_GICC_AFFINITY *)srat; acpi_print_srat_cpu(gic->AcpiProcessorUid, gic->ProximityDomain, gic->Flags); break; } } static void acpi_handle_srat(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_SRAT *srat; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); srat = (ACPI_TABLE_SRAT *)sdp; printf("\tTable Revision=%d\n", srat->TableRevision); acpi_walk_subtables(sdp, (srat + 1), acpi_print_srat); printf(END_COMMENT); } +static const char *nfit_types[] = { + [ACPI_NFIT_TYPE_SYSTEM_ADDRESS] = "System Address", + [ACPI_NFIT_TYPE_MEMORY_MAP] = "Memory Map", + [ACPI_NFIT_TYPE_INTERLEAVE] = "Interleave", + [ACPI_NFIT_TYPE_SMBIOS] = "SMBIOS", + [ACPI_NFIT_TYPE_CONTROL_REGION] = "Control Region", + [ACPI_NFIT_TYPE_DATA_REGION] = "Data Region", + [ACPI_NFIT_TYPE_FLUSH_ADDRESS] = "Flush Address" +}; + + static void +acpi_print_nfit(ACPI_NFIT_HEADER *nfit) +{ + char *uuidstr; + uint32_t status; + + ACPI_NFIT_SYSTEM_ADDRESS *sysaddr; + ACPI_NFIT_MEMORY_MAP *mmap; + ACPI_NFIT_INTERLEAVE *ileave; + ACPI_NFIT_SMBIOS *smbios; + ACPI_NFIT_CONTROL_REGION *ctlreg; + ACPI_NFIT_DATA_REGION *datareg; + ACPI_NFIT_FLUSH_ADDRESS *fladdr; + + if (nfit->Type < nitems(nfit_types)) + printf("\tType=%s\n", nfit_types[nfit->Type]); + else + printf("\tType=%u (unknown)\n", nfit->Type); + switch (nfit->Type) { + case ACPI_NFIT_TYPE_SYSTEM_ADDRESS: + sysaddr = (ACPI_NFIT_SYSTEM_ADDRESS *)nfit; + printf("\tRangeIndex=%u\n", (u_int)sysaddr->RangeIndex); + printf("\tProximityDomain=%u\n", + (u_int)sysaddr->ProximityDomain); + uuid_to_string((uuid_t *)(sysaddr->RangeGuid), + &uuidstr, &status); + if (status != uuid_s_ok) + errx(1, "uuid_to_string: status=%u", status); + printf("\tRangeGuid=%s\n", uuidstr); + free(uuidstr); + printf("\tAddress=0x%016jx\n", (uintmax_t)sysaddr->Address); + printf("\tLength=0x%016jx\n", (uintmax_t)sysaddr->Length); + printf("\tMemoryMapping=0x%016jx\n", + (uintmax_t)sysaddr->MemoryMapping); + +#define PRINTFLAG(var, flag) printflag((var), ACPI_NFIT_## flag, #flag) + + printf("\tFlags="); + PRINTFLAG(sysaddr->Flags, ADD_ONLINE_ONLY); + PRINTFLAG(sysaddr->Flags, PROXIMITY_VALID); + PRINTFLAG_END(); + +#undef PRINTFLAG + + break; + case ACPI_NFIT_TYPE_MEMORY_MAP: + mmap = (ACPI_NFIT_MEMORY_MAP *)nfit; + printf("\tDeviceHandle=%u\n", (u_int)mmap->DeviceHandle); + printf("\tPhysicalId=%u\n", (u_int)mmap->PhysicalId); + printf("\tRegionId=%u\n", (u_int)mmap->RegionId); + printf("\tRangeIndex=%u\n", (u_int)mmap->RangeIndex); + printf("\tRegionIndex=%u\n", (u_int)mmap->RegionIndex); + printf("\tRegionSize=0x%016jx\n", (uintmax_t)mmap->RegionSize); + printf("\tRegionOffset=0x%016jx\n", + (uintmax_t)mmap->RegionOffset); + printf("\tAddress=0x%016jx\n", (uintmax_t)mmap->Address); + printf("\tInterleaveIndex=%u\n", (u_int)mmap->InterleaveIndex); + +#define PRINTFLAG(var, flag) printflag((var), ACPI_NFIT_MEM_## flag, #flag) + + printf("\tFlags="); + PRINTFLAG(mmap->Flags, SAVE_FAILED); + PRINTFLAG(mmap->Flags, RESTORE_FAILED); + PRINTFLAG(mmap->Flags, FLUSH_FAILED); + PRINTFLAG(mmap->Flags, NOT_ARMED); + PRINTFLAG(mmap->Flags, HEALTH_OBSERVED); + PRINTFLAG(mmap->Flags, HEALTH_ENABLED); + PRINTFLAG(mmap->Flags, MAP_FAILED); + PRINTFLAG_END(); + +#undef PRINTFLAG + + break; + case ACPI_NFIT_TYPE_INTERLEAVE: + ileave = (ACPI_NFIT_INTERLEAVE *)nfit; + printf("\tInterleaveIndex=%u\n", + (u_int)ileave->InterleaveIndex); + printf("\tLineCount=%u\n", (u_int)ileave->LineCount); + printf("\tLineSize=%u\n", (u_int)ileave->LineSize); + /* XXX ileave->LineOffset[i] output is not supported */ + break; + case ACPI_NFIT_TYPE_SMBIOS: + smbios = (ACPI_NFIT_SMBIOS *)nfit; + /* XXX smbios->Data[x] output is not supported */ + break; + case ACPI_NFIT_TYPE_CONTROL_REGION: + ctlreg = (ACPI_NFIT_CONTROL_REGION *)nfit; + printf("\tRegionIndex=%u\n", (u_int)ctlreg->RegionIndex); + printf("\tVendorId=0x%04x\n", (u_int)ctlreg->VendorId); + printf("\tDeviceId=0x%04x\n", (u_int)ctlreg->DeviceId); + printf("\tRevisionId=%u\n", (u_int)ctlreg->RevisionId); + printf("\tSubsystemVendorId=0x%04x\n", + (u_int)ctlreg->SubsystemVendorId); + printf("\tSubsystemDeviceId=0x%04x\n", + (u_int)ctlreg->SubsystemDeviceId); + printf("\tSubsystemRevisionId=%u\n", + (u_int)ctlreg->SubsystemRevisionId); + printf("\tValidFields=%u\n", (u_int)ctlreg->ValidFields); + printf("\tManufacturingLocation=%u\n", + (u_int)ctlreg->ManufacturingLocation); + printf("\tManufacturingDate=%u\n", + (u_int)ctlreg->ManufacturingDate); + printf("\tSerialNumber=%u\n", + (u_int)ctlreg->SerialNumber); + printf("\tWindows=%u\n", (u_int)ctlreg->Windows); + printf("\tWindowSize=0x%016jx\n", + (uintmax_t)ctlreg->WindowSize); + printf("\tCommandOffset=0x%016jx\n", + (uintmax_t)ctlreg->CommandOffset); + printf("\tCommandSize=0x%016jx\n", + (uintmax_t)ctlreg->CommandSize); + printf("\tStatusOffset=0x%016jx\n", + (uintmax_t)ctlreg->StatusOffset); + printf("\tStatusSize=0x%016jx\n", + (uintmax_t)ctlreg->StatusSize); + +#define PRINTFLAG(var, flag) printflag((var), ACPI_NFIT_## flag, #flag) + + printf("\tFlags="); + PRINTFLAG(mmap->Flags, ADD_ONLINE_ONLY); + PRINTFLAG(mmap->Flags, PROXIMITY_VALID); + PRINTFLAG_END(); + +#undef PRINTFLAG + + break; + case ACPI_NFIT_TYPE_DATA_REGION: + datareg = (ACPI_NFIT_DATA_REGION *)nfit; + printf("\tRegionIndex=%u\n", (u_int)datareg->RegionIndex); + printf("\tWindows=%u\n", (u_int)datareg->Windows); + printf("\tOffset=0x%016jx\n", (uintmax_t)datareg->Offset); + printf("\tSize=0x%016jx\n", (uintmax_t)datareg->Size); + printf("\tCapacity=0x%016jx\n", (uintmax_t)datareg->Capacity); + printf("\tStartAddress=0x%016jx\n", + (uintmax_t)datareg->StartAddress); + break; + case ACPI_NFIT_TYPE_FLUSH_ADDRESS: + fladdr = (ACPI_NFIT_FLUSH_ADDRESS *)nfit; + printf("\tDeviceHandle=%u\n", (u_int)fladdr->DeviceHandle); + printf("\tHintCount=%u\n", (u_int)fladdr->HintCount); + /* XXX fladdr->HintAddress[i] output is not supported */ + break; + } +} + +static void +acpi_handle_nfit(ACPI_TABLE_HEADER *sdp) +{ + ACPI_TABLE_NFIT *nfit; + + printf(BEGIN_COMMENT); + acpi_print_sdt(sdp); + nfit = (ACPI_TABLE_NFIT *)sdp; + acpi_walk_nfit(sdp, (nfit + 1), acpi_print_nfit); + printf(END_COMMENT); +} + +static void acpi_print_sdt(ACPI_TABLE_HEADER *sdp) { printf(" "); acpi_print_string(sdp->Signature, ACPI_NAME_SIZE); printf(": Length=%d, Revision=%d, Checksum=%d,\n", sdp->Length, sdp->Revision, sdp->Checksum); printf("\tOEMID="); acpi_print_string(sdp->OemId, ACPI_OEM_ID_SIZE); printf(", OEM Table ID="); acpi_print_string(sdp->OemTableId, ACPI_OEM_TABLE_ID_SIZE); printf(", OEM Revision=0x%x,\n", sdp->OemRevision); printf("\tCreator ID="); acpi_print_string(sdp->AslCompilerId, ACPI_NAME_SIZE); printf(", Creator Revision=0x%x\n", sdp->AslCompilerRevision); } static void acpi_print_rsdt(ACPI_TABLE_HEADER *rsdp) { ACPI_TABLE_RSDT *rsdt; ACPI_TABLE_XSDT *xsdt; int i, entries; rsdt = (ACPI_TABLE_RSDT *)rsdp; xsdt = (ACPI_TABLE_XSDT *)rsdp; printf(BEGIN_COMMENT); acpi_print_sdt(rsdp); entries = (rsdp->Length - sizeof(ACPI_TABLE_HEADER)) / addr_size; printf("\tEntries={ "); for (i = 0; i < entries; i++) { if (i > 0) printf(", "); if (addr_size == 4) printf("0x%08x", le32toh(rsdt->TableOffsetEntry[i])); else printf("0x%016jx", (uintmax_t)le64toh(xsdt->TableOffsetEntry[i])); } printf(" }\n"); printf(END_COMMENT); } static const char *acpi_pm_profiles[] = { "Unspecified", "Desktop", "Mobile", "Workstation", "Enterprise Server", "SOHO Server", "Appliance PC" }; static void acpi_print_fadt(ACPI_TABLE_HEADER *sdp) { ACPI_TABLE_FADT *fadt; const char *pm; fadt = (ACPI_TABLE_FADT *)sdp; printf(BEGIN_COMMENT); acpi_print_sdt(sdp); printf(" \tFACS=0x%x, DSDT=0x%x\n", fadt->Facs, fadt->Dsdt); printf("\tINT_MODEL=%s\n", fadt->Model ? "APIC" : "PIC"); if (fadt->PreferredProfile >= sizeof(acpi_pm_profiles) / sizeof(char *)) pm = "Reserved"; else pm = acpi_pm_profiles[fadt->PreferredProfile]; printf("\tPreferred_PM_Profile=%s (%d)\n", pm, fadt->PreferredProfile); printf("\tSCI_INT=%d\n", fadt->SciInterrupt); printf("\tSMI_CMD=0x%x, ", fadt->SmiCommand); printf("ACPI_ENABLE=0x%x, ", fadt->AcpiEnable); printf("ACPI_DISABLE=0x%x, ", fadt->AcpiDisable); printf("S4BIOS_REQ=0x%x\n", fadt->S4BiosRequest); printf("\tPSTATE_CNT=0x%x\n", fadt->PstateControl); printf("\tPM1a_EVT_BLK=0x%x-0x%x\n", fadt->Pm1aEventBlock, fadt->Pm1aEventBlock + fadt->Pm1EventLength - 1); if (fadt->Pm1bEventBlock != 0) printf("\tPM1b_EVT_BLK=0x%x-0x%x\n", fadt->Pm1bEventBlock, fadt->Pm1bEventBlock + fadt->Pm1EventLength - 1); printf("\tPM1a_CNT_BLK=0x%x-0x%x\n", fadt->Pm1aControlBlock, fadt->Pm1aControlBlock + fadt->Pm1ControlLength - 1); if (fadt->Pm1bControlBlock != 0) printf("\tPM1b_CNT_BLK=0x%x-0x%x\n", fadt->Pm1bControlBlock, fadt->Pm1bControlBlock + fadt->Pm1ControlLength - 1); if (fadt->Pm2ControlBlock != 0) printf("\tPM2_CNT_BLK=0x%x-0x%x\n", fadt->Pm2ControlBlock, fadt->Pm2ControlBlock + fadt->Pm2ControlLength - 1); printf("\tPM_TMR_BLK=0x%x-0x%x\n", fadt->PmTimerBlock, fadt->PmTimerBlock + fadt->PmTimerLength - 1); if (fadt->Gpe0Block != 0) printf("\tGPE0_BLK=0x%x-0x%x\n", fadt->Gpe0Block, fadt->Gpe0Block + fadt->Gpe0BlockLength - 1); if (fadt->Gpe1Block != 0) printf("\tGPE1_BLK=0x%x-0x%x, GPE1_BASE=%d\n", fadt->Gpe1Block, fadt->Gpe1Block + fadt->Gpe1BlockLength - 1, fadt->Gpe1Base); if (fadt->CstControl != 0) printf("\tCST_CNT=0x%x\n", fadt->CstControl); printf("\tP_LVL2_LAT=%d us, P_LVL3_LAT=%d us\n", fadt->C2Latency, fadt->C3Latency); printf("\tFLUSH_SIZE=%d, FLUSH_STRIDE=%d\n", fadt->FlushSize, fadt->FlushStride); printf("\tDUTY_OFFSET=%d, DUTY_WIDTH=%d\n", fadt->DutyOffset, fadt->DutyWidth); printf("\tDAY_ALRM=%d, MON_ALRM=%d, CENTURY=%d\n", fadt->DayAlarm, fadt->MonthAlarm, fadt->Century); #define PRINTFLAG(var, flag) printflag((var), ACPI_FADT_## flag, #flag) printf("\tIAPC_BOOT_ARCH="); PRINTFLAG(fadt->BootFlags, LEGACY_DEVICES); PRINTFLAG(fadt->BootFlags, 8042); PRINTFLAG(fadt->BootFlags, NO_VGA); PRINTFLAG(fadt->BootFlags, NO_MSI); PRINTFLAG(fadt->BootFlags, NO_ASPM); PRINTFLAG(fadt->BootFlags, NO_CMOS_RTC); PRINTFLAG_END(); printf("\tFlags="); PRINTFLAG(fadt->Flags, WBINVD); PRINTFLAG(fadt->Flags, WBINVD_FLUSH); PRINTFLAG(fadt->Flags, C1_SUPPORTED); PRINTFLAG(fadt->Flags, C2_MP_SUPPORTED); PRINTFLAG(fadt->Flags, POWER_BUTTON); PRINTFLAG(fadt->Flags, SLEEP_BUTTON); PRINTFLAG(fadt->Flags, FIXED_RTC); PRINTFLAG(fadt->Flags, S4_RTC_WAKE); PRINTFLAG(fadt->Flags, 32BIT_TIMER); PRINTFLAG(fadt->Flags, DOCKING_SUPPORTED); PRINTFLAG(fadt->Flags, RESET_REGISTER); PRINTFLAG(fadt->Flags, SEALED_CASE); PRINTFLAG(fadt->Flags, HEADLESS); PRINTFLAG(fadt->Flags, SLEEP_TYPE); PRINTFLAG(fadt->Flags, PCI_EXPRESS_WAKE); PRINTFLAG(fadt->Flags, PLATFORM_CLOCK); PRINTFLAG(fadt->Flags, S4_RTC_VALID); PRINTFLAG(fadt->Flags, REMOTE_POWER_ON); PRINTFLAG(fadt->Flags, APIC_CLUSTER); PRINTFLAG(fadt->Flags, APIC_PHYSICAL); PRINTFLAG(fadt->Flags, HW_REDUCED); PRINTFLAG(fadt->Flags, LOW_POWER_S0); PRINTFLAG_END(); #undef PRINTFLAG if (fadt->Flags & ACPI_FADT_RESET_REGISTER) { printf("\tRESET_REG="); acpi_print_gas(&fadt->ResetRegister); printf(", RESET_VALUE=%#x\n", fadt->ResetValue); } if (acpi_get_fadt_revision(fadt) > 1) { printf("\tX_FACS=0x%016jx, ", (uintmax_t)fadt->XFacs); printf("X_DSDT=0x%016jx\n", (uintmax_t)fadt->XDsdt); printf("\tX_PM1a_EVT_BLK="); acpi_print_gas(&fadt->XPm1aEventBlock); if (fadt->XPm1bEventBlock.Address != 0) { printf("\n\tX_PM1b_EVT_BLK="); acpi_print_gas(&fadt->XPm1bEventBlock); } printf("\n\tX_PM1a_CNT_BLK="); acpi_print_gas(&fadt->XPm1aControlBlock); if (fadt->XPm1bControlBlock.Address != 0) { printf("\n\tX_PM1b_CNT_BLK="); acpi_print_gas(&fadt->XPm1bControlBlock); } if (fadt->XPm2ControlBlock.Address != 0) { printf("\n\tX_PM2_CNT_BLK="); acpi_print_gas(&fadt->XPm2ControlBlock); } printf("\n\tX_PM_TMR_BLK="); acpi_print_gas(&fadt->XPmTimerBlock); if (fadt->XGpe0Block.Address != 0) { printf("\n\tX_GPE0_BLK="); acpi_print_gas(&fadt->XGpe0Block); } if (fadt->XGpe1Block.Address != 0) { printf("\n\tX_GPE1_BLK="); acpi_print_gas(&fadt->XGpe1Block); } printf("\n"); } printf(END_COMMENT); } static void acpi_print_facs(ACPI_TABLE_FACS *facs) { printf(BEGIN_COMMENT); printf(" FACS:\tLength=%u, ", facs->Length); printf("HwSig=0x%08x, ", facs->HardwareSignature); printf("Firm_Wake_Vec=0x%08x\n", facs->FirmwareWakingVector); printf("\tGlobal_Lock="); if (facs->GlobalLock != 0) { if (facs->GlobalLock & ACPI_GLOCK_PENDING) printf("PENDING,"); if (facs->GlobalLock & ACPI_GLOCK_OWNED) printf("OWNED"); } printf("\n"); printf("\tFlags="); if (facs->Flags & ACPI_FACS_S4_BIOS_PRESENT) printf("S4BIOS"); printf("\n"); if (facs->XFirmwareWakingVector != 0) printf("\tX_Firm_Wake_Vec=%016jx\n", (uintmax_t)facs->XFirmwareWakingVector); printf("\tVersion=%u\n", facs->Version); printf(END_COMMENT); } static void acpi_print_dsdt(ACPI_TABLE_HEADER *dsdp) { printf(BEGIN_COMMENT); acpi_print_sdt(dsdp); printf(END_COMMENT); } int acpi_checksum(void *p, size_t length) { uint8_t *bp; uint8_t sum; bp = p; sum = 0; while (length--) sum += *bp++; return (sum); } static ACPI_TABLE_HEADER * acpi_map_sdt(vm_offset_t pa) { ACPI_TABLE_HEADER *sp; sp = acpi_map_physical(pa, sizeof(ACPI_TABLE_HEADER)); sp = acpi_map_physical(pa, sp->Length); return (sp); } static void acpi_print_rsd_ptr(ACPI_TABLE_RSDP *rp) { printf(BEGIN_COMMENT); printf(" RSD PTR: OEM="); acpi_print_string(rp->OemId, ACPI_OEM_ID_SIZE); printf(", ACPI_Rev=%s (%d)\n", rp->Revision < 2 ? "1.0x" : "2.0x", rp->Revision); if (rp->Revision < 2) { printf("\tRSDT=0x%08x, cksum=%u\n", rp->RsdtPhysicalAddress, rp->Checksum); } else { printf("\tXSDT=0x%016jx, length=%u, cksum=%u\n", (uintmax_t)rp->XsdtPhysicalAddress, rp->Length, rp->ExtendedChecksum); } printf(END_COMMENT); } static void acpi_handle_rsdt(ACPI_TABLE_HEADER *rsdp) { ACPI_TABLE_HEADER *sdp; ACPI_TABLE_RSDT *rsdt; ACPI_TABLE_XSDT *xsdt; vm_offset_t addr; int entries, i; acpi_print_rsdt(rsdp); rsdt = (ACPI_TABLE_RSDT *)rsdp; xsdt = (ACPI_TABLE_XSDT *)rsdp; entries = (rsdp->Length - sizeof(ACPI_TABLE_HEADER)) / addr_size; for (i = 0; i < entries; i++) { if (addr_size == 4) addr = le32toh(rsdt->TableOffsetEntry[i]); else addr = le64toh(xsdt->TableOffsetEntry[i]); if (addr == 0) continue; sdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(addr); if (acpi_checksum(sdp, sdp->Length)) { warnx("RSDT entry %d (sig %.4s) is corrupt", i, sdp->Signature); continue; } if (!memcmp(sdp->Signature, ACPI_SIG_FADT, 4)) acpi_handle_fadt(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_MADT, 4)) acpi_handle_madt(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_HPET, 4)) acpi_handle_hpet(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_ECDT, 4)) acpi_handle_ecdt(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_MCFG, 4)) acpi_handle_mcfg(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_SLIT, 4)) acpi_handle_slit(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_SRAT, 4)) acpi_handle_srat(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_TCPA, 4)) acpi_handle_tcpa(sdp); else if (!memcmp(sdp->Signature, ACPI_SIG_DMAR, 4)) acpi_handle_dmar(sdp); + else if (!memcmp(sdp->Signature, ACPI_SIG_NFIT, 4)) + acpi_handle_nfit(sdp); else { printf(BEGIN_COMMENT); acpi_print_sdt(sdp); printf(END_COMMENT); } } } ACPI_TABLE_HEADER * sdt_load_devmem(void) { ACPI_TABLE_RSDP *rp; ACPI_TABLE_HEADER *rsdp; rp = acpi_find_rsd_ptr(); if (!rp) errx(1, "Can't find ACPI information"); if (tflag) acpi_print_rsd_ptr(rp); if (rp->Revision < 2) { rsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(rp->RsdtPhysicalAddress); if (memcmp(rsdp->Signature, "RSDT", 4) != 0 || acpi_checksum(rsdp, rsdp->Length) != 0) errx(1, "RSDT is corrupted"); addr_size = sizeof(uint32_t); } else { rsdp = (ACPI_TABLE_HEADER *)acpi_map_sdt(rp->XsdtPhysicalAddress); if (memcmp(rsdp->Signature, "XSDT", 4) != 0 || acpi_checksum(rsdp, rsdp->Length) != 0) errx(1, "XSDT is corrupted"); addr_size = sizeof(uint64_t); } return (rsdp); } /* Write the DSDT to a file, concatenating any SSDTs (if present). */ static int write_dsdt(int fd, ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdt) { ACPI_TABLE_HEADER sdt; ACPI_TABLE_HEADER *ssdt; uint8_t sum; /* Create a new checksum to account for the DSDT and any SSDTs. */ sdt = *dsdt; if (rsdt != NULL) { sdt.Checksum = 0; sum = acpi_checksum(dsdt + 1, dsdt->Length - sizeof(ACPI_TABLE_HEADER)); ssdt = sdt_from_rsdt(rsdt, ACPI_SIG_SSDT, NULL); while (ssdt != NULL) { sdt.Length += ssdt->Length - sizeof(ACPI_TABLE_HEADER); sum += acpi_checksum(ssdt + 1, ssdt->Length - sizeof(ACPI_TABLE_HEADER)); ssdt = sdt_from_rsdt(rsdt, ACPI_SIG_SSDT, ssdt); } sum += acpi_checksum(&sdt, sizeof(ACPI_TABLE_HEADER)); sdt.Checksum -= sum; } /* Write out the DSDT header and body. */ write(fd, &sdt, sizeof(ACPI_TABLE_HEADER)); write(fd, dsdt + 1, dsdt->Length - sizeof(ACPI_TABLE_HEADER)); /* Write out any SSDTs (if present.) */ if (rsdt != NULL) { ssdt = sdt_from_rsdt(rsdt, "SSDT", NULL); while (ssdt != NULL) { write(fd, ssdt + 1, ssdt->Length - sizeof(ACPI_TABLE_HEADER)); ssdt = sdt_from_rsdt(rsdt, "SSDT", ssdt); } } return (0); } void dsdt_save_file(char *outfile, ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) { int fd; mode_t mode; assert(outfile != NULL); mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; fd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, mode); if (fd == -1) { perror("dsdt_save_file"); return; } write_dsdt(fd, rsdt, dsdp); close(fd); } void aml_disassemble(ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) { char buf[PATH_MAX], tmpstr[PATH_MAX], wrkdir[PATH_MAX]; const char *iname = "/acpdump.din"; const char *oname = "/acpdump.dsl"; const char *tmpdir; FILE *fp; size_t len; int fd, status; pid_t pid; tmpdir = getenv("TMPDIR"); if (tmpdir == NULL) tmpdir = _PATH_TMP; if (realpath(tmpdir, buf) == NULL) { perror("realpath tmp dir"); return; } len = sizeof(wrkdir) - strlen(iname); if ((size_t)snprintf(wrkdir, len, "%s/acpidump.XXXXXX", buf) > len-1 ) { fprintf(stderr, "$TMPDIR too long\n"); return; } if (mkdtemp(wrkdir) == NULL) { perror("mkdtemp tmp working dir"); return; } len = (size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, iname); assert(len <= sizeof(tmpstr) - 1); fd = open(tmpstr, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); if (fd < 0) { perror("iasl tmp file"); return; } write_dsdt(fd, rsdt, dsdp); close(fd); /* Run iasl -d on the temp file */ if ((pid = fork()) == 0) { close(STDOUT_FILENO); if (vflag == 0) close(STDERR_FILENO); execl("/usr/sbin/iasl", "iasl", "-d", tmpstr, NULL); err(1, "exec"); } if (pid > 0) wait(&status); if (unlink(tmpstr) < 0) { perror("unlink"); goto out; } if (pid < 0) { perror("fork"); goto out; } if (status != 0) { fprintf(stderr, "iast exit status = %d\n", status); } /* Dump iasl's output to stdout */ len = (size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, oname); assert(len <= sizeof(tmpstr) - 1); fp = fopen(tmpstr, "r"); if (unlink(tmpstr) < 0) { perror("unlink"); goto out; } if (fp == NULL) { perror("iasl tmp file (read)"); goto out; } while ((len = fread(buf, 1, sizeof(buf), fp)) > 0) fwrite(buf, 1, len, stdout); fclose(fp); out: if (rmdir(wrkdir) < 0) perror("rmdir"); } void sdt_print_all(ACPI_TABLE_HEADER *rsdp) { acpi_handle_rsdt(rsdp); } /* Fetch a table matching the given signature via the RSDT. */ ACPI_TABLE_HEADER * sdt_from_rsdt(ACPI_TABLE_HEADER *rsdp, const char *sig, ACPI_TABLE_HEADER *last) { ACPI_TABLE_HEADER *sdt; ACPI_TABLE_RSDT *rsdt; ACPI_TABLE_XSDT *xsdt; vm_offset_t addr; int entries, i; rsdt = (ACPI_TABLE_RSDT *)rsdp; xsdt = (ACPI_TABLE_XSDT *)rsdp; entries = (rsdp->Length - sizeof(ACPI_TABLE_HEADER)) / addr_size; for (i = 0; i < entries; i++) { if (addr_size == 4) addr = le32toh(rsdt->TableOffsetEntry[i]); else addr = le64toh(xsdt->TableOffsetEntry[i]); if (addr == 0) continue; sdt = (ACPI_TABLE_HEADER *)acpi_map_sdt(addr); if (last != NULL) { if (sdt == last) last = NULL; continue; } if (memcmp(sdt->Signature, sig, strlen(sig))) continue; if (acpi_checksum(sdt, sdt->Length)) errx(1, "RSDT entry %d is corrupt", i); return (sdt); } return (NULL); } ACPI_TABLE_HEADER * dsdt_from_fadt(ACPI_TABLE_FADT *fadt) { ACPI_TABLE_HEADER *sdt; /* Use the DSDT address if it is version 1, otherwise use XDSDT. */ if (acpi_get_fadt_revision(fadt) == 1) sdt = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->Dsdt); else sdt = (ACPI_TABLE_HEADER *)acpi_map_sdt(fadt->XDsdt); if (acpi_checksum(sdt, sdt->Length)) errx(1, "DSDT is corrupt\n"); return (sdt); } Index: projects/clang500-import/usr.sbin/cron/cron/cron.8 =================================================================== --- projects/clang500-import/usr.sbin/cron/cron/cron.8 (revision 321306) +++ projects/clang500-import/usr.sbin/cron/cron/cron.8 (revision 321307) @@ -1,228 +1,232 @@ .\"/* Copyright 1988,1990,1993 by Paul Vixie .\" * All rights reserved .\" * .\" * Distribute freely, except: don't remove my name from the source or .\" * documentation (don't take credit for my work), mark your changes (don't .\" * get me blamed for your possible bugs), don't alter or remove this .\" * notice. May be sold if buildable source is provided to buyer. No .\" * warrantee of any kind, express or implied, is included with this .\" * software; use at your own risk, responsibility for damages (if any) to .\" * anyone resulting from the use of this software rests entirely with the .\" * user. .\" * .\" * Send bug reports, bug fixes, enhancements, requests, flames, etc., and .\" * I'll try to keep a version up to date. I can be reached as follows: .\" * Paul Vixie uunet!decwrl!vixie!paul .\" */ .\" .\" $FreeBSD$ .\" -.Dd October 31, 2016 +.Dd July 19, 2017 .Dt CRON 8 .Os .Sh NAME .Nm cron .Nd daemon to execute scheduled commands (Vixie Cron) .Sh SYNOPSIS .Nm .Op Fl j Ar jitter .Op Fl J Ar rootjitter .Op Fl m Ar mailto .Op Fl n .Op Fl s .Op Fl o .Op Fl x Ar debugflag Ns Op , Ns Ar ... .Sh DESCRIPTION The .Nm utility should be started from .Pa /etc/rc or .Pa /etc/rc.local . It will return immediately, so you do not need to start it with '&'. .Pp The .Nm utility searches .Pa /var/cron/tabs for crontab files which are named after accounts in .Pa /etc/passwd ; crontabs found are loaded into memory. The .Nm utility also searches for .Pa /etc/crontab and files in .Pa /etc/cron.d and .Pa /usr/local/etc/cron.d which are in a different format (see .Xr crontab 5 ) . .Pp The .Nm utility then wakes up every minute, examining all stored crontabs, checking each command to see if it should be run in the current minute. Before running a command from a per-account crontab file, .Nm checks the status of the account with .Xr pam 3 and skips the command if the account is unavailable, e.g., locked out or expired. Commands from .Pa /etc/crontab bypass this check. When executing commands, any output is mailed to the owner of the crontab (or to the user named in the .Ev MAILTO environment variable in the crontab, if such exists). .Pp Additionally, .Nm checks each minute to see if its spool directory's modification time (or the modification time on .Pa /etc/crontab ) has changed, and if it has, .Nm will then examine the modification time on all crontabs and reload those which have changed. Thus .Nm need not be restarted whenever a crontab file is modified. Note that the .Xr crontab 1 command updates the modification time of the spool directory whenever it changes a crontab. .Pp Available options: .Bl -tag -width indent .It Fl j Ar jitter Enable time jitter. Prior to executing commands, .Nm will sleep a random number of seconds in the range from 0 to .Ar jitter . This will not affect superuser jobs (see .Fl J ) . A value for .Ar jitter must be between 0 and 60 inclusive. Default is 0, which effectively disables time jitter. .Pp This option can help to smooth down system load spikes during moments when a lot of jobs are likely to start at once, e.g., at the beginning of the first minute of each hour. .It Fl J Ar rootjitter Enable time jitter for superuser jobs. The same as .Fl j except that it will affect jobs run by the superuser only. .It Fl m Ar mailto Overrides the default recipient for .Nm mail. Each .Xr crontab 5 without .Ev MAILTO explicitly set will send mail to the .Ar mailto mailbox. Sending mail will be disabled by default if .Ar mailto set to a null string, usually specified in a shell as .Li '' or .Li \*q\*q . .It Fl n -Don't daemonize, run in foreground instead. +Do not daemonize; run in foreground instead. .It Fl s Enable special handling of situations when the GMT offset of the local timezone changes, such as the switches between the standard time and daylight saving time. .Pp The jobs run during the GMT offset changes time as intuitively expected. If a job falls into a time interval that disappears (for example, during the switch from standard time) to daylight saving time or is duplicated (for example, during the reverse switch), then it is handled in one of two ways: .Pp The first case is for the jobs that run every at hour of a time interval overlapping with the disappearing or duplicated interval. In other words, if the job had run within one hour before the GMT offset change (and cron was not restarted nor the .Xr crontab 5 changed after that) or would run after the change at the next hour. They work as always, skip the skipped time or run in the added time as usual. .Pp The second case is for the jobs that run less frequently. They are executed exactly once, they are not skipped nor executed twice (unless cron is restarted or the user's .Xr crontab 5 is changed during such a time interval). If an interval disappears due to the GMT offset change, such jobs are executed at the same absolute point of time as they would be in the old time zone. For example, if exactly one hour disappears, this point would be during the next hour at the first minute that is specified for them in .Xr crontab 5 . .It Fl o Disable the special handling of situations when the GMT offset of the local timezone changes, to be compatible with the old (default) behavior. If both options .Fl o and .Fl s are specified, the option specified last wins. .It Fl x Ar debugflag Ns Op , Ns Ar ... Enable writing of debugging information to standard output. One or more of the following comma separated .Ar debugflag identifiers must be specified: .Pp .Bl -tag -width ".Cm proc" -compact .It Cm bit currently not used .It Cm ext make the other debug flags more verbose .It Cm load be verbose when loading crontab files .It Cm misc be verbose about miscellaneous one-off events .It Cm pars be verbose about parsing individual crontab lines .It Cm proc be verbose about the state of the process, including all of its offspring .It Cm sch be verbose when iterating through the scheduling algorithms .It Cm test trace through the execution, but do not perform any actions .El .El .Sh FILES -.Bl -tag -width /etc/pam.d/cron -compact +.Bl -tag -width /usr/local/etc/cron.d -compact .It Pa /etc/crontab System crontab file +.It Pa /etc/cron.d +Directory for optional/modularized system crontab files. .It Pa /etc/pam.d/cron .Xr pam.conf 5 configuration file for .Nm +.It Pa /usr/local/etc/cron.d +Directory for third-party package provided crontab files. .It Pa /var/cron/tabs Directory for personal crontab files .El .Sh SEE ALSO .Xr crontab 1 , .Xr pam 3 , .Xr crontab 5 , .Xr pam.conf 5 .Sh AUTHORS .An Paul Vixie Aq Mt paul@vix.com Index: projects/clang500-import/usr.sbin/newsyslog/tests/legacy_test.sh =================================================================== --- projects/clang500-import/usr.sbin/newsyslog/tests/legacy_test.sh (revision 321306) +++ projects/clang500-import/usr.sbin/newsyslog/tests/legacy_test.sh (revision 321307) @@ -1,536 +1,536 @@ #!/bin/sh # $FreeBSD$ # A regular expression matching the format of an RFC-5424 log line header, # including the timestamp up through the seconds indicator; it does not include # the (optional) timezone offset. RFC5424_FMT='^<[0-9][0-9]*>1 [0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}' # A regular expression matching the format of an RFC-3164 (traditional syslog) # log line header, including the timestamp. RFC3164_FMT='^[A-Z][a-z]{2} [ 0-9][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}' COUNT=0 TMPDIR=$(pwd)/work if [ $? -ne 0 ]; then - echo "$0: Can't create temp dir, exiting..." - exit 1 + echo "$0: Can't create temp dir, exiting..." + exit 1 fi # Begin an individual test begin() { COUNT=`expr $COUNT + 1` OK=1 NAME="$1" } # End an individual test end() { local message if [ $OK = 1 ] then message='ok ' else message='not ok ' fi message="$message $COUNT - $NAME" if [ -n "$TODO" ] then message="$message # TODO $TODO" fi echo "$message" } # Make a file that can later be verified mkf() { CN=`basename $1` echo "$CN-$CN" >$1 } # Verify that the file specified is correct ckf() { if [ -f $2 ] && echo "$1-$1" | diff - $2 >/dev/null then ok else notok fi } # Check that a file exists ckfe() { if [ -f $1 ] then ok else notok fi } # Verify that the specified file does not exist # (is not there) cknt() { if [ -r $1 ] then notok else ok fi } # Check if a file is there, depending of if it's supposed to or not - # basically how many log files we are supposed to keep vs. how many we # actually keep. ckntfe() { curcnt=$1 keepcnt=$2 f=$3 if [ $curcnt -le $keepcnt ] then #echo Assuming file there ckfe $f else #echo Assuming file NOT there cknt $f fi } # Verify that the specified file has RFC-5424 rotation messages. ckrfc5424() { local lc=$(wc -l $1 | cut -w -f2) local rc=$(grep -cE "${RFC5424_FMT}" $1) if [ "$lc" -eq 0 -o "$rc" -eq 0 -o "$lc" -ne "$rc" ] then notok else ok fi } # Verify that the specified file has RFC-3164 rotation messages. ckrfc3164() { local lc=$(wc -l $1 | cut -w -f2) local rc=$(grep -cE "${RFC3164_FMT}" $1) if [ "$lc" -eq 0 -o "$rc" -eq 0 -o "$lc" -ne "$rc" ] then notok else ok fi } # A part of a test succeeds ok() { : } # A part of a test fails notok() { OK=0 } # Verify that the exit code passed is for unsuccessful termination ckfail() { if [ $1 -gt 0 ] then ok else notok fi } # Verify that the exit code passed is for successful termination ckok() { if [ $1 -eq 0 ] then ok else notok fi } # Check that there are X files which match expr chkfcnt() { cnt=$1; shift if [ $cnt -eq `echo "$@" | wc -w` ] then ok else notok fi } # Check that two strings are alike ckstr() { if [ "$1" = "$2" ] then ok else notok fi } tmpdir_create() { mkdir -p ${TMPDIR}/log ${TMPDIR}/alog cd ${TMPDIR}/log } tmpdir_clean() { cd ${TMPDIR} rm -rf "${TMPDIR}/log" "${TMPDIR}/alog" newsyslog.conf } run_newsyslog() { newsyslog -f ../newsyslog.conf -F -r "$@" } tests_normal_rotate() { ext="$1" dir="$2" if [ -n "$dir" ]; then newsyslog_args=" -a ${dir}" name_postfix="${ext} archive dir" else newsyslog_args="" name_postfix="${ext}" fi tmpdir_create begin "create file ${name_postfix}" -newdir run_newsyslog -C ckfe $LOGFNAME cknt ${dir}${LOGFNAME}.0${ext} end begin "rotate normal 1 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} cknt ${dir}${LOGFNAME}.1${ext} end begin "rotate normal 2 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} cknt ${dir}${LOGFNAME}.2${ext} end begin "rotate normal 3 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} ckfe ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.3${ext} end begin "rotate normal 4 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} ckfe ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.4${ext} end begin "rotate normal 5 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} ckfe ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.4${ext} end # Wait a bit so we can see if the noaction test rotates files sleep 1.1 begin "noaction ${name_postfix}" ofiles=`ls -Tl ${dir}${LOGFNAME}.*${ext} | tr -d '\n'` run_newsyslog ${newsyslog_args} -n >/dev/null ckfe ${LOGFNAME} ckstr "$ofiles" "`ls -lT ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`" end tmpdir_clean } tests_normal_rotate_keepn() { cnt="$1" ext="$2" dir="$3" if [ -n "$dir" ]; then newsyslog_args=" -a ${dir}" name_postfix="${ext} archive dir" else newsyslog_args="" name_postfix="${ext}" fi tmpdir_create begin "create file ${name_postfix}" -newdir run_newsyslog -C ckfe $LOGFNAME cknt ${dir}${LOGFNAME}.0${ext} end begin "rotate normal 1 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} cknt ${dir}${LOGFNAME}.1${ext} end begin "rotate normal 2 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext} cknt ${dir}${LOGFNAME}.2${ext} end begin "rotate normal 3 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext} ckntfe 3 $cnt ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.3${ext} end begin "rotate normal 3 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext} ckntfe 3 $cnt ${dir}${LOGFNAME}.2${ext} ckntfe 4 $cnt ${dir}${LOGFNAME}.3${ext} cknt ${dir}${LOGFNAME}.4${ext} end # Wait a bit so we can see if the noaction test rotates files sleep 1.1 begin "noaction ${name_postfix}" osum=`md5 ${dir}${LOGFNAME} | tr -d '\n'` run_newsyslog ${newsyslog_args} -n >/dev/null ckfe ${LOGFNAME} ckstr "$osum" "`md5 ${dir}${LOGFNAME} | tr -d '\n'`" end tmpdir_clean } tests_time_rotate() { ext="$1" dir="$2" if [ -n "$dir" ]; then newsyslog_args="-t DEFAULT -a ${dir}" name_postfix="${ext} archive dir" else newsyslog_args="-t DEFAULT" name_postfix="${ext}" fi tmpdir_create begin "create file ${name_postfix}" -newdir run_newsyslog -C ${newsyslog_args} ckfe ${LOGFNAME} end begin "rotate time 1 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 1 ${dir}${LOGFNAME}.*${ext} end sleep 1.1 begin "rotate time 2 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 2 ${dir}${LOGFNAME}.*${ext} end sleep 1.1 begin "rotate time 3 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 3 ${dir}${LOGFNAME}.*${ext} end sleep 1.1 begin "rotate time 4 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 3 ${dir}${LOGFNAME}.*${ext} end begin "noaction ${name_postfix}" ofiles=`ls -1 ${dir}${LOGFNAME}.*${ext} | tr -d '\n'` run_newsyslog ${newsyslog_args} -n >/dev/null ckfe ${LOGFNAME} ckstr "$ofiles" "`ls -1 ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`" end tmpdir_clean } tests_rfc5424() { ext="$1" dir="$2" if [ -n "$dir" ]; then newsyslog_args=" -a ${dir}" name_postfix="${ext} archive dir" else newsyslog_args="" name_postfix="${ext}" fi tmpdir_create begin "RFC-5424 - create file ${name_postfix}" -newdir run_newsyslog -C ckfe $LOGFNAME cknt ${dir}${LOGFNAME}.0${ext} ckfe $LOGFNAME5424 cknt ${dir}${LOGFNAME5424}.0${ext} - ckrfc3164 ${LOGFNAME} - ckrfc5424 ${LOGFNAME5424} + ckrfc3164 ${LOGFNAME} + ckrfc5424 ${LOGFNAME5424} end begin "RFC-5424 - rotate normal 1 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe $LOGFNAME5424 ckfe ${dir}${LOGFNAME5424}.0${ext} - ckrfc3164 ${LOGFNAME} - ckrfc3164 ${dir}${LOGFNAME}.0${ext} - ckrfc5424 ${LOGFNAME5424} - ckrfc5424 ${dir}${LOGFNAME5424}.0${ext} + ckrfc3164 ${LOGFNAME} + ckrfc3164 ${dir}${LOGFNAME}.0${ext} + ckrfc5424 ${LOGFNAME5424} + ckrfc5424 ${dir}${LOGFNAME5424}.0${ext} end tmpdir_clean } echo 1..128 mkdir -p ${TMPDIR} cd ${TMPDIR} LOGFNAME=foo.log LOGFPATH=${TMPDIR}/log/${LOGFNAME} # Log file for RFC-5424 testing LOGFNAME5424=foo5424.log LOGFPATH5424=${TMPDIR}/log/${LOGFNAME5424} # Normal, no archive dir, keep X files echo "$LOGFPATH 640 0 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 0 echo "$LOGFPATH 640 1 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 1 echo "$LOGFPATH 640 2 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 2 echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 3 # Normal, no archive dir, keep X files, gz echo "$LOGFPATH 640 0 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 0 ".gz" echo "$LOGFPATH 640 1 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 1 ".gz" echo "$LOGFPATH 640 2 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 2 ".gz" echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 3 ".gz" # Normal, no archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_normal_rotate echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_normal_rotate ".gz" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_normal_rotate ".bz2" # Normal, archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_normal_rotate "" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_normal_rotate ".gz" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_normal_rotate ".bz2" "${TMPDIR}/alog/" # Time based, no archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_time_rotate echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_time_rotate "gz" "" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_time_rotate "bz2" "" # Time based, archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_time_rotate "" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_time_rotate "gz" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_time_rotate "bz2" "${TMPDIR}/alog/" # RFC-5424; Normal, no archive dir echo "$LOGFPATH5424 640 3 * @T00 NCT" > newsyslog.conf echo "$LOGFPATH 640 3 * @T00 NC" >> newsyslog.conf tests_rfc5424 rm -rf "${TMPDIR}" Index: projects/clang500-import/usr.sbin/nfsd/nfsv4.4 =================================================================== --- projects/clang500-import/usr.sbin/nfsd/nfsv4.4 (revision 321306) +++ projects/clang500-import/usr.sbin/nfsd/nfsv4.4 (revision 321307) @@ -1,331 +1,359 @@ .\" Copyright (c) 2009 Rick Macklem, University of Guelph .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd July 1, 2013 +.Dd July 19, 2017 .Dt NFSV4 4 .Os .Sh NAME .Nm NFSv4 .Nd NFS Version 4 Protocol .Sh DESCRIPTION The NFS client and server provides support for the .Tn NFSv4 specification; see -.%T "Network File System (NFS) Version 4 Protocol RFC 3530" . +.%T "Network File System (NFS) Version 4 Protocol RFC 7530" and +.%T "Network File System (NFS) Version 4 Minor Version 1 Protocol RFC 5661" . The protocol is somewhat similar to NFS Version 3, but differs in significant ways. It uses a single compound RPC that concatenates operations to-gether. Each of these operations are similar to the RPCs of NFS Version 3. The operations in the compound are performed in order, until one of them fails (returns an error) and then the RPC terminates at that point. .Pp It has integrated locking support, which implies that the server is no longer stateless. As such, the .Nm server remains in recovery mode for a grace period (always greater than the lease duration the server uses) after a reboot. During this grace period, clients may recover state but not perform other open/lock state changing operations. To provide for correct recovery semantics, a small file described by .Xr stablerestart 5 is used by the server during the recovery phase. If this file is missing or empty, there is a backup copy maintained by .Xr nfsd 8 that will be used. If either file is missing, they will be created by the .Xr nfsd 8 . If both the file and the backup copy are empty, it will result in the server starting without providing a grace period for recovery. Note that recovery only occurs when the server machine is rebooted, not when the .Xr nfsd 8 are just restarted. .Pp It provides several optional features not present in NFS Version 3: .sp .Bd -literal -offset indent -compact - NFS Version 4 ACLs - Referrals, which redirect subtrees to other servers (not yet implemented) - Delegations, which allow a client to operate on a file locally +- pNFS, where I/O operations are separated from Metadata operations .Ed .Pp The .Nm protocol does not use a separate mount protocol and assumes that the server provides a single file system tree structure, rooted at the point in the local file system tree specified by one or more .sp 1 .Bd -literal -offset indent -compact V4: [-sec=secflavors] [host(s) or net] .Ed .sp 1 line(s) in the .Xr exports 5 file. (See .Xr exports 5 for details.) The .Xr nfsd 8 allows a limited subset of operations to be performed on non-exported subtrees of the local file system, so that traversal of the tree to the exported subtrees is possible. As such, the ``'' can be in a non-exported file system. The exception is ZFS, which checks exports and, as such, all ZFS file systems below the ``'' must be exported. However, the entire tree that is rooted at that point must be in local file systems that are of types that can be NFS exported. Since the .Nm file system is rooted at ``'', setting this to anything other than ``/'' will result in clients being required to use different mount paths for .Nm than for NFS Version 2 or 3. Unlike NFS Version 2 and 3, Version 4 allows a client mount to span across multiple server file systems, although not all clients are capable of doing this. .Pp .Nm -uses names for users and groups instead of numbers. -On the wire, they +uses strings for users and groups instead of numbers. +On the wire, these strings can either have the numbers in the string or take the form: .sp .Bd -literal -offset indent -compact @ .Ed .sp where ``'' is not the same as the DNS domain used for host name lookups, but is usually set to the same string. Most systems set this ``'' to the domain name part of the machine's .Xr hostname 1 by default. However, this can normally be overridden by a command line option or configuration file for the daemon used to do the name<->number mapping. Under FreeBSD, the mapping daemon is called .Xr nfsuserd 8 and has a command line option that overrides the domain component of the machine's hostname. -For use of +For use of this form of string on .Nm , either client or server, this daemon must be running. -If this ``'' is not set correctly or the daemon is not running, ``ls -l'' will typically +.Pp +The form where the numbers are in the strings can only be used for AUTH_SYS. +To configure your systems this way, the +.Xr nfsuserd 8 +daemon does not need to be running on the server, but the following sysctls need to be +set to 1 on the server. +.sp +.Bd -literal -offset indent -compact +vfs.nfs.enable_uidtostring +vfs.nfsd.enable_stringtouid +.Ed +.sp +On the client, the sysctl +.sp +.Bd -literal -offset indent -compact +vfs.nfs.enable_uidtostring +.Ed +.sp +must be set to 1 and the +.Xr nfsuserd 8 +daemon does not need to be running. +.Pp +If these strings are not configured correctly, ``ls -l'' will typically report a lot of ``nobody'' and ``nogroup'' ownerships. .Pp Although uid/gid numbers are no longer used in the .Nm -protocol, they will still be in the RPC authentication fields when +protocol except optionally in the above strings, they will still be in the RPC authentication fields when using AUTH_SYS (sec=sys), which is the default. As such, in this case both the user/group name and number spaces must be consistent between the client and server. .Pp However, if you run .Nm with RPCSEC_GSS (sec=krb5, krb5i, krb5p), only names and KerberosV tickets will go on the wire. .Sh SERVER SETUP To set up the NFS server that supports .Nm , -you will need to either set the variables in +you will need to set the variables in .Xr rc.conf 5 as follows: .sp .Bd -literal -offset indent -compact nfs_server_enable="YES" nfsv4_server_enable="YES" +.Ed +.sp +plus +.sp +.Bd -literal -offset indent -compact nfsuserd_enable="YES" .Ed .sp -or start -.Xr mountd 8 -and -.Xr nfsd 8 -without the ``-o'' option, which would force use of the old server. -The -.Xr nfsuserd 8 -daemon must also be running. +if the server is using the ``@'' form of user/group strings or +is using the ``-manage-gids'' option for +.Xr nfsuserd 8 . .Pp You will also need to add at least one ``V4:'' line to the .Xr exports 5 file for .Nm to work. .Pp If the file systems you are exporting are only being accessed via .Nm there are a couple of .Xr sysctl 8 variables that you can change, which might improve performance. .Bl -tag -width Ds .It Cm vfs.nfsd.issue_delegations when set non-zero, allows the server to issue Open Delegations to clients. These delegations permit the client to manipulate the file locally on the client. Unfortunately, at this time, client use of delegations is limited, so performance gains may not be observed. This can only be enabled when the file systems being exported to .Nm clients are not being accessed locally on the server and, if being accessed via NFS Version 2 or 3 clients, these clients cannot be using the NLM. .It Cm vfs.nfsd.enable_locallocks can be set to 0 to disable acquisition of local byte range locks. Disabling local locking can only be done if neither local accesses to the exported file systems nor the NLM is operating on them. .El .sp Note that Samba server access would be considered ``local access'' for the above discussion. .Pp To build a kernel with the NFS server that supports .Nm linked into it, the .sp .Bd -literal -offset indent -compact options NFSD .Ed .sp must be specified in the kernel's .Xr config 5 file. .Sh CLIENT MOUNTS To do an .Nm mount, specify the ``nfsv4'' option on the .Xr mount_nfs 8 command line. This will force use of the client that supports .Nm plus set ``tcp'' and .Nm . .Pp The .Xr nfsuserd 8 -must be running, as above. +must be running if name<->uid/gid mapping is being used, as above. Also, since an .Nm mount uses the host uuid to identify the client uniquely to the server, you cannot safely do an .Nm mount when .sp .Bd -literal -offset indent -compact hostid_enable="NO" .Ed .sp is set in .Xr rc.conf 5 . .sp If the .Nm server that is being mounted on supports delegations, you can start the .Xr nfscbd 8 daemon to handle client side callbacks. This will occur if .sp .Bd -literal -offset indent -compact -nfsuserd_enable="YES" +nfsuserd_enable="YES" <-- If name<->uid/gid mapping is being used. nfscbd_enable="YES" .Ed .sp are set in .Xr rc.conf 5 . .sp Without a functioning callback path, a server will never issue Delegations to a client. .sp -By default, the callback address will be set to the IP address acquired via +For NFSv4.0, by default, the callback address will be set to the IP address acquired via rtalloc() in the kernel and port# 7745. To override the default port#, a command line option for .Xr nfscbd 8 can be used. .sp To get callbacks to work when behind a NAT gateway, a port for the callback service will need to be set up on the NAT gateway and then the address of the NAT gateway (host IP plus port#) will need to be set by assigning the .Xr sysctl 8 variable vfs.nfs.callback_addr to a string of the form: .sp N.N.N.N.N.N .sp where the first 4 Ns are the host IP address and the last two are the port# in network byte order (all decimal #s in the range 0-255). +.Pp +For NFSv4.1, the callback path (called a backchannel) uses the same TCP connection as the mount, +so none of the above applies and should work through gateways without +any issues. .Pp To build a kernel with the client that supports .Nm linked into it, the option .sp .Bd -literal -offset indent -compact options NFSCL .Ed .sp must be specified in the kernel's .Xr config 5 file. .Pp Options can be specified for the .Xr nfsuserd 8 and .Xr nfscbd 8 daemons at boot time via the ``nfsuserd_flags'' and ``nfscbd_flags'' .Xr rc.conf 5 variables. .Pp NFSv4 mount(s) against exported volume(s) on the same host are not recommended, since this can result in a hung NFS server. It occurs when an nfsd thread tries to do an NFSv4 VOP_RECLAIM()/Close RPC as part of acquiring a new vnode. If all other nfsd threads are blocked waiting for lock(s) held by this nfsd thread, then there isn't an nfsd thread to service the Close RPC. .Sh FILES .Bl -tag -width /var/db/nfs-stablerestart.bak -compact .It Pa /var/db/nfs-stablerestart NFS V4 stable restart file .It Pa /var/db/nfs-stablerestart.bak backup copy of the file .El .Sh SEE ALSO .Xr stablerestart 5 , .Xr mountd 8 , .Xr nfscbd 8 , .Xr nfsd 8 , .Xr nfsdumpstate 8 , .Xr nfsrevoke 8 , .Xr nfsuserd 8 .Sh BUGS At this time, there is no recall of delegations for local file system operations. As such, delegations should only be enabled for file systems that are being used solely as NFS export volumes and are not being accessed via local system calls nor services such as Samba. Index: projects/clang500-import =================================================================== --- projects/clang500-import (revision 321306) +++ projects/clang500-import (revision 321307) Property changes on: projects/clang500-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r321239-321306